Clean-up Extraneous Paragraph Tags and Table Whitespace in MDX on GatsbyJS

MDX on GatsbyJS nests JSX components in paragraph tags which can produce invalid HTML so you end up with invalid HTML resembling:

<p>
    <div class="jsx-component">...</div>
</p>

When using Markdown tables, it can also leave extra whitespace that can produce HTML syntax warnings. Not showstopping, but annoying all the same.

Create this plugin in your ./plugins directory and add it to the end of your gatsbyRemarkPlugins in gatsby-config.js.

const visit = require(`unist-util-visit`);
const remove = require(`unist-util-remove`);
const HTMLParser = require('node-html-parser');
const matches = (classes, value) => {
const fragment = HTMLParser.parse(value);
if (!fragment.firstChild.classList) {
return false;
}
for (const elem of classes) {
if (fragment.firstChild.classList.contains(elem)) {
return true;
}
}
return false;
};
module.exports = (refs, {classes, stripTableWhitespaceNodes = true}) => {
const {markdownAST} = refs;
visit(markdownAST, 'paragraph', (node, index, parent) => {
const isCandidate = node.children.every(child => {
if (!child.value) {
return;
}
if (stripTableWhitespaceNodes) {
child.value = child.value.replace(/(table|tbody|tfoot|tr)([^>]*?)>\s+</g, '$1$2><').replace(/>\s+<\/(table|tbody|tfoot|tr)/g, '></$1').replace(/\/td>\s+<td/g, '/td><td');
}
return (
(child.type === 'html' && matches(classes, child.value))
|| (child.type === 'text' && child.value === '\n')
);
});
if (!isCandidate) {
return;
}
remove(node, 'text');
parent.children.splice(index, 1, ...node.children);
return index;
});
};
// File: gatsby-config.js
module.exports = {
// Other config
plugins: [
// Other plugins
{
resolve: 'gatsby-plugin-mdx',
options: {
// Other mdx options
gatsbyRemarkPlugins: [
// Other remark plugins
{
resolve: require.resolve('./plugins/gatsby-remark-cleaner'),
options: {
classes: [
// list of classes to remove wrapping p tag from, eg.
// 'gatsby-resp-iframe-wrapper' is used by 'gatsby-remark-responsive-iframe' (also 'gatsby-remark-embed-video')
// 'gist' is used by 'gatsby-remark-embed-gist'
],
// Removes table whitespace nodes that can raise errors, observed in 'gatsby-remark-embed-gist'. Defaults to true.
stripTableWhitespaceNodes: true,
},
},
],
},
},
],
};
Published October 9, 2021