Clean-up Extraneous Paragraph Tags and Table Whitespace in MDX on GatsbyJS
MDX on GatsbyJS nests JSX components in paragraph tags which can produce invalid HTML so you end up with invalid HTML resembling:
<p>
<div class="jsx-component">...</div>
</p>
When using Markdown tables, it can also leave extra whitespace that can produce HTML syntax warnings. Not showstopping, but annoying all the same.
Create this plugin in your ./plugins
directory and add it to the end of your gatsbyRemarkPlugins
in gatsby-config.js
.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const visit = require(`unist-util-visit`); | |
const remove = require(`unist-util-remove`); | |
const HTMLParser = require('node-html-parser'); | |
const matches = (classes, value) => { | |
const fragment = HTMLParser.parse(value); | |
if (!fragment.firstChild.classList) { | |
return false; | |
} | |
for (const elem of classes) { | |
if (fragment.firstChild.classList.contains(elem)) { | |
return true; | |
} | |
} | |
return false; | |
}; | |
module.exports = (refs, {classes, stripTableWhitespaceNodes = true}) => { | |
const {markdownAST} = refs; | |
visit(markdownAST, 'paragraph', (node, index, parent) => { | |
const isCandidate = node.children.every(child => { | |
if (!child.value) { | |
return; | |
} | |
if (stripTableWhitespaceNodes) { | |
child.value = child.value.replace(/(table|tbody|tfoot|tr)([^>]*?)>\s+</g, '$1$2><').replace(/>\s+<\/(table|tbody|tfoot|tr)/g, '></$1').replace(/\/td>\s+<td/g, '/td><td'); | |
} | |
return ( | |
(child.type === 'html' && matches(classes, child.value)) | |
|| (child.type === 'text' && child.value === '\n') | |
); | |
}); | |
if (!isCandidate) { | |
return; | |
} | |
remove(node, 'text'); | |
parent.children.splice(index, 1, ...node.children); | |
return index; | |
}); | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// File: gatsby-config.js | |
module.exports = { | |
// Other config | |
plugins: [ | |
// Other plugins | |
{ | |
resolve: 'gatsby-plugin-mdx', | |
options: { | |
// Other mdx options | |
gatsbyRemarkPlugins: [ | |
// Other remark plugins | |
{ | |
resolve: require.resolve('./plugins/gatsby-remark-cleaner'), | |
options: { | |
classes: [ | |
// list of classes to remove wrapping p tag from, eg. | |
// 'gatsby-resp-iframe-wrapper' is used by 'gatsby-remark-responsive-iframe' (also 'gatsby-remark-embed-video') | |
// 'gist' is used by 'gatsby-remark-embed-gist' | |
], | |
// Removes table whitespace nodes that can raise errors, observed in 'gatsby-remark-embed-gist'. Defaults to true. | |
stripTableWhitespaceNodes: true, | |
}, | |
}, | |
], | |
}, | |
}, | |
], | |
}; |
Published October 9, 2021