diff --git a/sg/markdown.py b/sg/markdown.py new file mode 100644 index 0000000..ec3a572 --- /dev/null +++ b/sg/markdown.py @@ -0,0 +1,100 @@ +from datetime import datetime +import logging +from urllib.parse import urlsplit, urlunsplit + +from markdown import Markdown +from markdown.extensions import Extension +from markdown.treeprocessors import Treeprocessor + + +logger = logging.getLogger(__name__) + + +def markdown_factory(): + """Create a Markdown instance. + + This method exists only to ensure we use the same Markdown instance + for tests as for the actual thing. + + Returns + ------- + markdown.Markdown + + """ + md = Markdown( + extensions=[ + 'meta', 'fenced_code', 'codehilite', + MarkdownLinkExtension() + ], + output_format='html5', + ) + return md + + +def convert_markdown(md, markdown): + """Convert markdown into html and extract meta data. + + Parameters + ---------- + md : markdown.Markdown instance + markdown : str + + Returns + ------- + str, dict : + html and metadata + + """ + md.reset() + content = md.convert(markdown) + meta = md.Meta + + # markdowns metadata consists as list of strings -- one item per + # line. let's convert into single strings. + for key, value in meta.items(): + value = '\n'.join(value) + meta[key] = value + + # convert known metadata + # date: datetime + if 'date' in meta: + meta['date'] = datetime.fromisoformat(meta['date']) + # tags: list[str] + if 'tags' in meta: + tags = meta['tags'].split(',') + tags = [t.strip() for t in tags] + meta['tags'] = tags + + return content, meta + + +class MarkdownLinkTreeprocessor(Treeprocessor): + """Converts relative links to .md files to .html + + """ + + def run(self, root): + for element in root.iter(): + if element.tag == 'a': + url = element.get('href') + converted = self.convert(url) + element.set('href', converted) + return root + + def convert(self, url): + scheme, netloc, path, query, fragment = urlsplit(url) + #logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}') + if (scheme or netloc or not path): + return url + if path.endswith('.md'): + path = path[:-3] + '.html' + + url = urlunsplit((scheme, netloc, path, query, fragment)) + return url + + +class MarkdownLinkExtension(Extension): + def extendMarkdown(self, md): + md.treeprocessors.register( + MarkdownLinkTreeprocessor(md), 'mdlink', 0, + ) diff --git a/sg/sg.py b/sg/sg.py index 171e930..5a38189 100644 --- a/sg/sg.py +++ b/sg/sg.py @@ -12,15 +12,12 @@ import argparse import os import shutil import logging -from datetime import datetime -from urllib.parse import urlsplit, urlunsplit -from markdown import Markdown -from markdown.extensions import Extension -from markdown.treeprocessors import Treeprocessor from jinja2 import Environment, ChoiceLoader, FileSystemLoader, PackageLoader import feedgenerator +from sg.markdown import markdown_factory, convert_markdown + logger = logging.getLogger(__name__) logging.basicConfig( level=logging.DEBUG, @@ -89,27 +86,6 @@ def build(args): convert_to_html(convertibles, args.input_dir, args.output_dir) -def markdown_factory(): - """Create a Markdown instance. - - This method exists only to ensure we use the same Markdown instance - for tests as for the actual thing. - - Returns - ------- - markdown.Markdown - - """ - md = Markdown( - extensions=[ - 'meta', 'fenced_code', 'codehilite', - MarkdownLinkExtension() - ], - output_format='html5', - ) - return md - - def convert_to_html(convertibles, input_dir, output_dir): env = Environment( @@ -192,74 +168,6 @@ def convert_to_html(convertibles, input_dir, output_dir): -def convert_markdown(md, markdown): - """Convert markdown into html and extract meta data. - - Parameters - ---------- - md : markdown.Markdown instance - markdown : str - - Returns - ------- - str, dict : - html and metadata - - """ - md.reset() - content = md.convert(markdown) - meta = md.Meta - - # markdowns metadata consists as list of strings -- one item per - # line. let's convert into single strings. - for key, value in meta.items(): - value = '\n'.join(value) - meta[key] = value - - # convert known metadata - # date: datetime - if 'date' in meta: - meta['date'] = datetime.fromisoformat(meta['date']) - # tags: list[str] - if 'tags' in meta: - tags = meta['tags'].split(',') - tags = [t.strip() for t in tags] - meta['tags'] = tags - - return content, meta - - -class MarkdownLinkTreeprocessor(Treeprocessor): - """Converts relative links to .md files to .html - - """ - - def run(self, root): - for element in root.iter(): - if element.tag == 'a': - url = element.get('href') - converted = self.convert(url) - element.set('href', converted) - return root - - def convert(self, url): - scheme, netloc, path, query, fragment = urlsplit(url) - logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}') - if (scheme or netloc or not path): - return url - if path.endswith('.md'): - path = path[:-3] + '.html' - - url = urlunsplit((scheme, netloc, path, query, fragment)) - return url - - -class MarkdownLinkExtension(Extension): - def extendMarkdown(self, md): - md.treeprocessors.register( - MarkdownLinkTreeprocessor(md), 'mdlink', 0, - ) - if __name__ == '__main__': main() diff --git a/tests/test_markdown.py b/tests/test_markdown.py new file mode 100644 index 0000000..dd32a15 --- /dev/null +++ b/tests/test_markdown.py @@ -0,0 +1,48 @@ +from datetime import datetime + +import pytest +import markdown + +from sg.markdown import convert_markdown, markdown_factory + + +@pytest.mark.parametrize("input_, expected", [ + # inline + ('[test](test.md)', 'test.html'), + ('[test](test.md "test")', 'test.html'), + ('[test](a/test.md)', 'a/test.html'), + ('[test](a/test.md "test")', 'a/test.html'), + ('[test](/test.md)', '/test.html'), + ('[test](/test.md "test")', '/test.html'), + ('[test](/a/test.md)', '/a/test.html'), + ('[test](/a/test.md "test")', '/a/test.html'), + # reference + ('[test][]\n[test]: test.md ''', 'test.html'), + ('[test][]\n[test]: test.md "test"', 'test.html'), + ('[test][]\n[test]: a/test.md', 'a/test.html'), + ('[test][]\n[test]: a/test.md "test"', 'a/test.html'), + ('[test][]\n[test]: /test.md', '/test.html'), + ('[test][]\n[test]: /test.md "test"', '/test.html'), + ('[test][]\n[test]: /a/test.md', '/a/test.html'), + ('[test][]\n[test]: /a/test.md "test"', '/a/test.html'), +]) +def test_convert_markdown_links(input_, expected): + md = markdown_factory() + html, _ = convert_markdown(md, input_) + assert expected in html + + +@pytest.mark.parametrize("input_, expected", [ + ('foo: bar', {'foo': 'bar'}), + ('tags: this, is, a, test\n', {'tags': ['this', 'is', 'a', 'test']}), + ('date: 2020-01-01 12:10', {'date': datetime(2020, 1, 1, 12, 10)}), +]) +def test_convert_metadata(input_, expected): + md = markdown_factory() + _, meta = convert_markdown(md, input_) + assert expected == meta + + +def test_markdown_factory(): + md = markdown_factory() + assert isinstance(md, markdown.Markdown) diff --git a/tests/test_sg.py b/tests/test_sg.py index c287ec7..e69de29 100644 --- a/tests/test_sg.py +++ b/tests/test_sg.py @@ -1,48 +0,0 @@ -from datetime import datetime - -import markdown -import pytest - -from sg import sg - - -@pytest.mark.parametrize("input_, expected", [ - # inline - ('[test](test.md)', 'test.html'), - ('[test](test.md "test")', 'test.html'), - ('[test](a/test.md)', 'a/test.html'), - ('[test](a/test.md "test")', 'a/test.html'), - ('[test](/test.md)', '/test.html'), - ('[test](/test.md "test")', '/test.html'), - ('[test](/a/test.md)', '/a/test.html'), - ('[test](/a/test.md "test")', '/a/test.html'), - # reference - ('[test][]\n[test]: test.md ''', 'test.html'), - ('[test][]\n[test]: test.md "test"', 'test.html'), - ('[test][]\n[test]: a/test.md', 'a/test.html'), - ('[test][]\n[test]: a/test.md "test"', 'a/test.html'), - ('[test][]\n[test]: /test.md', '/test.html'), - ('[test][]\n[test]: /test.md "test"', '/test.html'), - ('[test][]\n[test]: /a/test.md', '/a/test.html'), - ('[test][]\n[test]: /a/test.md "test"', '/a/test.html'), -]) -def test_convert_markdown_links(input_, expected): - md = sg.markdown_factory() - html, _ = sg.convert_markdown(md, input_) - assert expected in html - - -@pytest.mark.parametrize("input_, expected", [ - ('foo: bar', {'foo': 'bar'}), - ('tags: this, is, a, test\n', {'tags': ['this', 'is', 'a', 'test']}), - ('date: 2020-01-01 12:10', {'date': datetime(2020, 1, 1, 12, 10)}), -]) -def test_convert_metadata(input_, expected): - md = sg.markdown_factory() - _, meta = sg.convert_markdown(md, input_) - assert expected == meta - - -def test_markdown_factory(): - md = sg.markdown_factory() - assert isinstance(md, markdown.Markdown)