1
0
mirror of https://github.com/venthur/blag.git synced 2025-11-25 20:52:43 +00:00

moved mardkown stuff into separate module

This commit is contained in:
Bastian Venthur
2021-01-31 13:18:22 +01:00
parent 1c4f58ef02
commit f376352e90
4 changed files with 150 additions and 142 deletions

100
sg/markdown.py Normal file
View File

@@ -0,0 +1,100 @@
from datetime import datetime
import logging
from urllib.parse import urlsplit, urlunsplit
from markdown import Markdown
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor
logger = logging.getLogger(__name__)
def markdown_factory():
"""Create a Markdown instance.
This method exists only to ensure we use the same Markdown instance
for tests as for the actual thing.
Returns
-------
markdown.Markdown
"""
md = Markdown(
extensions=[
'meta', 'fenced_code', 'codehilite',
MarkdownLinkExtension()
],
output_format='html5',
)
return md
def convert_markdown(md, markdown):
"""Convert markdown into html and extract meta data.
Parameters
----------
md : markdown.Markdown instance
markdown : str
Returns
-------
str, dict :
html and metadata
"""
md.reset()
content = md.convert(markdown)
meta = md.Meta
# markdowns metadata consists as list of strings -- one item per
# line. let's convert into single strings.
for key, value in meta.items():
value = '\n'.join(value)
meta[key] = value
# convert known metadata
# date: datetime
if 'date' in meta:
meta['date'] = datetime.fromisoformat(meta['date'])
# tags: list[str]
if 'tags' in meta:
tags = meta['tags'].split(',')
tags = [t.strip() for t in tags]
meta['tags'] = tags
return content, meta
class MarkdownLinkTreeprocessor(Treeprocessor):
"""Converts relative links to .md files to .html
"""
def run(self, root):
for element in root.iter():
if element.tag == 'a':
url = element.get('href')
converted = self.convert(url)
element.set('href', converted)
return root
def convert(self, url):
scheme, netloc, path, query, fragment = urlsplit(url)
#logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}')
if (scheme or netloc or not path):
return url
if path.endswith('.md'):
path = path[:-3] + '.html'
url = urlunsplit((scheme, netloc, path, query, fragment))
return url
class MarkdownLinkExtension(Extension):
def extendMarkdown(self, md):
md.treeprocessors.register(
MarkdownLinkTreeprocessor(md), 'mdlink', 0,
)

View File

@@ -12,15 +12,12 @@ import argparse
import os import os
import shutil import shutil
import logging import logging
from datetime import datetime
from urllib.parse import urlsplit, urlunsplit
from markdown import Markdown
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor
from jinja2 import Environment, ChoiceLoader, FileSystemLoader, PackageLoader from jinja2 import Environment, ChoiceLoader, FileSystemLoader, PackageLoader
import feedgenerator import feedgenerator
from sg.markdown import markdown_factory, convert_markdown
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG, level=logging.DEBUG,
@@ -89,27 +86,6 @@ def build(args):
convert_to_html(convertibles, args.input_dir, args.output_dir) convert_to_html(convertibles, args.input_dir, args.output_dir)
def markdown_factory():
"""Create a Markdown instance.
This method exists only to ensure we use the same Markdown instance
for tests as for the actual thing.
Returns
-------
markdown.Markdown
"""
md = Markdown(
extensions=[
'meta', 'fenced_code', 'codehilite',
MarkdownLinkExtension()
],
output_format='html5',
)
return md
def convert_to_html(convertibles, input_dir, output_dir): def convert_to_html(convertibles, input_dir, output_dir):
env = Environment( env = Environment(
@@ -192,74 +168,6 @@ def convert_to_html(convertibles, input_dir, output_dir):
def convert_markdown(md, markdown):
"""Convert markdown into html and extract meta data.
Parameters
----------
md : markdown.Markdown instance
markdown : str
Returns
-------
str, dict :
html and metadata
"""
md.reset()
content = md.convert(markdown)
meta = md.Meta
# markdowns metadata consists as list of strings -- one item per
# line. let's convert into single strings.
for key, value in meta.items():
value = '\n'.join(value)
meta[key] = value
# convert known metadata
# date: datetime
if 'date' in meta:
meta['date'] = datetime.fromisoformat(meta['date'])
# tags: list[str]
if 'tags' in meta:
tags = meta['tags'].split(',')
tags = [t.strip() for t in tags]
meta['tags'] = tags
return content, meta
class MarkdownLinkTreeprocessor(Treeprocessor):
"""Converts relative links to .md files to .html
"""
def run(self, root):
for element in root.iter():
if element.tag == 'a':
url = element.get('href')
converted = self.convert(url)
element.set('href', converted)
return root
def convert(self, url):
scheme, netloc, path, query, fragment = urlsplit(url)
logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}')
if (scheme or netloc or not path):
return url
if path.endswith('.md'):
path = path[:-3] + '.html'
url = urlunsplit((scheme, netloc, path, query, fragment))
return url
class MarkdownLinkExtension(Extension):
def extendMarkdown(self, md):
md.treeprocessors.register(
MarkdownLinkTreeprocessor(md), 'mdlink', 0,
)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

48
tests/test_markdown.py Normal file
View File

@@ -0,0 +1,48 @@
from datetime import datetime
import pytest
import markdown
from sg.markdown import convert_markdown, markdown_factory
@pytest.mark.parametrize("input_, expected", [
# inline
('[test](test.md)', 'test.html'),
('[test](test.md "test")', 'test.html'),
('[test](a/test.md)', 'a/test.html'),
('[test](a/test.md "test")', 'a/test.html'),
('[test](/test.md)', '/test.html'),
('[test](/test.md "test")', '/test.html'),
('[test](/a/test.md)', '/a/test.html'),
('[test](/a/test.md "test")', '/a/test.html'),
# reference
('[test][]\n[test]: test.md ''', 'test.html'),
('[test][]\n[test]: test.md "test"', 'test.html'),
('[test][]\n[test]: a/test.md', 'a/test.html'),
('[test][]\n[test]: a/test.md "test"', 'a/test.html'),
('[test][]\n[test]: /test.md', '/test.html'),
('[test][]\n[test]: /test.md "test"', '/test.html'),
('[test][]\n[test]: /a/test.md', '/a/test.html'),
('[test][]\n[test]: /a/test.md "test"', '/a/test.html'),
])
def test_convert_markdown_links(input_, expected):
md = markdown_factory()
html, _ = convert_markdown(md, input_)
assert expected in html
@pytest.mark.parametrize("input_, expected", [
('foo: bar', {'foo': 'bar'}),
('tags: this, is, a, test\n', {'tags': ['this', 'is', 'a', 'test']}),
('date: 2020-01-01 12:10', {'date': datetime(2020, 1, 1, 12, 10)}),
])
def test_convert_metadata(input_, expected):
md = markdown_factory()
_, meta = convert_markdown(md, input_)
assert expected == meta
def test_markdown_factory():
md = markdown_factory()
assert isinstance(md, markdown.Markdown)

View File

@@ -1,48 +0,0 @@
from datetime import datetime
import markdown
import pytest
from sg import sg
@pytest.mark.parametrize("input_, expected", [
# inline
('[test](test.md)', 'test.html'),
('[test](test.md "test")', 'test.html'),
('[test](a/test.md)', 'a/test.html'),
('[test](a/test.md "test")', 'a/test.html'),
('[test](/test.md)', '/test.html'),
('[test](/test.md "test")', '/test.html'),
('[test](/a/test.md)', '/a/test.html'),
('[test](/a/test.md "test")', '/a/test.html'),
# reference
('[test][]\n[test]: test.md ''', 'test.html'),
('[test][]\n[test]: test.md "test"', 'test.html'),
('[test][]\n[test]: a/test.md', 'a/test.html'),
('[test][]\n[test]: a/test.md "test"', 'a/test.html'),
('[test][]\n[test]: /test.md', '/test.html'),
('[test][]\n[test]: /test.md "test"', '/test.html'),
('[test][]\n[test]: /a/test.md', '/a/test.html'),
('[test][]\n[test]: /a/test.md "test"', '/a/test.html'),
])
def test_convert_markdown_links(input_, expected):
md = sg.markdown_factory()
html, _ = sg.convert_markdown(md, input_)
assert expected in html
@pytest.mark.parametrize("input_, expected", [
('foo: bar', {'foo': 'bar'}),
('tags: this, is, a, test\n', {'tags': ['this', 'is', 'a', 'test']}),
('date: 2020-01-01 12:10', {'date': datetime(2020, 1, 1, 12, 10)}),
])
def test_convert_metadata(input_, expected):
md = sg.markdown_factory()
_, meta = sg.convert_markdown(md, input_)
assert expected == meta
def test_markdown_factory():
md = sg.markdown_factory()
assert isinstance(md, markdown.Markdown)