mirror of
https://github.com/venthur/blag.git
synced 2025-11-25 20:52:43 +00:00
moved mardkown stuff into separate module
This commit is contained in:
100
sg/markdown.py
Normal file
100
sg/markdown.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urlsplit, urlunsplit
|
||||||
|
|
||||||
|
from markdown import Markdown
|
||||||
|
from markdown.extensions import Extension
|
||||||
|
from markdown.treeprocessors import Treeprocessor
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def markdown_factory():
|
||||||
|
"""Create a Markdown instance.
|
||||||
|
|
||||||
|
This method exists only to ensure we use the same Markdown instance
|
||||||
|
for tests as for the actual thing.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
markdown.Markdown
|
||||||
|
|
||||||
|
"""
|
||||||
|
md = Markdown(
|
||||||
|
extensions=[
|
||||||
|
'meta', 'fenced_code', 'codehilite',
|
||||||
|
MarkdownLinkExtension()
|
||||||
|
],
|
||||||
|
output_format='html5',
|
||||||
|
)
|
||||||
|
return md
|
||||||
|
|
||||||
|
|
||||||
|
def convert_markdown(md, markdown):
|
||||||
|
"""Convert markdown into html and extract meta data.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
md : markdown.Markdown instance
|
||||||
|
markdown : str
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str, dict :
|
||||||
|
html and metadata
|
||||||
|
|
||||||
|
"""
|
||||||
|
md.reset()
|
||||||
|
content = md.convert(markdown)
|
||||||
|
meta = md.Meta
|
||||||
|
|
||||||
|
# markdowns metadata consists as list of strings -- one item per
|
||||||
|
# line. let's convert into single strings.
|
||||||
|
for key, value in meta.items():
|
||||||
|
value = '\n'.join(value)
|
||||||
|
meta[key] = value
|
||||||
|
|
||||||
|
# convert known metadata
|
||||||
|
# date: datetime
|
||||||
|
if 'date' in meta:
|
||||||
|
meta['date'] = datetime.fromisoformat(meta['date'])
|
||||||
|
# tags: list[str]
|
||||||
|
if 'tags' in meta:
|
||||||
|
tags = meta['tags'].split(',')
|
||||||
|
tags = [t.strip() for t in tags]
|
||||||
|
meta['tags'] = tags
|
||||||
|
|
||||||
|
return content, meta
|
||||||
|
|
||||||
|
|
||||||
|
class MarkdownLinkTreeprocessor(Treeprocessor):
|
||||||
|
"""Converts relative links to .md files to .html
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def run(self, root):
|
||||||
|
for element in root.iter():
|
||||||
|
if element.tag == 'a':
|
||||||
|
url = element.get('href')
|
||||||
|
converted = self.convert(url)
|
||||||
|
element.set('href', converted)
|
||||||
|
return root
|
||||||
|
|
||||||
|
def convert(self, url):
|
||||||
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
||||||
|
#logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}')
|
||||||
|
if (scheme or netloc or not path):
|
||||||
|
return url
|
||||||
|
if path.endswith('.md'):
|
||||||
|
path = path[:-3] + '.html'
|
||||||
|
|
||||||
|
url = urlunsplit((scheme, netloc, path, query, fragment))
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
class MarkdownLinkExtension(Extension):
|
||||||
|
def extendMarkdown(self, md):
|
||||||
|
md.treeprocessors.register(
|
||||||
|
MarkdownLinkTreeprocessor(md), 'mdlink', 0,
|
||||||
|
)
|
||||||
96
sg/sg.py
96
sg/sg.py
@@ -12,15 +12,12 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
|
||||||
from urllib.parse import urlsplit, urlunsplit
|
|
||||||
|
|
||||||
from markdown import Markdown
|
|
||||||
from markdown.extensions import Extension
|
|
||||||
from markdown.treeprocessors import Treeprocessor
|
|
||||||
from jinja2 import Environment, ChoiceLoader, FileSystemLoader, PackageLoader
|
from jinja2 import Environment, ChoiceLoader, FileSystemLoader, PackageLoader
|
||||||
import feedgenerator
|
import feedgenerator
|
||||||
|
|
||||||
|
from sg.markdown import markdown_factory, convert_markdown
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.DEBUG,
|
level=logging.DEBUG,
|
||||||
@@ -89,27 +86,6 @@ def build(args):
|
|||||||
convert_to_html(convertibles, args.input_dir, args.output_dir)
|
convert_to_html(convertibles, args.input_dir, args.output_dir)
|
||||||
|
|
||||||
|
|
||||||
def markdown_factory():
|
|
||||||
"""Create a Markdown instance.
|
|
||||||
|
|
||||||
This method exists only to ensure we use the same Markdown instance
|
|
||||||
for tests as for the actual thing.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
markdown.Markdown
|
|
||||||
|
|
||||||
"""
|
|
||||||
md = Markdown(
|
|
||||||
extensions=[
|
|
||||||
'meta', 'fenced_code', 'codehilite',
|
|
||||||
MarkdownLinkExtension()
|
|
||||||
],
|
|
||||||
output_format='html5',
|
|
||||||
)
|
|
||||||
return md
|
|
||||||
|
|
||||||
|
|
||||||
def convert_to_html(convertibles, input_dir, output_dir):
|
def convert_to_html(convertibles, input_dir, output_dir):
|
||||||
|
|
||||||
env = Environment(
|
env = Environment(
|
||||||
@@ -192,74 +168,6 @@ def convert_to_html(convertibles, input_dir, output_dir):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def convert_markdown(md, markdown):
|
|
||||||
"""Convert markdown into html and extract meta data.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
md : markdown.Markdown instance
|
|
||||||
markdown : str
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
str, dict :
|
|
||||||
html and metadata
|
|
||||||
|
|
||||||
"""
|
|
||||||
md.reset()
|
|
||||||
content = md.convert(markdown)
|
|
||||||
meta = md.Meta
|
|
||||||
|
|
||||||
# markdowns metadata consists as list of strings -- one item per
|
|
||||||
# line. let's convert into single strings.
|
|
||||||
for key, value in meta.items():
|
|
||||||
value = '\n'.join(value)
|
|
||||||
meta[key] = value
|
|
||||||
|
|
||||||
# convert known metadata
|
|
||||||
# date: datetime
|
|
||||||
if 'date' in meta:
|
|
||||||
meta['date'] = datetime.fromisoformat(meta['date'])
|
|
||||||
# tags: list[str]
|
|
||||||
if 'tags' in meta:
|
|
||||||
tags = meta['tags'].split(',')
|
|
||||||
tags = [t.strip() for t in tags]
|
|
||||||
meta['tags'] = tags
|
|
||||||
|
|
||||||
return content, meta
|
|
||||||
|
|
||||||
|
|
||||||
class MarkdownLinkTreeprocessor(Treeprocessor):
|
|
||||||
"""Converts relative links to .md files to .html
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def run(self, root):
|
|
||||||
for element in root.iter():
|
|
||||||
if element.tag == 'a':
|
|
||||||
url = element.get('href')
|
|
||||||
converted = self.convert(url)
|
|
||||||
element.set('href', converted)
|
|
||||||
return root
|
|
||||||
|
|
||||||
def convert(self, url):
|
|
||||||
scheme, netloc, path, query, fragment = urlsplit(url)
|
|
||||||
logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}')
|
|
||||||
if (scheme or netloc or not path):
|
|
||||||
return url
|
|
||||||
if path.endswith('.md'):
|
|
||||||
path = path[:-3] + '.html'
|
|
||||||
|
|
||||||
url = urlunsplit((scheme, netloc, path, query, fragment))
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
class MarkdownLinkExtension(Extension):
|
|
||||||
def extendMarkdown(self, md):
|
|
||||||
md.treeprocessors.register(
|
|
||||||
MarkdownLinkTreeprocessor(md), 'mdlink', 0,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|||||||
48
tests/test_markdown.py
Normal file
48
tests/test_markdown.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import markdown
|
||||||
|
|
||||||
|
from sg.markdown import convert_markdown, markdown_factory
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("input_, expected", [
|
||||||
|
# inline
|
||||||
|
('[test](test.md)', 'test.html'),
|
||||||
|
('[test](test.md "test")', 'test.html'),
|
||||||
|
('[test](a/test.md)', 'a/test.html'),
|
||||||
|
('[test](a/test.md "test")', 'a/test.html'),
|
||||||
|
('[test](/test.md)', '/test.html'),
|
||||||
|
('[test](/test.md "test")', '/test.html'),
|
||||||
|
('[test](/a/test.md)', '/a/test.html'),
|
||||||
|
('[test](/a/test.md "test")', '/a/test.html'),
|
||||||
|
# reference
|
||||||
|
('[test][]\n[test]: test.md ''', 'test.html'),
|
||||||
|
('[test][]\n[test]: test.md "test"', 'test.html'),
|
||||||
|
('[test][]\n[test]: a/test.md', 'a/test.html'),
|
||||||
|
('[test][]\n[test]: a/test.md "test"', 'a/test.html'),
|
||||||
|
('[test][]\n[test]: /test.md', '/test.html'),
|
||||||
|
('[test][]\n[test]: /test.md "test"', '/test.html'),
|
||||||
|
('[test][]\n[test]: /a/test.md', '/a/test.html'),
|
||||||
|
('[test][]\n[test]: /a/test.md "test"', '/a/test.html'),
|
||||||
|
])
|
||||||
|
def test_convert_markdown_links(input_, expected):
|
||||||
|
md = markdown_factory()
|
||||||
|
html, _ = convert_markdown(md, input_)
|
||||||
|
assert expected in html
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("input_, expected", [
|
||||||
|
('foo: bar', {'foo': 'bar'}),
|
||||||
|
('tags: this, is, a, test\n', {'tags': ['this', 'is', 'a', 'test']}),
|
||||||
|
('date: 2020-01-01 12:10', {'date': datetime(2020, 1, 1, 12, 10)}),
|
||||||
|
])
|
||||||
|
def test_convert_metadata(input_, expected):
|
||||||
|
md = markdown_factory()
|
||||||
|
_, meta = convert_markdown(md, input_)
|
||||||
|
assert expected == meta
|
||||||
|
|
||||||
|
|
||||||
|
def test_markdown_factory():
|
||||||
|
md = markdown_factory()
|
||||||
|
assert isinstance(md, markdown.Markdown)
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import markdown
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from sg import sg
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("input_, expected", [
|
|
||||||
# inline
|
|
||||||
('[test](test.md)', 'test.html'),
|
|
||||||
('[test](test.md "test")', 'test.html'),
|
|
||||||
('[test](a/test.md)', 'a/test.html'),
|
|
||||||
('[test](a/test.md "test")', 'a/test.html'),
|
|
||||||
('[test](/test.md)', '/test.html'),
|
|
||||||
('[test](/test.md "test")', '/test.html'),
|
|
||||||
('[test](/a/test.md)', '/a/test.html'),
|
|
||||||
('[test](/a/test.md "test")', '/a/test.html'),
|
|
||||||
# reference
|
|
||||||
('[test][]\n[test]: test.md ''', 'test.html'),
|
|
||||||
('[test][]\n[test]: test.md "test"', 'test.html'),
|
|
||||||
('[test][]\n[test]: a/test.md', 'a/test.html'),
|
|
||||||
('[test][]\n[test]: a/test.md "test"', 'a/test.html'),
|
|
||||||
('[test][]\n[test]: /test.md', '/test.html'),
|
|
||||||
('[test][]\n[test]: /test.md "test"', '/test.html'),
|
|
||||||
('[test][]\n[test]: /a/test.md', '/a/test.html'),
|
|
||||||
('[test][]\n[test]: /a/test.md "test"', '/a/test.html'),
|
|
||||||
])
|
|
||||||
def test_convert_markdown_links(input_, expected):
|
|
||||||
md = sg.markdown_factory()
|
|
||||||
html, _ = sg.convert_markdown(md, input_)
|
|
||||||
assert expected in html
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("input_, expected", [
|
|
||||||
('foo: bar', {'foo': 'bar'}),
|
|
||||||
('tags: this, is, a, test\n', {'tags': ['this', 'is', 'a', 'test']}),
|
|
||||||
('date: 2020-01-01 12:10', {'date': datetime(2020, 1, 1, 12, 10)}),
|
|
||||||
])
|
|
||||||
def test_convert_metadata(input_, expected):
|
|
||||||
md = sg.markdown_factory()
|
|
||||||
_, meta = sg.convert_markdown(md, input_)
|
|
||||||
assert expected == meta
|
|
||||||
|
|
||||||
|
|
||||||
def test_markdown_factory():
|
|
||||||
md = sg.markdown_factory()
|
|
||||||
assert isinstance(md, markdown.Markdown)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user