mirror of
https://github.com/venthur/blag.git
synced 2025-11-25 20:52:43 +00:00
101 lines
2.4 KiB
Python
101 lines
2.4 KiB
Python
from datetime import datetime
|
|
import logging
|
|
from urllib.parse import urlsplit, urlunsplit
|
|
|
|
from markdown import Markdown
|
|
from markdown.extensions import Extension
|
|
from markdown.treeprocessors import Treeprocessor
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def markdown_factory():
|
|
"""Create a Markdown instance.
|
|
|
|
This method exists only to ensure we use the same Markdown instance
|
|
for tests as for the actual thing.
|
|
|
|
Returns
|
|
-------
|
|
markdown.Markdown
|
|
|
|
"""
|
|
md = Markdown(
|
|
extensions=[
|
|
'meta', 'fenced_code', 'codehilite',
|
|
MarkdownLinkExtension()
|
|
],
|
|
output_format='html5',
|
|
)
|
|
return md
|
|
|
|
|
|
def convert_markdown(md, markdown):
|
|
"""Convert markdown into html and extract meta data.
|
|
|
|
Parameters
|
|
----------
|
|
md : markdown.Markdown instance
|
|
markdown : str
|
|
|
|
Returns
|
|
-------
|
|
str, dict :
|
|
html and metadata
|
|
|
|
"""
|
|
md.reset()
|
|
content = md.convert(markdown)
|
|
meta = md.Meta
|
|
|
|
# markdowns metadata consists as list of strings -- one item per
|
|
# line. let's convert into single strings.
|
|
for key, value in meta.items():
|
|
value = '\n'.join(value)
|
|
meta[key] = value
|
|
|
|
# convert known metadata
|
|
# date: datetime
|
|
if 'date' in meta:
|
|
meta['date'] = datetime.fromisoformat(meta['date'])
|
|
# tags: list[str]
|
|
if 'tags' in meta:
|
|
tags = meta['tags'].split(',')
|
|
tags = [t.strip() for t in tags]
|
|
meta['tags'] = tags
|
|
|
|
return content, meta
|
|
|
|
|
|
class MarkdownLinkTreeprocessor(Treeprocessor):
|
|
"""Converts relative links to .md files to .html
|
|
|
|
"""
|
|
|
|
def run(self, root):
|
|
for element in root.iter():
|
|
if element.tag == 'a':
|
|
url = element.get('href')
|
|
converted = self.convert(url)
|
|
element.set('href', converted)
|
|
return root
|
|
|
|
def convert(self, url):
|
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
|
#logger.debug(f'{url} -> scheme: {scheme} netloc: {netloc} path: {path} query: {query} fragment: {fragment}')
|
|
if (scheme or netloc or not path):
|
|
return url
|
|
if path.endswith('.md'):
|
|
path = path[:-3] + '.html'
|
|
|
|
url = urlunsplit((scheme, netloc, path, query, fragment))
|
|
return url
|
|
|
|
|
|
class MarkdownLinkExtension(Extension):
|
|
def extendMarkdown(self, md):
|
|
md.treeprocessors.register(
|
|
MarkdownLinkTreeprocessor(md), 'mdlink', 0,
|
|
)
|