1
0
mirror of https://github.com/venthur/blag.git synced 2025-11-25 20:52:43 +00:00
Files
blag/blag/markdown.py
Bastian Venthur 875fd85d65 removed types from docstrings now that we have type annotations.
note, the return value still needs to be in there.
2022-09-01 10:51:32 +02:00

132 lines
3.5 KiB
Python

"""Markdown Processing.
This module contains the methods responsible for blag's markdown
processing.
"""
# remove when we don't support py38 anymore
from __future__ import annotations
from datetime import datetime
import logging
from urllib.parse import urlsplit, urlunsplit
from xml.etree.ElementTree import Element
from markdown import Markdown
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor
logger = logging.getLogger(__name__)
def markdown_factory() -> Markdown:
"""Create a Markdown instance.
This method exists only to ensure we use the same Markdown instance
for tests as for the actual thing.
Returns
-------
markdown.Markdown
"""
md = Markdown(
extensions=[
'meta', 'fenced_code', 'codehilite', 'smarty',
MarkdownLinkExtension()
],
output_format='html',
)
return md
def convert_markdown(
md: Markdown,
markdown: str,
) -> tuple[str, dict[str, str]]:
"""Convert markdown into html and extract meta data.
Some meta data is treated special:
* `date` is converted into datetime with local timezone
* `tags` is interpreted as a comma-separeted list of strings.
All strings are stripped and converted to lower case.
Parameters
----------
md
the Markdown instance
markdown
the markdown text that should be converted
Returns
-------
str, dict[str, str]
html and metadata
"""
md.reset()
content = md.convert(markdown)
meta = md.Meta # type: ignore
# markdowns metadata consists as list of strings -- one item per
# line. let's convert into single strings.
for key, value in meta.items():
value = '\n'.join(value)
meta[key] = value
# convert known metadata
# date: datetime
if 'date' in meta:
meta['date'] = datetime.fromisoformat(meta['date'])
meta['date'] = meta['date'].astimezone()
# tags: list[str] and lower case
if 'tags' in meta:
tags = meta['tags'].split(',')
tags = [t.lower() for t in tags]
tags = [t.strip() for t in tags]
meta['tags'] = tags
return content, meta
class MarkdownLinkTreeprocessor(Treeprocessor):
"""Converts relative links to .md files to .html
"""
def run(self, root: Element) -> Element:
for element in root.iter():
if element.tag == 'a':
url = element.get('href')
# element.get could also return None, we haven't seen this so
# far, so lets wait if we raise this
assert url is not None
url = str(url)
converted = self.convert(url)
element.set('href', converted)
return root
def convert(self, url: str) -> str:
scheme, netloc, path, query, fragment = urlsplit(url)
logger.debug(
f'{url}: {scheme=} {netloc=} {path=} {query=} {fragment=}'
)
if (scheme or netloc or not path):
return url
if path.endswith('.md'):
path = path[:-3] + '.html'
url = urlunsplit((scheme, netloc, path, query, fragment))
return url
class MarkdownLinkExtension(Extension):
"""markdown.extension that converts relative .md- to .html-links.
"""
def extendMarkdown(self, md: Markdown) -> None:
md.treeprocessors.register(
MarkdownLinkTreeprocessor(md), 'mdlink', 0,
)