mirror of
https://github.com/venthur/blag.git
synced 2025-11-25 20:52:43 +00:00
137 lines
3.6 KiB
Python
137 lines
3.6 KiB
Python
"""Markdown Processing.
|
|
|
|
This module contains the methods responsible for blag's markdown
|
|
processing.
|
|
|
|
"""
|
|
|
|
# remove when we don't support py38 anymore
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
from urllib.parse import urlsplit, urlunsplit
|
|
from xml.etree.ElementTree import Element
|
|
|
|
from markdown import Markdown
|
|
from markdown.extensions import Extension
|
|
from markdown.treeprocessors import Treeprocessor
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def markdown_factory() -> Markdown:
|
|
"""Create a Markdown instance.
|
|
|
|
This method exists only to ensure we use the same Markdown instance
|
|
for tests as for the actual thing.
|
|
|
|
Returns
|
|
-------
|
|
markdown.Markdown
|
|
|
|
"""
|
|
md = Markdown(
|
|
extensions=[
|
|
"meta",
|
|
"fenced_code",
|
|
"codehilite",
|
|
"smarty",
|
|
MarkdownLinkExtension(),
|
|
],
|
|
output_format="html",
|
|
)
|
|
return md
|
|
|
|
|
|
def convert_markdown(
|
|
md: Markdown,
|
|
markdown: str,
|
|
) -> tuple[str, dict[str, str]]:
|
|
"""Convert markdown into html and extract meta data.
|
|
|
|
Some meta data is treated special:
|
|
* `date` is converted into datetime with local timezone
|
|
* `tags` is interpreted as a comma-separeted list of strings.
|
|
All strings are stripped and converted to lower case.
|
|
|
|
Parameters
|
|
----------
|
|
md
|
|
the Markdown instance
|
|
markdown
|
|
the markdown text that should be converted
|
|
|
|
Returns
|
|
-------
|
|
str, dict[str, str]
|
|
html and metadata
|
|
|
|
"""
|
|
md.reset()
|
|
content = md.convert(markdown)
|
|
meta = md.Meta # type: ignore
|
|
|
|
# markdowns metadata consists as list of strings -- one item per
|
|
# line. let's convert into single strings.
|
|
for key, value in meta.items():
|
|
value = "\n".join(value)
|
|
meta[key] = value
|
|
|
|
# convert known metadata
|
|
# date: datetime
|
|
if "date" in meta:
|
|
meta["date"] = datetime.fromisoformat(meta["date"])
|
|
meta["date"] = meta["date"].astimezone()
|
|
# tags: list[str] and lower case
|
|
if "tags" in meta:
|
|
tags = meta["tags"].split(",")
|
|
tags = [t.lower() for t in tags]
|
|
tags = [t.strip() for t in tags]
|
|
meta["tags"] = tags
|
|
|
|
return content, meta
|
|
|
|
|
|
class MarkdownLinkTreeprocessor(Treeprocessor):
|
|
"""Converts relative links to .md files to .html."""
|
|
|
|
def run(self, root: Element) -> Element:
|
|
"""Process the ElementTree."""
|
|
for element in root.iter():
|
|
if element.tag == "a":
|
|
url = element.get("href")
|
|
# element.get could also return None, we haven't seen this so
|
|
# far, so lets wait if we raise this
|
|
assert url is not None
|
|
url = str(url)
|
|
converted = self.convert(url)
|
|
element.set("href", converted)
|
|
return root
|
|
|
|
def convert(self, url: str) -> str:
|
|
"""Convert relative .md-links to .html-links."""
|
|
scheme, netloc, path, query, fragment = urlsplit(url)
|
|
logger.debug(
|
|
f"{url}: {scheme=} {netloc=} {path=} {query=} {fragment=}"
|
|
)
|
|
if scheme or netloc or not path:
|
|
return url
|
|
if path.endswith(".md"):
|
|
path = path[:-3] + ".html"
|
|
|
|
url = urlunsplit((scheme, netloc, path, query, fragment))
|
|
return url
|
|
|
|
|
|
class MarkdownLinkExtension(Extension):
|
|
"""markdown.extension that converts relative .md- to .html-links."""
|
|
|
|
def extendMarkdown(self, md: Markdown) -> None:
|
|
"""Register the MarkdownLinkTreeprocessor."""
|
|
md.treeprocessors.register(
|
|
MarkdownLinkTreeprocessor(md),
|
|
"mdlink",
|
|
0,
|
|
)
|