BlogGenerator/markdown_parser.py

153 lines
4.9 KiB
Python

import html
def parse_line(line):
"""
Parse a line of texte to replace HTML specialchars, link, and strong
/ emphased of markdown for HTML
return: the line ready for HTML
"""
# Change &, <, > for HTML support
html.escape(line)
# Checking if there is strong or emphasized
while "**" in line:
line = line.replace("**", "<strong>", 1)
line = line.replace("**", "</strong>", 1)
while "*" in line:
line = line.replace("*", "<em>", 1)
line = line.replace("*", "</em>", 1)
# Checking if there is image
while "![" in line and "]" in line:
title = line.split("]")[0].split("[")[1]
link = line.split("]")[1].split("(")[1].split(")")[0]
line = line.replace(
"![" + title + "](" + link + ")",
'<img src="' + link + '" alt="' + title + '"/>',
)
# Checking if there is link
while "[" in line and "]" in line:
title = line.split("]")[0].split("[")[1]
link = line.split("]")[1].split("(")[1].split(")")[0]
line = line.replace(
"[" + title + "](" + link + ")", '<a href="' + link + '">' + title + "</a>"
)
return line
def parse_md(filepath, env_vars):
"""
Parse a markdown file and return the content to put into the template page
env_vars: dictionnary of environment variable
filepath: Filepath of the markdown file
return: a dictionnary containing title, metadata, local path, content
"""
content = {
"content": "",
"title": "",
"date": "01-01-0001",
"description": "",
"tags": [],
"filepath": env_vars["pages_path"].replace(env_vars["parent_path"] + "/", "")
+ "/"
+ env_vars["lang"] + "/"
+ filepath.split(".")[0]
+ ".html",
}
inmeta, inquote, inpre, inul = False, False, False, False
# Reading the content of the file and transform into html
for line in open(env_vars["markdown_path"] + "/" + env_vars["lang"] + "/" + filepath, "r"):
line = line.strip()
# Open the metadata
if line.startswith("---"):
if inmeta:
inmeta = False
else:
inmeta = True
# Getting the date metadata
if inmeta and line.startswith("date:"):
content["date"] = line.split(":")[1].strip()
# Getting the description metadata
if inmeta and line.startswith("description:"):
content["description"] = line.split(":")[1].strip()
# Getting the tags metadata
if inmeta and line.startswith("tags:"):
tags = line.split(":")[1].split(",")
# Removing leading and ending white spaces
for i in range(0, len(tags)):
tags[i] = tags[i].strip()
content["tags"] = tags
# Close quote if not quoting
if inquote and not line.startswith(">"):
content["content"] += "</blockquote>\n"
inquote = False
# Close list if not listing
if inul and not line.startswith("-"):
content["content"] += "</li>\n</ul>\n"
inul = False
# Checking if it's a code block
if line.startswith("```"):
if inpre:
content["content"] += "</code></pre>\n"
content["content"] += "<pre><code>" + line.lstrip("```")
inpre = True
# Checking if it's a quote
elif line.startswith(">"):
if inquote:
content["content"] += parse_line(line.lstrip("> "))
else:
content["content"] += "<blockquote>" + parse_line(line.lstrip("> "))
inquote = True
# Checking if it's a list
elif line.startswith("-") and not line.startswith("---"):
if inul:
content["content"] += "</li>\n"
content["content"] += "\t<li>" + parse_line(line.lstrip("- "))
else:
content["content"] += "<ul>\n\t<li>" + parse_line(line.lstrip("- "))
inul = True
# Checking if it's a title
elif line.startswith("###"):
content["content"] += "<h3>" + parse_line(line.lstrip("# ")) + "</h3>\n"
elif line.startswith("##"):
content["content"] += "<h2>" + parse_line(line.lstrip("# ")) + "</h2>\n"
elif line.startswith("#"):
content["title"] += parse_line(line.lstrip("# "))
# else it's a paragraph
elif line != " " and line != "" and not inmeta and not line.startswith("---"):
content["content"] += "<p>" + parse_line(line) + "</p>\n"
# Checking all balise are closed
if inquote:
content["content"] += "</blockquote>\n"
inquote = False
if inul:
content["content"] += "</li>\n</ul>\n"
inul = False
if inpre:
content["content"] += "</code></pre>\n"
inpre = False
return content