markdown_parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

import html


def parse_line(line):
    """
    Parse a line of texte to replace HTML specialchars, link, and strong
    / emphased of markdown for HTML
    return: the line ready for HTML
    """
    # Change &, <, > for HTML support
    html.escape(line)

    # Checking if there is strong or emphasized
    while "**" in line:
        line = line.replace("**", "<strong>", 1)
        line = line.replace("**", "</strong>", 1)
    while "*" in line:
        line = line.replace("*", "<em>", 1)
        line = line.replace("*", "</em>", 1)


    # Checking if there is image
    while "![" in line and "]" in line:
        title = line.split("]")[0].split("[")[1]
        link = line.split("]")[1].split("(")[1].split(")")[0]
        line = line.replace(
            "![" + title + "](" + link + ")",
            '<img src="' + link + '" alt="' + title + '"/>',
        )

    # Checking if there is link
    while "[" in line and "]" in line:
        title = line.split("]")[0].split("[")[1]
        link = line.split("]")[1].split("(")[1].split(")")[0]
        line = line.replace(
            "[" + title + "](" + link + ")", '<a href="' + link + '">' + title + "</a>"
        )

    return line


def parse_md(filepath, env_vars):
    """
    Parse a markdown file and return the content to put into the template page
    env_vars: dictionnary of environment variable
    filepath: Filepath of the markdown file
    return: a dictionnary containing title, metadata, local path, content
    """
    content = {
        "content": "",
        "title": "",
        "date": "01-01-0001",
        "description": "",
        "tags": [],
        "filepath": env_vars["pages_path"].replace(env_vars["parent_path"] + "/", "")
        + "/"
        + env_vars["lang"] + "/"
        + filepath.split(".")[0]
        + ".html",
    }

    inmeta, inquote, inpre, inul = False, False, False, False

    # Reading the content of the file and transform into html
    for line in open(env_vars["markdown_path"] + "/" + env_vars["lang"] + "/" + filepath, "r"):
        line = line.strip()

        # Open the metadata
        if line.startswith("---"):
            if inmeta:
                inmeta = False
            else:
                inmeta = True

        # Getting the date metadata
        if inmeta and line.startswith("date:"):
            content["date"] = line.split(":")[1].strip()

        # Getting the description metadata
        if inmeta and line.startswith("description:"):
            content["description"] = line.split(":")[1].strip()

        # Getting the tags metadata
        if inmeta and line.startswith("tags:"):
            tags = line.split(":")[1].split(",")

            # Removing leading and ending white spaces
            for i in range(0, len(tags)):
                tags[i] = tags[i].strip()
            content["tags"] = tags

        # Close quote if not quoting
        if inquote and not line.startswith(">"):
            content["content"] += "</blockquote>\n"
            inquote = False

        # Close list if not listing
        if inul and not line.startswith("-"):
            content["content"] += "</li>\n</ul>\n"
            inul = False

        # Checking if it's a code block
        if line.startswith("```"):
            if inpre:
                content["content"] += "</code></pre>\n"

            content["content"] += "<pre><code>" + line.lstrip("```")
            inpre = True

        # Checking if it's a quote
        elif line.startswith(">"):
            if inquote:
                content["content"] += parse_line(line.lstrip("> "))
            else:
                content["content"] += "<blockquote>" + parse_line(line.lstrip("> "))
                inquote = True

        # Checking if it's a list
        elif line.startswith("-") and not line.startswith("---"):
            if inul:
                content["content"] += "</li>\n"
                content["content"] += "\t<li>" + parse_line(line.lstrip("- "))
            else:
                content["content"] += "<ul>\n\t<li>" + parse_line(line.lstrip("- "))
                inul = True

        # Checking if it's a title
        elif line.startswith("###"):
            content["content"] += "<h3>" + parse_line(line.lstrip("# ")) + "</h3>\n"
        elif line.startswith("##"):
            content["content"] += "<h2>" + parse_line(line.lstrip("# ")) + "</h2>\n"
        elif line.startswith("#"):
            content["title"] += parse_line(line.lstrip("# "))

        # else it's a paragraph
        elif line != " " and line != "" and not inmeta and not line.startswith("---"):
            content["content"] += "<p>" + parse_line(line) + "</p>\n"

    # Checking all balise are closed
    if inquote:
        content["content"] += "</blockquote>\n"
        inquote = False

    if inul:
        content["content"] += "</li>\n</ul>\n"
        inul = False

    if inpre:
        content["content"] += "</code></pre>\n"
        inpre = False

    return content