generator.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

import os
import sys
import shutil
from dotenv import load_dotenv
from markdown_parser import parse_md
from datetime import datetime


def md2html(filename, env_vars):
    """
    Create the html webpage from template and markdown content
    filename: file to transform into HTML
    env_vars: dictionnary of env variables
    return: a dictionnary containing title, metadata, local path, content for HTML
    """
    # Getting parsed content of markdown file & page template
    data = parse_md(filename, env_vars)
    template = open(env_vars['templates_folder'] + "/" + env_vars["lang"] + "/page_template.html", 'r').read()

    # Generating the HTML page
    output = open(env_vars['pages_path'] + '/' + env_vars["lang"] + "/" + filename.split('.')[0] + '.html', 'w')
    output.write(template.replace("$CONTENT", data['content']).replace("$TITLE", data['title']).replace("$DATE", data['date']).replace("$DESC", data['description']))
    output.close()

    return data


def generate_page_XML(data, env_vars):
    """
    Generate a RSS / Atom post for the page
    data: dictionnary generated by the markdown parser
    env_vars: dictionnary of env variables
    return: RSS / Atom post
    """
    template = open(env_vars['templates_folder'] + "/" + env_vars["lang"] + "/atom_post_template.xml", 'r').read()
    date = datetime.strptime(data['date'], "%d-%m-%Y").isoformat() + "Z"
    return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", data['content']).replace("$URL", env_vars['website_url'] + data['filepath'])


def generate_atom_feed(posts, env_vars):
    """
    Generate a RSS / Atom feed
    posts: list of data get from markdown pages
    env_vars: dictionnary of env variables
    """
    # Generate RSS / Atom post
    atom_content = ""
    for post in posts:
        # Checking if there is metadata, if not we don't create a RSS / Atom post
        if post['date'] != '01-01-0001':
            atom_content += generate_page_XML(post, env_vars)

    # Generate RSS / atom feed
    template = open(env_vars['templates_folder'] + "/" + env_vars["lang"] + "/atom_feed_template.xml", 'r').read()
    output = open(env_vars['parent_path'] + "/atom_" + env_vars["lang"] + ".xml", "w")
    output.write(template.replace('$CONTENT', atom_content).replace('$DATE', datetime.today().strftime("%Y-%m-%d")))
    output.close()


def generate_tags_pages(tags_dict, env_vars):
    """
    Generate page for each tag, which link to each content tagged
    tags_dict: A dictionnary with tag name as key, and a list of post (title & url) with that tag
    env_vars: dictionnary of env variables
    """
    if not os.path.exists(env_vars['pages_path'] + "/" + env_vars["lang"] + '/tags'):
        os.mkdir(env_vars['pages_path'] + "/" + env_vars["lang"] + '/tags')

    # Going on every tag and creating it's page
    for tag, pages in tags_dict.items():
        template = open(env_vars['templates_folder'] + "/" + env_vars["lang"] + "/tags_template.html", 'r').read()
        # Generating the HTML page
        output = open(env_vars['pages_path'] + "/" + env_vars["lang"] + '/tags/' + tag.replace(' ', '_') + '.html', 'w')

        # Adding all links for page with this tag
        content = "<ul>\n"
        for page in pages:
            content += '\t\t\t\t<li><a href="' + '../../../' + page[1] + '">' + page[0] + '</a></li>\n'
        content += "\t\t\t</ul>\n"

        output.write(template.replace("$CONTENT", content).replace("$TITLE", tag))
        output.close()
    pass


def generate_index(data, env_vars):
    """
    Generate the main page
    data: list of data get from markdown pages
    env_vars: dictionnary of env variables
    """
    # Create the index content
    tags_dict = {}
    index_content = "<ul>\n"

    # Getting data of each page
    for page in data:
        # Checking if there is metadata, if not we don't add the page in the index
        if page['date'] != '01-01-0001':
            index_content += ('\t\t\t\t<li><a href="' + page['filepath'] + '">' + page['title'] + '</a><p>'
                              + page['date'] + '</p></li>\n')

        # Adding page into tags categorie
        for tag in page['tags']:
            if tag not in tags_dict:
                tags_dict[tag] = []
            tags_dict[tag].append([page['title'], page['filepath']])

    index_content += '\t\t\t</ul>\n\t\t\t<h2>Tags:</h2>\n\t\t\t<div class="tags">\n\t\t\t\t<ul>\n'

    # Adding tags browsing into the page
    for tag in list(tags_dict.keys()):
        index_content += ('\t\t\t\t\t<li><a href="' + env_vars['pages_path'].replace(
            env_vars['parent_path'] + '/', '') + "/" + env_vars["lang"] + '/tags/' + tag.replace(' ', '_') + '.html' + '">' + tag + '</a></li>\n')
        generate_tags_pages(tags_dict, env_vars)
    index_content += '\t\t\t\t</ul>\n\t\t\t</div>'

    # Generate main page
    template = open(env_vars['templates_folder'] + "/" + env_vars["lang"] + "/index_template.html", 'r').read()
    output = open(env_vars['parent_path'] + "/index_" + env_vars["lang"] + ".html", 'w')
    output.write(template.replace('$CONTENT', index_content))
    output.close()

def check_env():
    """Check environment variables are set and not empty"""
    env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATES_FOLDER', 'WEBSITE_URL', 'LANGS']
    for variable in env:
        if variable not in os.environ:
            print(
                f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}")
            sys.exit()

        if (os.environ.get(variable) or '') == '':
            print(
                f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}")
            sys.exit()


if __name__ == "__main__":
    # Load .env file into python environment
    load_dotenv()

    # Color for print
    color = {'red': '\033[1;31m', 'green': '\033[1;32m', 'end': '\033[0m'}

    # Checking if all environment variable are present & setup
    check_env()

    # Getting env variable
    env_vars = {'parent_path': os.environ.get('PARENT_PATH'), 'pages_path': os.environ.get('PAGES_PATH'), 'markdown_path': os.environ.get('MARKDOWN_PATH'), 'templates_folder': os.environ.get('TEMPLATES_FOLDER'), 'website_url': os.environ.get('WEBSITE_URL')}
    langs = os.environ.get('LANGS').lower().split(',')

    # Checking if generate folder exist to remove previouly generated content, if not create it
    if os.path.exists(env_vars['pages_path']):
        shutil.rmtree(env_vars['pages_path'])
        for lang in langs:
            os.remove(env_vars['parent_path'] + "/atom_" + lang + ".xml")
            os.remove(env_vars['parent_path'] + "/index_" + lang + ".html")
    else:
        os.mkdir(env_vars['pages_path'])
        for lang in langs:
            os.mkdir(env_vars['pages_path'] + "/" + lang)

    
    # Generate for each lang
    for lang in langs:
        env_vars['lang'] = lang
        data = []  # A list for data generated by md2html

        # Generate all markdown file
        for file in os.listdir(env_vars['markdown_path'] + "/" + lang):

            # Generating HTML page
            print(f"{color['green']}Generating file: {file} in {lang} {color['end']}")
            data.append(md2html(file, env_vars))

        sorted_data = sorted(data, key=lambda x: datetime.strptime(x['date'], '%d-%m-%Y'))

        # Generating atom feed
        print(f"{color['green']}Generating RSS / Atom feed in {lang} {color['end']}")
        generate_atom_feed(sorted_data, env_vars)

        # Generating index
        print(f"{color['green']}Generating main page in {lang} :{color['end']}")
        generate_index(sorted_data, env_vars)