generator.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174

import os
import shutil
from dotenv import load_dotenv
from markdown_parser import *
from datetime import datetime

def md2html(filename, env_vars):
    """
    Create the html webpage from template and markdown content
    filename: file to transform into HTML
    env_vars: dictionnary of env variables
    return: a dictionnary containing title, metadata, local path, content for HTML
    """
    # Getting parsed content of markdown file & page template
    data = parsemd(filename, env_vars)
    template = open(env_vars['template_page'], 'r').read()

    # Generating the HTML page
    output = open(env_vars['pages_path'] + '/' + filename.split('.')[0] + '.html', 'w')
    output.write(template.replace("$CONTENT", data['content']).replace("$TITLE", data['title']).
     replace("$DATE", data['date']).replace("$DESC", data['description']))
    output.close()

    return data


def generatePageXML(data, env_vars):
    """
    Generate a RSS / Atom post for the page
    data: dictionnary generated by the markdown parser
    env_vars: dictionnary of env variables
    return: RSS / Atom post
    """
    template = open(env_vars['template_atom_post'], 'r').read()
    date = datetime.strptime(data['date'],"%d-%m-%Y").isoformat() + "Z"
    return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", 
     data['content']).replace("$URL", env_vars['website_url'] + data['filepath'])
    

def generateAtomFeed(posts, env_vars):
    """
    Generate a RSS / Atom feed
    posts: list of data get from markdown pages
    env_vars: dictionnary of env variables
    """
    # Generate RSS / Atom post
    atom_content = ""
    for post in posts:
        # Checking if there is metadata, if not we don't create a RSS / Atom post
        if post['date'] != '01-01-0001':
            atom_content += generatePageXML(post, env_vars)

    # Generate RSS / atom feed
    template = open(env_vars['template_atom_feed'], 'r').read()
    output = open(env_vars['parent_path'] + '/atom.xml', 'w')
    output.write(template.replace('$CONTENT', atom_content).replace('$DATE', datetime.today().strftime("%Y-%m-%d")))
    output.close()


def generateTagsPages(tags_dict, env_vars):
    """
    Generate page for each tag, which link to each content tagged
    tags_dict: A dictionnary with tag name as key, and a list of post (title & url) with that tag
    env_vars: dictionnary of env variables
    """
    if not os.path.exists(env_vars['pages_path'] + '/tags'):
        os.mkdir(env_vars['pages_path'] + '/tags')

    # Going on every tag and creating it's page
    for tag, pages in tags_dict.items():
        template = open(env_vars['template_tags'], 'r').read()
        # Generating the HTML page
        output = open(env_vars['pages_path'] + '/tags/' + tag.replace(' ', '_') + '.html', 'w')

        # Adding all links for page with this tag
        content = "<ul>\n"
        for page in pages:
            content += '\t\t\t\t<li><a href="' + '../../' + page[1] + '">' + page[0] + '</a></li>\n'
        content += "\t\t\t</ul>\n"

        output.write(template.replace("$CONTENT", content).replace("$TITLE", tag))
        output.close()
    pass


def generateIndex(data, env_vars):
    """
    Generate the main page
    data: list of data get from markdown pages
    env_vars: dictionnary of env variables
    """
    # Create the index content
    tags_dict = {}
    index_content = "<ul>\n"

    # Getting data of each page
    for page in data:
        # Checking if there is metadata, if not we don't add the page in the index
        if page['date'] != '01-01-0001':
            index_content += ('\t\t\t\t<li><a href="' + page['filepath'] + '">' + page['title'] + '</a><p>'
            + page['date'] + '</p></li>\n')

        # Adding page into tags categorie
        for tag in page['tags']:
            if tag not in tags_dict:
                tags_dict[tag] = []
            tags_dict[tag].append([page['title'], page['filepath']])
        
    index_content += '\t\t\t</ul>\n\t\t\t<h2>Tags:</h2>\n\t\t\t<div class="tags">\n\t\t\t\t<ul>\n'

    # Adding tags browsing into the page
    for tag in list(tags_dict.keys()):
        index_content += ('\t\t\t\t\t<li><a href="' + env_vars['pages_path'].replace(env_vars['parent_path'] + '/', '') + '/tags/' + tag.replace(' ', '_') + '.html' + '">' + tag + '</a></li>\n')
        generateTagsPages(tags_dict, env_vars)
    index_content += '\t\t\t\t</ul>\n\t\t\t</div>'

    # Generate main page
    template = open(env_vars['template_index'], 'r').read()
    output = open(env_vars['parent_path'] + '/index.html', 'w')
    output.write(template.replace('$CONTENT', index_content))
    output.close()


if __name__=="__main__":
    # Load .env file into python environment
    load_dotenv()

    # Color for print
    color = { 'red': '\033[1;31m', 'green' : '\033[1;32m', 'end' : '\033[0m'}

    # Checking if all environment variable are present  & setup
    env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST', 
           'TEMPLATE_ATOM_FEED', 'WEBSITE_URL', 'TEMPLATE_INDEX', 'TEMPLATE_TAGS']
    for variable in env:
        if variable not in os.environ:
            print(f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}")
            quit()

        if (os.environ.get(variable) or '') == '':
            print(f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}")
            quit()

    # Getting env variable
    env_vars = { 'parent_path' : os.environ.get('PARENT_PATH'), 'pages_path' : os.environ.get('PAGES_PATH')
    , 'markdown_path' : os.environ.get('MARKDOWN_PATH'), 'template_page' : os.environ.get('TEMPLATE_PAGE')
    , 'template_atom_post' : os.environ.get('TEMPLATE_ATOM_POST'), 'template_atom_feed' : os.environ.get('TEMPLATE_ATOM_FEED')
    , 'website_url' : os.environ.get('WEBSITE_URL'), 'template_index' : os.environ.get('TEMPLATE_INDEX'), 'template_tags' : os.environ.get('TEMPLATE_TAGS') }

    # Checking if generate folder exist to remove previouly generated content, if not create it
    if os.path.exists(env_vars['pages_path']):
        shutil.rmtree(env_vars['pages_path'])
        os.remove(env_vars['parent_path'] + '/atom.xml')
        os.remove(env_vars['parent_path'] + '/index.html')
    else:
        os.mkdir(env_vars['pages_path'])

    data = [] # A list for data generated by md2html

    # Generate all markdown file
    for file in os.listdir(env_vars['markdown_path']):

        # Generating HTML page
        print(f"{color['green']}Generating file: {file} {color['end']}")
        data.append(md2html(file, env_vars))

    sorted_data = sorted(data, key=lambda x:datetime.strptime(x['date'], '%d-%m-%Y'))

    # Generating atom feed
    print(f"{color['green']}Generating RSS / Atom feed {color['end']}")
    generateAtomFeed(data, env_vars)
    
    # Generating index 
    print(f"{color['green']}Generating main page {color['end']}")
    generateIndex(data, env_vars)