BlogGenerator/generator.py

181 lines
7.0 KiB
Python

import os
import shutil
from dotenv import load_dotenv
from markdown_parser import parse_md
from datetime import datetime
def md2html(filename, env_vars):
"""
Create the html webpage from template and markdown content
filename: file to transform into HTML
env_vars: dictionnary of env variables
return: a dictionnary containing title, metadata, local path, content for HTML
"""
# Getting parsed content of markdown file & page template
data = parse_md(filename, env_vars)
template = open(env_vars['template_page'], 'r').read()
# Generating the HTML page
output = open(env_vars['pages_path'] + '/' +
filename.split('.')[0] + '.html', 'w')
output.write(template.replace("$CONTENT", data['content']).replace(
"$TITLE", data['title']).replace("$DATE", data['date']).replace("$DESC", data['description']))
output.close()
return data
def generate_page_XML(data, env_vars):
"""
Generate a RSS / Atom post for the page
data: dictionnary generated by the markdown parser
env_vars: dictionnary of env variables
return: RSS / Atom post
"""
template = open(env_vars['template_atom_post'], 'r').read()
date = datetime.strptime(data['date'], "%d-%m-%Y").isoformat() + "Z"
return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", data['content']).replace("$URL", env_vars['website_url'] + data['filepath'])
def generate_atom_feed(posts, env_vars):
"""
Generate a RSS / Atom feed
posts: list of data get from markdown pages
env_vars: dictionnary of env variables
"""
# Generate RSS / Atom post
atom_content = ""
for post in posts:
# Checking if there is metadata, if not we don't create a RSS / Atom post
if post['date'] != '01-01-0001':
atom_content += generate_page_XML(post, env_vars)
# Generate RSS / atom feed
template = open(env_vars['template_atom_feed'], 'r').read()
output = open(env_vars['parent_path'] + '/atom.xml', 'w')
output.write(template.replace('$CONTENT', atom_content).replace(
'$DATE', datetime.today().strftime("%Y-%m-%d")))
output.close()
def generate_tags_pages(tags_dict, env_vars):
"""
Generate page for each tag, which link to each content tagged
tags_dict: A dictionnary with tag name as key, and a list of post (title & url) with that tag
env_vars: dictionnary of env variables
"""
if not os.path.exists(env_vars['pages_path'] + '/tags'):
os.mkdir(env_vars['pages_path'] + '/tags')
# Going on every tag and creating it's page
for tag, pages in tags_dict.items():
template = open(env_vars['template_tags'], 'r').read()
# Generating the HTML page
output = open(env_vars['pages_path'] + '/tags/' +
tag.replace(' ', '_') + '.html', 'w')
# Adding all links for page with this tag
content = "<ul>\n"
for page in pages:
content += '\t\t\t\t<li><a href="' + '../../' + \
page[1] + '">' + page[0] + '</a></li>\n'
content += "\t\t\t</ul>\n"
output.write(template.replace(
"$CONTENT", content).replace("$TITLE", tag))
output.close()
pass
def generate_index(data, env_vars):
"""
Generate the main page
data: list of data get from markdown pages
env_vars: dictionnary of env variables
"""
# Create the index content
tags_dict = {}
index_content = "<ul>\n"
# Getting data of each page
for page in data:
# Checking if there is metadata, if not we don't add the page in the index
if page['date'] != '01-01-0001':
index_content += ('\t\t\t\t<li><a href="' + page['filepath'] + '">' + page['title'] + '</a><p>'
+ page['date'] + '</p></li>\n')
# Adding page into tags categorie
for tag in page['tags']:
if tag not in tags_dict:
tags_dict[tag] = []
tags_dict[tag].append([page['title'], page['filepath']])
index_content += '\t\t\t</ul>\n\t\t\t<h2>Tags:</h2>\n\t\t\t<div class="tags">\n\t\t\t\t<ul>\n'
# Adding tags browsing into the page
for tag in list(tags_dict.keys()):
index_content += ('\t\t\t\t\t<li><a href="' + env_vars['pages_path'].replace(
env_vars['parent_path'] + '/', '') + '/tags/' + tag.replace(' ', '_') + '.html' + '">' + tag + '</a></li>\n')
generate_tags_pages(tags_dict, env_vars)
index_content += '\t\t\t\t</ul>\n\t\t\t</div>'
# Generate main page
template = open(env_vars['template_index'], 'r').read()
output = open(env_vars['parent_path'] + '/index.html', 'w')
output.write(template.replace('$CONTENT', index_content))
output.close()
if __name__ == "__main__":
# Load .env file into python environment
load_dotenv()
# Color for print
color = {'red': '\033[1;31m', 'green': '\033[1;32m', 'end': '\033[0m'}
# Checking if all environment variable are present & setup
env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST',
'TEMPLATE_ATOM_FEED', 'WEBSITE_URL', 'TEMPLATE_INDEX', 'TEMPLATE_TAGS']
for variable in env:
if variable not in os.environ:
print(
f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}")
quit()
if (os.environ.get(variable) or '') == '':
print(
f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}")
quit()
# Getting env variable
env_vars = {'parent_path': os.environ.get('PARENT_PATH'), 'pages_path': os.environ.get('PAGES_PATH'), 'markdown_path': os.environ.get('MARKDOWN_PATH'), 'template_page': os.environ.get('TEMPLATE_PAGE'), 'template_atom_post': os.environ.get(
'TEMPLATE_ATOM_POST'), 'template_atom_feed': os.environ.get('TEMPLATE_ATOM_FEED'), 'website_url': os.environ.get('WEBSITE_URL'), 'template_index': os.environ.get('TEMPLATE_INDEX'), 'template_tags': os.environ.get('TEMPLATE_TAGS')}
# Checking if generate folder exist to remove previouly generated content, if not create it
if os.path.exists(env_vars['pages_path']):
shutil.rmtree(env_vars['pages_path'])
os.remove(env_vars['parent_path'] + '/atom.xml')
os.remove(env_vars['parent_path'] + '/index.html')
else:
os.mkdir(env_vars['pages_path'])
data = [] # A list for data generated by md2html
# Generate all markdown file
for file in os.listdir(env_vars['markdown_path']):
# Generating HTML page
print(f"{color['green']}Generating file: {file} {color['end']}")
data.append(md2html(file, env_vars))
sorted_data = sorted(data, key=lambda x: datetime.strptime(x['date'], '%d-%m-%Y'))
# Generating atom feed
print(f"{color['green']}Generating RSS / Atom feed {color['end']}")
generate_atom_feed(data, env_vars)
# Generating index
print(f"{color['green']}Generating main page {color['end']}")
generate_index(data, env_vars)