BlogGenerator/generator.py

import os
import shutil
from dotenv import load_dotenv
from markdown_parser import parse_md
from datetime import datetime


def md2html(filename, env_vars):
    """
    Create the html webpage from template and markdown content
    filename: file to transform into HTML
    env_vars: dictionnary of env variables
    return: a dictionnary containing title, metadata, local path, content for HTML
    """
    # Getting parsed content of markdown file & page template
    data = parse_md(filename, env_vars)
    template = open(env_vars['template_page'], 'r').read()

    # Generating the HTML page
    output = open(env_vars['pages_path'] + '/' +
                  filename.split('.')[0] + '.html', 'w')
    output.write(template.replace("$CONTENT", data['content']).replace(
        "$TITLE", data['title']).replace("$DATE", data['date']).replace("$DESC", data['description']))
    output.close()

    return data


def generate_page_XML(data, env_vars):
    """
    Generate a RSS / Atom post for the page
    data: dictionnary generated by the markdown parser
    env_vars: dictionnary of env variables
    return: RSS / Atom post
    """
    template = open(env_vars['template_atom_post'], 'r').read()
    date = datetime.strptime(data['date'], "%d-%m-%Y").isoformat() + "Z"
    return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", data['content']).replace("$URL", env_vars['website_url'] + data['filepath'])


def generate_atom_feed(posts, env_vars):
    """
    Generate a RSS / Atom feed
    posts: list of data get from markdown pages
    env_vars: dictionnary of env variables
    """
    # Generate RSS / Atom post
    atom_content = ""
    for post in posts:
        # Checking if there is metadata, if not we don't create a RSS / Atom post
        if post['date'] != '01-01-0001':
            atom_content += generate_page_XML(post, env_vars)

    # Generate RSS / atom feed
    template = open(env_vars['template_atom_feed'], 'r').read()
    output = open(env_vars['parent_path'] + '/atom.xml', 'w')
    output.write(template.replace('$CONTENT', atom_content).replace(
        '$DATE', datetime.today().strftime("%Y-%m-%d")))
    output.close()


def generate_tags_pages(tags_dict, env_vars):
    """
    Generate page for each tag, which link to each content tagged
    tags_dict: A dictionnary with tag name as key, and a list of post (title & url) with that tag
    env_vars: dictionnary of env variables
    """
    if not os.path.exists(env_vars['pages_path'] + '/tags'):
        os.mkdir(env_vars['pages_path'] + '/tags')

    # Going on every tag and creating it's page
    for tag, pages in tags_dict.items():
        template = open(env_vars['template_tags'], 'r').read()
        # Generating the HTML page
        output = open(env_vars['pages_path'] + '/tags/' +
                      tag.replace(' ', '_') + '.html', 'w')

        # Adding all links for page with this tag
        content = "<ul>\n"
        for page in pages:
            content += '\t\t\t\t<li><a href="' + '../../' + \
                page[1] + '">' + page[0] + '</a></li>\n'
        content += "\t\t\t</ul>\n"

        output.write(template.replace(
            "$CONTENT", content).replace("$TITLE", tag))
        output.close()
    pass


def generate_index(data, env_vars):
    """
    Generate the main page
    data: list of data get from markdown pages
    env_vars: dictionnary of env variables
    """
    # Create the index content
    tags_dict = {}
    index_content = "<ul>\n"

    # Getting data of each page
    for page in data:
        # Checking if there is metadata, if not we don't add the page in the index
        if page['date'] != '01-01-0001':
            index_content += ('\t\t\t\t<li><a href="' + page['filepath'] + '">' + page['title'] + '</a><p>'
                              + page['date'] + '</p></li>\n')

        # Adding page into tags categorie
        for tag in page['tags']:
            if tag not in tags_dict:
                tags_dict[tag] = []
            tags_dict[tag].append([page['title'], page['filepath']])

    index_content += '\t\t\t</ul>\n\t\t\t<h2>Tags:</h2>\n\t\t\t<div class="tags">\n\t\t\t\t<ul>\n'

    # Adding tags browsing into the page
    for tag in list(tags_dict.keys()):
        index_content += ('\t\t\t\t\t<li><a href="' + env_vars['pages_path'].replace(
            env_vars['parent_path'] + '/', '') + '/tags/' + tag.replace(' ', '_') + '.html' + '">' + tag + '</a></li>\n')
        generate_tags_pages(tags_dict, env_vars)
    index_content += '\t\t\t\t</ul>\n\t\t\t</div>'

    # Generate main page
    template = open(env_vars['template_index'], 'r').read()
    output = open(env_vars['parent_path'] + '/index.html', 'w')
    output.write(template.replace('$CONTENT', index_content))
    output.close()


if __name__ == "__main__":
    # Load .env file into python environment
    load_dotenv()

    # Color for print
    color = {'red': '\033[1;31m', 'green': '\033[1;32m', 'end': '\033[0m'}

    # Checking if all environment variable are present & setup
    env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST',
           'TEMPLATE_ATOM_FEED', 'WEBSITE_URL', 'TEMPLATE_INDEX', 'TEMPLATE_TAGS']
    for variable in env:
        if variable not in os.environ:
            print(
                f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}")
            quit()

        if (os.environ.get(variable) or '') == '':
            print(
                f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}")
            quit()

    # Getting env variable
    env_vars = {'parent_path': os.environ.get('PARENT_PATH'), 'pages_path': os.environ.get('PAGES_PATH'), 'markdown_path': os.environ.get('MARKDOWN_PATH'), 'template_page': os.environ.get('TEMPLATE_PAGE'), 'template_atom_post': os.environ.get(
        'TEMPLATE_ATOM_POST'), 'template_atom_feed': os.environ.get('TEMPLATE_ATOM_FEED'), 'website_url': os.environ.get('WEBSITE_URL'), 'template_index': os.environ.get('TEMPLATE_INDEX'), 'template_tags': os.environ.get('TEMPLATE_TAGS')}

    # Checking if generate folder exist to remove previouly generated content, if not create it
    if os.path.exists(env_vars['pages_path']):
        shutil.rmtree(env_vars['pages_path'])
        os.remove(env_vars['parent_path'] + '/atom.xml')
        os.remove(env_vars['parent_path'] + '/index.html')
    else:
        os.mkdir(env_vars['pages_path'])

    data = []  # A list for data generated by md2html

    # Generate all markdown file
    for file in os.listdir(env_vars['markdown_path']):

        # Generating HTML page
        print(f"{color['green']}Generating file: {file} {color['end']}")
        data.append(md2html(file, env_vars))

    sorted_data = sorted(data, key=lambda x: datetime.strptime(x['date'], '%d-%m-%Y'))

    # Generating atom feed
    print(f"{color['green']}Generating RSS / Atom feed {color['end']}")
    generate_atom_feed(data, env_vars)

    # Generating index
    print(f"{color['green']}Generating main page {color['end']}")
    generate_index(data, env_vars)