From 107578e6dc7e13dfda0b99ff71148d47beb7b1f9 Mon Sep 17 00:00:00 2001 From: Oxbian Date: Wed, 5 Jul 2023 23:03:11 +0200 Subject: [PATCH] Updating function name for more python style name --- generator.py | 82 +++++++++++++++------------- markdown_parser.py | 133 +++++++++++++++++++++++++-------------------- 2 files changed, 118 insertions(+), 97 deletions(-) diff --git a/generator.py b/generator.py index 16150e7..5ec9ea7 100644 --- a/generator.py +++ b/generator.py @@ -1,9 +1,10 @@ import os import shutil from dotenv import load_dotenv -from markdown_parser import * +from markdown_parser import parse_md from datetime import datetime + def md2html(filename, env_vars): """ Create the html webpage from template and markdown content @@ -12,19 +13,20 @@ def md2html(filename, env_vars): return: a dictionnary containing title, metadata, local path, content for HTML """ # Getting parsed content of markdown file & page template - data = parsemd(filename, env_vars) + data = parse_md(filename, env_vars) template = open(env_vars['template_page'], 'r').read() # Generating the HTML page - output = open(env_vars['pages_path'] + '/' + filename.split('.')[0] + '.html', 'w') - output.write(template.replace("$CONTENT", data['content']).replace("$TITLE", data['title']). - replace("$DATE", data['date']).replace("$DESC", data['description'])) + output = open(env_vars['pages_path'] + '/' + + filename.split('.')[0] + '.html', 'w') + output.write(template.replace("$CONTENT", data['content']).replace( + "$TITLE", data['title']).replace("$DATE", data['date']).replace("$DESC", data['description'])) output.close() return data -def generatePageXML(data, env_vars): +def generate_page_XML(data, env_vars): """ Generate a RSS / Atom post for the page data: dictionnary generated by the markdown parser @@ -32,12 +34,11 @@ def generatePageXML(data, env_vars): return: RSS / Atom post """ template = open(env_vars['template_atom_post'], 'r').read() - date = datetime.strptime(data['date'],"%d-%m-%Y").isoformat() + "Z" - return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", - data['content']).replace("$URL", env_vars['website_url'] + data['filepath']) - + date = datetime.strptime(data['date'], "%d-%m-%Y").isoformat() + "Z" + return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", data['content']).replace("$URL", env_vars['website_url'] + data['filepath']) -def generateAtomFeed(posts, env_vars): + +def generate_atom_feed(posts, env_vars): """ Generate a RSS / Atom feed posts: list of data get from markdown pages @@ -48,16 +49,17 @@ def generateAtomFeed(posts, env_vars): for post in posts: # Checking if there is metadata, if not we don't create a RSS / Atom post if post['date'] != '01-01-0001': - atom_content += generatePageXML(post, env_vars) + atom_content += generate_page_XML(post, env_vars) # Generate RSS / atom feed template = open(env_vars['template_atom_feed'], 'r').read() output = open(env_vars['parent_path'] + '/atom.xml', 'w') - output.write(template.replace('$CONTENT', atom_content).replace('$DATE', datetime.today().strftime("%Y-%m-%d"))) + output.write(template.replace('$CONTENT', atom_content).replace( + '$DATE', datetime.today().strftime("%Y-%m-%d"))) output.close() -def generateTagsPages(tags_dict, env_vars): +def generate_tags_pages(tags_dict, env_vars): """ Generate page for each tag, which link to each content tagged tags_dict: A dictionnary with tag name as key, and a list of post (title & url) with that tag @@ -70,20 +72,23 @@ def generateTagsPages(tags_dict, env_vars): for tag, pages in tags_dict.items(): template = open(env_vars['template_tags'], 'r').read() # Generating the HTML page - output = open(env_vars['pages_path'] + '/tags/' + tag.replace(' ', '_') + '.html', 'w') + output = open(env_vars['pages_path'] + '/tags/' + + tag.replace(' ', '_') + '.html', 'w') # Adding all links for page with this tag content = "\n" - output.write(template.replace("$CONTENT", content).replace("$TITLE", tag)) + output.write(template.replace( + "$CONTENT", content).replace("$TITLE", tag)) output.close() pass -def generateIndex(data, env_vars): +def generate_index(data, env_vars): """ Generate the main page data: list of data get from markdown pages @@ -98,20 +103,21 @@ def generateIndex(data, env_vars): # Checking if there is metadata, if not we don't add the page in the index if page['date'] != '01-01-0001': index_content += ('\t\t\t\t
  • ' + page['title'] + '

    ' - + page['date'] + '

  • \n') + + page['date'] + '

    \n') # Adding page into tags categorie for tag in page['tags']: if tag not in tags_dict: tags_dict[tag] = [] tags_dict[tag].append([page['title'], page['filepath']]) - + index_content += '\t\t\t\n\t\t\t

    Tags:

    \n\t\t\t
    \n\t\t\t\t\n\t\t\t
    ' # Generate main page @@ -121,30 +127,30 @@ def generateIndex(data, env_vars): output.close() -if __name__=="__main__": +if __name__ == "__main__": # Load .env file into python environment load_dotenv() # Color for print - color = { 'red': '\033[1;31m', 'green' : '\033[1;32m', 'end' : '\033[0m'} + color = {'red': '\033[1;31m', 'green': '\033[1;32m', 'end': '\033[0m'} - # Checking if all environment variable are present & setup - env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST', + # Checking if all environment variable are present & setup + env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST', 'TEMPLATE_ATOM_FEED', 'WEBSITE_URL', 'TEMPLATE_INDEX', 'TEMPLATE_TAGS'] for variable in env: if variable not in os.environ: - print(f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}") + print( + f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}") quit() if (os.environ.get(variable) or '') == '': - print(f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}") + print( + f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}") quit() # Getting env variable - env_vars = { 'parent_path' : os.environ.get('PARENT_PATH'), 'pages_path' : os.environ.get('PAGES_PATH') - , 'markdown_path' : os.environ.get('MARKDOWN_PATH'), 'template_page' : os.environ.get('TEMPLATE_PAGE') - , 'template_atom_post' : os.environ.get('TEMPLATE_ATOM_POST'), 'template_atom_feed' : os.environ.get('TEMPLATE_ATOM_FEED') - , 'website_url' : os.environ.get('WEBSITE_URL'), 'template_index' : os.environ.get('TEMPLATE_INDEX'), 'template_tags' : os.environ.get('TEMPLATE_TAGS') } + env_vars = {'parent_path': os.environ.get('PARENT_PATH'), 'pages_path': os.environ.get('PAGES_PATH'), 'markdown_path': os.environ.get('MARKDOWN_PATH'), 'template_page': os.environ.get('TEMPLATE_PAGE'), 'template_atom_post': os.environ.get( + 'TEMPLATE_ATOM_POST'), 'template_atom_feed': os.environ.get('TEMPLATE_ATOM_FEED'), 'website_url': os.environ.get('WEBSITE_URL'), 'template_index': os.environ.get('TEMPLATE_INDEX'), 'template_tags': os.environ.get('TEMPLATE_TAGS')} # Checking if generate folder exist to remove previouly generated content, if not create it if os.path.exists(env_vars['pages_path']): @@ -154,7 +160,7 @@ if __name__=="__main__": else: os.mkdir(env_vars['pages_path']) - data = [] # A list for data generated by md2html + data = [] # A list for data generated by md2html # Generate all markdown file for file in os.listdir(env_vars['markdown_path']): @@ -163,12 +169,12 @@ if __name__=="__main__": print(f"{color['green']}Generating file: {file} {color['end']}") data.append(md2html(file, env_vars)) - sorted_data = sorted(data, key=lambda x:datetime.strptime(x['date'], '%d-%m-%Y')) + sorted_data = sorted(data, key=lambda x: datetime.strptime(x['date'], '%d-%m-%Y')) # Generating atom feed print(f"{color['green']}Generating RSS / Atom feed {color['end']}") - generateAtomFeed(data, env_vars) - - # Generating index + generate_atom_feed(data, env_vars) + + # Generating index print(f"{color['green']}Generating main page {color['end']}") - generateIndex(data, env_vars) + generate_index(data, env_vars) diff --git a/markdown_parser.py b/markdown_parser.py index 403b9e4..b4bce38 100644 --- a/markdown_parser.py +++ b/markdown_parser.py @@ -1,135 +1,150 @@ import html -def parseline(line): + +def parse_line(line): """ - Parse a line of texte to replace HTML specialchars, link, and strong / emphased of markdown for HTML + Parse a line of texte to replace HTML specialchars, link, and strong + / emphased of markdown for HTML return: the line ready for HTML """ # Change &, <, > for HTML support html.escape(line) # Checking if there is strong or emphasized - while '*' in line: - line = line.replace('*', '', 1) - line = line.replace('*', '', 1) - while '**' in line: - line = line.replace('**', '', 1) - line = line.replace('**', '', 1) + while "*" in line: + line = line.replace("*", "", 1) + line = line.replace("*", "", 1) + while "**" in line: + line = line.replace("**", "", 1) + line = line.replace("**", "", 1) # Checking if there is image - while '![' in line and ']' in line: - title = line.split(']')[0].split('[')[1] - link = line.split(']')[1].split('(')[1].split(')')[0] - line = line.replace('![' + title + '](' + link + ')', '' + title + '') - - # Checkinf if there is link - while '[' in line and ']' in line: - title = line.split(']')[0].split('[')[1] - link = line.split(']')[1].split('(')[1].split(')')[0] - line = line.replace('[' + title + '](' + link + ')', '' + title + '') + while "![" in line and "]" in line: + title = line.split("]")[0].split("[")[1] + link = line.split("]")[1].split("(")[1].split(")")[0] + line = line.replace( + "![" + title + "](" + link + ")", + '' + title + '', + ) + + # Checking if there is link + while "[" in line and "]" in line: + title = line.split("]")[0].split("[")[1] + link = line.split("]")[1].split("(")[1].split(")")[0] + line = line.replace( + "[" + title + "](" + link + ")", '' + title + "" + ) return line -def parsemd(filepath, env_vars): +def parse_md(filepath, env_vars): """ - Parse the markdown file and return the content to put into the template page + Parse a markdown file and return the content to put into the template page env_vars: dictionnary of environment variable filepath: Filepath of the markdown file - return: a dictionnary containing title, metadata, local path, content for HTML + return: a dictionnary containing title, metadata, local path, content """ - content = {'content': '', 'title': '', 'date': '01-01-0001', 'description': '', 'tags' : [], 'filepath': env_vars['pages_path'].replace(env_vars['parent_path'] + '/', '') - + '/' + filepath.split('.')[0] + '.html'} - + content = { + "content": "", + "title": "", + "date": "01-01-0001", + "description": "", + "tags": [], + "filepath": env_vars["pages_path"].replace(env_vars["parent_path"] + "/", "") + + "/" + + filepath.split(".")[0] + + ".html", + } + inmeta, inquote, inpre, inul = False, False, False, False - - # Reading the content of the file and transform into html - for line in open(env_vars['markdown_path'] + '/' + filepath, "r"): + + # Reading the content of the file and transform into html + for line in open(env_vars["markdown_path"] + "/" + filepath, "r"): line = line.strip() # Open the metadata - if line.startswith('---'): + if line.startswith("---"): if inmeta: inmeta = False else: inmeta = True # Getting the date metadata - if inmeta and line.startswith('date:'): - content['date'] = line.split(':')[1].strip() - + if inmeta and line.startswith("date:"): + content["date"] = line.split(":")[1].strip() + # Getting the description metadata - if inmeta and line.startswith('description:'): - content['description'] = line.split(':')[1].strip() + if inmeta and line.startswith("description:"): + content["description"] = line.split(":")[1].strip() # Getting the tags metadata - if inmeta and line.startswith('tags:'): - tags = line.split(':')[1].split(',') + if inmeta and line.startswith("tags:"): + tags = line.split(":")[1].split(",") # Removing leading and ending white spaces for i in range(0, len(tags)): tags[i] = tags[i].strip() - content['tags'] = tags - + content["tags"] = tags + # Close quote if not quoting if inquote and not line.startswith(">"): - content['content'] += "\n" + content["content"] += "\n" inquote = False - + # Close list if not listing if inul and not line.startswith("-"): - content['content'] += "\n\n" + content["content"] += "\n\n" inul = False - + # Checking if it's a code block if line.startswith("```"): if inpre: - content['content'] += "\n" + content["content"] += "\n" - content['content'] += "
    " + line.lstrip("```")
    +            content["content"] += "
    " + line.lstrip("```")
                 inpre = True
     
             # Checking if it's a quote
             elif line.startswith(">"):
                 if inquote:
    -                content['content'] += parseline(line.lstrip("> "))
    -            else: 
    -                content['content'] += "
    " + parseline(line.lstrip("> ")) + content["content"] += parse_line(line.lstrip("> ")) + else: + content["content"] += "
    " + parse_line(line.lstrip("> ")) inquote = True # Checking if it's a list elif line.startswith("-") and not line.startswith("---"): if inul: - content['content'] += "\n" - content['content'] += "\t
  • " + parseline(line.lstrip("- ")) + content["content"] += "
  • \n" + content["content"] += "\t
  • " + parse_line(line.lstrip("- ")) else: - content['content'] += "
      \n\t
    • " + parseline(line.lstrip("- ")) + content["content"] += "
        \n\t
      • " + parse_line(line.lstrip("- ")) inul = True - + # Checking if it's a title elif line.startswith("###"): - content['content'] += "

        " + parseline(line.lstrip("# ")) + "

        \n" + content["content"] += "

        " + parse_line(line.lstrip("# ")) + "

        \n" elif line.startswith("##"): - content['content'] += "

        " + parseline(line.lstrip("# ")) + "

        \n" + content["content"] += "

        " + parse_line(line.lstrip("# ")) + "

        \n" elif line.startswith("#"): - content['title'] += parseline(line.lstrip("# ")) + content["title"] += parse_line(line.lstrip("# ")) # else it's a paragraph elif line != " " and line != "" and not inmeta and not line.startswith("---"): - content['content'] += "

        " + parseline(line) + "

        \n" + content["content"] += "

        " + parse_line(line) + "

        \n" # Checking all balise are closed if inquote: - content['content'] += "
  • \n" + content["content"] += "
    \n" inquote = False if inul: - content['content'] += "\n\n" + content["content"] += "\n\n" inul = False - + if inpre: - content['content'] += "
    \n" + content["content"] += "
    \n" inpre = False return content -