diff options
author | Oxbian <got.dacs@slmail.me> | 2023-07-05 23:03:11 +0200 |
---|---|---|
committer | Oxbian <got.dacs@slmail.me> | 2023-07-05 23:03:11 +0200 |
commit | 107578e6dc7e13dfda0b99ff71148d47beb7b1f9 (patch) | |
tree | 5d5667b4f167d50e63e70b3d89709fe98c928a31 | |
parent | 71623ad0674947b025eb0337429712560f7b5acb (diff) | |
download | blog-generator-107578e6dc7e13dfda0b99ff71148d47beb7b1f9.tar.gz blog-generator-107578e6dc7e13dfda0b99ff71148d47beb7b1f9.zip |
Updating function name for more python style name
-rw-r--r-- | generator.py | 82 | ||||
-rw-r--r-- | markdown_parser.py | 133 |
2 files changed, 118 insertions, 97 deletions
diff --git a/generator.py b/generator.py index 16150e7..5ec9ea7 100644 --- a/generator.py +++ b/generator.py @@ -1,9 +1,10 @@ import os import shutil from dotenv import load_dotenv -from markdown_parser import * +from markdown_parser import parse_md from datetime import datetime + def md2html(filename, env_vars): """ Create the html webpage from template and markdown content @@ -12,19 +13,20 @@ def md2html(filename, env_vars): return: a dictionnary containing title, metadata, local path, content for HTML """ # Getting parsed content of markdown file & page template - data = parsemd(filename, env_vars) + data = parse_md(filename, env_vars) template = open(env_vars['template_page'], 'r').read() # Generating the HTML page - output = open(env_vars['pages_path'] + '/' + filename.split('.')[0] + '.html', 'w') - output.write(template.replace("$CONTENT", data['content']).replace("$TITLE", data['title']). - replace("$DATE", data['date']).replace("$DESC", data['description'])) + output = open(env_vars['pages_path'] + '/' + + filename.split('.')[0] + '.html', 'w') + output.write(template.replace("$CONTENT", data['content']).replace( + "$TITLE", data['title']).replace("$DATE", data['date']).replace("$DESC", data['description'])) output.close() return data -def generatePageXML(data, env_vars): +def generate_page_XML(data, env_vars): """ Generate a RSS / Atom post for the page data: dictionnary generated by the markdown parser @@ -32,12 +34,11 @@ def generatePageXML(data, env_vars): return: RSS / Atom post """ template = open(env_vars['template_atom_post'], 'r').read() - date = datetime.strptime(data['date'],"%d-%m-%Y").isoformat() + "Z" - return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", - data['content']).replace("$URL", env_vars['website_url'] + data['filepath']) - + date = datetime.strptime(data['date'], "%d-%m-%Y").isoformat() + "Z" + return template.replace("$TITLE", data['title']).replace("$DATE", date).replace("$CONTENT", data['content']).replace("$URL", env_vars['website_url'] + data['filepath']) + -def generateAtomFeed(posts, env_vars): +def generate_atom_feed(posts, env_vars): """ Generate a RSS / Atom feed posts: list of data get from markdown pages @@ -48,16 +49,17 @@ def generateAtomFeed(posts, env_vars): for post in posts: # Checking if there is metadata, if not we don't create a RSS / Atom post if post['date'] != '01-01-0001': - atom_content += generatePageXML(post, env_vars) + atom_content += generate_page_XML(post, env_vars) # Generate RSS / atom feed template = open(env_vars['template_atom_feed'], 'r').read() output = open(env_vars['parent_path'] + '/atom.xml', 'w') - output.write(template.replace('$CONTENT', atom_content).replace('$DATE', datetime.today().strftime("%Y-%m-%d"))) + output.write(template.replace('$CONTENT', atom_content).replace( + '$DATE', datetime.today().strftime("%Y-%m-%d"))) output.close() -def generateTagsPages(tags_dict, env_vars): +def generate_tags_pages(tags_dict, env_vars): """ Generate page for each tag, which link to each content tagged tags_dict: A dictionnary with tag name as key, and a list of post (title & url) with that tag @@ -70,20 +72,23 @@ def generateTagsPages(tags_dict, env_vars): for tag, pages in tags_dict.items(): template = open(env_vars['template_tags'], 'r').read() # Generating the HTML page - output = open(env_vars['pages_path'] + '/tags/' + tag.replace(' ', '_') + '.html', 'w') + output = open(env_vars['pages_path'] + '/tags/' + + tag.replace(' ', '_') + '.html', 'w') # Adding all links for page with this tag content = "<ul>\n" for page in pages: - content += '\t\t\t\t<li><a href="' + '../../' + page[1] + '">' + page[0] + '</a></li>\n' + content += '\t\t\t\t<li><a href="' + '../../' + \ + page[1] + '">' + page[0] + '</a></li>\n' content += "\t\t\t</ul>\n" - output.write(template.replace("$CONTENT", content).replace("$TITLE", tag)) + output.write(template.replace( + "$CONTENT", content).replace("$TITLE", tag)) output.close() pass -def generateIndex(data, env_vars): +def generate_index(data, env_vars): """ Generate the main page data: list of data get from markdown pages @@ -98,20 +103,21 @@ def generateIndex(data, env_vars): # Checking if there is metadata, if not we don't add the page in the index if page['date'] != '01-01-0001': index_content += ('\t\t\t\t<li><a href="' + page['filepath'] + '">' + page['title'] + '</a><p>' - + page['date'] + '</p></li>\n') + + page['date'] + '</p></li>\n') # Adding page into tags categorie for tag in page['tags']: if tag not in tags_dict: tags_dict[tag] = [] tags_dict[tag].append([page['title'], page['filepath']]) - + index_content += '\t\t\t</ul>\n\t\t\t<h2>Tags:</h2>\n\t\t\t<div class="tags">\n\t\t\t\t<ul>\n' # Adding tags browsing into the page for tag in list(tags_dict.keys()): - index_content += ('\t\t\t\t\t<li><a href="' + env_vars['pages_path'].replace(env_vars['parent_path'] + '/', '') + '/tags/' + tag.replace(' ', '_') + '.html' + '">' + tag + '</a></li>\n') - generateTagsPages(tags_dict, env_vars) + index_content += ('\t\t\t\t\t<li><a href="' + env_vars['pages_path'].replace( + env_vars['parent_path'] + '/', '') + '/tags/' + tag.replace(' ', '_') + '.html' + '">' + tag + '</a></li>\n') + generate_tags_pages(tags_dict, env_vars) index_content += '\t\t\t\t</ul>\n\t\t\t</div>' # Generate main page @@ -121,30 +127,30 @@ def generateIndex(data, env_vars): output.close() -if __name__=="__main__": +if __name__ == "__main__": # Load .env file into python environment load_dotenv() # Color for print - color = { 'red': '\033[1;31m', 'green' : '\033[1;32m', 'end' : '\033[0m'} + color = {'red': '\033[1;31m', 'green': '\033[1;32m', 'end': '\033[0m'} - # Checking if all environment variable are present & setup - env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST', + # Checking if all environment variable are present & setup + env = ['PARENT_PATH', 'PAGES_PATH', 'MARKDOWN_PATH', 'TEMPLATE_PAGE', 'TEMPLATE_ATOM_POST', 'TEMPLATE_ATOM_FEED', 'WEBSITE_URL', 'TEMPLATE_INDEX', 'TEMPLATE_TAGS'] for variable in env: if variable not in os.environ: - print(f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}") + print( + f"{color['red']}{variable} isn't present in the .env file, please fix this {color['end']}") quit() if (os.environ.get(variable) or '') == '': - print(f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}") + print( + f"{color['red']}{variable} isn't setup in the .env file, please fix this {color['end']}") quit() # Getting env variable - env_vars = { 'parent_path' : os.environ.get('PARENT_PATH'), 'pages_path' : os.environ.get('PAGES_PATH') - , 'markdown_path' : os.environ.get('MARKDOWN_PATH'), 'template_page' : os.environ.get('TEMPLATE_PAGE') - , 'template_atom_post' : os.environ.get('TEMPLATE_ATOM_POST'), 'template_atom_feed' : os.environ.get('TEMPLATE_ATOM_FEED') - , 'website_url' : os.environ.get('WEBSITE_URL'), 'template_index' : os.environ.get('TEMPLATE_INDEX'), 'template_tags' : os.environ.get('TEMPLATE_TAGS') } + env_vars = {'parent_path': os.environ.get('PARENT_PATH'), 'pages_path': os.environ.get('PAGES_PATH'), 'markdown_path': os.environ.get('MARKDOWN_PATH'), 'template_page': os.environ.get('TEMPLATE_PAGE'), 'template_atom_post': os.environ.get( + 'TEMPLATE_ATOM_POST'), 'template_atom_feed': os.environ.get('TEMPLATE_ATOM_FEED'), 'website_url': os.environ.get('WEBSITE_URL'), 'template_index': os.environ.get('TEMPLATE_INDEX'), 'template_tags': os.environ.get('TEMPLATE_TAGS')} # Checking if generate folder exist to remove previouly generated content, if not create it if os.path.exists(env_vars['pages_path']): @@ -154,7 +160,7 @@ if __name__=="__main__": else: os.mkdir(env_vars['pages_path']) - data = [] # A list for data generated by md2html + data = [] # A list for data generated by md2html # Generate all markdown file for file in os.listdir(env_vars['markdown_path']): @@ -163,12 +169,12 @@ if __name__=="__main__": print(f"{color['green']}Generating file: {file} {color['end']}") data.append(md2html(file, env_vars)) - sorted_data = sorted(data, key=lambda x:datetime.strptime(x['date'], '%d-%m-%Y')) + sorted_data = sorted(data, key=lambda x: datetime.strptime(x['date'], '%d-%m-%Y')) # Generating atom feed print(f"{color['green']}Generating RSS / Atom feed {color['end']}") - generateAtomFeed(data, env_vars) - - # Generating index + generate_atom_feed(data, env_vars) + + # Generating index print(f"{color['green']}Generating main page {color['end']}") - generateIndex(data, env_vars) + generate_index(data, env_vars) diff --git a/markdown_parser.py b/markdown_parser.py index 403b9e4..b4bce38 100644 --- a/markdown_parser.py +++ b/markdown_parser.py @@ -1,135 +1,150 @@ import html -def parseline(line): + +def parse_line(line): """ - Parse a line of texte to replace HTML specialchars, link, and strong / emphased of markdown for HTML + Parse a line of texte to replace HTML specialchars, link, and strong + / emphased of markdown for HTML return: the line ready for HTML """ # Change &, <, > for HTML support html.escape(line) # Checking if there is strong or emphasized - while '*' in line: - line = line.replace('*', '<em>', 1) - line = line.replace('*', '</em>', 1) - while '**' in line: - line = line.replace('**', '<strong>', 1) - line = line.replace('**', '</strong>', 1) + while "*" in line: + line = line.replace("*", "<em>", 1) + line = line.replace("*", "</em>", 1) + while "**" in line: + line = line.replace("**", "<strong>", 1) + line = line.replace("**", "</strong>", 1) # Checking if there is image - while '![' in line and ']' in line: - title = line.split(']')[0].split('[')[1] - link = line.split(']')[1].split('(')[1].split(')')[0] - line = line.replace('', '<img src="' + link + '" alt="' + title + '"/>') - - # Checkinf if there is link - while '[' in line and ']' in line: - title = line.split(']')[0].split('[')[1] - link = line.split(']')[1].split('(')[1].split(')')[0] - line = line.replace('[' + title + '](' + link + ')', '<a href="' + link + '">' + title + '</a>') + while "![" in line and "]" in line: + title = line.split("]")[0].split("[")[1] + link = line.split("]")[1].split("(")[1].split(")")[0] + line = line.replace( + "", + '<img src="' + link + '" alt="' + title + '"/>', + ) + + # Checking if there is link + while "[" in line and "]" in line: + title = line.split("]")[0].split("[")[1] + link = line.split("]")[1].split("(")[1].split(")")[0] + line = line.replace( + "[" + title + "](" + link + ")", '<a href="' + link + '">' + title + "</a>" + ) return line -def parsemd(filepath, env_vars): +def parse_md(filepath, env_vars): """ - Parse the markdown file and return the content to put into the template page + Parse a markdown file and return the content to put into the template page env_vars: dictionnary of environment variable filepath: Filepath of the markdown file - return: a dictionnary containing title, metadata, local path, content for HTML + return: a dictionnary containing title, metadata, local path, content """ - content = {'content': '', 'title': '', 'date': '01-01-0001', 'description': '', 'tags' : [], 'filepath': env_vars['pages_path'].replace(env_vars['parent_path'] + '/', '') - + '/' + filepath.split('.')[0] + '.html'} - + content = { + "content": "", + "title": "", + "date": "01-01-0001", + "description": "", + "tags": [], + "filepath": env_vars["pages_path"].replace(env_vars["parent_path"] + "/", "") + + "/" + + filepath.split(".")[0] + + ".html", + } + inmeta, inquote, inpre, inul = False, False, False, False - - # Reading the content of the file and transform into html - for line in open(env_vars['markdown_path'] + '/' + filepath, "r"): + + # Reading the content of the file and transform into html + for line in open(env_vars["markdown_path"] + "/" + filepath, "r"): line = line.strip() # Open the metadata - if line.startswith('---'): + if line.startswith("---"): if inmeta: inmeta = False else: inmeta = True # Getting the date metadata - if inmeta and line.startswith('date:'): - content['date'] = line.split(':')[1].strip() - + if inmeta and line.startswith("date:"): + content["date"] = line.split(":")[1].strip() + # Getting the description metadata - if inmeta and line.startswith('description:'): - content['description'] = line.split(':')[1].strip() + if inmeta and line.startswith("description:"): + content["description"] = line.split(":")[1].strip() # Getting the tags metadata - if inmeta and line.startswith('tags:'): - tags = line.split(':')[1].split(',') + if inmeta and line.startswith("tags:"): + tags = line.split(":")[1].split(",") # Removing leading and ending white spaces for i in range(0, len(tags)): tags[i] = tags[i].strip() - content['tags'] = tags - + content["tags"] = tags + # Close quote if not quoting if inquote and not line.startswith(">"): - content['content'] += "</blockquote>\n" + content["content"] += "</blockquote>\n" inquote = False - + # Close list if not listing if inul and not line.startswith("-"): - content['content'] += "</li>\n</ul>\n" + content["content"] += "</li>\n</ul>\n" inul = False - + # Checking if it's a code block if line.startswith("```"): if inpre: - content['content'] += "</code></pre>\n" + content["content"] += "</code></pre>\n" - content['content'] += "<pre><code>" + line.lstrip("```") + content["content"] += "<pre><code>" + line.lstrip("```") inpre = True # Checking if it's a quote elif line.startswith(">"): if inquote: - content['content'] += parseline(line.lstrip("> ")) - else: - content['content'] += "<blockquote>" + parseline(line.lstrip("> ")) + content["content"] += parse_line(line.lstrip("> ")) + else: + content["content"] += "<blockquote>" + parse_line(line.lstrip("> ")) inquote = True # Checking if it's a list elif line.startswith("-") and not line.startswith("---"): if inul: - content['content'] += "</li>\n" - content['content'] += "\t<li>" + parseline(line.lstrip("- ")) + content["content"] += "</li>\n" + content["content"] += "\t<li>" + parse_line(line.lstrip("- ")) else: - content['content'] += "<ul>\n\t<li>" + parseline(line.lstrip("- ")) + content["content"] += "<ul>\n\t<li>" + parse_line(line.lstrip("- ")) inul = True - + # Checking if it's a title elif line.startswith("###"): - content['content'] += "<h3>" + parseline(line.lstrip("# ")) + "</h3>\n" + content["content"] += "<h3>" + parse_line(line.lstrip("# ")) + "</h3>\n" elif line.startswith("##"): - content['content'] += "<h2>" + parseline(line.lstrip("# ")) + "</h2>\n" + content["content"] += "<h2>" + parse_line(line.lstrip("# ")) + "</h2>\n" elif line.startswith("#"): - content['title'] += parseline(line.lstrip("# ")) + content["title"] += parse_line(line.lstrip("# ")) # else it's a paragraph elif line != " " and line != "" and not inmeta and not line.startswith("---"): - content['content'] += "<p>" + parseline(line) + "</p>\n" + content["content"] += "<p>" + parse_line(line) + "</p>\n" # Checking all balise are closed if inquote: - content['content'] += "</blockquote>\n" + content["content"] += "</blockquote>\n" inquote = False if inul: - content['content'] += "</li>\n</ul>\n" + content["content"] += "</li>\n</ul>\n" inul = False - + if inpre: - content['content'] += "</code></pre>\n" + content["content"] += "</code></pre>\n" inpre = False return content - |