Daily-Dose/main.py

import configparser
from datetime import date
import feedparser
from mdutils import MdUtils


#################
# Configuration #
#################

markdown = True
pdf = False
html = True
animations = True
html_stylesheet = "styles/simple.css"
title_animation = "fade-down"
heading_animation = "fade-right"
list_animation = "fade-left"
ConvertToHTML = True
title = date.today().strftime('%d %B, %Y')
feeds = configparser.ConfigParser()

################
# Main Program #
###############

if (pdf and not markdown) or (html and not markdown):
	print("Markdown should be True to convert to pdf/html")

if feeds.read("feeds.ini") == []:
	print("feeds.ini does not exist!")
	exit(1)
else:
	print("Reading feeds.ini")
	feeds.read("feeds.ini")

rss_feeds = [x for x in feeds.keys()]
rss_feeds.pop(0)
print("Read %s feeds from the configuration file" % str(len(rss_feeds)))

def GetPosts(feed):
	Posts = {}
	ToIgnore = []
	ReadSummary = True
	URL = None
	ShowLink = False
	ToRead = 5
	try:
		URL = feeds[feed]["URL"]
	except KeyError:
		print("Improper configuration for %s. No URL Specified")
		exit(1)
	try:
		ToIgnore = eval(feeds[feed]["IGNORE"])
	except KeyError:
		None
	try:
		ReadSummary = eval(feeds[feed]["SUMMARY"])
	except KeyError:
		ReadSummary = True
	try:
		ToRead = eval(feeds[feed]["ToRead"])
	except:
		None
	try:
		ShowLink = eval(feeds[feed]["LINK"])
	except KeyError:
		None
	
	rss = feedparser.parse(URL)
	maximum = len(rss.entries)

	count = 0

	if ToRead == -1 or ToRead > maximum:
		ToRead = maximum

	posts = []
	summaries = []
	links = []
	added = 0
	while count != maximum and added != ToRead:
		Skip = False
		Title = str(rss.entries[count].title)
		Summary = rss.entries[count].summary
		for words in ToIgnore:
			if (words.lower() in Title.lower()) or (words.lower() in Summary.lower()):
				Skip = True
				break
		if not Skip:
			added += 1
			posts.append(Title)
			if ReadSummary:
				summaries.append(Summary)
			if ShowLink:
				links.append(rss.entries[count].link)
		count += 1
	
	for idx in range(len(posts)):
		Posts[idx] = {}
		Posts[idx]["post"] = {}
		Posts[idx]["post"]["title"] = posts[idx].encode('utf-8')
		if ReadSummary:
			Posts[idx]["post"]["summary"] = summaries[idx].encode('utf-8')
		else:
			Posts[idx]["summary"] = None
		if ShowLink:
			Posts[idx]["post"]["link"] = links[idx].encode('utf-8')
		else:
			Posts[idx]["post"]["link"] = None

	Posts["NoOfPosts"] = len(posts)
	Posts["Summary"] = ReadSummary
	Posts["ShowLink"] = ShowLink

	return Posts


posts = {}
for feed in rss_feeds:
	sauce = GetPosts(feed)
	posts[feed] = sauce

#print(posts)

if markdown:
	mdfile = MdUtils(file_name=title,title='Daily Dose')
	for feed in posts:
		mdfile.new_header(level=1,title="From %s" % feed)
		for idx in range(posts[feed]["NoOfPosts"]):
			mdfile.write(str("* **"+posts[feed][idx]["post"]["title"].decode('utf-8'))+"**")
			if posts[feed]["Summary"]:
				mdfile.write(" - ")
				mdfile.write(posts[feed][idx]["post"]["summary"].decode('utf-8'))
			if posts[feed]["ShowLink"]:
				mdfile.write(" - [link](%s)"%posts[feed][idx]["post"]["link"].decode('utf-8'))
				mdfile.write("\n\n")
			elif posts[feed]["Summary"]:
				mdfile.write("\n\n")
			else:
				mdfile.write("\n")
		mdfile.new_line("\n")
	mdfile.new_table_of_contents(table_title='Contents', depth=2)
	mdfile.create_md_file()


import pypandoc

if pdf:
	ifname = str(title.strip()) + ".md"
	ofname = str(title.strip()) + ".pdf"
	args = ['-V', 'geometry:margin=1.5cm']

	convert = pypandoc.convert_file(ifname, 'pdf', outputfile=ofname,extra_args=args)

if html:
	from bs4 import BeautifulSoup


	ifname = str(title.strip()) + ".md"
	ofname = str(title.strip()) + ".html"

	convert = pypandoc.convert_file(ifname,'html',outputfile=ofname,extra_args=['-s'])
	assert(convert) == ''

	soup = None

	with open(ofname) as fp:
		soup = BeautifulSoup(fp,'html5lib')
		
		title = soup.new_tag('title')
		title.string = "DailyDose"

		soup.head.append(title)
		viewport = soup.new_tag("meta",content="width=device-width, initial-scale=1.0")
		viewport.attrs["name"] = "viewport"
		soup.head.append(viewport)

		custom_css = soup.new_tag('link',href=html_stylesheet,rel='stylesheet')
		soup.head.append(custom_css)

		custom_css = soup.new_tag('link',href=str("../"+html_stylesheet),rel='stylesheet')
		soup.head.append(custom_css)

		if animations:
			aos_css = soup.new_tag('link',href='https://unpkg.com/aos@2.3.1/dist/aos.css',rel='stylesheet')
			soup.head.append(aos_css)

			aos_js = soup.new_tag('script',src="https://unpkg.com/aos@2.3.1/dist/aos.js")
			soup.head.append(aos_js)

			aos_script = soup.new_tag('script')
			aos_script.string = "AOS.init();"
			soup.body.append(aos_script)

			for feed in rss_feeds:
				ToFindID = str("from-"+str(feed.strip().replace(":","").replace(" ","-").lower()))
				ToEdit = soup.find("h1", {"id": ToFindID})
				ToEdit['data-aos'] = 'fade-right'

			soup.find("h1",{"id":"daily-dose"})['data-aos'] = title_animation
			soup.find("h1",{"id":"contents"})['data-aos'] = heading_animation
			soup.find("h1",{"id":"contents"})['data-aos-anchor-placement'] = "top-bottom"

			paragraphs = soup.find_all("p")
			for paras in paragraphs:
				paras['data-aos'] = list_animation
				paras['data-aos-anchor-placement'] = "bottom-bottom"

			lis = soup.find_all("li")
			for li in lis:
				if li.a == None:
					li['data-aos'] = list_animation
					li['data-aos-anchor-placement'] = "bottom-bottom"

	with open(ofname, "w") as outf:
	    outf.write(str(soup))
added initial code 2020-11-10 13:26:35 +00:00			`import configparser`
			`from datetime import date`
			`import feedparser`
			`from mdutils import MdUtils`


added check for skipped articles. Earlier even if the count threshold had not been matched it would break the loop 2020-11-10 14:12:05 +00:00			`#################`
			`# Configuration #`
			`#################`

added initial code 2020-11-10 13:26:35 +00:00			`markdown = True`
removed temporary html 2020-11-10 13:30:02 +00:00			`pdf = False`
			`html = True`
added custom option for animations 2020-11-10 14:54:00 +00:00			`animations = True`
added initial code 2020-11-10 13:26:35 +00:00			`html_stylesheet = "styles/simple.css"`
			`title_animation = "fade-down"`
			`heading_animation = "fade-right"`
			`list_animation = "fade-left"`
			`ConvertToHTML = True`
			`title = date.today().strftime('%d %B, %Y')`
			`feeds = configparser.ConfigParser()`

added check for skipped articles. Earlier even if the count threshold had not been matched it would break the loop 2020-11-10 14:12:05 +00:00			`################`
			`# Main Program #`
			`###############`

			`if (pdf and not markdown) or (html and not markdown):`
			`print("Markdown should be True to convert to pdf/html")`

added initial code 2020-11-10 13:26:35 +00:00			`if feeds.read("feeds.ini") == []:`
			`print("feeds.ini does not exist!")`
			`exit(1)`
			`else:`
removed debug option 2020-11-11 00:41:42 +00:00			`print("Reading feeds.ini")`
added initial code 2020-11-10 13:26:35 +00:00			`feeds.read("feeds.ini")`

			`rss_feeds = [x for x in feeds.keys()]`
			`rss_feeds.pop(0)`
removed debug option 2020-11-11 00:41:42 +00:00			`print("Read %s feeds from the configuration file" % str(len(rss_feeds)))`
added initial code 2020-11-10 13:26:35 +00:00
			`def GetPosts(feed):`
			`Posts = {}`
			`ToIgnore = []`
			`ReadSummary = True`
			`URL = None`
added direct link feature 2020-11-10 14:49:02 +00:00			`ShowLink = False`
added initial code 2020-11-10 13:26:35 +00:00			`ToRead = 5`
			`try:`
			`URL = feeds[feed]["URL"]`
			`except KeyError:`
			`print("Improper configuration for %s. No URL Specified")`
			`exit(1)`
			`try:`
			`ToIgnore = eval(feeds[feed]["IGNORE"])`
			`except KeyError:`
			`None`
			`try:`
			`ReadSummary = eval(feeds[feed]["SUMMARY"])`
			`except KeyError:`
			`ReadSummary = True`
			`try:`
			`ToRead = eval(feeds[feed]["ToRead"])`
			`except:`
			`None`
added direct link feature 2020-11-10 14:49:02 +00:00			`try:`
			`ShowLink = eval(feeds[feed]["LINK"])`
			`except KeyError:`
			`None`
added initial code 2020-11-10 13:26:35 +00:00
			`rss = feedparser.parse(URL)`
			`maximum = len(rss.entries)`

			`count = 0`

			`if ToRead == -1 or ToRead > maximum:`
			`ToRead = maximum`

			`posts = []`
			`summaries = []`
added direct link feature 2020-11-10 14:49:02 +00:00			`links = []`
added check for skipped articles. Earlier even if the count threshold had not been matched it would break the loop 2020-11-10 14:12:05 +00:00			`added = 0`
			`while count != maximum and added != ToRead:`
added initial code 2020-11-10 13:26:35 +00:00			`Skip = False`
			`Title = str(rss.entries[count].title)`
			`Summary = rss.entries[count].summary`
			`for words in ToIgnore:`
			`if (words.lower() in Title.lower()) or (words.lower() in Summary.lower()):`
			`Skip = True`
			`break`
			`if not Skip:`
added check for skipped articles. Earlier even if the count threshold had not been matched it would break the loop 2020-11-10 14:12:05 +00:00			`added += 1`
added initial code 2020-11-10 13:26:35 +00:00			`posts.append(Title)`
			`if ReadSummary:`
			`summaries.append(Summary)`
added direct link feature 2020-11-10 14:49:02 +00:00			`if ShowLink:`
			`links.append(rss.entries[count].link)`
added initial code 2020-11-10 13:26:35 +00:00			`count += 1`

			`for idx in range(len(posts)):`
			`Posts[idx] = {}`
			`Posts[idx]["post"] = {}`
			`Posts[idx]["post"]["title"] = posts[idx].encode('utf-8')`
			`if ReadSummary:`
			`Posts[idx]["post"]["summary"] = summaries[idx].encode('utf-8')`
			`else:`
			`Posts[idx]["summary"] = None`
added direct link feature 2020-11-10 14:49:02 +00:00			`if ShowLink:`
			`Posts[idx]["post"]["link"] = links[idx].encode('utf-8')`
			`else:`
			`Posts[idx]["post"]["link"] = None`
added initial code 2020-11-10 13:26:35 +00:00
			`Posts["NoOfPosts"] = len(posts)`
			`Posts["Summary"] = ReadSummary`
added direct link feature 2020-11-10 14:49:02 +00:00			`Posts["ShowLink"] = ShowLink`
added initial code 2020-11-10 13:26:35 +00:00
			`return Posts`


			`posts = {}`
			`for feed in rss_feeds:`
			`sauce = GetPosts(feed)`
			`posts[feed] = sauce`

			`#print(posts)`

			`if markdown:`
			`mdfile = MdUtils(file_name=title,title='Daily Dose')`
			`for feed in posts:`
			`mdfile.new_header(level=1,title="From %s" % feed)`
			`for idx in range(posts[feed]["NoOfPosts"]):`
			`mdfile.write(str("* "+posts[feed][idx]["post"]["title"].decode('utf-8'))+"")`
			`if posts[feed]["Summary"]:`
			`mdfile.write(" - ")`
			`mdfile.write(posts[feed][idx]["post"]["summary"].decode('utf-8'))`
added direct link feature 2020-11-10 14:49:02 +00:00			`if posts[feed]["ShowLink"]:`
			`mdfile.write(" - [link](%s)"%posts[feed][idx]["post"]["link"].decode('utf-8'))`
			`mdfile.write("\n\n")`
			`elif posts[feed]["Summary"]:`
added initial code 2020-11-10 13:26:35 +00:00			`mdfile.write("\n\n")`
			`else:`
			`mdfile.write("\n")`
			`mdfile.new_line("\n")`
			`mdfile.new_table_of_contents(table_title='Contents', depth=2)`
			`mdfile.create_md_file()`


			`import pypandoc`

			`if pdf:`
			`ifname = str(title.strip()) + ".md"`
			`ofname = str(title.strip()) + ".pdf"`
			`args = ['-V', 'geometry:margin=1.5cm']`

			`convert = pypandoc.convert_file(ifname, 'pdf', outputfile=ofname,extra_args=args)`

			`if html:`
			`from bs4 import BeautifulSoup`


			`ifname = str(title.strip()) + ".md"`
removed temporary html 2020-11-10 13:30:02 +00:00			`ofname = str(title.strip()) + ".html"`
added initial code 2020-11-10 13:26:35 +00:00
added check for skipped articles. Earlier even if the count threshold had not been matched it would break the loop 2020-11-10 14:12:05 +00:00			`convert = pypandoc.convert_file(ifname,'html',outputfile=ofname,extra_args=['-s'])`
added initial code 2020-11-10 13:26:35 +00:00			`assert(convert) == ''`

added fallback css path 2020-11-10 16:57:23 +00:00			`soup = None`

removed temporary html 2020-11-10 13:30:02 +00:00			`with open(ofname) as fp:`
			`soup = BeautifulSoup(fp,'html5lib')`
added custom option for animations 2020-11-10 14:54:00 +00:00
removed temporary html 2020-11-10 13:30:02 +00:00			`title = soup.new_tag('title')`
			`title.string = "DailyDose"`
added initial code 2020-11-10 13:26:35 +00:00
added custom option for animations 2020-11-10 14:54:00 +00:00			`soup.head.append(title)`
added check for skipped articles. Earlier even if the count threshold had not been matched it would break the loop 2020-11-10 14:12:05 +00:00			`viewport = soup.new_tag("meta",content="width=device-width, initial-scale=1.0")`
			`viewport.attrs["name"] = "viewport"`
			`soup.head.append(viewport)`
added initial code 2020-11-10 13:26:35 +00:00
added custom option for animations 2020-11-10 14:54:00 +00:00			`custom_css = soup.new_tag('link',href=html_stylesheet,rel='stylesheet')`
			`soup.head.append(custom_css)`

added fallback css path 2020-11-10 16:57:23 +00:00			`custom_css = soup.new_tag('link',href=str("../"+html_stylesheet),rel='stylesheet')`
			`soup.head.append(custom_css)`
added custom option for animations 2020-11-10 14:54:00 +00:00
			`if animations:`
			`aos_css = soup.new_tag('link',href='https://unpkg.com/aos@2.3.1/dist/aos.css',rel='stylesheet')`
			`soup.head.append(aos_css)`

			`aos_js = soup.new_tag('script',src="https://unpkg.com/aos@2.3.1/dist/aos.js")`
			`soup.head.append(aos_js)`

			`aos_script = soup.new_tag('script')`
			`aos_script.string = "AOS.init();"`
			`soup.body.append(aos_script)`
added initial code 2020-11-10 13:26:35 +00:00
added custom option for animations 2020-11-10 14:54:00 +00:00			`for feed in rss_feeds:`
			`ToFindID = str("from-"+str(feed.strip().replace(":","").replace(" ","-").lower()))`
			`ToEdit = soup.find("h1", {"id": ToFindID})`
			`ToEdit['data-aos'] = 'fade-right'`
added initial code 2020-11-10 13:26:35 +00:00
added custom option for animations 2020-11-10 14:54:00 +00:00			`soup.find("h1",{"id":"daily-dose"})['data-aos'] = title_animation`
			`soup.find("h1",{"id":"contents"})['data-aos'] = heading_animation`
			`soup.find("h1",{"id":"contents"})['data-aos-anchor-placement'] = "top-bottom"`
added initial code 2020-11-10 13:26:35 +00:00
added custom option for animations 2020-11-10 14:54:00 +00:00			`paragraphs = soup.find_all("p")`
			`for paras in paragraphs:`
			`paras['data-aos'] = list_animation`
			`paras['data-aos-anchor-placement'] = "bottom-bottom"`
added initial code 2020-11-10 13:26:35 +00:00
added custom option for animations 2020-11-10 14:54:00 +00:00			`lis = soup.find_all("li")`
			`for li in lis:`
			`if li.a == None:`
			`li['data-aos'] = list_animation`
			`li['data-aos-anchor-placement'] = "bottom-bottom"`
added initial code 2020-11-10 13:26:35 +00:00
removed temporary html 2020-11-10 13:30:02 +00:00			`with open(ofname, "w") as outf:`
added initial code 2020-11-10 13:26:35 +00:00			`outf.write(str(soup))`