comments, cleanup and graceful fail in case of rss download issues

This commit is contained in:
fordprefect
2022-01-14 22:57:48 +01:00
parent 251e787669
commit 8eb27f3105

View File

@@ -4,9 +4,13 @@ from defusedxml.ElementTree import fromstring
import urllib.request import urllib.request
import datetime import datetime
# rss-xml URL
url = "https://www1.wdr.de/mediathek/audio/zeitzeichen/zeitzeichen-podcast-100.podcast" url = "https://www1.wdr.de/mediathek/audio/zeitzeichen/zeitzeichen-podcast-100.podcast"
def findZander(today): def findZander(today):
"""
"""
# find metadata
for n, item in enumerate(today): for n, item in enumerate(today):
if item.tag.find("title") >= 0: if item.tag.find("title") >= 0:
title = today[n].text title = today[n].text
@@ -15,31 +19,36 @@ def findZander(today):
if item.tag.find("enclosure") >= 0: if item.tag.find("enclosure") >= 0:
podcasturl = today[n].attrib["url"] podcasturl = today[n].attrib["url"]
# check for Zander
if author.find("Zander") >= 0: if author.find("Zander") >= 0:
print(f"ZanderAlert: {title}\nDownload-URL: {podcasturl}") print(f"ZanderAlert: {title}\nDownload-URL: {podcasturl}")
return {"url": podcasturl, "title": title, "full_item": today} return {"url": podcasturl, "title": title, "full_item": today}
return None return None
# read rss feed
with urllib.request.urlopen(url) as response: with urllib.request.urlopen(url) as response:
assert response.status == 200, f"Webrequest fehlgeschlagen, stimmt die URL noch?"
feed = fromstring(response.read())[0] feed = fromstring(response.read())[0]
# read entries in rss feed until first entry (today)
for i in feed: for i in feed:
if i.tag == "item": if i.tag == "item":
metadata = findZander(i) metadata = findZander(i)
break break
# future feature: episode download and feed regeneration # download episode download and regenerate zanderzeichen feed
if metadata is not None: if metadata is not None:
# wdrzeitzeichen_2017-05-16_Voltaire wird verhaftet_16051717_wdr5.mp3
day, month, year = datetime.datetime.now().strftime("%d.%m.%Y").split(".") day, month, year = datetime.datetime.now().strftime("%d.%m.%Y").split(".")
title = metadata["title"] title = metadata["title"]
for i in (",", "(", ")"): for i in (",", "(", ")"): # get rid of unwanted characters in title
title = title.replace(i, "") title = title.replace(i, "")
refdate = title.split(" ")[-1].replace(".", "") refdate = title.split(" ")[-1].replace(".", "") # generate reference date
title = " ".join(title.split(" ")[:-1]) title = " ".join(title.split(" ")[:-1]) # strip reference date
# destination file name
destination = f"files/wdrzeitzeichen_{year}-{month}-{day}_{title}_{refdate}_wdr5.mp3" destination = f"files/wdrzeitzeichen_{year}-{month}-{day}_{title}_{refdate}_wdr5.mp3"
urllib.request.urlretrieve(metadata["url"], "test/"+destination) # actual download of file
urllib.request.urlretrieve(metadata["url"], destination)
# regenerate zanderzeichen feed
import genfeed import genfeed