add metadata from new columns and make every module fit to use this. add stringence of local measures as background of plot, add vaccination status to plots, mute unreasonable warnings

This commit is contained in:
fordprefect
2020-12-17 13:18:19 +01:00
parent e0d6508682
commit 22c65b1b68
10 changed files with 111 additions and 60 deletions

View File

@@ -6,6 +6,8 @@ from mpl_toolkits.axisartist.parasite_axes import HostAxes, ParasiteAxes
import numpy as np import numpy as np
import time as time_module import time as time_module
import pickle import pickle
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
basename="all_" basename="all_"
# manual y adjustments for new cases # manual y adjustments for new cases
@@ -28,10 +30,11 @@ def plot(data, countries, pop, **kwargs):
if loc == "International": if loc == "International":
continue continue
name = basename+loc name = basename+loc
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'], data[loc]['stringency_index']
fig, ax1 = pp.subplots(num=name, figsize=figsize) fig, ax1 = pp.subplots(num=name, figsize=figsize)
ax2 = ax1.twinx() ax2 = ax1.twinx()
ax2.plot(time, np.array(total_deaths)*10, label="Total deaths (x10)", marker="", linestyle="--", color="green") ax2.plot(time, np.array(total_deaths)*10, label="Total deaths (x10)", marker="", linestyle="--", color="green")
ax2.plot(time, total_cases, label=f"Total cases", marker="", linestyle="-", color="blue") ax2.plot(time, total_cases, label=f"Total cases", marker="", linestyle="-", color="blue")
@@ -42,6 +45,10 @@ def plot(data, countries, pop, **kwargs):
ax1.plot(time, new_cases, label="raw new cases", color="grey", linestyle="-") ax1.plot(time, new_cases, label="raw new cases", color="grey", linestyle="-")
ax1.plot(time[3:-3], np.convolve(new_cases, np.ones((7,))/7, mode="valid"), label="new cases 7day mean", color="orange", linestyle="-", linewidth=2) ax1.plot(time[3:-3], np.convolve(new_cases, np.ones((7,))/7, mode="valid"), label="new cases 7day mean", color="orange", linestyle="-", linewidth=2)
if not np.isnan(total_vaccinations[-1]):
print(f"{loc} has vaccines, adding to plot")
ax2.plot(time, np.array(total_vaccinations), label=f"Total vaccinations", marker="", linestyle="-.", color="crimson")
# fix lower bound of plot # fix lower bound of plot
for ax in (ax1, ax2): for ax in (ax1, ax2):
axis = ax.axis() axis = ax.axis()
@@ -50,6 +57,12 @@ def plot(data, countries, pop, **kwargs):
axis = [axis[0], axis[1], axis[2], corr[loc]] axis = [axis[0], axis[1], axis[2], corr[loc]]
ax.axis([axis[0], axis[1], -1, axis[3]]) ax.axis([axis[0], axis[1], -1, axis[3]])
# fix population
#try:
# print(loc, pop[loc]['pop'] - metadata[loc]['population'])
#except:
# pop[loc]['pop'] = metadata[loc]['population']
# if we know population: plot 500 new cases / 1million inhabitants as a rough measure for comparison # if we know population: plot 500 new cases / 1million inhabitants as a rough measure for comparison
# also set color for infection level indicator # also set color for infection level indicator
infection_level_indicator = "grey" infection_level_indicator = "grey"
@@ -82,6 +95,12 @@ def plot(data, countries, pop, **kwargs):
print(f"=====> population unknown for {loc}, skipping plot enhancements") print(f"=====> population unknown for {loc}, skipping plot enhancements")
# stringency of countermeasures as background contourf
axbounds = ax1.axis()
ax1.contourf(time, [-1, 1e10], [stringency_index]*2, cmap="Greys", alpha=0.3, levels=99)
ax1.axis(axbounds)
# disabled for now: put second total-cases-per-million-inhabitants axis besides total-cases-axis # disabled for now: put second total-cases-per-million-inhabitants axis besides total-cases-axis
if loc in pop and False: if loc in pop and False:
# according to https://matplotlib.org/3.2.2/gallery/axisartist/demo_parasite_axes.html # according to https://matplotlib.org/3.2.2/gallery/axisartist/demo_parasite_axes.html
@@ -107,6 +126,8 @@ def plot(data, countries, pop, **kwargs):
if loc in pop: if loc in pop:
#pp.title(f"{loc}", population = "+f"{pop[loc]['pop']:,}".replace(",",".")) #pp.title(f"{loc}", population = "+f"{pop[loc]['pop']:,}".replace(",","."))
title += ", population = "+f"{pop[loc]['pop']:,}".replace(",",".") title += ", population = "+f"{pop[loc]['pop']:,}".replace(",",".")
if not np.isnan(total_vaccinations[-1]):
title += ", vac rate: "+f"{total_vaccinations[-1]/pop[loc]['pop']*100:1.3f}%"
ax1.set_title(title) ax1.set_title(title)
fig.tight_layout() fig.tight_layout()
pp.text(0.002,0.005, f"plot generated {time_module.strftime('%Y-%m-%d %H:%M')}, CC-by-sa-nc, origin: dukun.de/corona, datasource: ourworldindata.org/coronavirus-source-data", color="dimgrey", fontsize=8, transform=fig.transFigure) pp.text(0.002,0.005, f"plot generated {time_module.strftime('%Y-%m-%d %H:%M')}, CC-by-sa-nc, origin: dukun.de/corona, datasource: ourworldindata.org/coronavirus-source-data", color="dimgrey", fontsize=8, transform=fig.transFigure)

View File

@@ -14,7 +14,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
# total cases # total cases
tc.plot(time, total_cases, label=f"{loc}", marker=".") tc.plot(time, total_cases, label=f"{loc}", marker=".")

View File

@@ -20,11 +20,46 @@ plots = ["basics", "death_per_case",
"doubling_time", "doubling_time",
"all_countries", "all_countries",
] ]
metadata_fields = [
"iso_code",
"continent",
"location",
"population",
"population_density",
"median_age",
"aged_65_older",
"aged_70_older",
"gdp_per_capita",
"extreme_poverty",
"cardiovasc_death_rate",
"diabetes_prevalence",
"female_smokers",
"male_smokers",
"handwashing_facilities",
"hospital_beds_per_thousand",
"life_expectancy",
"human_development_index",
]
### manual data ### manual data
# population: sourced ECDC data # population: sourced ECDC data
from population_repository import pop from population_repository import pop
### ###
def toint(a):
try:
return int(a)
except:
return np.nan
def tofloat(a):
try:
return float(a)
except:
return np.nan
def addmeta(field, value):
pass
def get_data(): def get_data():
"""fetch data from remote, cache locally and reorganize internal data """fetch data from remote, cache locally and reorganize internal data
not beautiful (at all), but effective!!""" not beautiful (at all), but effective!!"""
@@ -55,6 +90,7 @@ def get_data():
# processing # processing
data = {} data = {}
metadata = {}
with open(datafile, "r") as f: with open(datafile, "r") as f:
reader = csv.reader(f) reader = csv.reader(f)
for row in reader: for row in reader:
@@ -64,80 +100,61 @@ def get_data():
date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row
elif len(row) == 50: elif len(row) == 50:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
elif len(row) == 52:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
else: else:
print(f"WARNING! Table format changed, new header:\n{row})") print(f"WARNING! Table format changed, new header:\n{row})")
exit(1) exit(1)
# break loop if header # break loop if header
if location=="location": if location=="location":
# table header # table header
header = row
continue continue
# cast to num type # cast to num type
if isinstance(total_cases, str): total_cases = tofloat(total_cases)
if total_cases in ('', ' ', '0.0'): new_cases = tofloat(new_cases)
total_cases = 0 total_deaths = tofloat(total_deaths)
try: new_deaths = tofloat(new_deaths)
total_cases = int(total_cases) total_vaccinations = tofloat(total_vaccinations)
except: stringency_index = tofloat(stringency_index)
try:
total_cases = float(total_cases)
except:
print("tc failed")
if isinstance(new_cases, str):
if new_cases in ('', ' ', '0.0'):
new_cases = 0
try:
new_cases = int(new_cases)
except:
try:
new_cases = float(new_cases)
except:
print("nc failed")
if isinstance(new_deaths, str):
if new_deaths in ('', ' ', '0.0'):
new_deaths = 0
try:
new_deaths = int(new_deaths)
except:
try:
new_deaths = float(new_deaths)
except:
print("nd failed")
if isinstance(total_deaths, str):
if total_deaths in ('', ' ', '0.0'):
total_deaths = 0
try:
total_deaths = int(total_deaths)
except:
try:
total_deaths = float(total_deaths)
except:
print("td failed")
if location not in data: if location not in data:
data[location] = [] data[location] = []
metadata[location] = {}
year, month, day = date.split("-") year, month, day = date.split("-")
def toint(a): data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index])
try:
return int(a)
except: # catch all data fields
return np.nan #dfields = {field: row[n] for n, field in enumerate(header)}
data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths]) # add metadata
for n, field in enumerate(header):
if field in metadata_fields:
if field not in metadata[location]:
metadata[location][field] = row[n]
else:
if metadata[location][field] != row[n]:
print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")
# reorganize data # reorganize data
data2 = {} data2 = {}
for loc in data: for loc in data:
time = [] time = []
new_cases, new_deaths, total_cases, total_deaths = [], [], [], [] new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], []
for entry in data[loc]: for entry in data[loc]:
t_, new_cases_, new_deaths_, total_cases_, total_deaths_ = entry t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_ = entry
time.append(t_) time.append(t_)
new_cases.append(toint(new_cases_)) new_cases.append(toint(new_cases_))
new_deaths.append(toint(new_deaths_)) new_deaths.append(toint(new_deaths_))
total_cases.append(toint(total_cases_)) total_cases.append(toint(total_cases_))
total_deaths.append(toint(total_deaths_)) total_deaths.append(toint(total_deaths_))
data2[loc] = [time, new_cases, new_deaths, total_cases, total_deaths] total_vaccinations.append(toint(total_vaccinations_))
return data2 stringency_index.append(toint(stringency_index_))
data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, "stringency_index": stringency_index}
return data2, metadata
data = get_data() data, metadata = get_data()
for plot in plots: for plot in plots:
i = importlib.import_module(plot) i = importlib.import_module(plot)
i.plot(data, countries, pop) i.plot(data, countries, pop, metadata=metadata)

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
# death/case # death/case
pp.figure(name, figsize=figsize) pp.figure(name, figsize=figsize)

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize) pp.figure(name, figsize=figsize)
day_of_above_hundred_cases = np.argwhere(np.array(total_cases) > 100)[0][0] day_of_above_hundred_cases = np.argwhere(np.array(total_cases) > 100)[0][0]

View File

@@ -3,6 +3,8 @@ Plot doubling time
""" """
import matplotlib.pyplot as pp import matplotlib.pyplot as pp
import numpy as np import numpy as np
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
name="doubling_time" name="doubling_time"
def moving_average(x, w): def moving_average(x, w):
@@ -13,7 +15,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize) pp.figure(name, figsize=figsize)
window_size = 7 window_size = 7

View File

@@ -56,6 +56,17 @@ Aktuelle Daten aus Deutschland mit vielen Hintergründen finden sich im <a href=
<summary><h2>Ausgewählte Länder</h2></summary> <summary><h2>Ausgewählte Länder</h2></summary>
<b>Achtung! Die Datenqualität variiert zwischen den Ländern enorm, damit sind einzelne Länder auch nur schwer miteinander zu vergleichen!</b> <b>Achtung! Die Datenqualität variiert zwischen den Ländern enorm, damit sind einzelne Länder auch nur schwer miteinander zu vergleichen!</b>
<br>
Di Plots werden immer komplexer, daher hier eine kurze Zusammenfassung der sichtbaren Informationen:
<ol>
<li>gemeldete Neuinfektion: Rohdaten (grau durchgezogen) und 7-Tage-Mittel (orange) (linke Achse)</li>
<li>Absolute Zahl gemeldeter Infektionen (blau) (rechte Achse)</li>
<li>gemeldete neue Todesfälle: Rohdaten (grau gestrichelt) und 7-Tage-Mittel (schwarz gestrichelt) - der besseren Sichtbarkeit halber mit 10 multipliziert! (linke Achse)</li>
<li>Absolute Zahl gemeldeter Todesfälle (grün gestrichelt) - der besseren Sichtbarkeit halber mit 10 multipliziert! (rechte Achse)</li>
<li>Einschätzung der Strenge der Maßnahmen zur Einordnung der Gegenmaßnahmen als Hintergrundschattierung (<a href=https://ourworldindata.org/grapher/covid-stringency-index>Erklärung (EN)</a>, weiß kann entweder "keine Daten" oder "keine Einschränkungen" bedeuten)</li>
<li>Zahl der verabreichten Impfungen (dunkelrot Strichpunkt-Linie) (rechte Achse)</li>
<li>Referenzen für die Inzidenz (Anzahl der neuen Fälle pro 1Mio Einwohner pro Woche): 5, 50 und 500</li>
<br>
<br> <br>
<details open> <details open>
<summary>Deutschland und europäische Nachbarn</summary> <summary>Deutschland und europäische Nachbarn</summary>

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize) pp.figure(name, figsize=figsize)
day_of_first_death = np.argwhere(np.array(total_deaths) > 0)[0][0] day_of_first_death = np.argwhere(np.array(total_deaths) > 0)[0][0]

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize) pp.figure(name, figsize=figsize)
day_of_100_cases = np.argwhere(np.array(total_cases) > 99)[0][0] day_of_100_cases = np.argwhere(np.array(total_cases) > 99)[0][0]

View File

@@ -13,7 +13,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data: for loc in data:
if loc not in countries: if loc not in countries:
continue continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc] time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
# moving average of relative new cases with a window size of 3 days # moving average of relative new cases with a window size of 3 days
window_size = 3 window_size = 3