add metadata from new columns and make every module fit to use this. add stringence of local measures as background of plot, add vaccination status to plots, mute unreasonable warnings

This commit is contained in:
fordprefect
2020-12-17 13:18:19 +01:00
parent e0d6508682
commit 22c65b1b68
10 changed files with 111 additions and 60 deletions

View File

@@ -6,6 +6,8 @@ from mpl_toolkits.axisartist.parasite_axes import HostAxes, ParasiteAxes
import numpy as np
import time as time_module
import pickle
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
basename="all_"
# manual y adjustments for new cases
@@ -28,10 +30,11 @@ def plot(data, countries, pop, **kwargs):
if loc == "International":
continue
name = basename+loc
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'], data[loc]['stringency_index']
fig, ax1 = pp.subplots(num=name, figsize=figsize)
ax2 = ax1.twinx()
ax2.plot(time, np.array(total_deaths)*10, label="Total deaths (x10)", marker="", linestyle="--", color="green")
ax2.plot(time, total_cases, label=f"Total cases", marker="", linestyle="-", color="blue")
@@ -42,6 +45,10 @@ def plot(data, countries, pop, **kwargs):
ax1.plot(time, new_cases, label="raw new cases", color="grey", linestyle="-")
ax1.plot(time[3:-3], np.convolve(new_cases, np.ones((7,))/7, mode="valid"), label="new cases 7day mean", color="orange", linestyle="-", linewidth=2)
if not np.isnan(total_vaccinations[-1]):
print(f"{loc} has vaccines, adding to plot")
ax2.plot(time, np.array(total_vaccinations), label=f"Total vaccinations", marker="", linestyle="-.", color="crimson")
# fix lower bound of plot
for ax in (ax1, ax2):
axis = ax.axis()
@@ -50,6 +57,12 @@ def plot(data, countries, pop, **kwargs):
axis = [axis[0], axis[1], axis[2], corr[loc]]
ax.axis([axis[0], axis[1], -1, axis[3]])
# fix population
#try:
# print(loc, pop[loc]['pop'] - metadata[loc]['population'])
#except:
# pop[loc]['pop'] = metadata[loc]['population']
# if we know population: plot 500 new cases / 1million inhabitants as a rough measure for comparison
# also set color for infection level indicator
infection_level_indicator = "grey"
@@ -82,6 +95,12 @@ def plot(data, countries, pop, **kwargs):
print(f"=====> population unknown for {loc}, skipping plot enhancements")
# stringency of countermeasures as background contourf
axbounds = ax1.axis()
ax1.contourf(time, [-1, 1e10], [stringency_index]*2, cmap="Greys", alpha=0.3, levels=99)
ax1.axis(axbounds)
# disabled for now: put second total-cases-per-million-inhabitants axis besides total-cases-axis
if loc in pop and False:
# according to https://matplotlib.org/3.2.2/gallery/axisartist/demo_parasite_axes.html
@@ -107,6 +126,8 @@ def plot(data, countries, pop, **kwargs):
if loc in pop:
#pp.title(f"{loc}", population = "+f"{pop[loc]['pop']:,}".replace(",","."))
title += ", population = "+f"{pop[loc]['pop']:,}".replace(",",".")
if not np.isnan(total_vaccinations[-1]):
title += ", vac rate: "+f"{total_vaccinations[-1]/pop[loc]['pop']*100:1.3f}%"
ax1.set_title(title)
fig.tight_layout()
pp.text(0.002,0.005, f"plot generated {time_module.strftime('%Y-%m-%d %H:%M')}, CC-by-sa-nc, origin: dukun.de/corona, datasource: ourworldindata.org/coronavirus-source-data", color="dimgrey", fontsize=8, transform=fig.transFigure)

View File

@@ -14,7 +14,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
# total cases
tc.plot(time, total_cases, label=f"{loc}", marker=".")

View File

@@ -20,11 +20,46 @@ plots = ["basics", "death_per_case",
"doubling_time",
"all_countries",
]
metadata_fields = [
"iso_code",
"continent",
"location",
"population",
"population_density",
"median_age",
"aged_65_older",
"aged_70_older",
"gdp_per_capita",
"extreme_poverty",
"cardiovasc_death_rate",
"diabetes_prevalence",
"female_smokers",
"male_smokers",
"handwashing_facilities",
"hospital_beds_per_thousand",
"life_expectancy",
"human_development_index",
]
### manual data
# population: sourced ECDC data
from population_repository import pop
###
def toint(a):
try:
return int(a)
except:
return np.nan
def tofloat(a):
try:
return float(a)
except:
return np.nan
def addmeta(field, value):
pass
def get_data():
"""fetch data from remote, cache locally and reorganize internal data
not beautiful (at all), but effective!!"""
@@ -55,6 +90,7 @@ def get_data():
# processing
data = {}
metadata = {}
with open(datafile, "r") as f:
reader = csv.reader(f)
for row in reader:
@@ -64,80 +100,61 @@ def get_data():
date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row
elif len(row) == 50:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
elif len(row) == 52:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
else:
print(f"WARNING! Table format changed, new header:\n{row})")
exit(1)
# break loop if header
if location=="location":
# table header
header = row
continue
# cast to num type
if isinstance(total_cases, str):
if total_cases in ('', ' ', '0.0'):
total_cases = 0
try:
total_cases = int(total_cases)
except:
try:
total_cases = float(total_cases)
except:
print("tc failed")
if isinstance(new_cases, str):
if new_cases in ('', ' ', '0.0'):
new_cases = 0
try:
new_cases = int(new_cases)
except:
try:
new_cases = float(new_cases)
except:
print("nc failed")
if isinstance(new_deaths, str):
if new_deaths in ('', ' ', '0.0'):
new_deaths = 0
try:
new_deaths = int(new_deaths)
except:
try:
new_deaths = float(new_deaths)
except:
print("nd failed")
if isinstance(total_deaths, str):
if total_deaths in ('', ' ', '0.0'):
total_deaths = 0
try:
total_deaths = int(total_deaths)
except:
try:
total_deaths = float(total_deaths)
except:
print("td failed")
total_cases = tofloat(total_cases)
new_cases = tofloat(new_cases)
total_deaths = tofloat(total_deaths)
new_deaths = tofloat(new_deaths)
total_vaccinations = tofloat(total_vaccinations)
stringency_index = tofloat(stringency_index)
if location not in data:
data[location] = []
metadata[location] = {}
year, month, day = date.split("-")
def toint(a):
try:
return int(a)
except:
return np.nan
data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths])
data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index])
# catch all data fields
#dfields = {field: row[n] for n, field in enumerate(header)}
# add metadata
for n, field in enumerate(header):
if field in metadata_fields:
if field not in metadata[location]:
metadata[location][field] = row[n]
else:
if metadata[location][field] != row[n]:
print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")
# reorganize data
data2 = {}
for loc in data:
time = []
new_cases, new_deaths, total_cases, total_deaths = [], [], [], []
new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], []
for entry in data[loc]:
t_, new_cases_, new_deaths_, total_cases_, total_deaths_ = entry
t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_ = entry
time.append(t_)
new_cases.append(toint(new_cases_))
new_deaths.append(toint(new_deaths_))
total_cases.append(toint(total_cases_))
total_deaths.append(toint(total_deaths_))
data2[loc] = [time, new_cases, new_deaths, total_cases, total_deaths]
return data2
total_vaccinations.append(toint(total_vaccinations_))
stringency_index.append(toint(stringency_index_))
data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, "stringency_index": stringency_index}
return data2, metadata
data = get_data()
data, metadata = get_data()
for plot in plots:
i = importlib.import_module(plot)
i.plot(data, countries, pop)
i.plot(data, countries, pop, metadata=metadata)

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
# death/case
pp.figure(name, figsize=figsize)

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize)
day_of_above_hundred_cases = np.argwhere(np.array(total_cases) > 100)[0][0]

View File

@@ -3,6 +3,8 @@ Plot doubling time
"""
import matplotlib.pyplot as pp
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
name="doubling_time"
def moving_average(x, w):
@@ -13,7 +15,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize)
window_size = 7

View File

@@ -56,6 +56,17 @@ Aktuelle Daten aus Deutschland mit vielen Hintergründen finden sich im <a href=
<summary><h2>Ausgewählte Länder</h2></summary>
<b>Achtung! Die Datenqualität variiert zwischen den Ländern enorm, damit sind einzelne Länder auch nur schwer miteinander zu vergleichen!</b>
<br>
Di Plots werden immer komplexer, daher hier eine kurze Zusammenfassung der sichtbaren Informationen:
<ol>
<li>gemeldete Neuinfektion: Rohdaten (grau durchgezogen) und 7-Tage-Mittel (orange) (linke Achse)</li>
<li>Absolute Zahl gemeldeter Infektionen (blau) (rechte Achse)</li>
<li>gemeldete neue Todesfälle: Rohdaten (grau gestrichelt) und 7-Tage-Mittel (schwarz gestrichelt) - der besseren Sichtbarkeit halber mit 10 multipliziert! (linke Achse)</li>
<li>Absolute Zahl gemeldeter Todesfälle (grün gestrichelt) - der besseren Sichtbarkeit halber mit 10 multipliziert! (rechte Achse)</li>
<li>Einschätzung der Strenge der Maßnahmen zur Einordnung der Gegenmaßnahmen als Hintergrundschattierung (<a href=https://ourworldindata.org/grapher/covid-stringency-index>Erklärung (EN)</a>, weiß kann entweder "keine Daten" oder "keine Einschränkungen" bedeuten)</li>
<li>Zahl der verabreichten Impfungen (dunkelrot Strichpunkt-Linie) (rechte Achse)</li>
<li>Referenzen für die Inzidenz (Anzahl der neuen Fälle pro 1Mio Einwohner pro Woche): 5, 50 und 500</li>
<br>
<br>
<details open>
<summary>Deutschland und europäische Nachbarn</summary>

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize)
day_of_first_death = np.argwhere(np.array(total_deaths) > 0)[0][0]

View File

@@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
pp.figure(name, figsize=figsize)
day_of_100_cases = np.argwhere(np.array(total_cases) > 99)[0][0]

View File

@@ -13,7 +13,7 @@ def plot(data, countries, pop, **kwargs):
for loc in data:
if loc not in countries:
continue
time, new_cases, new_deaths, total_cases, total_deaths = data[loc]
time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations']
# moving average of relative new cases with a window size of 3 days
window_size = 3