From 22c65b1b68a3dbca5b0ab967fc26ef7f8b70c68c Mon Sep 17 00:00:00 2001 From: fordprefect Date: Thu, 17 Dec 2020 13:18:19 +0100 Subject: [PATCH] add metadata from new columns and make every module fit to use this. add stringence of local measures as background of plot, add vaccination status to plots, mute unreasonable warnings --- all_countries.py | 23 ++++++- basics.py | 2 +- coronavis.py | 121 ++++++++++++++++++++--------------- death_per_case.py | 2 +- delay_from_usa.py | 2 +- doubling_time.py | 4 +- index.html | 11 ++++ normalized_to_first_death.py | 2 +- normalized_to_ten_cases.py | 2 +- percent_increase.py | 2 +- 10 files changed, 111 insertions(+), 60 deletions(-) diff --git a/all_countries.py b/all_countries.py index 0cbebce..805bd08 100644 --- a/all_countries.py +++ b/all_countries.py @@ -6,6 +6,8 @@ from mpl_toolkits.axisartist.parasite_axes import HostAxes, ParasiteAxes import numpy as np import time as time_module import pickle +import warnings +warnings.filterwarnings("ignore", category=UserWarning) basename="all_" # manual y adjustments for new cases @@ -28,10 +30,11 @@ def plot(data, countries, pop, **kwargs): if loc == "International": continue name = basename+loc - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'], data[loc]['stringency_index'] fig, ax1 = pp.subplots(num=name, figsize=figsize) + ax2 = ax1.twinx() ax2.plot(time, np.array(total_deaths)*10, label="Total deaths (x10)", marker="", linestyle="--", color="green") ax2.plot(time, total_cases, label=f"Total cases", marker="", linestyle="-", color="blue") @@ -42,6 +45,10 @@ def plot(data, countries, pop, **kwargs): ax1.plot(time, new_cases, label="raw new cases", color="grey", linestyle="-") ax1.plot(time[3:-3], np.convolve(new_cases, np.ones((7,))/7, mode="valid"), label="new cases 7day mean", color="orange", linestyle="-", linewidth=2) + if not np.isnan(total_vaccinations[-1]): + print(f"{loc} has vaccines, adding to plot") + ax2.plot(time, np.array(total_vaccinations), label=f"Total vaccinations", marker="", linestyle="-.", color="crimson") + # fix lower bound of plot for ax in (ax1, ax2): axis = ax.axis() @@ -50,6 +57,12 @@ def plot(data, countries, pop, **kwargs): axis = [axis[0], axis[1], axis[2], corr[loc]] ax.axis([axis[0], axis[1], -1, axis[3]]) + # fix population + #try: + # print(loc, pop[loc]['pop'] - metadata[loc]['population']) + #except: + # pop[loc]['pop'] = metadata[loc]['population'] + # if we know population: plot 500 new cases / 1million inhabitants as a rough measure for comparison # also set color for infection level indicator infection_level_indicator = "grey" @@ -82,6 +95,12 @@ def plot(data, countries, pop, **kwargs): print(f"=====> population unknown for {loc}, skipping plot enhancements") + # stringency of countermeasures as background contourf + axbounds = ax1.axis() + ax1.contourf(time, [-1, 1e10], [stringency_index]*2, cmap="Greys", alpha=0.3, levels=99) + ax1.axis(axbounds) + + # disabled for now: put second total-cases-per-million-inhabitants axis besides total-cases-axis if loc in pop and False: # according to https://matplotlib.org/3.2.2/gallery/axisartist/demo_parasite_axes.html @@ -107,6 +126,8 @@ def plot(data, countries, pop, **kwargs): if loc in pop: #pp.title(f"{loc}", population = "+f"{pop[loc]['pop']:,}".replace(",",".")) title += ", population = "+f"{pop[loc]['pop']:,}".replace(",",".") + if not np.isnan(total_vaccinations[-1]): + title += ", vac rate: "+f"{total_vaccinations[-1]/pop[loc]['pop']*100:1.3f}%" ax1.set_title(title) fig.tight_layout() pp.text(0.002,0.005, f"plot generated {time_module.strftime('%Y-%m-%d %H:%M')}, CC-by-sa-nc, origin: dukun.de/corona, datasource: ourworldindata.org/coronavirus-source-data", color="dimgrey", fontsize=8, transform=fig.transFigure) diff --git a/basics.py b/basics.py index 1913816..175a374 100644 --- a/basics.py +++ b/basics.py @@ -14,7 +14,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] # total cases tc.plot(time, total_cases, label=f"{loc}", marker=".") diff --git a/coronavis.py b/coronavis.py index 9321048..04aa98d 100644 --- a/coronavis.py +++ b/coronavis.py @@ -20,11 +20,46 @@ plots = ["basics", "death_per_case", "doubling_time", "all_countries", ] + +metadata_fields = [ + "iso_code", + "continent", + "location", + "population", + "population_density", + "median_age", + "aged_65_older", + "aged_70_older", + "gdp_per_capita", + "extreme_poverty", + "cardiovasc_death_rate", + "diabetes_prevalence", + "female_smokers", + "male_smokers", + "handwashing_facilities", + "hospital_beds_per_thousand", + "life_expectancy", + "human_development_index", +] + ### manual data # population: sourced ECDC data from population_repository import pop ### +def toint(a): + try: + return int(a) + except: + return np.nan +def tofloat(a): + try: + return float(a) + except: + return np.nan +def addmeta(field, value): + pass + def get_data(): """fetch data from remote, cache locally and reorganize internal data not beautiful (at all), but effective!!""" @@ -55,6 +90,7 @@ def get_data(): # processing data = {} + metadata = {} with open(datafile, "r") as f: reader = csv.reader(f) for row in reader: @@ -64,80 +100,61 @@ def get_data(): date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row elif len(row) == 50: iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row + elif len(row) == 52: + iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row else: print(f"WARNING! Table format changed, new header:\n{row})") exit(1) # break loop if header if location=="location": # table header + header = row continue + # cast to num type - if isinstance(total_cases, str): - if total_cases in ('', ' ', '0.0'): - total_cases = 0 - try: - total_cases = int(total_cases) - except: - try: - total_cases = float(total_cases) - except: - print("tc failed") - if isinstance(new_cases, str): - if new_cases in ('', ' ', '0.0'): - new_cases = 0 - try: - new_cases = int(new_cases) - except: - try: - new_cases = float(new_cases) - except: - print("nc failed") - if isinstance(new_deaths, str): - if new_deaths in ('', ' ', '0.0'): - new_deaths = 0 - try: - new_deaths = int(new_deaths) - except: - try: - new_deaths = float(new_deaths) - except: - print("nd failed") - if isinstance(total_deaths, str): - if total_deaths in ('', ' ', '0.0'): - total_deaths = 0 - try: - total_deaths = int(total_deaths) - except: - try: - total_deaths = float(total_deaths) - except: - print("td failed") + total_cases = tofloat(total_cases) + new_cases = tofloat(new_cases) + total_deaths = tofloat(total_deaths) + new_deaths = tofloat(new_deaths) + total_vaccinations = tofloat(total_vaccinations) + stringency_index = tofloat(stringency_index) if location not in data: data[location] = [] + metadata[location] = {} year, month, day = date.split("-") - def toint(a): - try: - return int(a) - except: - return np.nan - data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths]) + data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index]) + + + # catch all data fields + #dfields = {field: row[n] for n, field in enumerate(header)} + # add metadata + for n, field in enumerate(header): + if field in metadata_fields: + if field not in metadata[location]: + metadata[location][field] = row[n] + else: + if metadata[location][field] != row[n]: + print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})") + # reorganize data data2 = {} for loc in data: time = [] - new_cases, new_deaths, total_cases, total_deaths = [], [], [], [] + new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], [] for entry in data[loc]: - t_, new_cases_, new_deaths_, total_cases_, total_deaths_ = entry + t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_ = entry time.append(t_) new_cases.append(toint(new_cases_)) new_deaths.append(toint(new_deaths_)) total_cases.append(toint(total_cases_)) total_deaths.append(toint(total_deaths_)) - data2[loc] = [time, new_cases, new_deaths, total_cases, total_deaths] - return data2 + total_vaccinations.append(toint(total_vaccinations_)) + stringency_index.append(toint(stringency_index_)) + data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, "stringency_index": stringency_index} + return data2, metadata -data = get_data() +data, metadata = get_data() for plot in plots: i = importlib.import_module(plot) - i.plot(data, countries, pop) + i.plot(data, countries, pop, metadata=metadata) diff --git a/death_per_case.py b/death_per_case.py index 9d4b399..7963219 100644 --- a/death_per_case.py +++ b/death_per_case.py @@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] # death/case pp.figure(name, figsize=figsize) diff --git a/delay_from_usa.py b/delay_from_usa.py index 6d3e61b..c9d0b90 100644 --- a/delay_from_usa.py +++ b/delay_from_usa.py @@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] pp.figure(name, figsize=figsize) day_of_above_hundred_cases = np.argwhere(np.array(total_cases) > 100)[0][0] diff --git a/doubling_time.py b/doubling_time.py index bcc6362..d31b0b1 100644 --- a/doubling_time.py +++ b/doubling_time.py @@ -3,6 +3,8 @@ Plot doubling time """ import matplotlib.pyplot as pp import numpy as np +import warnings +warnings.filterwarnings("ignore", category=RuntimeWarning) name="doubling_time" def moving_average(x, w): @@ -13,7 +15,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] pp.figure(name, figsize=figsize) window_size = 7 diff --git a/index.html b/index.html index 93562f6..0e02d6e 100644 --- a/index.html +++ b/index.html @@ -56,6 +56,17 @@ Aktuelle Daten aus Deutschland mit vielen Hintergründen finden sich im

Ausgewählte Länder

Achtung! Die Datenqualität variiert zwischen den Ländern enorm, damit sind einzelne Länder auch nur schwer miteinander zu vergleichen! +
+Di Plots werden immer komplexer, daher hier eine kurze Zusammenfassung der sichtbaren Informationen: +
    +
  1. gemeldete Neuinfektion: Rohdaten (grau durchgezogen) und 7-Tage-Mittel (orange) (linke Achse)
  2. +
  3. Absolute Zahl gemeldeter Infektionen (blau) (rechte Achse)
  4. +
  5. gemeldete neue Todesfälle: Rohdaten (grau gestrichelt) und 7-Tage-Mittel (schwarz gestrichelt) - der besseren Sichtbarkeit halber mit 10 multipliziert! (linke Achse)
  6. +
  7. Absolute Zahl gemeldeter Todesfälle (grün gestrichelt) - der besseren Sichtbarkeit halber mit 10 multipliziert! (rechte Achse)
  8. +
  9. Einschätzung der Strenge der Maßnahmen zur Einordnung der Gegenmaßnahmen als Hintergrundschattierung (Erklärung (EN), weiß kann entweder "keine Daten" oder "keine Einschränkungen" bedeuten)
  10. +
  11. Zahl der verabreichten Impfungen (dunkelrot Strichpunkt-Linie) (rechte Achse)
  12. +
  13. Referenzen für die Inzidenz (Anzahl der neuen Fälle pro 1Mio Einwohner pro Woche): 5, 50 und 500
  14. +

    Deutschland und europäische Nachbarn diff --git a/normalized_to_first_death.py b/normalized_to_first_death.py index 68a497b..c881773 100644 --- a/normalized_to_first_death.py +++ b/normalized_to_first_death.py @@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] pp.figure(name, figsize=figsize) day_of_first_death = np.argwhere(np.array(total_deaths) > 0)[0][0] diff --git a/normalized_to_ten_cases.py b/normalized_to_ten_cases.py index 6f4636d..86eb611 100644 --- a/normalized_to_ten_cases.py +++ b/normalized_to_ten_cases.py @@ -10,7 +10,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] pp.figure(name, figsize=figsize) day_of_100_cases = np.argwhere(np.array(total_cases) > 99)[0][0] diff --git a/percent_increase.py b/percent_increase.py index 952477c..df31361 100644 --- a/percent_increase.py +++ b/percent_increase.py @@ -13,7 +13,7 @@ def plot(data, countries, pop, **kwargs): for loc in data: if loc not in countries: continue - time, new_cases, new_deaths, total_cases, total_deaths = data[loc] + time, new_cases, new_deaths, total_cases, total_deaths, total_vaccinations = data[loc]['time'], data[loc]['new_cases'], data[loc]['new_deaths'], data[loc]['total_cases'], data[loc]['total_deaths'], data[loc]['total_vaccinations'] # moving average of relative new cases with a window size of 3 days window_size = 3