add metadata from new columns and make every module fit to use this. add stringence of local measures as background of plot, add vaccination status to plots, mute unreasonable warnings

This commit is contained in:
fordprefect
2020-12-17 13:18:19 +01:00
parent e0d6508682
commit 22c65b1b68
10 changed files with 111 additions and 60 deletions

View File

@@ -20,11 +20,46 @@ plots = ["basics", "death_per_case",
"doubling_time",
"all_countries",
]
metadata_fields = [
"iso_code",
"continent",
"location",
"population",
"population_density",
"median_age",
"aged_65_older",
"aged_70_older",
"gdp_per_capita",
"extreme_poverty",
"cardiovasc_death_rate",
"diabetes_prevalence",
"female_smokers",
"male_smokers",
"handwashing_facilities",
"hospital_beds_per_thousand",
"life_expectancy",
"human_development_index",
]
### manual data
# population: sourced ECDC data
from population_repository import pop
###
def toint(a):
try:
return int(a)
except:
return np.nan
def tofloat(a):
try:
return float(a)
except:
return np.nan
def addmeta(field, value):
pass
def get_data():
"""fetch data from remote, cache locally and reorganize internal data
not beautiful (at all), but effective!!"""
@@ -55,6 +90,7 @@ def get_data():
# processing
data = {}
metadata = {}
with open(datafile, "r") as f:
reader = csv.reader(f)
for row in reader:
@@ -64,80 +100,61 @@ def get_data():
date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row
elif len(row) == 50:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
elif len(row) == 52:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
else:
print(f"WARNING! Table format changed, new header:\n{row})")
exit(1)
# break loop if header
if location=="location":
# table header
header = row
continue
# cast to num type
if isinstance(total_cases, str):
if total_cases in ('', ' ', '0.0'):
total_cases = 0
try:
total_cases = int(total_cases)
except:
try:
total_cases = float(total_cases)
except:
print("tc failed")
if isinstance(new_cases, str):
if new_cases in ('', ' ', '0.0'):
new_cases = 0
try:
new_cases = int(new_cases)
except:
try:
new_cases = float(new_cases)
except:
print("nc failed")
if isinstance(new_deaths, str):
if new_deaths in ('', ' ', '0.0'):
new_deaths = 0
try:
new_deaths = int(new_deaths)
except:
try:
new_deaths = float(new_deaths)
except:
print("nd failed")
if isinstance(total_deaths, str):
if total_deaths in ('', ' ', '0.0'):
total_deaths = 0
try:
total_deaths = int(total_deaths)
except:
try:
total_deaths = float(total_deaths)
except:
print("td failed")
total_cases = tofloat(total_cases)
new_cases = tofloat(new_cases)
total_deaths = tofloat(total_deaths)
new_deaths = tofloat(new_deaths)
total_vaccinations = tofloat(total_vaccinations)
stringency_index = tofloat(stringency_index)
if location not in data:
data[location] = []
metadata[location] = {}
year, month, day = date.split("-")
def toint(a):
try:
return int(a)
except:
return np.nan
data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths])
data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index])
# catch all data fields
#dfields = {field: row[n] for n, field in enumerate(header)}
# add metadata
for n, field in enumerate(header):
if field in metadata_fields:
if field not in metadata[location]:
metadata[location][field] = row[n]
else:
if metadata[location][field] != row[n]:
print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")
# reorganize data
data2 = {}
for loc in data:
time = []
new_cases, new_deaths, total_cases, total_deaths = [], [], [], []
new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], []
for entry in data[loc]:
t_, new_cases_, new_deaths_, total_cases_, total_deaths_ = entry
t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_ = entry
time.append(t_)
new_cases.append(toint(new_cases_))
new_deaths.append(toint(new_deaths_))
total_cases.append(toint(total_cases_))
total_deaths.append(toint(total_deaths_))
data2[loc] = [time, new_cases, new_deaths, total_cases, total_deaths]
return data2
total_vaccinations.append(toint(total_vaccinations_))
stringency_index.append(toint(stringency_index_))
data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, "stringency_index": stringency_index}
return data2, metadata
data = get_data()
data, metadata = get_data()
for plot in plots:
i = importlib.import_module(plot)
i.plot(data, countries, pop)
i.plot(data, countries, pop, metadata=metadata)