Files
coronavis/coronavis.py

161 lines
7.0 KiB
Python

#!/usr/bin/python
import requests
import csv
import datetime
import os
import matplotlib.pyplot as pp
import numpy as np
import sys
import importlib
import time
sys.path.append(".")
#### config
# countries of interest
countries = ["Germany", "Italy", "India", "Japan", "Brazil", "Iran", "United States", "World", "United Kingdom", "Sweden"]
# enabled plots
plots = ["basics", "death_per_case",
#"normalized_to_first_death", "delay_from_china", "delay_from_usa", "normalized_to_ten_cases", "percent_increase",
"doubling_time",
"all_countries",
]
metadata_fields = [
"iso_code",
"continent",
"location",
"population",
"population_density",
"median_age",
"aged_65_older",
"aged_70_older",
"gdp_per_capita",
"extreme_poverty",
"cardiovasc_death_rate",
"diabetes_prevalence",
"female_smokers",
"male_smokers",
"handwashing_facilities",
"hospital_beds_per_thousand",
"life_expectancy",
"human_development_index",
]
### manual data
# population: sourced ECDC data
from population_repository import pop
###
def toint(a):
try:
return int(a)
except:
return np.nan
def tofloat(a):
try:
return float(a)
except:
return np.nan
def addmeta(field, value):
pass
def get_data():
"""fetch data from remote, cache locally and reorganize internal data
not beautiful (at all), but effective!!"""
tries = 10
delay = 10
dataurl = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
date = datetime.date.today()
datafile = f"{date}-full-data.csv"
if not os.path.isfile(datafile):
for n in range(tries):
try:
r = requests.get(dataurl)
except:
print(f"==> download failed, retrying after {delay}s up to another {tries-n} times…")
time.sleep(delay)
continue
break
with open(datafile, "wb") as f:
f.write(r.content)
else:
print(f"file found: {datafile}")
# processing
data = {}
metadata = {}
with open(datafile, "r") as f:
reader = csv.reader(f)
for row in reader:
if len(row) == 6:
date,location,new_cases,new_deaths,total_cases,total_deaths = row
elif len(row) == 10:
date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row
elif len(row) == 50:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
elif len(row) == 52:
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
else:
print(f"WARNING! Table format changed, new header:\n{row})")
exit(1)
# break loop if header
if location=="location":
# table header
header = row
continue
# cast to num type
total_cases = tofloat(total_cases)
new_cases = tofloat(new_cases)
total_deaths = tofloat(total_deaths)
new_deaths = tofloat(new_deaths)
total_vaccinations = tofloat(total_vaccinations)
stringency_index = tofloat(stringency_index)
if location not in data:
data[location] = []
metadata[location] = {}
year, month, day = date.split("-")
data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index])
# catch all data fields
#dfields = {field: row[n] for n, field in enumerate(header)}
# add metadata
for n, field in enumerate(header):
if field in metadata_fields:
if field not in metadata[location]:
metadata[location][field] = row[n]
else:
if metadata[location][field] != row[n]:
print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")
# reorganize data
data2 = {}
for loc in data:
time = []
new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], []
for entry in data[loc]:
t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_ = entry
time.append(t_)
new_cases.append(toint(new_cases_))
new_deaths.append(toint(new_deaths_))
total_cases.append(toint(total_cases_))
total_deaths.append(toint(total_deaths_))
total_vaccinations.append(toint(total_vaccinations_))
stringency_index.append(toint(stringency_index_))
data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, "stringency_index": stringency_index}
return data2, metadata
data, metadata = get_data()
for plot in plots:
i = importlib.import_module(plot)
i.plot(data, countries, pop, metadata=metadata)