#!/usr/bin/python import requests import csv import datetime import os import matplotlib.pyplot as pp import numpy as np import sys import importlib import time import subprocess sys.path.append(".") #### config # countries of interest countries = ["Germany", "Italy", "India", "Japan", "Brazil", "Iran", "United States", "World", "United Kingdom", "Sweden"] # enabled plots plots = ["basics", "death_per_case", #"normalized_to_first_death", "delay_from_china", "delay_from_usa", "normalized_to_ten_cases", "percent_increase", "doubling_time", "all_countries", "country_details", ] metadata_fields = [ "iso_code", "continent", "location", "population", "population_density", "median_age", "aged_65_older", "aged_70_older", "gdp_per_capita", "extreme_poverty", "cardiovasc_death_rate", "diabetes_prevalence", "female_smokers", "male_smokers", "handwashing_facilities", "hospital_beds_per_thousand", "life_expectancy", "human_development_index", ] ### manual data # population: sourced ECDC data from population_repository import pop ### def toint(a): try: return int(a) except: return np.nan def tofloat(a): try: return float(a) except: return np.nan def addmeta(field, value): pass def get_data(): """fetch data from remote, cache locally and reorganize internal data not beautiful (at all), but effective!!""" tries = 10 delay = 10 dataurl = "https://covid.ourworldindata.org/data/owid-covid-data.csv" date = datetime.date.today() datafile = f"data/{date}-full-data.csv" if not os.path.isfile(datafile): print("==> downloading data") for n in range(tries): try: r = requests.get(dataurl) except: print(f"==> download failed, retrying after {delay}s up to another {tries-n} times…") time.sleep(delay) continue break with open(datafile, "wb") as f: f.write(r.content) ## file archiving: include error log just in case… try: subprocess.run(["/usr/bin/cp", f"../{date}-full-data.csv", "data.csv"], cwd="/srv/http/dukun.de/corona/data/git") subprocess.run(["/usr/bin/git", "commit", "-a", f"-m {date}"], cwd="/srv/http/dukun.de/corona/data/git") subprocess.run(["/usr/bin/git", "push"], cwd="/srv/http/dukun.de/corona/data/git") except Exception as e: print(f"File archiving failed with {e} - need for debugging here…") print(e.with_traceback) print("\n\n\ncontinuing…\n") else: print(f"file found: {datafile}") # processing data = {} metadata = {} with open(datafile, "r") as f: reader = csv.reader(f) for row in reader: if len(row) == 6: date,location,new_cases,new_deaths,total_cases,total_deaths = row elif len(row) == 10: date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row elif len(row) == 50: iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row elif len(row) == 52: iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row elif len(row) == 54: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, new_vaccinations, total_vaccinations_per_hundred, new_vaccinations_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row elif len(row) == 55: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row elif len(row) == 59: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, total_tests, new_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row elif len(row) == 60: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index, excess_mortality = row elif len(row) == 62: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, total_boosters_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index, excess_mortality = row elif len(row) == 63: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, total_boosters_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index, excess_mortality_cumulative, excess_mortality = row elif len(row) == 64: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, total_boosters_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index, excess_mortality_cumulative_absolute, excess_mortality_cumulative, excess_mortality = row elif len(row) == 65: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, total_boosters_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index, excess_mortality_cumulative_absolute, excess_mortality_cumulative, excess_mortality, excess_mortality_cumulative_per_million = row elif len(row) == 67: iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, total_boosters_per_hundred, new_vaccinations_smoothed_per_million, new_people_vaccinated_smoothed, new_people_vaccinated_smoothed_per_hundred, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index, excess_mortality_cumulative_absolute, excess_mortality_cumulative, excess_mortality, excess_mortality_cumulative_per_million = row else: print(f"WARNING! Table format changed, length now {len(row)}, new header:\n{row})") exit(1) # break loop if header if location=="location": # table header header = row continue # cast to num type total_cases = tofloat(total_cases) new_cases = tofloat(new_cases) total_deaths = tofloat(total_deaths) new_deaths = tofloat(new_deaths) total_vaccinations = tofloat(total_vaccinations) people_fully_vaccinated = tofloat(people_fully_vaccinated) stringency_index = tofloat(stringency_index) reproduction_rate = tofloat(reproduction_rate) icu_patients = tofloat(icu_patients) hosp_patients = tofloat(hosp_patients) weekly_icu_admissions = tofloat(weekly_icu_admissions) weekly_hosp_admissions = tofloat(weekly_hosp_admissions) new_tests = tofloat(new_tests) total_tests = tofloat(total_tests) positive_rate = tofloat(positive_rate) tests_per_case = tofloat(tests_per_case) new_vaccinations = tofloat(new_vaccinations) tests_units = tests_units total_boosters = tofloat(total_boosters) if location not in data: data[location] = [] metadata[location] = {} year, month, day = date.split("-") data[location].append( [datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index, reproduction_rate, icu_patients, hosp_patients, weekly_icu_admissions, weekly_hosp_admissions, new_tests, total_tests, positive_rate, tests_per_case, tests_units, new_vaccinations, people_fully_vaccinated, total_boosters] ) # catch all data fields #dfields = {field: row[n] for n, field in enumerate(header)} # add metadata for n, field in enumerate(header): if field in metadata_fields: if field not in metadata[location]: metadata[location][field] = row[n] else: if metadata[location][field] != row[n]: print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})") ### End of csv reading loop # get data about vaccines vaccinesurl = "https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/locations.csv" vacraw = requests.get(vaccinesurl).content.decode("UTF8").split('\n')[1:-1] vacreader = csv.reader(vacraw) vaccines_country_dict = {} for row in vacreader: land = row[0] vaccines = row[2] vaccines_country_dict[land] = vaccines del(vaccinesurl, vacraw, vacreader) # reorganize data data2 = {} for loc in data: time = [] new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], [] reproduction_rate = [] icu_patients = [] hosp_patients = [] weekly_icu_admissions = [] weekly_hosp_admissions = [] new_tests = [] total_tests = [] positive_rate = [] tests_per_case = [] tests_units = [] new_vaccinations = [] people_fully_vaccinated = [] total_boosters = [] for entry in data[loc]: t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_, reproduction_rate_, icu_patients_, hosp_patients_, weekly_icu_admissions_, weekly_hosp_admissions_, new_tests_, total_tests_, positive_rate_, tests_per_case_, tests_units_, new_vaccinations_, people_fully_vaccinated_, total_boosters_ = entry time.append(t_) new_cases.append(toint(new_cases_)) new_deaths.append(toint(new_deaths_)) total_cases.append(toint(total_cases_)) total_deaths.append(toint(total_deaths_)) total_vaccinations.append(toint(total_vaccinations_)) stringency_index.append(toint(stringency_index_)) reproduction_rate.append(reproduction_rate_) icu_patients.append(toint(icu_patients_)) hosp_patients.append(toint(hosp_patients_)) weekly_icu_admissions.append(toint(weekly_icu_admissions_)) weekly_hosp_admissions.append(toint(weekly_hosp_admissions_)) new_tests.append(toint(new_tests_)) total_tests.append(toint(total_tests_)) positive_rate.append(positive_rate_) tests_per_case.append(tests_per_case_) new_vaccinations.append(toint(new_vaccinations_)) tests_units.append(tests_units_) people_fully_vaccinated.append(people_fully_vaccinated_) total_boosters.append(total_boosters_) ### data tweaking and fixing goes here # fix vaccination data: not all countries report daily vaccinations for n in range(1, len(total_vaccinations)): if np.isnan(total_vaccinations[n]) and not np.isnan(total_vaccinations[n-1]): total_vaccinations[n] = total_vaccinations[n-1] # fix vaccination data: not all countries report fully vaccinated for n in range(1, len(people_fully_vaccinated)): if np.isnan(people_fully_vaccinated[n]) and not np.isnan(people_fully_vaccinated[n-1]): people_fully_vaccinated[n] = people_fully_vaccinated[n-1] ### # collecting data data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, 'stringency_index': stringency_index, 'reproduction_rate': reproduction_rate, 'icu_patients': icu_patients, 'hosp_patients': hosp_patients, 'weekly_icu_admissions': weekly_icu_admissions, 'weekly_hosp_admissions': weekly_hosp_admissions, 'new_tests': new_tests, 'total_tests': total_tests, 'positive_rate': positive_rate, 'tests_per_case': tests_per_case, 'tests_units': tests_units, 'new_vaccinations': new_vaccinations, 'people_fully_vaccinated': people_fully_vaccinated, 'total_boosters': total_boosters, } # add vaccine info to metadata if loc in vaccines_country_dict: metadata[loc]['vaccines'] = vaccines_country_dict[loc] # cast population to int if loc not in ("International", "Africa", "European Union", "Europe", "Asia", "North America", "South America", "Oceania", "Northern Cyprus"): try: metadata[loc]['population'] = int(float(metadata[loc]['population'])) except: metadata[loc][loc]['population'] = np.nan return data2, metadata data, metadata = get_data() ## dump data instead of plotting if False: print("dumping data, no plotting") import pickle with open("data.dump", "wb") as f: pickle.dump([data, metadata], f) exit() for plot in plots: print(f"==> starting plot: {plot}") i = importlib.import_module(plot) i.plot(data, countries, pop, metadata=metadata)