coronavis/coronavis.py

#!/usr/bin/python

import requests
import csv
import datetime
import os
import matplotlib.pyplot as pp
import numpy as np
import sys
import importlib
import time
sys.path.append(".")

#### config
# countries of interest
countries = ["Germany", "Italy", "India", "Japan", "Brazil", "Iran", "United States", "World", "United Kingdom", "Sweden"]
# enabled plots
plots = ["basics", "death_per_case",
        #"normalized_to_first_death", "delay_from_china", "delay_from_usa", "normalized_to_ten_cases", "percent_increase",
        "doubling_time",
        "all_countries",
        ]

metadata_fields = [
    "iso_code",
    "continent",
    "location",
    "population",
    "population_density",
    "median_age",
    "aged_65_older",
    "aged_70_older",
    "gdp_per_capita",
    "extreme_poverty",
    "cardiovasc_death_rate",
    "diabetes_prevalence",
    "female_smokers",
    "male_smokers",
    "handwashing_facilities",
    "hospital_beds_per_thousand",
    "life_expectancy",
    "human_development_index",
]

### manual data
# population: sourced ECDC data
from population_repository import pop

###
def toint(a):
    try:
        return int(a)
    except:
        return np.nan
def tofloat(a):
    try:
        return float(a)
    except:
        return np.nan
def addmeta(field, value):
    pass

def get_data():
    """fetch data from remote, cache locally and reorganize internal data
    not beautiful (at all), but effective!!"""

    tries = 10
    delay = 10

    dataurl = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
    date = datetime.date.today()

    datafile = f"{date}-full-data.csv"

    if not os.path.isfile(datafile):

        for n in range(tries):
            try:
                r = requests.get(dataurl)
            except:
                print(f"==> download failed, retrying after {delay}s up to another {tries-n} times…")
                time.sleep(delay)
                continue
            break

        with open(datafile, "wb") as f:
            f.write(r.content)
    else:
        print(f"file found: {datafile}")

    # processing
    data = {}
    metadata = {}
    with open(datafile, "r") as f:
        reader = csv.reader(f)
        for row in reader:
            if len(row) == 6:
                date,location,new_cases,new_deaths,total_cases,total_deaths = row
            elif len(row) == 10:
                date,location,new_cases,new_deaths,total_cases,total_deaths,weekly_cases,weekly_deaths,biweekly_cases,biweekly_deaths = row
            elif len(row) == 50:
                iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
            elif len(row) == 52:
                iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
            else:
                print(f"WARNING! Table format changed, new header:\n{row})")
                exit(1)
            # break loop if header
            if location=="location":
                # table header
                header = row
                continue

            # cast to num type
            total_cases = tofloat(total_cases)
            new_cases = tofloat(new_cases)
            total_deaths = tofloat(total_deaths)
            new_deaths = tofloat(new_deaths)
            total_vaccinations = tofloat(total_vaccinations)
            stringency_index = tofloat(stringency_index)
            if location not in data:
                data[location] = []
                metadata[location] = {}
            year, month, day = date.split("-")
            data[location].append([datetime.date(int(year), int(month), int(day)), new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index])


            # catch all data fields
            #dfields = {field: row[n] for n, field in enumerate(header)}
            # add metadata
            for n, field in enumerate(header):
                if field in metadata_fields:
                    if field not in metadata[location]:
                        metadata[location][field] = row[n]
                    else:
                        if metadata[location][field] != row[n]:
                            print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")

    # reorganize data
    data2 = {}
    for loc in data:
        time = []
        new_cases, new_deaths, total_cases, total_deaths, total_vaccinations, stringency_index = [], [], [], [], [], []
        for entry in data[loc]:
            t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_ = entry
            time.append(t_)
            new_cases.append(toint(new_cases_))
            new_deaths.append(toint(new_deaths_))
            total_cases.append(toint(total_cases_))
            total_deaths.append(toint(total_deaths_))
            total_vaccinations.append(toint(total_vaccinations_))
            stringency_index.append(toint(stringency_index_))
        data2[loc] = {'time': time, 'new_cases': new_cases, 'new_deaths': new_deaths, 'total_cases': total_cases, 'total_deaths': total_deaths, 'total_vaccinations': total_vaccinations, "stringency_index": stringency_index}
    return data2, metadata

data, metadata = get_data()

for plot in plots:
    i = importlib.import_module(plot)
    i.plot(data, countries, pop, metadata=metadata)