various new features: new vaccination overview on country specific side, slightly changed text on index head, moved data correction to parser, made population an int, added the used vaccines to the vaccination table, and some bugfixing here and there

2021-01-21 18:42:48 +01:00
parent 3f0ce1b621
commit 71c4ef8065
4 changed files with 166 additions and 22 deletions
--- a/coronavis.py
+++ b/coronavis.py
@@ -94,6 +94,7 @@ def get_data():
    metadata = {}
    with open(datafile, "r") as f:
        reader = csv.reader(f)
+
        for row in reader:
            if len(row) == 6:
                date,location,new_cases,new_deaths,total_cases,total_deaths = row
@@ -105,6 +106,8 @@ def get_data():
                iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
            elif len(row) == 54:
                iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, new_vaccinations, total_vaccinations_per_hundred, new_vaccinations_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row
+            elif len(row) == 55:
+                iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row
            else:
                print(f"WARNING! Table format changed, length now {len(row)}, new header:\n{row})")
                exit(1)
@@ -130,6 +133,7 @@ def get_data():
            total_tests = tofloat(total_tests)
            positive_rate = tofloat(positive_rate)
            tests_per_case = tofloat(tests_per_case)
+            new_vaccinations = tofloat(new_vaccinations)
            tests_units = tests_units

            if location not in data:
@@ -141,7 +145,8 @@ def get_data():
                    new_cases, new_deaths, total_cases, total_deaths, total_vaccinations,
                    stringency_index, reproduction_rate, icu_patients, hosp_patients,
                    weekly_icu_admissions, weekly_hosp_admissions, new_tests,
-                    total_tests, positive_rate, tests_per_case, tests_units,]
+                    total_tests, positive_rate, tests_per_case, tests_units,
+                    new_vaccinations,]
                    )


@@ -155,6 +160,21 @@ def get_data():
                    else:
                        if metadata[location][field] != row[n]:
                            print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")
+
+    ### End of csv reading loop
+
+# get data about vaccines
+    vaccinesurl = "https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/locations.csv"
+    vacraw = requests.get(vaccinesurl).content.decode("UTF8").split('\n')[1:-1]
+    vacreader = csv.reader(vacraw)
+    vaccines_country_dict = {}
+
+    for row in vacreader:
+        land = row[0]
+        vaccines = row[2]
+        vaccines_country_dict[land] = vaccines
+    del(vaccinesurl, vacraw, vacreader)
+
    # reorganize data
    data2 = {}
    for loc in data:
@@ -170,8 +190,9 @@ def get_data():
        positive_rate = []
        tests_per_case = []
        tests_units = []
+        new_vaccinations = []
        for entry in data[loc]:
-            t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_, reproduction_rate_, icu_patients_, hosp_patients_, weekly_icu_admissions_, weekly_hosp_admissions_, new_tests_, total_tests_, positive_rate_, tests_per_case_, tests_units_ = entry
+            t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_, reproduction_rate_, icu_patients_, hosp_patients_, weekly_icu_admissions_, weekly_hosp_admissions_, new_tests_, total_tests_, positive_rate_, tests_per_case_, tests_units_, new_vaccinations_ = entry

            time.append(t_)
            new_cases.append(toint(new_cases_))
@@ -189,7 +210,21 @@ def get_data():
            total_tests.append(toint(total_tests_))
            positive_rate.append(positive_rate_)
            tests_per_case.append(tests_per_case_)
+            new_vaccinations.append(toint(new_vaccinations_))
            tests_units.append(tests_units_)
+
+        ### data tweaking and fixing goes here
+
+        # fix vaccination data: not all countries report daily vaccinations
+        for n in range(1, len(total_vaccinations)):
+            if np.isnan(total_vaccinations[n]) and not np.isnan(total_vaccinations[n-1]):
+                total_vaccinations[n] = total_vaccinations[n-1]
+
+
+        ###
+
+
+        # collecting data
        data2[loc] = {'time': time,
                    'new_cases': new_cases,
                    'new_deaths': new_deaths,
@@ -207,11 +242,28 @@ def get_data():
                    'positive_rate': positive_rate,
                    'tests_per_case': tests_per_case,
                    'tests_units': tests_units,
+                    'new_vaccinations': new_vaccinations,
        }
+        # add vaccine info to metadata
+        if loc in vaccines_country_dict:
+            metadata[loc]['vaccines'] = vaccines_country_dict[loc]
+        # cast population to int
+        if loc != "International":
+            try: metadata[loc]['population'] = int(float(metadata[loc]['population']))
+            except: metadata[loc][loc]['population'] = np.nan
+
    return data2, metadata

 data, metadata = get_data()

+## dump data instead of plotting
+if False:
+    print("dumping data, no plotting")
+    import pickle
+    with open("data.dump", "wb") as f:
+        pickle.dump([data, metadata], f)
+    exit()
+
 for plot in plots:
    i = importlib.import_module(plot)
    i.plot(data, countries, pop, metadata=metadata)