various new features: new vaccination overview on country specific side, slightly changed text on index head, moved data correction to parser, made population an int, added the used vaccines to the vaccination table, and some bugfixing here and there

This commit is contained in:
fordprefect
2021-01-21 18:42:48 +01:00
parent 3f0ce1b621
commit 71c4ef8065
4 changed files with 166 additions and 22 deletions

View File

@@ -94,6 +94,7 @@ def get_data():
metadata = {}
with open(datafile, "r") as f:
reader = csv.reader(f)
for row in reader:
if len(row) == 6:
date,location,new_cases,new_deaths,total_cases,total_deaths = row
@@ -105,6 +106,8 @@ def get_data():
iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,total_vaccinations_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index = row
elif len(row) == 54:
iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, new_vaccinations, total_vaccinations_per_hundred, new_vaccinations_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row
elif len(row) == 55:
iso_code, continent, location, date, total_cases, new_cases, new_cases_smoothed, total_deaths, new_deaths, new_deaths_smoothed, total_cases_per_million, new_cases_per_million, new_cases_smoothed_per_million, total_deaths_per_million, new_deaths_per_million, new_deaths_smoothed_per_million, reproduction_rate, icu_patients, icu_patients_per_million, hosp_patients, hosp_patients_per_million, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions, weekly_hosp_admissions_per_million, new_tests, total_tests, total_tests_per_thousand, new_tests_per_thousand, new_tests_smoothed, new_tests_smoothed_per_thousand, positive_rate, tests_per_case, tests_units, total_vaccinations, new_vaccinations, new_vaccinations_smoothed, total_vaccinations_per_hundred, new_vaccinations_smoothed_per_million, stringency_index, population, population_density, median_age, aged_65_older, aged_70_older, gdp_per_capita, extreme_poverty, cardiovasc_death_rate, diabetes_prevalence, female_smokers, male_smokers, handwashing_facilities, hospital_beds_per_thousand, life_expectancy, human_development_index = row
else:
print(f"WARNING! Table format changed, length now {len(row)}, new header:\n{row})")
exit(1)
@@ -130,6 +133,7 @@ def get_data():
total_tests = tofloat(total_tests)
positive_rate = tofloat(positive_rate)
tests_per_case = tofloat(tests_per_case)
new_vaccinations = tofloat(new_vaccinations)
tests_units = tests_units
if location not in data:
@@ -141,7 +145,8 @@ def get_data():
new_cases, new_deaths, total_cases, total_deaths, total_vaccinations,
stringency_index, reproduction_rate, icu_patients, hosp_patients,
weekly_icu_admissions, weekly_hosp_admissions, new_tests,
total_tests, positive_rate, tests_per_case, tests_units,]
total_tests, positive_rate, tests_per_case, tests_units,
new_vaccinations,]
)
@@ -155,6 +160,21 @@ def get_data():
else:
if metadata[location][field] != row[n]:
print(f"{location}: {field} seems not to be a constant ({metadata[location][field]} vs {row[n]})")
### End of csv reading loop
# get data about vaccines
vaccinesurl = "https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/locations.csv"
vacraw = requests.get(vaccinesurl).content.decode("UTF8").split('\n')[1:-1]
vacreader = csv.reader(vacraw)
vaccines_country_dict = {}
for row in vacreader:
land = row[0]
vaccines = row[2]
vaccines_country_dict[land] = vaccines
del(vaccinesurl, vacraw, vacreader)
# reorganize data
data2 = {}
for loc in data:
@@ -170,8 +190,9 @@ def get_data():
positive_rate = []
tests_per_case = []
tests_units = []
new_vaccinations = []
for entry in data[loc]:
t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_, reproduction_rate_, icu_patients_, hosp_patients_, weekly_icu_admissions_, weekly_hosp_admissions_, new_tests_, total_tests_, positive_rate_, tests_per_case_, tests_units_ = entry
t_, new_cases_, new_deaths_, total_cases_, total_deaths_, total_vaccinations_, stringency_index_, reproduction_rate_, icu_patients_, hosp_patients_, weekly_icu_admissions_, weekly_hosp_admissions_, new_tests_, total_tests_, positive_rate_, tests_per_case_, tests_units_, new_vaccinations_ = entry
time.append(t_)
new_cases.append(toint(new_cases_))
@@ -189,7 +210,21 @@ def get_data():
total_tests.append(toint(total_tests_))
positive_rate.append(positive_rate_)
tests_per_case.append(tests_per_case_)
new_vaccinations.append(toint(new_vaccinations_))
tests_units.append(tests_units_)
### data tweaking and fixing goes here
# fix vaccination data: not all countries report daily vaccinations
for n in range(1, len(total_vaccinations)):
if np.isnan(total_vaccinations[n]) and not np.isnan(total_vaccinations[n-1]):
total_vaccinations[n] = total_vaccinations[n-1]
###
# collecting data
data2[loc] = {'time': time,
'new_cases': new_cases,
'new_deaths': new_deaths,
@@ -207,11 +242,28 @@ def get_data():
'positive_rate': positive_rate,
'tests_per_case': tests_per_case,
'tests_units': tests_units,
'new_vaccinations': new_vaccinations,
}
# add vaccine info to metadata
if loc in vaccines_country_dict:
metadata[loc]['vaccines'] = vaccines_country_dict[loc]
# cast population to int
if loc != "International":
try: metadata[loc]['population'] = int(float(metadata[loc]['population']))
except: metadata[loc][loc]['population'] = np.nan
return data2, metadata
data, metadata = get_data()
## dump data instead of plotting
if False:
print("dumping data, no plotting")
import pickle
with open("data.dump", "wb") as f:
pickle.dump([data, metadata], f)
exit()
for plot in plots:
i = importlib.import_module(plot)
i.plot(data, countries, pop, metadata=metadata)