Get Covid19 data
Tags: #johnshopkins #opendata #analytics
Author: Florent Ravenel

Input

Import libraries

1
import pandas as pd
2
import naas
Copied!

Variables

1
# Input URLs of the raw csv dataset
2
urls = [
3
'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv',
4
'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv',
5
'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
6
]
7
8
# Output paths
9
title = "DB_COVID19_JHU"
10
output_csv = f"{title}.csv"
Copied!

Model

Get data from JHU

1
def get_data_url(urls):
2
df = pd.DataFrame()
3
for url in urls:
4
tmp_df = pd.read_csv(url)
5
tmp_df["Indicator"] = url.split("/time_series_covid19_")[-1].split("_global.csv")[0].capitalize()
6
df = pd.concat([df, tmp_df])
7
return df
8
9
df_init = get_data_url(urls)
10
df_init
Copied!

Get all data from JHU

1
def get_all_data(df_init):
2
df = df_init.copy()
3
# Cleaning
4
df = df.drop("Province/State", axis=1)
5
6
# Melt data
7
df = pd.melt(df,
8
id_vars=["Country/Region", "Lat", "Long", "Indicator"],
9
var_name="Date",
10
value_name="Value").fillna(0)
11
df["Date"] = pd.to_datetime(df["Date"])
12
13
# Calc active cases
14
df_active = df.copy()
15
df_active.loc[df_active["Indicator"].isin(["Deaths", "Recovered"]), "Value"] = df_active["Value"] * (-1)
16
df_active["Indicator"] = "Active cases"
17
18
# Concat data
19
df = pd.concat([df, df_active])
20
21
# Group by country/region
22
to_group = ["Country/Region", "Lat", "Long", "Indicator", "Date"]
23
df = df.groupby(to_group, as_index=False).agg({"Value": "sum"})
24
25
# Cleaning
26
df = df.rename(columns={"Country/Region": "COUNTRY"})
27
df.columns = df.columns.str.upper()
28
return df.reset_index(drop=True)
29
30
df_clean = get_all_data(df_init)
31
df_clean
Copied!

Output

Save dataframe in csv

1
df_clean.to_csv(output_csv, index=False)
Copied!
Copy link
Edit on GitHub