Ondřej Šrámek, Charles University, Prague, Czech Republic, ondrej.sramek@gmail.com, 6 April 2020
Uses data from CSSE JHU available on GitHub: https://github.com/CSSEGISandData/2019-nCoV Therefore, I first run
git clone https://github.com/CSSEGISandData/COVID-19.git
in the directory where I then run this script
jupyter-notebook covid19time.ipynb
You can download the script by changing the extension in the URL from html to ipynb. You need to have Jupyter installed; I use Anaconda distribution on my Mac.
See also http://91-divoc.com/pages/covid-visualization/, esp. for countries comparison visualisation.
[image stolen somewhere on the internets]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from scipy.optimize import curve_fit
plt.rcParams['figure.figsize'] = (10,7)
x_label_rot = 60
# read global time-series dataset
filename = 'COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
#globdat = np.genfromtxt(filename, delimiter=',')
### the above did not work, entries like "Korea, North" cause trouble -> use pandas instead
globdat_pd = pd.read_csv(filename)
globdat_pd
c0 = 4 # index where the actual time series begins
# dates string array, convert to numpy.datetime64
days_str = globdat_pd.columns[c0:]
days = np.array([ np.datetime64( dt.datetime.strptime(days_str[i], '%m/%d/%y') ) for i in np.arange(days_str.size) ])
idays = np.arange(days.size)
# cases counts array
counts = globdat_pd.iloc[:, c0:].to_numpy()
countries = globdat_pd.iloc[:, 1].to_numpy()
# functions for curve fitting
def func0(x, x0, a):
return a * (x-x0) # linear
def func1(x, x0, a):
return np.exp(a*(x-x0)) # exponential
def func2(x, a, b, c):
return a + x * (b + x * c) # quadratic
def func3(x, x0, k, L):
return L / (1 + np.exp(-k*(x-x0))) # logistic
country = 'Czechia'
rowtuple = np.where(countries == country)
print ('Number of entries with country=={:s} is {:d}'.format(country, rowtuple[0].size))
row = rowtuple[0][0]
count = counts[row,:]
nonzero = np.where(count > 0)
xx = days[nonzero]
yy = count[nonzero]
ii = idays[nonzero]
# curve fitting
print('LINEAR FIT IN Y-LOG SPACE')
popt0, pcov0 = curve_fit(func0, ii, np.log(yy))
tdbl0 = np.log(2.)/popt0[1]
print('popt0 =', popt0, ', doubling time = {:.1f} days\n'.format(tdbl0))
#print(pcov0)
# curve fitting
print('EXP FIT IN Y-LIN SPACE')
popt1, pcov1 = curve_fit(func1, ii, yy)
tdbl1 = np.log(2.)/popt1[1]
print('popt1 =', popt1, ', doubling time = {:.1f} days\n'.format(tdbl1))
#print(pcov1)
# curve fitting
print('DEG-2 POLYNOMIAL FIT IN Y-LOG SPACE')
popt2, pcov2 = curve_fit(func2, ii, np.log(yy))
tdbl2 = np.log(2.)/(popt2[1]+2*ii[-1]*popt2[2])
print('popt2 =', popt2, ', instantaneous doubling time = {:.1f} days\n'.format(tdbl2))
#print(pcov2)
# curve fitting
print('LOGISTIC FIT')
popt3, pcov3 = curve_fit(func3, ii, yy, p0 = (70, 0.2, 6000))
print('popt3 =', popt3)
#print(pcov3)
# plotting
def fig_lin():
plt.plot(xx, yy, 'o-', label='data')
yyfit = np.exp(func0(ii, *popt0))
skip = 4
plt.plot(xx[0:xx.size-skip], yyfit[0:xx.size-skip], '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl0)
plt.plot(xx, func1(ii, *popt1), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl1)
plt.plot(xx, np.exp(func2(ii, *popt2)), '--', label='deg-2 polynomial fit in log-y space, current doubling time = %.1f days' % tdbl2)
#plt.plot(xx, func3(ii, *popt3), '--', label='logistic fit')
plt.xticks(rotation=x_label_rot)
plt.legend()
plt.ylabel('Cases')
plt.title('COVID-19 cases in the Czech Republic')
plt.savefig('fig_lin.pdf')
plt.savefig('fig_lin.png')
plt.show()
def fig_semilog():
plt.semilogy(xx, yy, 'o-', label='data')
plt.semilogy(xx, np.exp(func0(ii, *popt0)), '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl0)
plt.semilogy(xx, func1(ii, *popt1), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl1)
plt.semilogy(xx, np.exp(func2(ii, *popt2)), '--', label='deg-2 polynomial fit in log-y space, current doubling time = %.1f days' % tdbl2)
#plt.semilogy(xx, func3(ii, *popt3), '--', label='logistic fit')
plt.xticks(rotation=x_label_rot)
plt.legend()
plt.ylabel('Cases')
plt.title('COVID-19 cases in the Czech Republic')
plt.savefig('fig_log.pdf')
plt.savefig('fig_log.png')
plt.show()
fig_lin()
fig_semilog()
def fig_logistic():
plt.plot(ii-ii[0], yy, 'o-', label='data')
ii2 = np.arange(ii[0],ii[0]+62)
plt.plot(ii2-ii[0], func3(ii2, *popt3), '--', label='logistic fit')
plt.legend()
plt.xlabel('Day')
plt.ylabel('Cases')
plt.title('COVID-19 cases in the Czech Republic')
plt.savefig('fig_logistic.pdf')
plt.savefig('fig_logistic.png')
plt.show()
#fig_logistic()
As fitting the entire dataset is not too good to aproximate the curent trend.
ntail = 8
xx1 = xx[-ntail:]
yy1 = yy[-ntail:]
ii1 = ii[-ntail:]
# curve fitting
print('LINEAR FIT IN Y-LOG SPACE')
popt01, pcov01 = curve_fit(func0, ii1, np.log(yy1))
tdbl01 = np.log(2.)/popt01[1]
print('popt01 =', popt01, ', doubling time = {:.1f} days\n'.format(tdbl01))
#print(pcov01)
# curve fitting
print('EXP FIT IN Y-LIN SPACE')
popt11, pcov11 = curve_fit(func1, ii1, yy1)
tdbl11 = np.log(2.)/popt11[1]
print('popt11 =', popt11, ', doubling time = {:.1f} days\n'.format(tdbl11))
#print(pcov11)
# plotting
def fig_lin1():
plt.plot(xx1, yy1, 'o-', label='data')
#plt.plot(xx1, np.exp(func0(ii1, *popt01)), '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl01)
plt.plot(xx1, func1(ii1, *popt11), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl11)
plt.xticks(rotation=x_label_rot)
plt.legend()
plt.ylabel('Cases')
plt.title('COVID-19 cases in the Czech Republic')
plt.savefig('fig_lin1.pdf')
plt.savefig('fig_lin1.png')
plt.show()
def fig_semilog1():
plt.semilogy(xx1, yy1, 'o-', label='data')
#plt.semilogy(xx1, np.exp(func0(ii1, *popt01)), '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl01)
plt.semilogy(xx1, func1(ii1, *popt11), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl11)
plt.xticks(rotation=x_label_rot)
plt.legend()
plt.ylabel('Cases')
plt.title('COVID-19 cases in the Czech Republic')
plt.savefig('fig_log1.pdf')
plt.savefig('fig_log1.png')
plt.show()
fig_lin1()
fig_semilog1()
print ('hello bird and crocodile and mámo and amálko')