COVID-19 cases over time in Czech Republic

Ondřej Šrámek, Charles University, Prague, Czech Republic, ondrej.sramek@gmail.com, 6 April 2020

Uses data from CSSE JHU available on GitHub: https://github.com/CSSEGISandData/2019-nCoV Therefore, I first run

git clone https://github.com/CSSEGISandData/COVID-19.git

in the directory where I then run this script

jupyter-notebook covid19time.ipynb

You can download the script by changing the extension in the URL from html to ipynb. You need to have Jupyter installed; I use Anaconda distribution on my Mac.

See also http://91-divoc.com/pages/covid-visualization/, esp. for countries comparison visualisation.

KAAA4880.jpeg [image stolen somewhere on the internets]

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from scipy.optimize import curve_fit
In [2]:
plt.rcParams['figure.figsize'] = (10,7)
x_label_rot = 60
In [3]:
# read global time-series dataset

filename = 'COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

#globdat = np.genfromtxt(filename, delimiter=',') 
### the above did not work, entries like "Korea, North" cause trouble -> use pandas instead

globdat_pd = pd.read_csv(filename)
globdat_pd
Out[3]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/27/20 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 4/3/20 4/4/20 4/5/20
0 NaN Afghanistan 33.000000 65.000000 0 0 0 0 0 0 ... 110 110 120 170 174 237 273 281 299 349
1 NaN Albania 41.153300 20.168300 0 0 0 0 0 0 ... 186 197 212 223 243 259 277 304 333 361
2 NaN Algeria 28.033900 1.659600 0 0 0 0 0 0 ... 409 454 511 584 716 847 986 1171 1251 1320
3 NaN Andorra 42.506300 1.521800 0 0 0 0 0 0 ... 267 308 334 370 376 390 428 439 466 501
4 NaN Angola -11.202700 17.873900 0 0 0 0 0 0 ... 4 5 7 7 7 8 8 8 10 14
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
257 NaN Malawi -13.254308 34.301525 0 0 0 0 0 0 ... 0 0 0 0 0 0 3 3 4 4
258 Falkland Islands (Islas Malvinas) United Kingdom -51.796300 -59.523600 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 2
259 Saint Pierre and Miquelon France 46.885200 -56.315900 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
260 NaN South Sudan 6.877000 31.307000 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
261 NaN Western Sahara 24.215500 -12.885800 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 4

262 rows × 79 columns

In [4]:
c0 = 4 # index where the actual time series begins

# dates string array, convert to numpy.datetime64
days_str = globdat_pd.columns[c0:]
days = np.array([ np.datetime64( dt.datetime.strptime(days_str[i], '%m/%d/%y') ) for i in np.arange(days_str.size) ])
idays = np.arange(days.size)

# cases counts array
counts = globdat_pd.iloc[:, c0:].to_numpy()
countries = globdat_pd.iloc[:, 1].to_numpy()
In [5]:
# functions for curve fitting

def func0(x, x0, a):
    return a * (x-x0) # linear

def func1(x, x0, a):
    return np.exp(a*(x-x0)) # exponential

def func2(x, a, b, c):
    return a + x * (b + x * c) # quadratic

def func3(x, x0, k, L):
    return L / (1 + np.exp(-k*(x-x0))) # logistic
In [6]:
country = 'Czechia'
rowtuple = np.where(countries == country)
print ('Number of entries with country=={:s} is {:d}'.format(country, rowtuple[0].size))
row = rowtuple[0][0]
count = counts[row,:]
nonzero = np.where(count > 0)
xx = days[nonzero]
yy = count[nonzero]
ii = idays[nonzero]
Number of entries with country==Czechia is 1

Fitting the entire timeseries

In [7]:
# curve fitting
print('LINEAR FIT IN Y-LOG SPACE')
popt0, pcov0 = curve_fit(func0, ii, np.log(yy))
tdbl0 = np.log(2.)/popt0[1]
print('popt0 =', popt0, ', doubling time = {:.1f} days\n'.format(tdbl0))
#print(pcov0)

# curve fitting
print('EXP FIT IN Y-LIN SPACE')
popt1, pcov1 = curve_fit(func1, ii, yy)
tdbl1 = np.log(2.)/popt1[1]
print('popt1 =', popt1, ', doubling time = {:.1f} days\n'.format(tdbl1))
#print(pcov1)

# curve fitting
print('DEG-2 POLYNOMIAL FIT IN Y-LOG SPACE')
popt2, pcov2 = curve_fit(func2, ii, np.log(yy))
tdbl2 = np.log(2.)/(popt2[1]+2*ii[-1]*popt2[2])
print('popt2 =', popt2, ', instantaneous doubling time = {:.1f} days\n'.format(tdbl2))
#print(pcov2)

# curve fitting
print('LOGISTIC FIT')
popt3, pcov3 = curve_fit(func3, ii, yy, p0 = (70, 0.2, 6000))
print('popt3 =', popt3)
#print(pcov3)
LINEAR FIT IN Y-LOG SPACE
popt0 = [30.09071497  0.21577323] , doubling time = 3.2 days

EXP FIT IN Y-LIN SPACE
popt1 = [-4.39133899  0.10909963] , doubling time = 6.4 days

DEG-2 POLYNOMIAL FIT IN Y-LOG SPACE
popt2 = [-2.30660805e+01  8.22965709e-01 -5.37338473e-03] , instantaneous doubling time = 25.0 days

LOGISTIC FIT
popt3 = [6.71528057e+01 2.05853417e-01 5.65978695e+03]
In [8]:
# plotting
def fig_lin():
    plt.plot(xx, yy, 'o-', label='data')
    yyfit = np.exp(func0(ii, *popt0))
    skip = 4
    plt.plot(xx[0:xx.size-skip], yyfit[0:xx.size-skip], '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl0)
    plt.plot(xx, func1(ii, *popt1), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl1)
    plt.plot(xx, np.exp(func2(ii, *popt2)), '--', label='deg-2 polynomial fit in log-y space, current doubling time = %.1f days' % tdbl2)
    #plt.plot(xx, func3(ii, *popt3), '--', label='logistic fit')
    plt.xticks(rotation=x_label_rot)
    plt.legend()
    plt.ylabel('Cases')
    plt.title('COVID-19 cases in the Czech Republic')
    plt.savefig('fig_lin.pdf')
    plt.savefig('fig_lin.png')
    plt.show()

def fig_semilog():
    plt.semilogy(xx, yy, 'o-', label='data')
    plt.semilogy(xx, np.exp(func0(ii, *popt0)),  '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl0)
    plt.semilogy(xx, func1(ii, *popt1), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl1)
    plt.semilogy(xx, np.exp(func2(ii, *popt2)), '--', label='deg-2 polynomial fit in log-y space, current doubling time = %.1f days' % tdbl2)
    #plt.semilogy(xx, func3(ii, *popt3), '--', label='logistic fit')
    plt.xticks(rotation=x_label_rot)
    plt.legend()
    plt.ylabel('Cases')
    plt.title('COVID-19 cases in the Czech Republic')
    plt.savefig('fig_log.pdf')
    plt.savefig('fig_log.png')
    plt.show()
In [9]:
fig_lin()
In [10]:
fig_semilog()
In [11]:
def fig_logistic():
    plt.plot(ii-ii[0], yy, 'o-', label='data')
    ii2 = np.arange(ii[0],ii[0]+62)
    plt.plot(ii2-ii[0], func3(ii2, *popt3), '--', label='logistic fit')
    plt.legend()
    plt.xlabel('Day')
    plt.ylabel('Cases')
    plt.title('COVID-19 cases in the Czech Republic')
    plt.savefig('fig_logistic.pdf')
    plt.savefig('fig_logistic.png')
    plt.show()

#fig_logistic()

Fitting the tail of the time series

As fitting the entire dataset is not too good to aproximate the curent trend.

In [12]:
ntail = 8
xx1 = xx[-ntail:]
yy1 = yy[-ntail:]
ii1 = ii[-ntail:]

# curve fitting
print('LINEAR FIT IN Y-LOG SPACE')
popt01, pcov01 = curve_fit(func0, ii1, np.log(yy1))
tdbl01 = np.log(2.)/popt01[1]
print('popt01 =', popt01, ', doubling time = {:.1f} days\n'.format(tdbl01))
#print(pcov01)

# curve fitting
print('EXP FIT IN Y-LIN SPACE')
popt11, pcov11 = curve_fit(func1, ii1, yy1)
tdbl11 = np.log(2.)/popt11[1]
print('popt11 =', popt11, ', doubling time = {:.1f} days\n'.format(tdbl11))
#print(pcov11)
LINEAR FIT IN Y-LOG SPACE
popt01 = [-41.73464398   0.07309246] , doubling time = 9.5 days

EXP FIT IN Y-LIN SPACE
popt11 = [-43.68266909   0.07185081] , doubling time = 9.6 days

In [13]:
# plotting
def fig_lin1():
    plt.plot(xx1, yy1, 'o-', label='data')
    #plt.plot(xx1, np.exp(func0(ii1, *popt01)), '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl01)
    plt.plot(xx1, func1(ii1, *popt11), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl11)
    plt.xticks(rotation=x_label_rot)
    plt.legend()
    plt.ylabel('Cases')
    plt.title('COVID-19 cases in the Czech Republic')
    plt.savefig('fig_lin1.pdf')
    plt.savefig('fig_lin1.png')
    plt.show()

def fig_semilog1():
    plt.semilogy(xx1, yy1, 'o-', label='data')
    #plt.semilogy(xx1, np.exp(func0(ii1, *popt01)),  '--', label='linear fit in log-y space, doubling time = %.1f days' % tdbl01)
    plt.semilogy(xx1, func1(ii1, *popt11), '--', label='exp fit in lin-y space, doubling time = %.1f days' % tdbl11)
    plt.xticks(rotation=x_label_rot)
    plt.legend()
    plt.ylabel('Cases')
    plt.title('COVID-19 cases in the Czech Republic')
    plt.savefig('fig_log1.pdf')
    plt.savefig('fig_log1.png')
    plt.show()
In [14]:
fig_lin1()
fig_semilog1()
In [15]:
print ('hello bird and crocodile and mámo and amálko')
hello bird and crocodile and mámo and amálko
In [ ]: