AdSense

Tuesday, June 2, 2020

Moving Average (x: date vs y: COVID-19 New Cases)

Moving Average (x: date vs y: COVID-19 New Cases)



0_MacOS_Python_setup.txt
# Install on Terminal of MacOS


#pip3 install -U pandas

#pip3 install -U numpy

#pip3 install -U matplotlib



1_MacOS_Terminal.txt
########## Run Terminal on MacOS and execute
### TO UPDATE
cd "YOUR_WORKING_DIRECTORY"


python3 df.py JPN date new_cases

python3 ma.py dfiso2.csv date new_cases isocodef.txt 7





Input Data files



isocodef.txt
JPN



dfiso2.csv
date,new_cases
2019-12-31,0
2020-01-01,0
2020-01-02,0
2020-01-03,0
2020-01-04,0
2020-01-05,0
2020-01-06,0
2020-01-07,0
2020-01-08,0
2020-01-09,0
2020-01-10,0
2020-01-11,0
2020-01-12,0
2020-01-13,0
2020-01-14,0
2020-01-15,1
2020-01-16,0
2020-01-17,0
2020-01-18,0
2020-01-19,0
2020-01-20,0
2020-01-21,0
2020-01-22,0
2020-01-23,0
2020-01-24,1
2020-01-25,0
2020-01-26,1
2020-01-27,0
2020-01-28,1
2020-01-29,3
2020-01-30,4
2020-01-31,3
2020-02-01,1
2020-02-02,4
2020-02-03,1
2020-02-04,0
2020-02-05,5
2020-02-06,0
2020-02-07,0
2020-02-08,0
2020-02-09,0
2020-02-10,0
2020-02-11,0
2020-02-12,0
2020-02-13,4
2020-02-14,1
2020-02-15,8
2020-02-16,14
2020-02-17,7
2020-02-18,0
2020-02-19,7
2020-02-20,18
2020-02-21,9
2020-02-22,12
2020-02-23,27
2020-02-24,12
2020-02-25,0
2020-02-26,20
2020-02-27,22
2020-02-28,24
2020-02-29,20
2020-03-01,9
2020-03-02,15
2020-03-03,0
2020-03-04,14
2020-03-05,49
2020-03-06,32
2020-03-07,59
2020-03-08,47
2020-03-09,33
2020-03-10,26
2020-03-11,54
2020-03-12,51
2020-03-13,56
2020-03-14,62
2020-03-15,43
2020-03-16,34
2020-03-17,10
2020-03-18,5
2020-03-19,44
2020-03-20,77
2020-03-21,57
2020-03-22,39
2020-03-23,43
2020-03-24,39
2020-03-25,65
2020-03-26,75
2020-03-27,96
2020-03-28,135
2020-03-29,194
2020-03-30,173
2020-03-31,87
2020-04-01,0
2020-04-02,225
2020-04-03,439
2020-04-04,318
2020-04-05,336
2020-04-06,383
2020-04-07,163
2020-04-08,89
2020-04-09,351
2020-04-10,410
2020-04-11,680
2020-04-12,1401
2020-04-13,507
2020-04-14,390
2020-04-15,455
2020-04-16,482
2020-04-17,585
2020-04-18,628
2020-04-19,566
2020-04-20,390
2020-04-21,367
2020-04-22,378
2020-04-23,276
2020-04-24,468
2020-04-25,652
2020-04-26,290
2020-04-27,203
2020-04-28,191
2020-04-29,276
2020-04-30,236
2020-05-01,193
2020-05-02,263
2020-05-03,295
2020-05-04,218
2020-05-05,174
2020-05-06,123
2020-05-07,109
2020-05-08,84
2020-05-09,81
2020-05-10,119
2020-05-11,51
2020-05-12,76
2020-05-13,150
2020-05-14,55
2020-05-15,114
2020-05-16,44
2020-05-17,48
2020-05-18,20
2020-05-19,60
2020-05-20,20
2020-05-21,39
2020-05-22,89
2020-05-23,23
2020-05-24,14
2020-05-25,31
2020-05-26,42
2020-05-27,28
2020-05-28,0
2020-05-29,68
2020-05-30,85
2020-05-31,47
2020-06-01,33
2020-06-02,46
2020-06-03,56
2020-06-04,32
2020-06-05,46
2020-06-06,39




Python files

df.py
########## Getting Data: COVID-19 ##########

##### Run this script as follows:
#
#python3 df.py (ISO code) date new_cases
#
# For instance,
#python3 df.py JPN date new_cases


#####Data Sources:
#
#https://ourworldindata.org/coronavirus-source-data
#https://covid.ourworldindata.org/data/owid-covid-data.csv


import pandas as pd
import io
import requests
import sys


isocode = str(sys.argv[1])
#isocode = str("JPN")

isocodef = 'isocodef.txt'
s = isocode

with open(isocodef, mode='w') as f:
    f.write(s)


xname = str(sys.argv[2])    #date
yname = str(sys.argv[3])    #new_cases


url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

dfurl = requests.get(url).content

#df = pd.read_csv(dfurl)
df = pd.read_csv(io.StringIO(dfurl.decode('utf-8')))
pd.DataFrame(data=df).to_csv("df.csv", header=True, index=False)

#print(df.columns)
'''
Index(['iso_code', 'location', 'date', 'total_cases', 'new_cases',
       'total_deaths', 'new_deaths', 'total_cases_per_million',
       'new_cases_per_million', 'total_deaths_per_million',
       'new_deaths_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'tests_units',
       'stringency_index', 'population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_100k'],
      dtype='object')
'''


#dfiso1 = df[df['iso_code'] == isocode][['iso_code', 'location', 'date', 'new_cases']]
dfiso1 = df[df['iso_code'] == isocode][['iso_code', 'location', xname, yname]]

pd.DataFrame(data=dfiso1).to_csv("dfiso1.csv", header=True, index=False)

#dfiso2 = dfiso1[['date', 'new_cases']]
dfiso2 = dfiso1[[xname, yname]]
pd.DataFrame(data=dfiso2).to_csv("dfiso2.csv", header=True, index=False)



ma.py
########## Moving Average ##########

##### Run this script as follows:
#
#python3 ma.py (a file name that has two columns, x & y) (x-axis) (y-axis) (title text file) (moving average window)
#
# For instance,
#python3 ma.py dfiso2.csv date new_cases isocodef.txt 7


########## import ##########

import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import sys
import datetime

dt_now = datetime.datetime.now()
#print(dt_now)
# 2019-02-04 21:04:15.412854
#
#print(type(dt_now))
# <class 'datetime.datetime'>
#
#print(dt_now.strftime('%Y-%m-%d_%H:%M:%S'))
# 2019-02-04_21:04:15


########## arguments ##########

dffile = str(sys.argv[1])

xname = str(sys.argv[2])

yname = str(sys.argv[3])

#isocodef = 'isocodef.txt'
isocodef = str(sys.argv[4])

xmawin = int(sys.argv[5])


########## load data files ##########

df = pd.read_csv(dffile)

#print(df.loc[0, ['iso_code']])
#iso_code    JPN
#
#print(df.loc[0, ['iso_code']][0])
#JPN
#
#isocode = df.loc[0, ['iso_code']][0]
#print(isocode)
#JPN

with open(isocodef) as f:
    isocode = f.read()

print(isocode)


########## data conversion ##########

df2 = df[[xname, yname]]

df2[xname] = pd.to_datetime(df2[xname], format='%Y-%m-%d')

df2.set_index(xname, inplace=True)
#print(df2.loc[:, [yname]])
df2.to_csv('df2.csv')


########## plot (raw data) ##########

#fig = plt.figure()
fig = plt.figure(figsize=(16, 8))

plt.rcParams["font.size"] = 10

ax = fig.add_subplot(1,1,1)

ax.bar(df2.index, df2[yname],  color='black', label = str(yname))    # raw data


#days = dates.DayLocator()
#days = dates.DayLocator(bymonthday = None, interval = 7, tz = None)
days = dates.DayLocator(bymonthday = None, interval = xmawin, tz = None)
#
ax.xaxis.set_major_locator(days)
#
#daysFmt = dates.DateFormatter('%Y-%m-%d')
daysFmt = dates.DateFormatter('%m-%d')
#
ax.xaxis.set_major_formatter(daysFmt)

plt.xlabel(xname)
plt.ylabel(yname)
plt.title(isocode)
plt.grid()
plt.legend(bbox_to_anchor = (0, 1), loc = 'upper left', borderaxespad = 0, fontsize = 12)
#plt.legend()

plt.savefig("Figure_1_bar_chart_raw_data_" + str(isocode) + "_" + dt_now.strftime('%Y-%m-%d_%H%M%S') + ".png")
plt.show()


########## generate moving average data ##########
df2ma = df2[yname].rolling(window=xmawin).mean()

df2ma.to_csv('df2ma.csv')


########## plot (raw data and moving average data) ##########

fig = plt.figure(figsize=(16, 8))

plt.rcParams["font.size"] = 10

ax = fig.add_subplot(1,1,1)

ax.bar(df2.index, df2[yname], color='lightgray', label = str(yname))    # raw data


#ax.plot(df2ma, label = str(yname)  + ": " + str(xmawin) + "-calendar-day moving average")    # moving average data
ax.plot(df2ma, color='red', linestyle='solid', label = str(yname)  + ": " + str(xmawin) + "-calendar-day moving average")    # moving average data

#days = dates.DayLocator()
# interval of x-axis is set to 7
#days = dates.DayLocator(bymonthday = None, interval = 7, tz = None)
days = dates.DayLocator(bymonthday = None, interval = xmawin, tz = None)
#
ax.xaxis.set_major_locator(days)
#
#daysFmt = dates.DateFormatter('%Y-%m-%d')
daysFmt = dates.DateFormatter('%m-%d')
#
ax.xaxis.set_major_formatter(daysFmt)

plt.xlabel(xname)
plt.ylabel(yname)
plt.title(isocode)
plt.grid()
plt.legend(bbox_to_anchor = (0, 1), loc = 'upper left', borderaxespad = 0, fontsize = 12)
#plt.legend()

plt.savefig("Figure_2_bar_chart_raw_data_and_line_chart_moving_average_" + str(xmawin) + "_days_" + str(isocode) + "_" + dt_now.strftime('%Y-%m-%d_%H%M%S') + ".png")
plt.show()





Output Data files


df2.csv
date,new_cases
2019-12-31,0
2020-01-01,0
2020-01-02,0
2020-01-03,0
2020-01-04,0
2020-01-05,0
2020-01-06,0
2020-01-07,0
2020-01-08,0
2020-01-09,0
2020-01-10,0
2020-01-11,0
2020-01-12,0
2020-01-13,0
2020-01-14,0
2020-01-15,1
2020-01-16,0
2020-01-17,0
2020-01-18,0
2020-01-19,0
2020-01-20,0
2020-01-21,0
2020-01-22,0
2020-01-23,0
2020-01-24,1
2020-01-25,0
2020-01-26,1
2020-01-27,0
2020-01-28,1
2020-01-29,3
2020-01-30,4
2020-01-31,3
2020-02-01,1
2020-02-02,4
2020-02-03,1
2020-02-04,0
2020-02-05,5
2020-02-06,0
2020-02-07,0
2020-02-08,0
2020-02-09,0
2020-02-10,0
2020-02-11,0
2020-02-12,0
2020-02-13,4
2020-02-14,1
2020-02-15,8
2020-02-16,14
2020-02-17,7
2020-02-18,0
2020-02-19,7
2020-02-20,18
2020-02-21,9
2020-02-22,12
2020-02-23,27
2020-02-24,12
2020-02-25,0
2020-02-26,20
2020-02-27,22
2020-02-28,24
2020-02-29,20
2020-03-01,9
2020-03-02,15
2020-03-03,0
2020-03-04,14
2020-03-05,49
2020-03-06,32
2020-03-07,59
2020-03-08,47
2020-03-09,33
2020-03-10,26
2020-03-11,54
2020-03-12,51
2020-03-13,56
2020-03-14,62
2020-03-15,43
2020-03-16,34
2020-03-17,10
2020-03-18,5
2020-03-19,44
2020-03-20,77
2020-03-21,57
2020-03-22,39
2020-03-23,43
2020-03-24,39
2020-03-25,65
2020-03-26,75
2020-03-27,96
2020-03-28,135
2020-03-29,194
2020-03-30,173
2020-03-31,87
2020-04-01,0
2020-04-02,225
2020-04-03,439
2020-04-04,318
2020-04-05,336
2020-04-06,383
2020-04-07,163
2020-04-08,89
2020-04-09,351
2020-04-10,410
2020-04-11,680
2020-04-12,1401
2020-04-13,507
2020-04-14,390
2020-04-15,455
2020-04-16,482
2020-04-17,585
2020-04-18,628
2020-04-19,566
2020-04-20,390
2020-04-21,367
2020-04-22,378
2020-04-23,276
2020-04-24,468
2020-04-25,652
2020-04-26,290
2020-04-27,203
2020-04-28,191
2020-04-29,276
2020-04-30,236
2020-05-01,193
2020-05-02,263
2020-05-03,295
2020-05-04,218
2020-05-05,174
2020-05-06,123
2020-05-07,109
2020-05-08,84
2020-05-09,81
2020-05-10,119
2020-05-11,51
2020-05-12,76
2020-05-13,150
2020-05-14,55
2020-05-15,114
2020-05-16,44
2020-05-17,48
2020-05-18,20
2020-05-19,60
2020-05-20,20
2020-05-21,39
2020-05-22,89
2020-05-23,23
2020-05-24,14
2020-05-25,31
2020-05-26,42
2020-05-27,28
2020-05-28,0
2020-05-29,68
2020-05-30,85
2020-05-31,47
2020-06-01,33
2020-06-02,46
2020-06-03,56
2020-06-04,32
2020-06-05,46
2020-06-06,39



df2ma.csv
date,new_cases
2019-12-31,
2020-01-01,
2020-01-02,
2020-01-03,
2020-01-04,
2020-01-05,
2020-01-06,0.0
2020-01-07,0.0
2020-01-08,0.0
2020-01-09,0.0
2020-01-10,0.0
2020-01-11,0.0
2020-01-12,0.0
2020-01-13,0.0
2020-01-14,0.0
2020-01-15,0.14285714285714285
2020-01-16,0.14285714285714285
2020-01-17,0.14285714285714285
2020-01-18,0.14285714285714285
2020-01-19,0.14285714285714285
2020-01-20,0.14285714285714285
2020-01-21,0.14285714285714285
2020-01-22,0.0
2020-01-23,0.0
2020-01-24,0.14285714285714285
2020-01-25,0.14285714285714285
2020-01-26,0.2857142857142857
2020-01-27,0.2857142857142857
2020-01-28,0.42857142857142855
2020-01-29,0.8571428571428571
2020-01-30,1.4285714285714286
2020-01-31,1.7142857142857142
2020-02-01,1.8571428571428572
2020-02-02,2.2857142857142856
2020-02-03,2.4285714285714284
2020-02-04,2.2857142857142856
2020-02-05,2.5714285714285716
2020-02-06,2.0
2020-02-07,1.5714285714285714
2020-02-08,1.4285714285714286
2020-02-09,0.8571428571428571
2020-02-10,0.7142857142857143
2020-02-11,0.7142857142857143
2020-02-12,0.0
2020-02-13,0.5714285714285714
2020-02-14,0.7142857142857143
2020-02-15,1.8571428571428572
2020-02-16,3.857142857142857
2020-02-17,4.857142857142857
2020-02-18,4.857142857142857
2020-02-19,5.857142857142857
2020-02-20,7.857142857142857
2020-02-21,9.0
2020-02-22,9.571428571428571
2020-02-23,11.428571428571429
2020-02-24,12.142857142857142
2020-02-25,12.142857142857142
2020-02-26,14.0
2020-02-27,14.571428571428571
2020-02-28,16.714285714285715
2020-02-29,17.857142857142858
2020-03-01,15.285714285714286
2020-03-02,15.714285714285714
2020-03-03,15.714285714285714
2020-03-04,14.857142857142858
2020-03-05,18.714285714285715
2020-03-06,19.857142857142858
2020-03-07,25.428571428571427
2020-03-08,30.857142857142858
2020-03-09,33.42857142857143
2020-03-10,37.142857142857146
2020-03-11,42.857142857142854
2020-03-12,43.142857142857146
2020-03-13,46.57142857142857
2020-03-14,47.0
2020-03-15,46.42857142857143
2020-03-16,46.57142857142857
2020-03-17,44.285714285714285
2020-03-18,37.285714285714285
2020-03-19,36.285714285714285
2020-03-20,39.285714285714285
2020-03-21,38.57142857142857
2020-03-22,38.0
2020-03-23,39.285714285714285
2020-03-24,43.42857142857143
2020-03-25,52.0
2020-03-26,56.42857142857143
2020-03-27,59.142857142857146
2020-03-28,70.28571428571429
2020-03-29,92.42857142857143
2020-03-30,111.0
2020-03-31,117.85714285714286
2020-04-01,108.57142857142857
2020-04-02,130.0
2020-04-03,179.0
2020-04-04,205.14285714285714
2020-04-05,225.42857142857142
2020-04-06,255.42857142857142
2020-04-07,266.2857142857143
2020-04-08,279.0
2020-04-09,297.0
2020-04-10,292.85714285714283
2020-04-11,344.57142857142856
2020-04-12,496.7142857142857
2020-04-13,514.4285714285714
2020-04-14,546.8571428571429
2020-04-15,599.1428571428571
2020-04-16,617.8571428571429
2020-04-17,642.8571428571429
2020-04-18,635.4285714285714
2020-04-19,516.1428571428571
2020-04-20,499.42857142857144
2020-04-21,496.14285714285717
2020-04-22,485.14285714285717
2020-04-23,455.7142857142857
2020-04-24,439.0
2020-04-25,442.42857142857144
2020-04-26,403.0
2020-04-27,376.2857142857143
2020-04-28,351.14285714285717
2020-04-29,336.57142857142856
2020-04-30,330.85714285714283
2020-05-01,291.57142857142856
2020-05-02,236.0
2020-05-03,236.71428571428572
2020-05-04,238.85714285714286
2020-05-05,236.42857142857142
2020-05-06,214.57142857142858
2020-05-07,196.42857142857142
2020-05-08,180.85714285714286
2020-05-09,154.85714285714286
2020-05-10,129.71428571428572
2020-05-11,105.85714285714286
2020-05-12,91.85714285714286
2020-05-13,95.71428571428571
2020-05-14,88.0
2020-05-15,92.28571428571429
2020-05-16,87.0
2020-05-17,76.85714285714286
2020-05-18,72.42857142857143
2020-05-19,70.14285714285714
2020-05-20,51.57142857142857
2020-05-21,49.285714285714285
2020-05-22,45.714285714285715
2020-05-23,42.714285714285715
2020-05-24,37.857142857142854
2020-05-25,39.42857142857143
2020-05-26,36.857142857142854
2020-05-27,38.0
2020-05-28,32.42857142857143
2020-05-29,29.428571428571427
2020-05-30,38.285714285714285
2020-05-31,43.0
2020-06-01,43.285714285714285
2020-06-02,43.857142857142854
2020-06-03,47.857142857142854
2020-06-04,52.42857142857143
2020-06-05,49.285714285714285
2020-06-06,42.714285714285715




Figures
Figure_1_bar_chart_raw_data_JPN_2020-06-07_082326.png



Figure_2_bar_chart_raw_data_and_line_chart_moving_average_7_days_JPN_2020-06-07_082326.png



No comments:

Post a Comment

Deep Learning (Regression, Multiple Features/Explanatory Variables, Supervised Learning): Impelementation and Showing Biases and Weights

Deep Learning (Regression, Multiple Features/Explanatory Variables, Supervised Learning): Impelementation and Showing Biases and Weights ...