AdSense

Monday, June 1, 2020

Polynomial Regression (x: date vs y: COVID-19 New Cases)

Polynomial Regression (x: date vs y: COVID-19 New Cases)



0_MacOS_Python_setup.txt
# Install on Terminal of MacOS



#pip3 install -U matplotlib

#pip3 install -U numpy

#pip3 install -U pandas

#pip3 install -U sympy

#pip3 install -U requests

#pip3 install -U scikit-learn

#pip3 install -U scikit-learn

#pip3 install -U ipython




1_MacOS_Terminal.txt
########## Run Terminal on MacOS and execute
### TO UPDATE
cd "YOUR_WORKING_DIRECTORY"


python3 df.py JPN date new_cases


python3 bar.py dfiso2.csv date new_cases


python3 pr.py 8 dfiso2.csv new_cases 12





Data files



dfiso2.csv
date,new_cases
2019-12-31,0
2020-01-01,0
2020-01-02,0
2020-01-03,0
2020-01-04,0
2020-01-05,0
2020-01-06,0
2020-01-07,0
2020-01-08,0
2020-01-09,0
2020-01-10,0
2020-01-11,0
2020-01-12,0
2020-01-13,0
2020-01-14,0
2020-01-15,1
2020-01-16,0
2020-01-17,0
2020-01-18,0
2020-01-19,0
2020-01-20,0
2020-01-21,0
2020-01-22,0
2020-01-23,0
2020-01-24,1
2020-01-25,0
2020-01-26,1
2020-01-27,0
2020-01-28,1
2020-01-29,3
2020-01-30,4
2020-01-31,3
2020-02-01,1
2020-02-02,4
2020-02-03,1
2020-02-04,0
2020-02-05,5
2020-02-06,0
2020-02-07,0
2020-02-08,0
2020-02-09,0
2020-02-10,0
2020-02-11,0
2020-02-12,0
2020-02-13,4
2020-02-14,1
2020-02-15,8
2020-02-16,14
2020-02-17,7
2020-02-18,0
2020-02-19,7
2020-02-20,18
2020-02-21,9
2020-02-22,12
2020-02-23,27
2020-02-24,12
2020-02-25,0
2020-02-26,20
2020-02-27,22
2020-02-28,24
2020-02-29,20
2020-03-01,9
2020-03-02,15
2020-03-03,0
2020-03-04,14
2020-03-05,49
2020-03-06,32
2020-03-07,59
2020-03-08,47
2020-03-09,33
2020-03-10,26
2020-03-11,54
2020-03-12,51
2020-03-13,56
2020-03-14,62
2020-03-15,43
2020-03-16,34
2020-03-17,10
2020-03-18,5
2020-03-19,44
2020-03-20,77
2020-03-21,57
2020-03-22,39
2020-03-23,43
2020-03-24,39
2020-03-25,65
2020-03-26,75
2020-03-27,96
2020-03-28,135
2020-03-29,194
2020-03-30,173
2020-03-31,87
2020-04-01,0
2020-04-02,225
2020-04-03,439
2020-04-04,318
2020-04-05,336
2020-04-06,383
2020-04-07,163
2020-04-08,89
2020-04-09,351
2020-04-10,410
2020-04-11,680
2020-04-12,1401
2020-04-13,507
2020-04-14,390
2020-04-15,455
2020-04-16,482
2020-04-17,585
2020-04-18,628
2020-04-19,566
2020-04-20,390
2020-04-21,367
2020-04-22,378
2020-04-23,276
2020-04-24,468
2020-04-25,652
2020-04-26,290
2020-04-27,203
2020-04-28,191
2020-04-29,276
2020-04-30,236
2020-05-01,193
2020-05-02,263
2020-05-03,295
2020-05-04,218
2020-05-05,174
2020-05-06,123
2020-05-07,109
2020-05-08,84
2020-05-09,81
2020-05-10,119
2020-05-11,51
2020-05-12,76
2020-05-13,150
2020-05-14,55
2020-05-15,114
2020-05-16,44
2020-05-17,48
2020-05-18,20
2020-05-19,60
2020-05-20,20
2020-05-21,39
2020-05-22,89
2020-05-23,23
2020-05-24,14
2020-05-25,31
2020-05-26,42
2020-05-27,28
2020-05-28,0
2020-05-29,68
2020-05-30,85
2020-05-31,47




Python files

df.py
########## Getting Data: COVID-19 ##########

##### Run this script as follows:
#
#python3 df.py (ISO code) date new_cases
#
# For instance,
#python3 df.py JPN date new_cases


#####Data Sources:
#
#https://ourworldindata.org/coronavirus-source-data
#https://covid.ourworldindata.org/data/owid-covid-data.csv


import pandas as pd
import io
import requests
import sys


isocode = str(sys.argv[1])
#isocode = str("JPN")

isocodef = 'isocodef.txt'
s = isocode

with open(isocodef, mode='w') as f:
    f.write(s)


xname = str(sys.argv[2])    #date
yname = str(sys.argv[3])    #new_cases


url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

dfurl = requests.get(url).content

#df = pd.read_csv(dfurl)
df = pd.read_csv(io.StringIO(dfurl.decode('utf-8')))
pd.DataFrame(data=df).to_csv("df.csv", header=True, index=False)

#print(df.columns)
'''
Index(['iso_code', 'location', 'date', 'total_cases', 'new_cases',
       'total_deaths', 'new_deaths', 'total_cases_per_million',
       'new_cases_per_million', 'total_deaths_per_million',
       'new_deaths_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'tests_units',
       'stringency_index', 'population', 'population_density', 'median_age',
       'aged_65_older', 'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cvd_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_100k'],
      dtype='object')
'''


#dfiso1 = df[df['iso_code'] == isocode][['iso_code', 'location', 'date', 'new_cases']]
dfiso1 = df[df['iso_code'] == isocode][['iso_code', 'location', xname, yname]]

pd.DataFrame(data=dfiso1).to_csv("dfiso1.csv", header=True, index=False)

#dfiso2 = dfiso1[['date', 'new_cases']]
dfiso2 = dfiso1[[xname, yname]]
pd.DataFrame(data=dfiso2).to_csv("dfiso2.csv", header=True, index=False)



bar.py
########## Bar Chart Plot ##########

##### Run this script as follows:
#
#python3 df.py (a file name that has two columns, date & new_cases) date new_cases
#
# For instance,
#python3 bar.py dfiso2.csv date new_cases



########## import ##########

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import sys
import pandas as pd


########## arguments ##########

dffile = str(sys.argv[1])
xname = str(sys.argv[2])
yname = str(sys.argv[3])


########## load data files ##########

df = pd.read_csv(dffile)


isocodef = 'isocodef.txt'
with open(isocodef) as f:
    isocode = f.read()

print(isocode)


########## Bar Chart ##########

Date = df[xname].values.astype(np.datetime64)

Score = df[yname].values

from pandas.plotting import register_matplotlib_converters

register_matplotlib_converters()

plt.rcParams["font.size"] = 18

fig = plt.figure(figsize=(12, 6))

ax = fig.add_subplot(111)

ax.bar(Date, Score, color = 'black', edgecolor="black")

#ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

### axis labels
ax.set_xlabel("x: " + xname)    #x-axis
ax.set_ylabel("y: " + yname)    #y-axis

plt.title(isocode)
plt.savefig("Figure_1_bar_chart.png")
plt.show()







pr.py
########## Polynomial Regression (x: date vs y: COVID-19 New Cases) ##########

##### Run this script as follows:
#
#python3 pr.py (the number of degrees/orders of the polynominal regression model) (a file name that has two columns, date & new_cases) new_cases (signigicant figures of the polynominal regression model coefficients)
#
# For instance,
#python3 pr.py 8 dfiso2.csv new_cases 12


### import
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
import sys
import os
import sympy as sym
from sympy.plotting import plot
from IPython.display import display
import pandas as pd


### arguments
deg = int(sys.argv[1])
trainingcsv = sys.argv[2]
yname = sys.argv[3]
coefdecimals = int(sys.argv[4])


### Training Data
tmp_training = pd.read_csv(trainingcsv)
print(tmp_training)
x_training = tmp_training.index.values.tolist()
y_training = tmp_training[yname].values.tolist()


### LSM
#
#least-squares method (Degree of the polynomial fitting: n)
#LSM (Deg: n)
cf = ["LSM (Deg: " + str(deg) +  ")", lambda x, y: np.polyfit(x, y, deg)]


sym.init_printing(use_unicode=True)
x, y = sym.symbols("x y")


for method_name, method in [cf]:
    print(method_name)
 
    ### calculating coefficients
    coefficients = method(x_training, y_training)
    #print(type(coefficients))
    #<class 'numpy.ndarray'>
    coefficients = np.round(coefficients, decimals = coefdecimals)
 
    ### sympy to show an equation
    expr = 0
    for index, coefficient in enumerate(coefficients):
        expr += coefficient * x ** (len(coefficients) - index - 1)
    display(sym.Eq(y, expr))
 
    ###R2
    fitted_curve = np.poly1d(method(x_training, y_training))
    r2 = r2_score(y_training, fitted_curve(x_training))
 
    ### Scatter Plot
    plt.scatter(x_training, y_training, label="Training Data")
    #
    # data plotting and drawing a fitted model
    x_latent = np.linspace(min(x_training), max(x_training), 100)
    fitted_curve = np.poly1d(method(x_training, y_training))(x_latent)
    plt.plot(x_latent, fitted_curve, c="red", label="Polynominal Regession")
    plt.xlabel('Days')
    plt.ylabel(yname)
    plt.grid()  
    plt.legend(bbox_to_anchor=(0, 1), loc='upper left', borderaxespad=0, fontsize=8)
    plt.text(min(x_training),max(y_training)*0.80, sym.Eq(y, expr), fontsize=8)
    plt.text(min(x_training),max(y_training)*0.70, "R2 = " + str(r2), fontsize=8)
    plt.savefig("Figure_2_Polynominal_Regression_deg_" + str(deg) + ".png")
    plt.show()



Figures
Figure_1_bar_chart.png


Figure_2_Polynominal_Regression_deg_8.png



No comments:

Post a Comment

Deep Learning (Regression, Multiple Features/Explanatory Variables, Supervised Learning): Impelementation and Showing Biases and Weights

Deep Learning (Regression, Multiple Features/Explanatory Variables, Supervised Learning): Impelementation and Showing Biases and Weights ...