The Financial Journal (Global): R: Logistic Regression (without regularization)

data1.csv

y,x1,x2
0,3.6,60.1
1,4.1,52.0
0,3.7,62.5
0,4.9,60.6
1,4.4,54.1
0,3.6,63.6
1,4.9,68.0
0,4.8,38.2
1,4.1,59.5
0,4.3,47.3

binary.csv

admit,gre,gpa,rank
0,380,3.61,3
1,660,3.67,3
1,800,4,1
1,640,3.19,4
0,520,2.93,4
1,760,3,2
1,560,2.98,1
0,400,3.08,2
1,540,3.39,3
0,700,3.92,2
0,800,4,4
0,440,3.22,1
1,760,4,1
0,700,3.08,2
1,700,4,1
0,480,3.44,3
0,780,3.87,4
0,360,2.56,3
0,800,3.75,2
1,540,3.81,1
0,500,3.17,3
1,660,3.63,2
0,600,2.82,4
0,680,3.19,4
1,760,3.35,2
1,800,3.66,1
1,620,3.61,1
1,520,3.74,4
1,780,3.22,2
0,520,3.29,1
0,540,3.78,4
0,760,3.35,3
0,600,3.4,3
1,800,4,3
0,360,3.14,1
0,400,3.05,2
0,580,3.25,1
0,520,2.9,3
1,500,3.13,2
1,520,2.68,3
0,560,2.42,2
1,580,3.32,2
1,600,3.15,2
0,500,3.31,3
0,700,2.94,2
1,460,3.45,3
1,580,3.46,2
0,500,2.97,4
0,440,2.48,4
0,400,3.35,3
0,640,3.86,3
0,440,3.13,4
0,740,3.37,4
1,680,3.27,2
0,660,3.34,3
1,740,4,3
0,560,3.19,3
0,380,2.94,3
0,400,3.65,2
0,600,2.82,4
1,620,3.18,2
0,560,3.32,4
0,640,3.67,3
1,680,3.85,3
0,580,4,3
0,600,3.59,2
0,740,3.62,4
0,620,3.3,1
0,580,3.69,1
0,800,3.73,1
0,640,4,3
0,300,2.92,4
0,480,3.39,4
0,580,4,2
0,720,3.45,4
0,720,4,3
0,560,3.36,3
1,800,4,3
0,540,3.12,1
1,620,4,1
0,700,2.9,4
0,620,3.07,2
0,500,2.71,2
0,380,2.91,4
1,500,3.6,3
0,520,2.98,2
0,600,3.32,2
0,600,3.48,2
0,700,3.28,1
1,660,4,2
0,700,3.83,2
1,720,3.64,1
0,800,3.9,2
0,580,2.93,2
1,660,3.44,2
0,660,3.33,2
0,640,3.52,4
0,480,3.57,2
0,700,2.88,2
0,400,3.31,3
0,340,3.15,3
0,580,3.57,3
0,380,3.33,4
0,540,3.94,3
1,660,3.95,2
1,740,2.97,2
1,700,3.56,1
0,480,3.13,2
0,400,2.93,3
0,480,3.45,2
0,680,3.08,4
0,420,3.41,4
0,360,3,3
0,600,3.22,1
0,720,3.84,3
0,620,3.99,3
1,440,3.45,2
0,700,3.72,2
1,800,3.7,1
0,340,2.92,3
1,520,3.74,2
1,480,2.67,2
0,520,2.85,3
0,500,2.98,3
0,720,3.88,3
0,540,3.38,4
1,600,3.54,1
0,740,3.74,4
0,540,3.19,2
0,460,3.15,4
1,620,3.17,2
0,640,2.79,2
0,580,3.4,2
0,500,3.08,3
0,560,2.95,2
0,500,3.57,3
0,560,3.33,4
0,700,4,3
0,620,3.4,2
1,600,3.58,1
0,640,3.93,2
1,700,3.52,4
0,620,3.94,4
0,580,3.4,3
0,580,3.4,4
0,380,3.43,3
0,480,3.4,2
0,560,2.71,3
1,480,2.91,1
0,740,3.31,1
1,800,3.74,1
0,400,3.38,2
1,640,3.94,2
0,580,3.46,3
0,620,3.69,3
1,580,2.86,4
0,560,2.52,2
1,480,3.58,1
0,660,3.49,2
0,700,3.82,3
0,600,3.13,2
0,640,3.5,2
1,700,3.56,2
0,520,2.73,2
0,580,3.3,2
0,700,4,1
0,440,3.24,4
0,720,3.77,3
0,500,4,3
0,600,3.62,3
0,400,3.51,3
0,540,2.81,3
0,680,3.48,3
1,800,3.43,2
0,500,3.53,4
1,620,3.37,2
0,520,2.62,2
1,620,3.23,3
0,620,3.33,3
0,300,3.01,3
0,620,3.78,3
0,500,3.88,4
0,700,4,2
1,540,3.84,2
0,500,2.79,4
0,800,3.6,2
0,560,3.61,3
0,580,2.88,2
0,560,3.07,2
0,500,3.35,2
1,640,2.94,2
0,800,3.54,3
0,640,3.76,3
0,380,3.59,4
1,600,3.47,2
0,560,3.59,2
0,660,3.07,3
1,400,3.23,4
0,600,3.63,3
0,580,3.77,4
0,800,3.31,3
1,580,3.2,2
1,700,4,1
0,420,3.92,4
1,600,3.89,1
1,780,3.8,3
0,740,3.54,1
1,640,3.63,1
0,540,3.16,3
0,580,3.5,2
0,740,3.34,4
0,580,3.02,2
0,460,2.87,2
0,640,3.38,3
1,600,3.56,2
1,660,2.91,3
0,340,2.9,1
1,460,3.64,1
0,460,2.98,1
1,560,3.59,2
0,540,3.28,3
0,680,3.99,3
1,480,3.02,1
0,800,3.47,3
0,800,2.9,2
1,720,3.5,3
0,620,3.58,2
0,540,3.02,4
0,480,3.43,2
1,720,3.42,2
0,580,3.29,4
0,600,3.28,3
0,380,3.38,2
0,420,2.67,3
1,800,3.53,1
0,620,3.05,2
1,660,3.49,2
0,480,4,2
0,500,2.86,4
0,700,3.45,3
0,440,2.76,2
1,520,3.81,1
1,680,2.96,3
0,620,3.22,2
0,540,3.04,1
0,800,3.91,3
0,680,3.34,2
0,440,3.17,2
0,680,3.64,3
0,640,3.73,3
0,660,3.31,4
0,620,3.21,4
1,520,4,2
1,540,3.55,4
1,740,3.52,4
0,640,3.35,3
1,520,3.3,2
1,620,3.95,3
0,520,3.51,2
0,640,3.81,2
0,680,3.11,2
0,440,3.15,2
1,520,3.19,3
1,620,3.95,3
1,520,3.9,3
0,380,3.34,3
0,560,3.24,4
1,600,3.64,3
1,680,3.46,2
0,500,2.81,3
1,640,3.95,2
0,540,3.33,3
1,680,3.67,2
0,660,3.32,1
0,520,3.12,2
1,600,2.98,2
0,460,3.77,3
1,580,3.58,1
1,680,3,4
1,660,3.14,2
0,660,3.94,2
0,360,3.27,3
0,660,3.45,4
0,520,3.1,4
1,440,3.39,2
0,600,3.31,4
1,800,3.22,1
1,660,3.7,4
0,800,3.15,4
0,420,2.26,4
1,620,3.45,2
0,800,2.78,2
0,680,3.7,2
0,800,3.97,1
0,480,2.55,1
0,520,3.25,3
0,560,3.16,1
0,460,3.07,2
0,540,3.5,2
0,720,3.4,3
0,640,3.3,2
1,660,3.6,3
1,400,3.15,2
1,680,3.98,2
0,220,2.83,3
0,580,3.46,4
1,540,3.17,1
0,580,3.51,2
0,540,3.13,2
0,440,2.98,3
0,560,4,3
0,660,3.67,2
0,660,3.77,3
1,520,3.65,4
0,540,3.46,4
1,300,2.84,2
1,340,3,2
1,780,3.63,4
1,480,3.71,4
0,540,3.28,1
0,460,3.14,3
0,460,3.58,2
0,500,3.01,4
0,420,2.69,2
0,520,2.7,3
0,680,3.9,1
0,680,3.31,2
1,560,3.48,2
0,580,3.34,2
0,500,2.93,4
0,740,4,3
0,660,3.59,3
0,420,2.96,1
0,560,3.43,3
1,460,3.64,3
1,620,3.71,1
0,520,3.15,3
0,620,3.09,4
0,540,3.2,1
1,660,3.47,3
0,500,3.23,4
1,560,2.65,3
0,500,3.95,4
0,580,3.06,2
0,520,3.35,3
0,500,3.03,3
0,600,3.35,2
0,580,3.8,2
0,400,3.36,2
0,620,2.85,2
1,780,4,2
0,620,3.43,3
1,580,3.12,3
0,700,3.52,2
1,540,3.78,2
1,760,2.81,1
0,700,3.27,2
0,720,3.31,1
1,560,3.69,3
0,720,3.94,3
1,520,4,1
1,540,3.49,1
0,680,3.14,2
0,460,3.44,2
1,560,3.36,1
0,480,2.78,3
0,460,2.93,3
0,620,3.63,3
0,580,4,1
0,800,3.89,2
1,540,3.77,2
1,680,3.76,3
1,680,2.42,1
1,620,3.37,1
0,560,3.78,2
0,560,3.49,4
0,620,3.63,2
1,800,4,2
0,640,3.12,3
0,540,2.7,2
0,700,3.65,2
1,540,3.49,2
0,540,3.51,2
0,660,4,1
1,480,2.62,2
0,420,3.02,1
1,740,3.86,2
0,580,3.36,2
0,640,3.17,2
0,640,3.51,2
1,800,3.05,2
1,660,3.88,2
1,600,3.38,3
1,620,3.75,2
1,460,3.99,3
0,620,4,2
0,560,3.04,3
0,460,2.63,2
0,700,3.65,2
0,600,3.89,3

0_runme.txt

########## R: Logistic Regression

##### Run this script on your R Console

# Set your working directory on your R Console
# The following directory is dummy - set to your own directory where you save all the r files below.
#setwd('/Users/XXX/Downloads/')

#source('1_install_packages.r') # You have to run this r script only for the first time.

#source('2_library.r')

source('3_logistics_regression.r')

source('4_logistic_regression.r')

Source: https://qiita.com/katsu1110/items/e4ef613559f02f183af5

1_install_packages.r

2_library.r

3_logistics_regression.r

# data loading
data1 <- read.csv("data1.csv", header=TRUE)

# Logistic Regression
mylogit = glm(y ~ x1 + x2, data=data1, family=binomial(link="logit"))

# Summary
summary(mylogit)
#=>
# Call:
# glm(formula = y ~ x1 + x2, family = binomial(link ="logit"), data = data1)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -1.4754 -0.8584 -0.8007 1.1905 1.5719

# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -9.44589 9.12237 -1.035 0.300
# x1 1.27158 1.49423 0.851 0.395
# x2 0.06424 0.08739 0.735 0.462

# (Dispersion parameter for binomial family taken to be 1)

# Null deviance: 13.460 on 9 degrees of freedom
# Residual deviance: 12.345 on 7 degrees of freedom
# AIC: 18.345

# Number of Fisher Scoring iterations: 4

# See fitted numbers
fitted(mylogit)
#=>
# 1 2 3 4 5 6 7 8 9 10
# 0.2675020 0.2907201 0.3260736 0.6632579 0.4072105 0.3137831 0.7600997 0.2914609 0.3988916 0.2810008

# Fitted (predicted) numbers are calculated by the following equation.
# logit(π)=−9.44589+1.27158x1+0.06424x2
# However, as you can see Pr(>|z|) in the result of summary, this is not significant.

4_logistic_regression.r

# Data loading
#mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
mydata <- read.csv("binary.csv")
mydata$rank <- factor(mydata$rank)

# Logistic Regression
mylogit <- glm(admit ~ gre + gpa + rank, data = mydata, family = "binomial")

# Summary
summary(mylogit)
#=>
# Call:
# glm(formula = admit ~ gre + gpa + rank, family = "binomial", data = mydata)

# Deviance Residuals:
# Min 1Q Median 3Q Max
# -1.6268 -0.8662 -0.6388 1.1490 2.0790

# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -3.989979 1.139951 -3.500 0.000465 ***
# gre 0.002264 0.001094 2.070 0.038465 *
# gpa 0.804038 0.331819 2.423 0.015388 *
# rank2 -0.675443 0.316490 -2.134 0.032829 *
# rank3 -1.340204 0.345306 -3.881 0.000104 ***
# rank4 -1.551464 0.417832 -3.713 0.000205 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for binomial family taken to be 1)

# Null deviance: 499.98 on 399 degrees of freedom
# Residual deviance: 458.52 on 394 degrees of freedom
# AIC: 470.52

# Number of Fisher Scoring iterations: 4

newdata1 <- with(mydata, data.frame(gre = mean(gre), gpa = mean(gpa), rank = factor(1:4)))
newdata1$rankP <- predict(mylogit, newdata = newdata1, type = "response")
newdata2 <- with(mydata, data.frame(gre = rep(seq(from = 200, to = 800, length.out = 100),
4), gpa = mean(gpa), rank = factor(rep(1:4, each = 100))))
newdata3 <- cbind(newdata2, predict(mylogit, newdata = newdata2, type = "link",
se = TRUE))
newdata3 <- within(newdata3, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
# Using graphic library
library("ggplot2")
ggplot(newdata3, aes(x = gre, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
ymax = UL, fill = rank), alpha = 0.2) + geom_line(aes(colour = rank),
size = 1)
# Save as an image file
ggsave("image.png")

#Reference
#https://stats.idre.ucla.edu/r/dae/logit-regression/

The Financial Journal (Global)

AdSense

Saturday, July 6, 2019

R: Logistic Regression (without regularization) - simple sample

No comments:

Post a Comment

Deep Learning (Regression, Multiple Features/Explanatory Variables, Supervised Learning): Impelementation and Showing Biases and Weights

Report Abuse