Dichotomous Logit and Probit
The purpose of this
session is to show you how to use R's "canned" procedures for doing
dichotomous Logit and Probit
analysis. This includes obtaining predicted probabilities, predictions of the
dependent variable, coefficients and marginal effects for the variables, model
diagnostics, hypothesis tests, and the heteroskedastic
Probit model. In addition, we provide programs that
obtain each of these outputs the "hard" way for illustrative purposes
only.
# The purpose of this
session is to show you how to use some of R's "canned" procedures for
doing
# dichotomous Probit and Logit analysis. We
will also obtain predicted probabilities,
# predictions of the
dependent variable, coefficients, marginal effects for the variables,
# model diagnostics, and
hypothesis tests. In addition, we provide programs that obtain each
# of these outputs the "hard" way for
illustrative purposes only.
# The data is a sample of
32 observations from a study by Spector and Mazzeo on pre- and post-tests
# for economic literacy.
The variables are:
# GPA = grade point average
# TUCE = test score on teaching college level
economics
# PSI = program participation variable (school
district)
# GRADE = response: 1 if grade on post is higher
than on pre,0 otherwise.
# First read the data.
Spector <- read.table("c:/users/B. Dan Wood/My Documents/My Teaching/Maximum
Likelihood/Data/AldrichNelson.txt", header=TRUE)
attach(Spector)
summary(Spector)
# We will first estimate a
Probit and Logit using glm.
library(car)
library(stats)
library(MASS)
# Probit
probitglm.out <- glm(GRADE ~ GPA + TUCE + PSI,
family = binomial(link=probit), data = Spector)
summary(probitglm.out)
# Logit
logitglm.out <- glm(GRADE ~ GPA + TUCE + PSI,
family = binomial, data = Spector)
summary(logitglm.out)
# However, the Zelig library is more flexible allowing us to do the same
things as are done
# in STATA with Clarify,
so we switch to Zelig for the remainder of the
lesson.
# You might want to visit
the Zelig web site to learn more about this program.
# It is at
http://gking.harvard.edu/zelig. Look especially at the documentation
# either on line or in pdf format.
# Note that the shift to logit
from probit is as easy as changing 'model = "probit"' to 'model = "logit"'.
library(Zelig)
probit.out <- zelig(GRADE ~ GPA + TUCE + PSI,
model = "probit", data = Spector)
summary(probit.out)
# Put the summary into an
R object
probitsummary <- summary(probit.out)
# We can do likelihood
ratio, Wald, and F tests on each of the individual coefficients in the model
# as follows. Note the
upper case Anova. Note that the F test is
inappropriate when using a binomial
# family distribution such
as this one.
Anova(probit.out, test = 'LR')
Anova(probit.out, test = 'Wald')
Anova(probit.out, test = 'F')
# We can do likelihood
ratio and F tests for subsets of coefficients as follows. Note the lower case
# anova.
Again, the F is inappropriate with the binomial family distribution.
anova(update(probit.out, GRADE ~ GPA + TUCE + PSI -PSI), probit.out, test = 'Chisq')
anova(update(probit.out, GRADE ~ GPA + TUCE + PSI -PSI), probit.out, test = 'F')
anova(update(probit.out, GRADE ~ GPA + TUCE + PSI -TUCE-PSI), probit.out, test = 'Chisq')
anova(update(probit.out, GRADE ~ GPA + TUCE + PSI -TUCE-PSI), probit.out, test = 'F')
# Execute the following to get the auxiliary
outputs from probit.out and the summary object.
names(probit.out)
names(probitsummary)
# Following are some interesting values that can be
extracted from the model for later use.
# The value of the
log-likelihood at the maximum
loglik <- -1/2*probit.out$deviance
loglik
# Akaike's Information
Criterion
aic <- probit.out$aic
aic
# Model Coefficients
beta <-probit.out$coefficients
beta
# Model Residuals
residuals <- probit.out$residuals
residuals
# Fitted Probabilities
fitted.probs <- probit.out$fitted.values
fitted.probs
# Covariance of the
coefficients.
covb <- probitsummary$cov.unscaled
covb
# Now, get the underlying
latent index, z, from some of this stuff and show how to get
# the set of probabilities
for each case the hard way. Note that these correspond
# to fitted.probs
above.
beta <- probit.out$coefficients
x <- cbind(1,as.matrix(Spector[,1:3]))
z <- x %*% beta
z
pz <- pnorm(c(z))
pz
# Generating the model
predictions and a prediction success table.
Predict <- recode(fitted.probs, 'lo:.499999=0 ; .5:hi=1; ; ', as.factor.result=FALSE)
Predict
table(Predict,GRADE)
# Now for interpretation
of the model.
#Simulate the predicted probability of making an A,
PSI=0, rest at mean
x.out <- setx(probit.out, PSI = 0)
s.out <- sim(probit.out, x = x.out)
summary(s.out)
#Simulate the predicted probability of making an A,
PSI=1, rest at mean
x.out <- setx(probit.out, PSI = 1)
s.out <- sim(probit.out, x = x.out)
summary(s.out)
# We could just take the
difference between these to get a first difference. However, Zelig
# can do this in one step.
# Simulated first difference and risk ratios when
Shifting PSI from 0 to 1 #
psi.low <- setx(probit.out, PSI = 0)
psi.high <- setx(probit.out, PSI = 1)
s.out <- sim(probit.out, x = psi.low, x1 = psi.high)
summary(s.out)
# You can also plot the
results from the simulations in Zelig.
plot(s.out)
# Another example
# First Difference when Shifting GPA from 2 to 3
with PSI=0 and TUCE at mean#
gpa.low <- setx(probit.out, GPA=2, PSI=0)
gpa.high <- setx(probit.out, GPA = 3, PSI=0)
s.out <- sim(probit.out, x = gpa.low, x1 = gpa.high)
summary(s.out)
plot(s.out)
# Calculate marginal effects with all variables at
their means from the probit
# coefficients and a scale
factor. These can be interpreted much like slopes.
# See table 21.1 on
p. 675 of Greene.
xbar <- as.matrix(mean(cbind(1,Spector[1:3])))
zxbar <- t(xbar) %*% beta
scalefactor <- dnorm(zxbar)
scalefactor
margin <- scalefactor * beta[2:4]
margin
# Again, all that is
needed to move to logit is changing the model =
"probit" option
# to model = "logit". Just change the output object to logit.out for the other
# calculations. The only
calculation that is different is for the marginal effects.
# Let's run the logit and calculate the marginal effects, leaving the other
calculations
# above for you to do on
your own.
logit.out <- zelig(GRADE ~ GPA + TUCE + PSI,
model = "logit", data = Spector)
summary(logit.out)
# Calculate marginal effects with all variables at
their means from the logit
# coefficients and a scale
factor. Again, these can be interpreted much like slopes.
# Compare the results to those on p. 675 of Greene.
beta <- logit.out$coefficients
xbar <- as.matrix(mean(cbind(1,Spector[1:3])))
zxbar <- t(xbar) %*% beta
lambda <- 1/(1+exp(-zxbar))
scalefactor <- lambda * (1-lambda)
scalefactor
margin <- scalefactor * beta[2:4]
margin
# Now let's estimate a Probit the hard way and obtain the interesting quantities.
# First, create the data
matrix, dependent vector, and other necessary quantities.
x <- cbind(1,GPA,TUCE,PSI)
y <- as.matrix(GRADE)
z <- as.matrix(PSI)
K <- ncol(x)
K
k <- ncol(z)
k
n <- nrow(x)
n
K1 <- K + 1
K1
Kk <- K + k
Kk
# Define Probit Log
Likelihood
llik.probit<-function(par, X, Y){
Y <- as.matrix(y)
X <- as.matrix(x)
b <-as.matrix(par[1:K])
phi<-pnorm(X%*%b, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)
sum(Y*log(phi)+(1-Y)*log(1-phi))
}
#Fit Probit Model
values.start <- lm(GRADE ~ GPA + TUCE + PSI)$coef
mod.probit<-optim(values.start, llik.probit,
Y=Y, X=X, method="BFGS",
control=list(maxit=2000, fnscale=-1, trace), hessian=T)
mod.probit
# Save the log likelihood for later use
LR <- mod.probit$value
LR
# Calculate the variance matrix from the Hessian
matrix.
v <- -solve(mod.probit$hessian)
v
# Calculate the standard errors from the variance
matrix.
se <- sqrt( diag(v))
se
# Calculate the z statistics from the coefficients
and standard errors
b <- mod.probit$par
b
zstat <-b/se
zstat
# Calculate p values from
the z statistics
pz <- 2*(1-pnorm(abs(zstat)))
pz
# Put the results together
in a table.
table <- cbind(b,se,zstat,pz)
table
# Obtain the underlying latent index, z.
z <- x %*% b
z
# Calculate the probabilities for each value of z
pz <- pnorm(c(z))
pz
# Now let's estimate a heteroskedastic probit model and
test for heteroskedasticity using a likelihood
# ratio test. Posit the heteroskedasticity is a function of PSI
#Define Heteroskedastic Probit Log-Likelihood
llik.hprobit<-function(par, X, Y, Z){
Y <- as.matrix(y)
X <- as.matrix(x)
Z <- as.matrix(z)
b <- as.matrix(par[1:K])
g <- as.matrix(par[K1:Kk])
phi<-pnorm((X %*% b)/exp(Z %*%
g), mean=0, sd=1, lower.tail
= TRUE, log.p = FALSE)
sum(Y*log(phi)+(1-Y)*log(1-phi))
}
#Fit Heteroskedastic Probit Model
values.start <- c(coefficients(probit.out),1)
values.start
mod.hprobit<-optim(values.start, llik.hprobit,
method="BFGS",
control=list(maxit=2000, fnscale=-1, trace), hessian=T)
mod.hprobit
# Save the log likelihood for later use
LU <- mod.hprobit$value
LU
# Calculate the variance matrix from the Hessian
matrix.
v <- -solve(mod.hprobit$hessian)
v
# Calculate the standard errors from the variance
matrix.
se <- sqrt( diag(v))
se
# Calculate the z statistics from the coefficients
and standard errors
b <- mod.hprobit$par
b
zstat <-b/se
zstat
# Calculate p values from
the z statistics
pz <- 2*(1-pnorm(abs(zstat)))
pz
# Put the results together
in a table.
table <- cbind(b,se,zstat,pz)
table
# Obtain the underlying latent index, zh.
zh <- x %*% b[1:4]
zh
# Calculate the probabilities for each value of zh
pz <- pnorm(c(zh))
pz
# Do a likelihood ratio test for the heteroskedastic model vs. the homoskedastic
model
j <- Kk - K1+1
lrtest <- -2*(LR-LU)
lrtest
lrpvalue <- 1-pchisq(lrtest, df=j)
lrpvalue
# Now estimate a logit the hard way.
# Define Logit Log
Likelihood
llik.logit<-function(par, X, Y){
Y <- as.matrix(y)
X <- as.matrix(x)
b <-as.matrix(par[1:K])
Lambda<-1/(1+exp(-X%*%b))
sum(Y*log(Lambda)+(1-Y)*log(1-Lambda))
}
#Fit Logit Model
values.start <- lm(GRADE ~ GPA + TUCE + PSI)$coef
mod.logit<-optim(values.start, llik.logit,
Y=Y, X=X, method="BFGS",
control=list(maxit=2000, fnscale=-1, trace), hessian=T)
mod.logit
# Save the log likelihood for later use
LR <- mod.logit$value
LR
# Calculate the variance matrix from the Hessian
matrix.
v <- -solve(mod.logit$hessian)
v
# Calculate the standard errors from the variance
matrix.
se <- sqrt( diag(v))
se
# Calculate the z statistics from the coefficients
and standard errors
b <- mod.logit$par
b
zstat <-b/se
zstat
# Calculate p values from
the z statistics
pz <- 2*(1-pnorm(abs(zstat)))
pz
# Put the results together
in a table.
table <- cbind(b,se,zstat,pz)
table
# Obtain the underlying logit
latent index, z.
z <- x %*% b
z
# Calculate the logit
probabilities for each value of z
pz <- 1/(1+exp(-c(z)))
pz