Introduction to
Parametric Duration Models
The
purpose of this session is to show you how to use some of R's procedures for
estimating parametric duration models. Note that we do not cover non-parametric
or semi-parametric duration models which are an important part of this
literature. Our purpose is simply to introduce
another application of maximum likelihood.
# The
purpose of this session is to show you how to use R's procedures for estimating
# parametric
duration models. Note that we do not cover non-parametric or semi-parametric
# duration
models which are an important part of this literature.
# This
file demonstrates the R procedures for evaluating duration data. First, let's
# read in the data. These
are data on the duration of labor strikes taken from Greene,
# p.
800. The variable T is the
duration of the strike. The variable PROD is the residual
# from a linear regression
of the log of industrial production on time, time squared,
# and
a set of monthly dummy variables. It measures aggregate industrial production
less
# trend
and seasonal components.
library(car)
strikes <- read.table("C:/users/B.
Dan Wood/My Documents/My Teaching/Maximum Likelihood/Data/strikes.txt",
header=TRUE)
attach(strikes)
summary(strikes)
# In contrast to Limdep, R does not expect the dependent variable to be
logged. You will
# also need
a censoring variable to indicate whether or not a process is still ongoing.
Below
# we
create the censoring variable, STATUS, for all durations below 80. Note that
unlike
# STATA and LIMDEP, R
expects the censoring variable to indicate observations that are NOT
# censored.
strikes$status <- recode(strikes$T,
'lo:79=1 ; 80:hi=0 ', as.factor.result=FALSE)
detach(strikes)
attach(strikes)
# Now
let's just estimate some duration models with no covariates. This would be
useful for
#
exploring the duration dependence of a process. First the Exponential model.
library(Design)
# Estimate the exponential
model
exp.model <- psm(Surv(T, status) ~ 1, strikes,
dist="exponential")
exp.model
# Calculate AIC for later
use in comparing models
ll.exp <- exp.model$loglik[2]
df.exp <- nrow(strikes)-exp.model$df.residual-1
aic.exp <- -2*ll.exp+2*df.exp+1
aic.exp
# Plot the exponential Survival
curve. Note that the hazard function is always a straight line
# for
this model and trivial to plot.
S <- Survival(exp.model)
times <- seq(0,max(T),by=1)
plot(times, S(times,exp.model$coef),
xlab="Days",ylab="S(t)",main="Plot
of Exponential Survival function",type='l') # plots survival curve at X*Beta hat=?
# Estimate the Weibull model
weibull.model <- psm(Surv(T, status) ~ 1, strikes,
dist="weibull")
weibull.model
# Calculate AIC for later
use in comparing models
ll.weibull <- weibull.model$loglik[2]
df.weibull <- nrow(strikes)-weibull.model$df.residual-1
aic.weibull <- -2*ll.weibull+2*df.weibull+1
aic.weibull
# Plot the survival and
hazard functions for the Weibull model
S <- Survival(weibull.model)
times <- seq(0,max(T),by=1)
plot(times, S(times,weibull.model$coef),
xlab="Days",ylab="S(t)",main="Plot
of Weibull Survival function",type='l') # plots survival curve at X*Beta hat=?
H <- Hazard(weibull.model)
plot(times, H(times,weibull.model$coef),
xlab="Days",ylab="H(t)",main="Plot
of Weibull Hazard function",type='l') # plots survival curve at X*Beta hat=?
# Estimate the lognormal
model
lognormal.model <- psm(Surv(T, status) ~ 1, strikes,
dist="lognormal")
lognormal.model
# Calculate AIC for later
use in comparing models
ll.lognormal <- lognormal.model$loglik[2]
df.lognormal <- nrow(strikes)-lognormal.model$df.residual-1
aic.lognormal <- -2*ll.lognormal+2*df.lognormal+1
aic.lognormal
# Plot the survival and
hazard functions for the lognormal model
S <- Survival(lognormal.model)
times <- seq(0,max(T),by=1)
plot(times, S(times,lognormal.model$coef),
xlab="Days",ylab="S(t)",main="Plot
of lognormal Survival function",type='l') # plots survival curve at X*Beta hat=?
H <- Hazard(lognormal.model)
plot(times, H(times,lognormal.model$coef),
xlab="Days",ylab="H(t)",main="Plot
of lognormal Hazard function",type='l') # plots survival curve at X*Beta hat=?
# Estimate the logistic
model
logistic.model <- psm(Surv(T, status) ~ 1, strikes,
dist="logistic")
logistic.model
# Calculate AIC for later
use in comparing models
ll.logistic <- logistic.model$loglik[2]
df.logistic <- nrow(strikes)-logistic.model$df.residual-1
aic.logistic <- -2*ll.logistic+2*df.logistic+1
aic.logistic
# Plot the survival and
hazard functions for the logistic model
S <- Survival(logistic.model)
times <- seq(0,max(T),by=1)
plot(times, S(times,logistic.model$coef),
xlab="Days",ylab="S(t)",main="Plot
of logistic Survival function",type='l') # plots survival curve at X*Beta hat=?
H <- Hazard(logistic.model)
plot(times, H(times,logistic.model$coef),
xlab="Days",ylab="H(t)",main="Plot
of logistic Hazard function",type='l') # plots survival curve at X*Beta hat=?
# Estimate the loglogistic model
loglogistic.model <- psm(Surv(T, status) ~ 1, strikes,
dist="loglogistic")
loglogistic.model
# Calculate AIC for later
use in comparing models
ll.loglogistic <- loglogistic.model$loglik[2]
df.loglogistic <- nrow(strikes)-loglogistic.model$df.residual-1
aic.loglogistic <- -2*ll.loglogistic+2*df.loglogistic+1
aic.loglogistic
# Plot the survival and
hazard functions for the loglogistic model
S <- Survival(loglogistic.model)
times <- seq(0,max(T),by=1)
plot(times, S(times,loglogistic.model$coef),
xlab="Days",ylab="S(t)",main="Plot
of loglogistic Survival function",type='l') # plots survival curve at X*Beta hat=?
H <- Hazard(loglogistic.model)
plot(times, H(times,loglogistic.model$coef),
xlab="Days",ylab="H(t)",main="Plot
of loglogistic Hazard function",type='l') # plots survival curve at X*Beta hat=?
# We
can use the AIC statistics as a model selection tool. The model with the lowest
AIC is best.
AIC.compare <- rbind(aic.exp,aic.weibull,aic.lognormal,aic.logistic,aic.loglogistic)
AIC.compare
# Based
on this comparison the lognormal model is the best model.
# We
can also include covariates in each of these models. Reestimate
the Weibull model with PROD
weibull.model <- psm(Surv(T, status) ~ 1 + PROD,
strikes, dist="weibull")
weibull.model
# Plot the survival and
hazard functions for the Weibull model at xbeta mean
xbar
<- c(1,mean(PROD))
xbarbeta <- xbar %*% weibull.model$coefficients
S <- Survival(weibull.model)
times <- seq(0,max(T),by=1)
plot(times, S(times,xbarbeta), xlab="Days",ylab="S(t)",main="Plot
of Weibull Survival function",type='l') # plots survival curve at X*Beta hat=?
H <- Hazard(weibull.model)
plot(times, H(times,xbarbeta), xlab="Days",ylab="H(t)",main="Plot
of Weibull Hazard function",type='l') # plots survival curve at X*Beta hat=?
# Another
diagnostic for model specification utilizes the cumulative hazard function. A
# well specified model
will have a cumulative hazard function that begins at zero and
# increases in linear
fashion. For example, here are the compare the cumulative hazard
# functions for the weibull versus lognormal models
# Reestimate
the Weibull model above
weibull.model <- psm(Surv(T, status) ~ 1, strikes,
dist="weibull")
weibull.model
# Plot the integrated
hazard function for the Weibull model
S <- Survival(weibull.model)
times <- seq(0,max(T),by=1)
plot(times, -log(S(times,weibull.model$coef)),
xlab="Days",ylab="Integrated
Hazard Function",main="Plot of Weibull Integrated Hazard Function",type='l') # plots survival curve at X*Beta hat=?
# Reestimate
the Lognormal model above
lognormal.model <- psm(Surv(T, status) ~ 1, strikes,
dist="lognormal")
lognormal.model
# Plot the integrated
hazard function for the lognormal model
S <- Survival(lognormal.model)
times <- seq(0,max(T),by=1)
plot(times, -log(S(times,lognormal.model$coef)),
xlab="Days",ylab="Integrated
Hazard Function",main="Plot of lognormal
Integrated Hazard Function",type='l') # plots survival curve at X*Beta hat=?
# Based
on this comparison, the Weibull model is the best
since it comes closest to
#
forming a straight line.
# This
lesson has provided a summary of parametric models of survival in R. As you can
see,
# there is considerable
expertise required to use and interpret these models. A future
# course
in survival analysis might be required to learn everything about this class of
# models.