# edvreg.R # # A function to fit the estimated dependent variable regression described in # Jeffrey B. Lewis and Drew A. Linzer (2005), ``Estimating Regression Models in Which the Dependent Variable # Is Based on Estimates.'' _Political_Analysis_ 13(4):345-364. Based on code written by Drew Linzer # in March of 2005 ( MonteCarlo_EDV.R). # # Jeff Lewis (2013) # library(MASS) # needed to use ginv(), the generalized inverse of a matrix library(car) # needed to access a fast wcrossprod function # Find the trace of a square matrix tr <- function(x) { sum(diag(x)) } # # edvreg # mod: Regression formula # # omegasq: Nx1 vector of estimated sampling/error variances of the dependent variable # (Note that unlike the STATA code, what is called for here is the sample variance not the # standard error) # # proportional: "True" if omegasq is only proportional to the sampling/error variances of # of the dependent variables # # Example call: res <- edvreg( y ~ x, omegasq = est.var.y ) # edvreg <- function(mod,omegasq,proportional=FALSE) { # Some housekeeping stolen from John Fox's corchrane-orcutt implementation X <- model.matrix(mod) xnames <- colnames(X) y <- model.response(model.frame(mod)) # When omegasq is known up to a proportion (for example 1/N_i) ... if (proportional) { residsq <- (resid(lm(y~X-1)))^2 steptwo <- lm(residsq~omegasq) zeroeps <- FALSE if (coef(steptwo)[1] < 0) { warning("Variance of epsilon estimated to be negative! Setting to 0...") steptwo <- lm(residsq~0+omegasq) zeroeps <- TRUE } predval <- fitted.values(steptwo) if (sum(predval<0) < 0) { warning("Negative omegaSq estimates encountered, setting to 0.0001") predval[predval<0] <- 0.0001 # error check against very rare possibility of negative weight } model.FGLS <- lm(y~X-1,weights=(1/predval)) model.FGLS$vareps <- ifelse(zeroeps,0,steptwo$coefficients[1]) model.FGLS$omegasqFactor <- ifelse(zeroeps,steptwo$coefficients[1],steptwo$coefficients[2]) model.FGLS$predval <- predval } # When omegasq is known exactly... else { N <- nrow(X) sigmahatsq <- (sum((residuals(lm(y~X-1)))^2) - sum(omegasq) + tr(ginv(crossprod(X)) %*% wcrossprod(X,X,w=omegasq)))/(N-ncol(X)-1) if (sigmahatsq < 0) { warning(sprintf("SigmaHatSq estimated to be %7.4f, setting to 0...",sigmahatsq)) sigmahatsq <- 0.0 } w <- 1/(omegasq+sigmahatsq) model.FGLS <- lm(y~X-1, weights=w) model.FGLS$sigmahatsq <- sigmahatsq } names(model.FGLS$coefficients) <- xnames model.FGLS } # # A few extra functions to run Monte Carlo experiments on edvreg (not needed for real applications)... # edvreg.sim <- function(N=1500,theta=0.9,rho=-0.9,bet0=0,bet1=1,Cval=0.9) { omegasq <- rgamma(N,1/theta,1/theta) u <- rnorm(N,mean=0,sd=sqrt(omegasq)) eps <- rnorm(N,mean=0,sd=1) x <- matrix((sqrt(1-(rho^2)))*rnorm(N,mean=0,sd=1) + rho*omegasq/sqrt(theta)) ystar <- bet0 + bet1*x + sqrt(Cval)*u + sqrt(1-Cval)*eps omegasq <- omegasq*Cval list(ystar=ystar,x=x,omegasq=omegasq) } edvreg.test <- function() { sims <- 1000 alpha <- matrix(NA,sims,6) beta <- alpha vareps <- rep(NA,sims) omegasqFactor <- vareps for (i in 1:sims) { cat(sprintf("Iteration %5i...\n",i)) dat <- edvreg.sim() omegasq <- dat$omegasq ystar <- dat$ystar x <- dat$x res0 <- summary(lm(ystar ~ x)) res1 <- summary( edvreg(ystar ~ x, omegasq=omegasq) ) res2a <- edvreg(ystar ~ x, omegasq=omegasq,proportional=TRUE) res2 <- summary(res2a) rr <- cbind(res0$coefficients[,1:2], res1$coefficients[,1:2], res2$coefficients[,1:2]) alpha[i,] <- rr[1,] beta[i,] <- rr[2,] omegasqFactor[i] <- res2a$omegasqFactor vareps[i] <- res2a$vareps } list(alpha=alpha,beta=beta,vareps=vareps,omegasqFactor=omegasqFactor) } edvreg.run.test <- function() { res <- edvreg.test() print( apply(res$beta[,c(1,3,5)],2,sd) ) print( apply(res$beta[,c(2,4,6)],2,mean) ) print( mean(res$vareps) ) print( sd( res$vareps) ) print( mean(res$omegasqFactor) ) print( sd(res$omegasqFactor) ) return( res ) }