n=100 p=10 MSE=PMSE=PMSE.rand=NULL for (i in 1:1000){ sigma=1 beta = (1:p) X = matrix(rnorm(n*p),nrow=n) Y = X%*%beta + sigma*rnorm(n) Ynew = X%*%beta + sigma*rnorm(n) Xnew = matrix(rnorm(n*p),nrow=n) Ynew.rand = Xnew%*%beta + sigma*rnorm(n) mod = lm (Y~X-1) Yhat = predict(mod) MSE = c(MSE, mean( (Y-Yhat)^2)) #Fixed-x prediction error PMSE = c(PMSE, mean( (Ynew-Yhat)^2)) # test set error Yhat.rand = Xnew%*%mod$coef PMSE.rand = c(PMSE.rand, mean( (Ynew.rand-Yhat.rand)^2)) } plot (density(MSE),xlim=c(min(MSE),max(PMSE.rand))) lines (density(PMSE),col=2) lines (density(PMSE.rand),col=3) # what is this supposed to be? (mean(PMSE)-mean(MSE))/2/sigma^2*n # what happens for true random-X prediction? (mean(PMSE.rand)-mean(MSE))/2/sigma^2*n # now lets add bias n=100 p=10 MSE=PMSE=PMSE.rand=NULL for (i in 1:1000){ sigma=1 beta = (1:p) X = matrix(rnorm(n*p),nrow=n) Y = X%*%beta + X[,1]^3*5 + sigma*rnorm(n) Ynew = X%*%beta + X[,1]^3*5 + sigma*rnorm(n) Xnew = matrix(rnorm(n*p),nrow=n) Ynew.rand = Xnew%*%beta + Xnew[,1]^3*5 + sigma*rnorm(n) mod = lm (Y~X-1) Yhat = predict(mod) MSE = c(MSE, mean( (Y-Yhat)^2)) #Fixed-x prediction error PMSE = c(PMSE, mean( (Ynew-Yhat)^2)) # test set error Yhat.rand = Xnew%*%mod$coef PMSE.rand = c(PMSE.rand, mean( (Ynew.rand-Yhat.rand)^2)) } plot (density(MSE),xlim=c(min(MSE),max(PMSE))) lines (density(PMSE),col=2) lines (density(PMSE.rand),col=3) # what is this supposed to be now? Different? (mean(PMSE)-mean(MSE))/2/sigma^2*n # what happens for true random-X prediction? (mean(PMSE.rand)-mean(MSE))/2/sigma^2*n