library(urca)data(inven, package='wooldridge')# automated ADF test using urca:summary( ur.df(log(inven$gdp) , type =c("trend"), lags =1) )
Example-18-4.R
library(dynlm)data(inven, package='wooldridge')# variable to test: y=log(gdp)inven$y <-log(inven$gdp)inven.ts<-ts(inven)# summary output of ADF regression:summary(dynlm( d(y) ~L(y) +L(d(y)) +trend(inven.ts), data=inven.ts))# automated ADF test using tseries:library(tseries)adf.test(inven$y, k=1)
Example-18-8.R
# load updataed data from URfIE Website since online file is incompletelibrary(dynlm); library(stargazer)data(phillips, package='wooldridge')tsdat=ts(phillips, start=1948)# Estimate models and display resultsres1 <-dynlm(unem ~ unem_1 , data=tsdat, end=1996)res2 <-dynlm(unem ~ unem_1+inf_1, data=tsdat, end=1996)stargazer(res1, res2 ,type="text", keep.stat=c("n","adj.rsq","ser"))# Predictions for 1997-2003 including 95% forecast intervals:predict(res1, newdata=window(tsdat,start=1997), interval="prediction")predict(res2, newdata=window(tsdat,start=1997), interval="prediction")
Example-18-9.R
# Note: run Example-18-8.R first to generate the results res1 and res2# Actual unemployment and forecasts:y <-window(tsdat,start=1997)[,"unem"]f1 <-predict( res1, newdata=window(tsdat,start=1997) )f2 <-predict( res2, newdata=window(tsdat,start=1997) )# Plot unemployment and forecasts:matplot(time(y), cbind(y,f1,f2), type="l", col="black",lwd=2,lty=1:3)legend("topleft",c("Unempl.","Forecast 1","Forecast 2"),lwd=2,lty=1:3)# Forecast errors:e1<- y - f1e2<- y - f2# RMSE:sqrt(mean(e1^2))sqrt(mean(e2^2))# MAE:mean(abs(e1))mean(abs(e2))
Simulate-Spurious-Regression-1.R
# Initialize Random Number Generatorset.seed(29846)# i.i.d. N(0,1) innovationsn <-50e <-rnorm(n)a <-rnorm(n)# independent random walksx <-cumsum(a)y <-cumsum(e)# plotplot(x,type="l",lty=1,lwd=1)lines(y ,lty=2,lwd=2)legend("topright",c("x","y"), lty=c(1,2), lwd=c(1,2))# Regression of y on xsummary( lm(y~x) )
Simulate-Spurious-Regression-2.R
# Initialize Random Number Generatorset.seed(29846)# generate 10,000 independent random walks# and store the p val of the t test pvals <-numeric(10000)for (r in1:10000) {# i.i.d. N(0,1) innovations n <-50 a <-rnorm(n) e <-rnorm(n)# independent random walks x <-cumsum(a) y <-cumsum(e)# regression summary regsum <-summary(lm(y~x))# p value: 2nd row, 4th column of regression table pvals[r] <- regsum$coef[2,4]}# How often is p<5% ?table(pvals<=0.05)
import wooldridge as wooimport pandas as pdimport numpy as npimport statsmodels.formula.api as smfimport matplotlib.pyplot as pltphillips = woo.dataWoo('phillips')# define yearly time series beginning in 1948:date_range = pd.date_range(start='1948', periods=len(phillips), freq='Y')phillips.index = date_range.year# estimate models:yt96 = (phillips['year'] <=1996)reg_1 = smf.ols(formula='unem ~ unem_1', data=phillips, subset=yt96)results_1 = reg_1.fit()reg_2 = smf.ols(formula='unem ~ unem_1 + inf_1', data=phillips, subset=yt96)results_2 = reg_2.fit()# predictions for 1997-2003 including 95% forecast intervals:yf97 = (phillips['year'] >1996)pred_1 = results_1.get_prediction(phillips[yf97])pred_1_FI = pred_1.summary_frame( alpha=0.05)[['mean', 'obs_ci_lower', 'obs_ci_upper']]pred_1_FI.index = date_range.year[yf97]print(f'pred_1_FI: \n{pred_1_FI}\n')pred_2 = results_2.get_prediction(phillips[yf97])pred_2_FI = pred_2.summary_frame( alpha=0.05)[['mean', 'obs_ci_lower', 'obs_ci_upper']]pred_2_FI.index = date_range.year[yf97]print(f'pred_2_FI: \n{pred_2_FI}\n')# forecast errors:e1 = phillips[yf97]['unem'] - pred_1_FI['mean']e2 = phillips[yf97]['unem'] - pred_2_FI['mean']# RMSE and MAE:rmse1 = np.sqrt(np.mean(e1 **2))print(f'rmse1: {rmse1}\n')rmse2 = np.sqrt(np.mean(e2 **2))print(f'rmse2: {rmse2}\n')mae1 = np.mean(abs(e1))print(f'mae1: {mae1}\n')mae2 = np.mean(abs(e2))print(f'mae2: {mae2}\n')# graph:plt.plot(phillips[yf97]['unem'], color='black', marker='', label='unem')plt.plot(pred_1_FI['mean'], color='black', marker='', linestyle='--', label='forecast without inflation')plt.plot(pred_2_FI['mean'], color='black', marker='', linestyle='-.', label='forecast with inflation')plt.ylabel('unemployment')plt.xlabel('time')plt.legend()plt.savefig('PyGraphs/Example-18-8.pdf')
Simulate-Spurious-Regression-1.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport matplotlib.pyplot as pltimport scipy.stats as stats# set the random seed:np.random.seed(123456)# i.i.d. N(0,1) innovations:n =51e = stats.norm.rvs(0, 1, size=n)e[0] =0a = stats.norm.rvs(0, 1, size=n)a[0] =0# independent random walks:x = np.cumsum(a)y = np.cumsum(e)sim_data = pd.DataFrame({'y': y, 'x': x})# regression:reg = smf.ols(formula='y ~ x', data=sim_data)results = reg.fit()# print regression table:table = pd.DataFrame({'b': round(results.params, 4),'se': round(results.bse, 4),'t': round(results.tvalues, 4),'pval': round(results.pvalues, 4)})print(f'table: \n{table}\n')# graph:plt.plot(x, color='black', marker='', linestyle='-', label='x')plt.plot(y, color='black', marker='', linestyle='--', label='y')plt.ylabel('x,y')plt.legend()plt.savefig('PyGraphs/Simulate-Spurious-Regression-1.pdf')
Simulate-Spurious-Regression-2.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport scipy.stats as stats# set the random seed:np.random.seed(123456)pvals = np.empty(10000)# repeat r times:for i inrange(10000):# i.i.d. N(0,1) innovations: n =51 e = stats.norm.rvs(0, 1, size=n) e[0] =0 a = stats.norm.rvs(0, 1, size=n) a[0] =0# independent random walks: x = np.cumsum(a) y = np.cumsum(e) sim_data = pd.DataFrame({'y': y, 'x': x})# regression: reg = smf.ols(formula='y ~ x', data=sim_data) results = reg.fit() pvals[i] = results.pvalues['x']# how often is p<=5%:count_pval_smaller = np.count_nonzero(pvals <=0.05) # counts True elementsprint(f'count_pval_smaller: {count_pval_smaller}\n')# how often is p>5%:count_pval_greater = np.count_nonzero(pvals >0.05)print(f'count_pval_greater: {count_pval_greater}\n')
usingRandom, Distributions, Statistics, Plots, GLM, DataFrames# set the random seed:Random.seed!(12345)pvals =zeros(10000)for i in1:10000# i.i.d. N(0,1) innovations: n =51e=rand(Normal(), n)e[1] =0 a =rand(Normal(), n) a[1] =0# independent random walks: x =cumsum(a) y =cumsum(e) sim_data =DataFrame(y=y, x=x)# regression: reg =lm(@formula(y ~ x), sim_data) reg_table =coeftable(reg)# save the p value of x: pvals[i] = reg_table.cols[4][2]end# how often is p<=5%:count_pval_smaller =sum(pvals .<=0.05) # counts true elementsprintln("count_pval_smaller = $count_pval_smaller\n")# how often is p>5%:count_pval_greater =sum(pvals .>0.05) # counts true elementsprintln("count_pval_greater = $count_pval_greater")