Chapter 18

Example-18-1.R
library(dynlm); library(stargazer)
data(hseinv, package='wooldridge')

# detrended variable: residual from a regression on the obs. index: 
trendreg <- dynlm( log(invpc) ~ trend(hseinv), data=hseinv )
hseinv$linv.detr <-  resid( trendreg )
# ts data:
hseinv.ts <- ts(hseinv)

# Koyck geometric d.l.:
gDL<-dynlm(linv.detr~gprice + L(linv.detr)            ,data=hseinv.ts)
# rational d.l.:
rDL<-dynlm(linv.detr~gprice + L(linv.detr) + L(gprice),data=hseinv.ts)

stargazer(gDL,rDL, type="text", keep.stat=c("n","adj.rsq"))

# LRP geometric DL:
b <- coef(gDL)
 b["gprice"]                 / (1-b["L(linv.detr)"])

# LRP rationalDL:
b <- coef(rDL)
(b["gprice"]+b["L(gprice)"]) / (1-b["L(linv.detr)"])
Example-18-4-urca.R
library(urca)
data(inven, package='wooldridge')

# automated ADF test using urca:
summary( ur.df(log(inven$gdp) , type = c("trend"), lags = 1) )
Example-18-4.R
library(dynlm)
data(inven, package='wooldridge')

# variable to test: y=log(gdp)
inven$y <- log(inven$gdp)
inven.ts<- ts(inven)

# summary output of ADF regression:
summary(dynlm( d(y) ~ L(y) + L(d(y)) + trend(inven.ts), data=inven.ts))

# automated ADF test using tseries:
library(tseries)
adf.test(inven$y, k=1)
Example-18-8.R
# load updataed data from URfIE Website since online file is incomplete
library(dynlm); library(stargazer)
data(phillips, package='wooldridge')
tsdat=ts(phillips, start=1948)

# Estimate models and display results
res1 <- dynlm(unem ~ unem_1      , data=tsdat, end=1996)
res2 <- dynlm(unem ~ unem_1+inf_1, data=tsdat, end=1996)
stargazer(res1, res2 ,type="text", keep.stat=c("n","adj.rsq","ser"))

# Predictions for 1997-2003 including 95% forecast intervals:
predict(res1, newdata=window(tsdat,start=1997), interval="prediction")
predict(res2, newdata=window(tsdat,start=1997), interval="prediction")
Example-18-9.R
# Note: run Example-18-8.R first to generate the results res1 and res2

# Actual unemployment and forecasts:
y  <- window(tsdat,start=1997)[,"unem"]
f1 <- predict( res1, newdata=window(tsdat,start=1997) )
f2 <- predict( res2, newdata=window(tsdat,start=1997) )

# Plot unemployment and forecasts:
matplot(time(y), cbind(y,f1,f2), type="l",  col="black",lwd=2,lty=1:3)
legend("topleft",c("Unempl.","Forecast 1","Forecast 2"),lwd=2,lty=1:3)

# Forecast errors:
e1<- y - f1
e2<- y - f2

# RMSE:
sqrt(mean(e1^2))
sqrt(mean(e2^2))

# MAE:
mean(abs(e1))
mean(abs(e2))
Simulate-Spurious-Regression-1.R
# Initialize Random Number Generator
set.seed(29846)

# i.i.d. N(0,1) innovations
n <- 50
e <- rnorm(n)
a <- rnorm(n)
# independent random walks
x <- cumsum(a)
y <- cumsum(e)

# plot
plot(x,type="l",lty=1,lwd=1)
lines(y        ,lty=2,lwd=2)
legend("topright",c("x","y"), lty=c(1,2), lwd=c(1,2))

# Regression of y on x
summary( lm(y~x) )
Simulate-Spurious-Regression-2.R
# Initialize Random Number Generator
set.seed(29846)

# generate 10,000 independent random walks
# and store the p val of the t test 
pvals <- numeric(10000)
for (r in 1:10000) {
  # i.i.d. N(0,1) innovations
  n <- 50
  a <- rnorm(n)
  e <- rnorm(n)
  # independent random walks
  x <- cumsum(a)
  y <- cumsum(e)
  # regression summary
  regsum <- summary(lm(y~x))
  # p value: 2nd row, 4th column of regression table
  pvals[r] <- regsum$coef[2,4]
}

# How often is p<5% ?
table(pvals<=0.05)
Example-18-1.py
import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm

hseinv = woo.dataWoo('hseinv')

# add lags and detrend:
hseinv['linvpc_det'] = sm.tsa.tsatools.detrend(hseinv['linvpc'])
hseinv['gprice_lag1'] = hseinv['gprice'].shift(1)
hseinv['linvpc_det_lag1'] = hseinv['linvpc_det'].shift(1)

# Koyck geometric d.l.:
reg_koyck = smf.ols(formula='linvpc_det ~ gprice + linvpc_det_lag1',
                    data=hseinv)
results_koyck = reg_koyck.fit()

# print regression table:
table_koyck = pd.DataFrame({'b': round(results_koyck.params, 4),
                            'se': round(results_koyck.bse, 4),
                            't': round(results_koyck.tvalues, 4),
                            'pval': round(results_koyck.pvalues, 4)})
print(f'table_koyck: \n{table_koyck}\n')

# rational d.l.:
reg_rational = smf.ols(formula='linvpc_det ~ gprice + linvpc_det_lag1 +'
                               'gprice_lag1',
                       data=hseinv)
results_rational = reg_rational.fit()

# print regression table:
table_rational = pd.DataFrame({'b': round(results_rational.params, 4),
                               'se': round(results_rational.bse, 4),
                               't': round(results_rational.tvalues, 4),
                               'pval': round(results_rational.pvalues, 4)})
print(f'table_rational: \n{table_rational}\n')

# LRP:
lrp_koyck = results_koyck.params['gprice'] / (
        1 - results_koyck.params['linvpc_det_lag1'])
print(f'lrp_koyck: {lrp_koyck}\n')

lrp_rational = (results_rational.params['gprice'] +
                results_rational.params['gprice_lag1']) / (
                       1 - results_rational.params['linvpc_det_lag1'])
print(f'lrp_rational: {lrp_rational}\n')
Example-18-4.py
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.api as sm

inven = woo.dataWoo('inven')
inven['lgdp'] = np.log(inven['gdp'])

# automated ADF:
res_ADF_aut = sm.tsa.stattools.adfuller(inven['lgdp'], maxlag=1, autolag=None,
                                        regression='ct', regresults=True)
ADF_stat_aut = res_ADF_aut[0]
ADF_pval_aut = res_ADF_aut[1]
table = pd.DataFrame({'names': res_ADF_aut[3].resols.model.exog_names,
                      'b': np.round(res_ADF_aut[3].resols.params, 4),
                      'se': np.round(res_ADF_aut[3].resols.bse, 4),
                      't': np.round(res_ADF_aut[3].resols.tvalues, 4),
                      'pval': np.round(res_ADF_aut[3].resols.pvalues, 4)})
print(f'table: \n{table}\n')
print(f'ADF_stat_aut: {ADF_stat_aut}\n')
print(f'ADF_pval_aut: {ADF_pval_aut}\n')
Example-18-8.py
import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

phillips = woo.dataWoo('phillips')

# define yearly time series beginning in 1948:
date_range = pd.date_range(start='1948', periods=len(phillips), freq='Y')
phillips.index = date_range.year

# estimate models:
yt96 = (phillips['year'] <= 1996)
reg_1 = smf.ols(formula='unem ~ unem_1', data=phillips, subset=yt96)
results_1 = reg_1.fit()
reg_2 = smf.ols(formula='unem ~ unem_1 + inf_1', data=phillips, subset=yt96)
results_2 = reg_2.fit()

# predictions for 1997-2003 including 95% forecast intervals:
yf97 = (phillips['year'] > 1996)
pred_1 = results_1.get_prediction(phillips[yf97])
pred_1_FI = pred_1.summary_frame(
    alpha=0.05)[['mean', 'obs_ci_lower', 'obs_ci_upper']]
pred_1_FI.index = date_range.year[yf97]
print(f'pred_1_FI: \n{pred_1_FI}\n')

pred_2 = results_2.get_prediction(phillips[yf97])
pred_2_FI = pred_2.summary_frame(
    alpha=0.05)[['mean', 'obs_ci_lower', 'obs_ci_upper']]
pred_2_FI.index = date_range.year[yf97]
print(f'pred_2_FI: \n{pred_2_FI}\n')

# forecast errors:
e1 = phillips[yf97]['unem'] - pred_1_FI['mean']
e2 = phillips[yf97]['unem'] - pred_2_FI['mean']

# RMSE and MAE:
rmse1 = np.sqrt(np.mean(e1 ** 2))
print(f'rmse1: {rmse1}\n')
rmse2 = np.sqrt(np.mean(e2 ** 2))
print(f'rmse2: {rmse2}\n')
mae1 = np.mean(abs(e1))
print(f'mae1: {mae1}\n')
mae2 = np.mean(abs(e2))
print(f'mae2: {mae2}\n')

# graph:
plt.plot(phillips[yf97]['unem'], color='black', marker='', label='unem')
plt.plot(pred_1_FI['mean'], color='black',
         marker='', linestyle='--', label='forecast without inflation')
plt.plot(pred_2_FI['mean'], color='black',
         marker='', linestyle='-.', label='forecast with inflation')
plt.ylabel('unemployment')
plt.xlabel('time')
plt.legend()
plt.savefig('PyGraphs/Example-18-8.pdf')
Simulate-Spurious-Regression-1.py
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import scipy.stats as stats

# set the random seed:
np.random.seed(123456)

# i.i.d. N(0,1) innovations:
n = 51
e = stats.norm.rvs(0, 1, size=n)
e[0] = 0
a = stats.norm.rvs(0, 1, size=n)
a[0] = 0

# independent random walks:
x = np.cumsum(a)
y = np.cumsum(e)
sim_data = pd.DataFrame({'y': y, 'x': x})

# regression:
reg = smf.ols(formula='y ~ x', data=sim_data)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

# graph:
plt.plot(x, color='black', marker='', linestyle='-', label='x')
plt.plot(y, color='black', marker='', linestyle='--', label='y')
plt.ylabel('x,y')
plt.legend()
plt.savefig('PyGraphs/Simulate-Spurious-Regression-1.pdf')
Simulate-Spurious-Regression-2.py
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.stats as stats

# set the random seed:
np.random.seed(123456)

pvals = np.empty(10000)

# repeat r times:
for i in range(10000):
    # i.i.d. N(0,1) innovations:
    n = 51
    e = stats.norm.rvs(0, 1, size=n)
    e[0] = 0
    a = stats.norm.rvs(0, 1, size=n)
    a[0] = 0

    # independent random walks:
    x = np.cumsum(a)
    y = np.cumsum(e)
    sim_data = pd.DataFrame({'y': y, 'x': x})

    # regression:
    reg = smf.ols(formula='y ~ x', data=sim_data)
    results = reg.fit()
    pvals[i] = results.pvalues['x']

# how often is p<=5%:
count_pval_smaller = np.count_nonzero(pvals <= 0.05)  # counts True elements
print(f'count_pval_smaller: {count_pval_smaller}\n')

# how often is p>5%:
count_pval_greater = np.count_nonzero(pvals > 0.05)
print(f'count_pval_greater: {count_pval_greater}\n')
Example-18-1.jl
using WooldridgeDatasets, GLM, DataFrames

hseinv = DataFrame(wooldridge("hseinv"))

# add lags and detrend:
reg_trend = lm(@formula(linvpc ~ t), hseinv)
hseinv.linvpc_det = residuals(reg_trend)
hseinv.gprice_lag1 = lag(hseinv.gprice, 1)
hseinv.linvpc_det_lag1 = lag(hseinv.linvpc_det, 1)

# Koyck geometric d.l.:
reg_koyck = lm(@formula(linvpc_det ~ gprice +
                                     linvpc_det_lag1), hseinv)
table_koyck = coeftable(reg_koyck)
println("table_koyck: \n$table_koyck\n")

# rational d.l.:
reg_rational = lm(@formula(linvpc_det ~ gprice + linvpc_det_lag1 +
                                        gprice_lag1), hseinv)
table_rational = coeftable(reg_rational)
println("table_rational: \n$table_rational\n")

# calculate LRP as...
# gprice / (1 - linvpc_det_lag1):
lrp_koyck = coef(reg_koyck)[2] / (1 - coef(reg_koyck)[3])
println("lrp_koyck = $lrp_koyck\n")

# and (gprice + gprice_lag1) / (1 - linvpc_det_lag1):
lrp_rational = (coef(reg_rational)[2] + coef(reg_rational)[4]) /
               (1 - coef(reg_rational)[3])
println("lrp_rational = $lrp_rational")
Example-18-4.jl
using WooldridgeDatasets, DataFrames, HypothesisTests

inven = DataFrame(wooldridge("inven"))
inven.lgdp = log.(inven.gdp)

# automated ADF:
adf_lag = 1
res_ADF_aut = ADFTest(inven.lgdp, :trend, adf_lag)
println("res_ADF_aut: \n$res_ADF_aut")
Example-18-8.jl
using WooldridgeDatasets, GLM, DataFrames, Statistics, Plots

phillips = DataFrame(wooldridge("phillips"))

# estimate models:
yt96 = subset(phillips, :year => ByRow(<=(1996)))
reg_1 = lm(@formula(unem ~ unem_1), yt96)
reg_2 = lm(@formula(unem ~ unem_1 + inf_1), yt96)

# predictions for 1997-2003:
yf97 = subset(phillips, :year => ByRow(>(1996)))
pred_1 = round.(predict(reg_1, yf97), digits=5)
println("pred_1 = $pred_1\n")
pred_2 = round.(predict(reg_2, yf97), digits=5)
println("pred_2 = $pred_2\n")

# forecast errors:
e1 = yf97.unem .- pred_1
e2 = yf97.unem .- pred_2

# RMSE and MAE:
rmse1 = sqrt(mean(e1 .^ 2))
println("rmse1 = $rmse1\n")

rmse2 = sqrt(mean(e2 .^ 2))
println("rmse2 = $rmse2\n")

mae1 = mean(abs.(e1))
println("mae1 = $mae1\n")

mae2 = mean(abs.(e2))
println("mae2 = $mae2")

# graph:
plot(yf97.year, yf97.unem, color="black", linewidth=2,
    linestyle=:solid, label="unem", legend=:topleft)
plot!(yf97.year, pred_1, color="black", linewidth=2,
    linestyle=:dash, label="forecast without inflation")
plot!(yf97.year, pred_2, color="black", linewidth=2,
    linestyle=:dashdot, label="forecast with inflation")
ylabel!("unemployment")
xlabel!("time")
savefig("JlGraphs/Example-18-8.pdf")
Simulate-Spurious-Regression-1.jl
using Random, Distributions, Statistics, Plots, GLM, DataFrames

# set the random seed:
Random.seed!(12345)

# i.i.d. N(0,1) innovations:
n = 51
e = rand(Normal(), n)
e[1] = 0
a = rand(Normal(), n)
a[1] = 0

# independent random walks:
x = cumsum(a)
y = cumsum(e)
sim_data = DataFrame(y=y, x=x)

# regression:
reg = lm(@formula(y ~ x), sim_data)
reg_table = coeftable(reg)
println("reg_table: \n$reg_table")

# graph:
plot(x, color="black", linewidth=2, linestyle=:solid, label="x")
plot!(y, color="black", linewidth=2, linestyle=:dash, label="y")
ylabel!("y")
xlabel!("x")
savefig("JlGraphs/Simulate-Spurious-Regression-1.pdf")
Simulate-Spurious-Regression-2.jl
using Random, Distributions, Statistics, Plots, GLM, DataFrames

# set the random seed:
Random.seed!(12345)

pvals = zeros(10000)

for i in 1:10000
    # i.i.d. N(0,1) innovations:
    n = 51
    e = rand(Normal(), n)
    e[1] = 0
    a = rand(Normal(), n)
    a[1] = 0

    # independent random walks:
    x = cumsum(a)
    y = cumsum(e)
    sim_data = DataFrame(y=y, x=x)

    # regression:
    reg = lm(@formula(y ~ x), sim_data)
    reg_table = coeftable(reg)

    # save the p value of x:
    pvals[i] = reg_table.cols[4][2]
end

# how often is p<=5%:
count_pval_smaller = sum(pvals .<= 0.05)  # counts true elements
println("count_pval_smaller = $count_pval_smaller\n")

# how often is p>5%:
count_pval_greater = sum(pvals .> 0.05)  # counts true elements
println("count_pval_greater = $count_pval_greater")