Chapter 12

Example-12-1.R

library(dynlm);library(lmtest)
data(phillips, package='wooldridge')

# Define Yearly time series beginning in 1948
tsdata <- ts(phillips, start=1948)

# Estimation of static Phillips curve:
reg.s <- dynlm( inf ~ unem, data=tsdata, end=1996)
# residuals and AR(1) test:
residual.s <- resid(reg.s)
coeftest( dynlm(residual.s ~ L(residual.s)) )

# Same with expectations-augmented Phillips curve:
reg.ea <- dynlm( d(inf) ~ unem, data=tsdata, end=1996)
residual.ea <- resid(reg.ea)
coeftest( dynlm(residual.ea ~ L(residual.ea)) )

Example-12-3.R

library(dynlm);library(car);library(lmtest)
data(barium, package='wooldridge')

tsdata <- ts(barium, start=c(1978,2), frequency=12)

reg <- dynlm(log(chnimp)~log(chempi)+log(gas)+log(rtwex)+
                                  befile6+affile6+afdec6, data=tsdata )

# Pedestrian test: 
residual <- resid(reg)
resreg <- dynlm(residual ~ L(residual)+L(residual,2)+L(residual,3)+
                           log(chempi)+log(gas)+log(rtwex)+befile6+
                                          affile6+afdec6, data=tsdata )
linearHypothesis(resreg, 
                 c("L(residual)","L(residual, 2)","L(residual, 3)"))

# Automatic test:
bgtest(reg, order=3, type="F")

Example-12-4.R

library(dynlm);library(car);library(orcutt)
data(barium, package='wooldridge')

tsdata <- ts(barium, start=c(1978,2), frequency=12)

# OLS estimation
olsres <- dynlm(log(chnimp)~log(chempi)+log(gas)+log(rtwex)+
      befile6+affile6+afdec6, data=tsdata)

# Cochrane-Orcutt estimation
cochrane.orcutt(olsres)

Example-12-7.R

library(dynlm);library(lmtest);library(sandwich)
data(prminwge, package='wooldridge')

tsdata <- ts(prminwge, start=1950)

# OLS regression
reg<-dynlm(log(prepop)~log(mincov)+log(prgnp)+log(usgnp)+trend(tsdata), 
                                                          data=tsdata )
# results with usual SE
coeftest(reg)
# results with HAC SE
coeftest(reg, vcovHAC)

Example-12-9.R

library(dynlm);library(lmtest)
data(nyse, package='wooldridge')

tsdata <- ts(nyse)

# Linear regression of model:
reg <- dynlm(return ~ L(return), data=tsdata) 

# squared residual
residual.sq <- resid(reg)^2

# Model for squared residual:
ARCHreg <- dynlm(residual.sq ~ L(residual.sq)) 
coeftest(ARCHreg)

Example-ARCH.R

library(zoo);library(quantmod);library(dynlm);library(stargazer)

# Download data using the quantmod package:
getSymbols("AAPL", auto.assign = TRUE)

# Calculate return as the log difference
ret <- diff( log(AAPL$AAPL.Adjusted) )
# Subset 2008-2016 by special xts indexing:
ret <- ret["2008/2016"]

# AR(1) model for returns
ret <- as.zoo(ret)
reg <- dynlm( ret ~ L(ret) ) 

# squared residual
residual.sq <- resid(reg)^2

# Model for squared residual:
ARCHreg <- dynlm(residual.sq ~ L(residual.sq)) 
summary(ARCHreg)

Example-DWtest.R

library(dynlm);library(lmtest)
data(phillips, package='wooldridge')

tsdata <- ts(phillips, start=1948)

# Estimation of both Phillips curve models:
reg.s <- dynlm( inf ~ unem, data=tsdata, end=1996)
reg.ea <- dynlm( d(inf) ~ unem, data=tsdata, end=1996)

# DW tests
dwtest(reg.s)
dwtest(reg.ea)

Example-12-1.py

import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

prminwge = woo.dataWoo('prminwge')
T = len(prminwge)
prminwge['time'] = prminwge['year'] - 1949
prminwge.index = pd.date_range(start='1950', periods=T, freq='YE').year

# OLS regression:
reg = smf.ols(formula='np.log(prepop) ~ np.log(mincov) + np.log(prgnp) +'
                      'np.log(usgnp) + time', data=prminwge)

# results with regular SE:
results_regu = reg.fit()

# print regression table:
table_regu = pd.DataFrame({'b': round(results_regu.params, 4),
                           'se': round(results_regu.bse, 4),
                           't': round(results_regu.tvalues, 4),
                           'pval': round(results_regu.pvalues, 4)})
print(f'table_regu: \n{table_regu}\n')

# results with HAC SE:
results_hac = reg.fit(cov_type='HAC', cov_kwds={'maxlags': 2})

# print regression table:
table_hac = pd.DataFrame({'b': round(results_hac.params, 4),
                          'se': round(results_hac.bse, 4),
                          't': round(results_hac.tvalues, 4),
                          'pval': round(results_hac.pvalues, 4)})
print(f'table_hac: \n{table_hac}\n')

Example-12-2-ExpAug.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

phillips = woo.dataWoo('phillips')
T = len(phillips)

# define yearly time series beginning in 1948:
date_range = pd.date_range(start='1948', periods=T, freq='YE')
phillips.index = date_range.year

# estimation of expectations-augmented Phillips curve:
yt96 = (phillips['year'] <= 1996)
phillips['inf_diff1'] = phillips['inf'].diff()
reg_ea = smf.ols(formula='inf_diff1 ~ unem', data=phillips, subset=yt96)
results_ea = reg_ea.fit()

phillips['resid_ea'] = results_ea.resid
phillips['resid_ea_lag1'] = phillips['resid_ea'].shift(1)
reg = smf.ols(formula='resid_ea ~ resid_ea_lag1', data=phillips, subset=yt96)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

Example-12-2-Static.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

phillips = woo.dataWoo('phillips')
T = len(phillips)

# define yearly time series beginning in 1948:
date_range = pd.date_range(start='1948', periods=T, freq='YE')
phillips.index = date_range.year

# estimation of static Phillips curve:
yt96 = (phillips['year'] <= 1996)
reg_s = smf.ols(formula='Q("inf") ~ unem', data=phillips, subset=yt96)
results_s = reg_s.fit()

# residuals and AR(1) test:
phillips['resid_s'] = results_s.resid
phillips['resid_s_lag1'] = phillips['resid_s'].shift(1)
reg = smf.ols(formula='resid_s ~ resid_s_lag1', data=phillips, subset=yt96)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

Example-12-4.py

import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

barium = woo.dataWoo('barium')
T = len(barium)

# monthly time series starting Feb. 1978:
barium.index = pd.date_range(start='1978-02', periods=T, freq='ME')

reg = smf.ols(formula='np.log(chnimp) ~ np.log(chempi) + np.log(gas) +'
                      'np.log(rtwex) + befile6 + affile6 + afdec6',
              data=barium)
results = reg.fit()

# automatic test:
bg_result = sm.stats.diagnostic.acorr_breusch_godfrey(results, nlags=3)
fstat_auto = bg_result[2]
fpval_auto = bg_result[3]
print(f'fstat_auto: {fstat_auto}\n')
print(f'fpval_auto: {fpval_auto}\n')

# pedestrian test:
barium['resid'] = results.resid
barium['resid_lag1'] = barium['resid'].shift(1)
barium['resid_lag2'] = barium['resid'].shift(2)
barium['resid_lag3'] = barium['resid'].shift(3)

reg_manual = smf.ols(formula='resid ~ resid_lag1 + resid_lag2 + resid_lag3 +'
                             'np.log(chempi) + np.log(gas) + np.log(rtwex) +'
                             'befile6 + affile6 + afdec6', data=barium)
results_manual = reg_manual.fit()

hypotheses = ['resid_lag1 = 0', 'resid_lag2 = 0', 'resid_lag3 = 0']
ftest_manual = results_manual.f_test(hypotheses)
fstat_manual = ftest_manual.statistic
fpval_manual = ftest_manual.pvalue
print(f'fstat_manual: {fstat_manual}\n')
print(f'fpval_manual: {fpval_manual}\n')

Example-12-5.py

import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.api as sm
import patsy as pt

barium = woo.dataWoo('barium')
T = len(barium)

# monthly time series starting Feb. 1978:
barium.index = pd.date_range(start='1978-02', periods=T, freq='ME')

# perform the Cochrane-Orcutt estimation (iterative procedure):
y, X = pt.dmatrices('np.log(chnimp) ~ np.log(chempi) + np.log(gas) +'
                    'np.log(rtwex) + befile6 + affile6 + afdec6',
                    data=barium, return_type='dataframe')
reg = sm.GLSAR(y, X)
CORC_results = reg.iterative_fit(maxiter=100)
table = pd.DataFrame({'b_CORC': CORC_results.params,
                      'se_CORC': CORC_results.bse})
print(f'reg.rho: {reg.rho}\n')
print(f'table: \n{table}\n')

Example-12-9.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

nyse = woo.dataWoo('nyse')
nyse['ret'] = nyse['return']
nyse['ret_lag1'] = nyse['ret'].shift(1)

# linear regression of model:
reg = smf.ols(formula='ret ~ ret_lag1', data=nyse)
results = reg.fit()

# squared residuals:
nyse['resid_sq'] = results.resid ** 2
nyse['resid_sq_lag1'] = nyse['resid_sq'].shift(1)

# model for squared residuals:
ARCHreg = smf.ols(formula='resid_sq ~ resid_sq_lag1', data=nyse)
results_ARCH = ARCHreg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results_ARCH.params, 4),
                      'se': round(results_ARCH.bse, 4),
                      't': round(results_ARCH.tvalues, 4),
                      'pval': round(results_ARCH.pvalues, 4)})
print(f'table: \n{table}\n')

Example-ARCH.py

import numpy as np
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf

# download data for 'AAPL' (= Apple) and define start and end:
tickers = ['AAPL']
start_date = '2007-12-31'
end_date = '2016-12-31'

# use yfinance for the import:
AAPL_data = yf.download(tickers, start_date, end_date)

# calculate return as the difference of logged prices:
AAPL_data['ret'] = np.log(AAPL_data['Adj Close']).diff()
AAPL_data['ret_lag1'] = AAPL_data['ret'].shift(1)

# AR(1) model for returns:
reg = smf.ols(formula='ret ~ ret_lag1', data=AAPL_data)
results = reg.fit()

# squared residuals:
AAPL_data['resid_sq'] = results.resid ** 2
AAPL_data['resid_sq_lag1'] = AAPL_data['resid_sq'].shift(1)

# model for squared residuals:
ARCHreg = smf.ols(formula='resid_sq ~ resid_sq_lag1', data=AAPL_data)
results_ARCH = ARCHreg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results_ARCH.params, 4),
                      'se': round(results_ARCH.bse, 4),
                      't': round(results_ARCH.tvalues, 4),
                      'pval': round(results_ARCH.pvalues, 4)})
print(f'table: \n{table}\n')

Example-DWtest.py

import wooldridge as woo
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

phillips = woo.dataWoo('phillips')
T = len(phillips)

# define yearly time series beginning in 1948:
date_range = pd.date_range(start='1948', periods=T, freq='YE')
phillips.index = date_range.year

# estimation of both Phillips curve models:
yt96 = (phillips['year'] <= 1996)
phillips['inf_diff1'] = phillips['inf'].diff()
reg_s = smf.ols(formula='Q("inf") ~ unem', data=phillips, subset=yt96)
reg_ea = smf.ols(formula='inf_diff1 ~ unem', data=phillips, subset=yt96)
results_s = reg_s.fit()
results_ea = reg_ea.fit()

# DW tests:
DW_s = sm.stats.stattools.durbin_watson(results_s.resid)
DW_ea = sm.stats.stattools.durbin_watson(results_ea.resid)
print(f'DW_s: {DW_s}\n')
print(f'DW_ea: {DW_ea}\n')

calc-hac-se.jl

using LinearAlgebra

# for details, see Equations 12.41 - 12.43 in Wooldridge (2019)
function calc_hac_se(reg, g)
    n = nobs(reg)
    X = reg.mm.m
    n = size(X, 1)
    K = size(X, 2)
    u = residuals(reg)
    ser = sqrt(sum(u .^ 2) / (n - K))
    se_ols = coeftable(reg).cols[2]
    se_hac = zeros(K)

    for k in 1:K
        yk = X[:, k]
        Xk = X[:, (1:K).!=k]
        bk = inv(transpose(Xk) * Xk) * transpose(Xk) * yk
        rk = yk .- Xk * bk
        ak = rk .* u
        vk = sum(ak .^ 2)
        for h in 1:g
            sum_h = 2 * (1 - h / (g + 1)) * sum(ak[(h+1):n] .* ak[1:(n-h)])
            vk = vk + sum_h
        end

        se_hac[k] = (se_ols[k] / ser)^2 * sqrt(vk)
    end
    return se_hac
end

Example-12-1.jl

using WooldridgeDatasets, GLM, DataFrames
include("calc-hac-se.jl")

prminwge = DataFrame(wooldridge("prminwge"))
prminwge.time = prminwge.year .- 1949

# OLS with regular SE:
reg = lm(@formula(log(prepop) ~ log(mincov) + log(prgnp) +
                                log(usgnp) + time), prminwge)

# OLS with HAC SE: 
hac_se = calc_hac_se(reg, 2)

# print different SEs:
table = DataFrame(coefficients=coeftable(reg).rownms,
    b=round.(coef(reg), digits=5),
    se_default=round.(coeftable(reg).cols[2], digits=5),
    hac_se=round.(hac_se, digits=5))
println("table: \n$table")

Example-12-2-ExpAug.jl

using WooldridgeDatasets, GLM, DataFrames

phillips = DataFrame(wooldridge("phillips"))
yt96 = subset(phillips, :year => ByRow(<=(1996)))

# estimation of expectations-augmented Phillips curve:
yt96.inf_diff1 = vcat(missing, diff(yt96.inf))
yt96 = yt96[Not(1), :]
reg_ea = lm(@formula(inf_diff1 ~ unem), yt96)

yt96.resid_ea = residuals(reg_ea)
yt96.resid_ea_lag1 = lag(yt96.resid_ea, 1)

reg = lm(@formula(resid_ea ~ resid_ea_lag1), yt96)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")

Example-12-2-Static.jl

using WooldridgeDatasets, GLM, DataFrames

phillips = DataFrame(wooldridge("phillips"))
yt96 = subset(phillips, :year => ByRow(<=(1996)))

# estimation of static Phillips curve:
reg_s = lm(@formula(inf ~ unem), yt96)

# residuals and AR(1) test:
yt96.resid_s = residuals(reg_s)
yt96.resid_s_lag1 = lag(yt96.resid_s, 1)

reg = lm(@formula(resid_s ~ resid_s_lag1), yt96)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")

Example-12-4.jl

using WooldridgeDatasets, GLM, DataFrames

barium = DataFrame(wooldridge("barium"))

reg = lm(@formula(log(chnimp) ~ log(chempi) + log(gas) + log(rtwex) +
                                befile6 + affile6 + afdec6), barium)

# testing resid_lag1 = 0, resid_lag2 = 0 and resid_lag3 = 0:
barium.resid = residuals(reg)
barium.resid_lag1 = lag(barium.resid, 1)
barium.resid_lag2 = lag(barium.resid, 2)
barium.resid_lag3 = lag(barium.resid, 3)
barium = barium[Not(1:3), :]

reg_manual_ur = lm(@formula(resid ~ resid_lag1 + resid_lag2 + resid_lag3 +
                                    log(chempi) + log(gas) + log(rtwex) +
                                    befile6 + affile6 + afdec6), barium)
reg_manual_r = lm(@formula(resid ~ log(chempi) + log(gas) + log(rtwex) +
                                   befile6 + affile6 + afdec6), barium)
ftest_manual_res = ftest(reg_manual_r.model, reg_manual_ur.model)

fstat_manual = ftest_manual_res.fstat[2]
fpval_manual = ftest_manual_res.pval[2]
println("fstat_manual = $fstat_manual\n")
println("fpval_manual = $fpval_manual")

Example-12-5.jl

using PyCall, WooldridgeDatasets, GLM, DataFrames
# install Python's statsmodels with: using Conda; Conda.add("statsmodels")
sm = pyimport("statsmodels.api")
include("../03/getMats.jl")

barium = DataFrame(wooldridge("barium"))

# definition of model and hypotheses:
f = @formula(log(chnimp) ~ 1 + log(chempi) + log(gas) + log(rtwex) +
                           befile6 + affile6 + afdec6)
xy = getMats(f, barium)
y = xy[1]
X = xy[2]

# perform the Cochrane-Orcutt estimation (iterative procedure):
reg = sm.GLSAR(y, X)
CORC_results = reg.iterative_fit(maxiter=100)

reg_rho = reg.rho
table = DataFrame(
    coefnames=["Intercept", "log(chempi)", "log(gas)", "log(rtwex)",
        "befile6", "affile6", "afdec6"],
    b_CORC=CORC_results.params,
    se_CORC=round.(CORC_results.bse, digits=5))

println("reg_rho = $reg_rho\n")
println("table: \n$table")

Example-12-9.jl

using WooldridgeDatasets, GLM, DataFrames

nyse = DataFrame(wooldridge("nyse"))
nyse.ret = nyse.return
nyse.ret_lag1 = lag(nyse.ret, 1)
nyse = nyse[Not(1, 2), :]

# linear regression of model:
reg = lm(@formula(ret ~ ret_lag1), nyse)

# squared residuals:
nyse.resid_sq = residuals(reg) .^ 2
nyse.resid_sq_lag1 = lag(nyse.resid_sq, 1)

# model for squared residuals:
ARCHreg = lm(@formula(resid_sq ~ resid_sq_lag1), nyse)
table_ARCHreg = coeftable(ARCHreg)
println("table_ARCHreg: \n$table_ARCHreg")

Example-ARCH.jl

using DataFrames, GLM, Dates, MarketData

# download data for "AAPL" (= Apple) and define start and end:
ticker = "AAPL"
start_date = DateTime(2007, 12, 31)
end_date = DateTime(2017, 01, 01)

# import data as DataFrame:
AAPL_data = DataFrame(yahoo(ticker,
    YahooOpt(period1=start_date, period2=end_date)))

# calculate return as the difference of logged prices:
AAPL_data.ret = vcat(missing, diff(log.(AAPL_data.AdjClose)))
AAPL_data.ret_lag1 = lag(AAPL_data.ret, 1)
AAPL_data = AAPL_data[Not(1, 2), :]

# AR(1) model for returns:
reg = lm(@formula(ret ~ ret_lag1), AAPL_data)

# squared residuals:
AAPL_data.resid_sq = residuals(reg) .^ 2
AAPL_data.resid_sq_lag1 = lag(AAPL_data.resid_sq, 1)

# model for squared residuals:
ARCHreg = lm(@formula(resid_sq ~ resid_sq_lag1), AAPL_data)
table_ARCHreg = coeftable(ARCHreg)
println("table_ARCHreg: \n$table_ARCHreg")

Example-DWtest.jl

using WooldridgeDatasets, GLM, DataFrames, HypothesisTests
include("../03/getMats.jl")

phillips = DataFrame(wooldridge("phillips"))
yt96 = subset(phillips, :year => ByRow(<=(1996)))

# estimation of both Phillips curve models and 
# extraction of regressor matrices and residuals:
reg_s = lm(@formula(inf ~ unem), yt96)
X_s = getMats(formula(reg_s), yt96)[2]
resid_s = residuals(reg_s)

yt96.inf_diff1 = vcat(missing, diff(yt96.inf))
yt96 = yt96[Not(1), :]
reg_ea = lm(@formula(inf_diff1 ~ unem), yt96)
X_ea = getMats(formula(reg_ea), yt96)[2]
resid_ea = residuals(reg_ea)

# DW tests:
DW_s = DurbinWatsonTest(X_s, resid_s)
DW_ea = DurbinWatsonTest(X_ea, resid_ea)
println("DW_s: \n$DW_s\n")
println("DW_ea: \n$DW_ea")