Chapter 8

Example-8-2-cont.R

# F-Tests using different variance-covariance formulas:
myH0 <- c("black","white")
# Ususal VCOV
linearHypothesis(reg, myH0)
# Refined White VCOV
linearHypothesis(reg, myH0, vcov=hccm)
# Classical White VCOV
linearHypothesis(reg, myH0, vcov=hccm(reg,type="hc0"))

Example-8-2.R

data(gpa3, package='wooldridge')

# load packages (which need to be installed!)
library(lmtest); library(car)

# Estimate model (only for spring data)
reg <- lm(cumgpa~sat+hsperc+tothrs+female+black+white, 
                                     data=gpa3, subset=(spring==1))
# Usual SE:
coeftest(reg)
# Refined White heteroscedasticity-robust SE:
coeftest(reg, vcov=hccm)

Example-8-4.R

data(hprice1, package='wooldridge')

# Estimate model
reg <- lm(price~lotsize+sqrft+bdrms, data=hprice1)
reg

# Automatic BP test
library(lmtest)
bptest(reg)

# Manual regression of squared residuals 
summary(lm( resid(reg)^2 ~ lotsize+sqrft+bdrms, data=hprice1))

Example-8-5.R

data(hprice1, package='wooldridge')

# Estimate model
reg <- lm(log(price)~log(lotsize)+log(sqrft)+bdrms, data=hprice1)
reg

# BP test
library(lmtest)
bptest(reg)

# White test
bptest(reg, ~ fitted(reg) + I(fitted(reg)^2) )

Example-8-6.R

library(foreign)
d401k<-read.dta("http://fmwww.bc.edu/ec-p/data/wooldridge/401ksubs.dta")

# OLS (only for singles: fsize==1)
lm(nettfa ~ inc + I((age-25)^2) + male + e401k, 
                                         data=d401k, subset=(fsize==1))

# WLS
lm(nettfa ~ inc + I((age-25)^2) + male + e401k, weight=1/inc, 
                                         data=d401k, subset=(fsize==1))

Example-8-7.R

data(smoke, package='wooldridge')

# OLS
olsreg<-lm(cigs~log(income)+log(cigpric)+educ+age+I(age^2)+restaurn, 
                                                            data=smoke)
olsreg

# BP test
library(lmtest)
bptest(olsreg)

# FGLS: estimation of the variance function
logu2 <- log(resid(olsreg)^2)
varreg<-lm(logu2~log(income)+log(cigpric)+educ+age+I(age^2)+restaurn, 
                                                            data=smoke)

# FGLS: WLS
w <- 1/exp(fitted(varreg))
lm(cigs~log(income)+log(cigpric)+educ+age+I(age^2)+restaurn, 
                                                  weight=w ,data=smoke)

WLS-Robust.R

library(foreign)
d401k<-read.dta("http://fmwww.bc.edu/ec-p/data/wooldridge/401ksubs.dta")

# WLS
wlsreg <- lm(nettfa ~ inc + I((age-25)^2) + male + e401k, 
                             weight=1/inc, data=d401k, subset=(fsize==1))

# non-robust results
library(lmtest); library(car)
coeftest(wlsreg)

# robust results (Refined White SE:)
coeftest(wlsreg,hccm)

Example-8-2-cont.py

import wooldridge as woo
import statsmodels.formula.api as smf

gpa3 = woo.dataWoo('gpa3')

# definition of model and hypotheses:
reg = smf.ols(formula='cumgpa ~ sat + hsperc + tothrs + female + black + white',
              data=gpa3, subset=(gpa3['spring'] == 1))
hypotheses = ['black = 0', 'white = 0']

# F-Tests using different variance-covariance formulas:
# usual VCOV:
results_default = reg.fit()
ftest_default = results_default.f_test(hypotheses)
fstat_default = ftest_default.statistic
fpval_default = ftest_default.pvalue
print(f'fstat_default: {fstat_default}\n')
print(f'fpval_default: {fpval_default}\n')

# refined White VCOV:
results_hc3 = reg.fit(cov_type='HC3')
ftest_hc3 = results_hc3.f_test(hypotheses)
fstat_hc3 = ftest_hc3.statistic
fpval_hc3 = ftest_hc3.pvalue
print(f'fstat_hc3: {fstat_hc3}\n')
print(f'fpval_hc3: {fpval_hc3}\n')

# classical White VCOV:
results_hc0 = reg.fit(cov_type='HC0')
ftest_hc0 = results_hc0.f_test(hypotheses)
fstat_hc0 = ftest_hc0.statistic
fpval_hc0 = ftest_hc0.pvalue
print(f'fstat_hc0: {fstat_hc0}\n')
print(f'fpval_hc0: {fpval_hc0}\n')

Example-8-2.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

gpa3 = woo.dataWoo('gpa3')

# define regression model:
reg = smf.ols(formula='cumgpa ~ sat + hsperc + tothrs + female + black + white',
              data=gpa3, subset=(gpa3['spring'] == 1))

# estimate default model (only for spring data):
results_default = reg.fit()

table_default = pd.DataFrame({'b': round(results_default.params, 5),
                              'se': round(results_default.bse, 5),
                              't': round(results_default.tvalues, 5),
                              'pval': round(results_default.pvalues, 5)})
print(f'table_default: \n{table_default}\n')

# estimate model with White SE (only for spring data):
results_white = reg.fit(cov_type='HC0')

table_white = pd.DataFrame({'b': round(results_white.params, 5),
                            'se': round(results_white.bse, 5),
                            't': round(results_white.tvalues, 5),
                            'pval': round(results_white.pvalues, 5)})
print(f'table_white: \n{table_white}\n')

# estimate model with refined White SE (only for spring data):
results_refined = reg.fit(cov_type='HC3')

table_refined = pd.DataFrame({'b': round(results_refined.params, 5),
                              'se': round(results_refined.bse, 5),
                              't': round(results_refined.tvalues, 5),
                              'pval': round(results_refined.pvalues, 5)})
print(f'table_refined: \n{table_refined}\n')

Example-8-4.py

import wooldridge as woo
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy as pt

hprice1 = woo.dataWoo('hprice1')

# estimate model:
reg = smf.ols(formula='price ~ lotsize + sqrft + bdrms', data=hprice1)
results = reg.fit()
table_results = pd.DataFrame({'b': round(results.params, 4),
                              'se': round(results.bse, 4),
                              't': round(results.tvalues, 4),
                              'pval': round(results.pvalues, 4)})
print(f'table_results: \n{table_results}\n')

# automatic BP test (LM version):
y, X = pt.dmatrices('price ~ lotsize + sqrft + bdrms',
                    data=hprice1, return_type='dataframe')
result_bp_lm = sm.stats.diagnostic.het_breuschpagan(results.resid, X)
bp_lm_statistic = result_bp_lm[0]
bp_lm_pval = result_bp_lm[1]
print(f'bp_lm_statistic: {bp_lm_statistic}\n')
print(f'bp_lm_pval: {bp_lm_pval}\n')

# manual BP test (F version):
hprice1['resid_sq'] = results.resid ** 2
reg_resid = smf.ols(formula='resid_sq ~ lotsize + sqrft + bdrms', data=hprice1)
results_resid = reg_resid.fit()
bp_F_statistic = results_resid.fvalue
bp_F_pval = results_resid.f_pvalue
print(f'bp_F_statistic: {bp_F_statistic}\n')
print(f'bp_F_pval: {bp_F_pval}\n')

Example-8-5.py

import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy as pt

hprice1 = woo.dataWoo('hprice1')

# estimate model:
reg = smf.ols(formula='np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms',
              data=hprice1)
results = reg.fit()

# BP test:
y, X_bp = pt.dmatrices('np.log(price) ~ np.log(lotsize) + np.log(sqrft) + bdrms',
                       data=hprice1, return_type='dataframe')
result_bp = sm.stats.diagnostic.het_breuschpagan(results.resid, X_bp)
bp_statistic = result_bp[0]
bp_pval = result_bp[1]
print(f'bp_statistic: {bp_statistic}\n')
print(f'bp_pval: {bp_pval}\n')

# White test:
X_wh = pd.DataFrame({'const': 1, 'fitted_reg': results.fittedvalues,
                     'fitted_reg_sq': results.fittedvalues ** 2})
result_white = sm.stats.diagnostic.het_breuschpagan(results.resid, X_wh)
white_statistic = result_white[0]
white_pval = result_white[1]
print(f'white_statistic: {white_statistic}\n')
print(f'white_pval: {white_pval}\n')

Example-8-6.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

k401ksubs = woo.dataWoo('401ksubs')

# subsetting data:
k401ksubs_sub = k401ksubs[k401ksubs['fsize'] == 1]

# OLS (only for singles, i.e. 'fsize'==1):
reg_ols = smf.ols(formula='nettfa ~ inc + I((age-25)**2) + male + e401k',
                  data=k401ksubs_sub)
results_ols = reg_ols.fit(cov_type='HC0')

# print regression table:
table_ols = pd.DataFrame({'b': round(results_ols.params, 4),
                          'se': round(results_ols.bse, 4),
                          't': round(results_ols.tvalues, 4),
                          'pval': round(results_ols.pvalues, 4)})
print(f'table_ols: \n{table_ols}\n')

# WLS:
wls_weight = list(1 / k401ksubs_sub['inc'])
reg_wls = smf.wls(formula='nettfa ~ inc + I((age-25)**2) + male + e401k',
                  weights=wls_weight, data=k401ksubs_sub)
results_wls = reg_wls.fit()

# print regression table:
table_wls = pd.DataFrame({'b': round(results_wls.params, 4),
                          'se': round(results_wls.bse, 4),
                          't': round(results_wls.tvalues, 4),
                          'pval': round(results_wls.pvalues, 4)})
print(f'table_wls: \n{table_wls}\n')

Example-8-7.py

import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy as pt

smoke = woo.dataWoo('smoke')

# OLS:
reg_ols = smf.ols(formula='cigs ~ np.log(income) + np.log(cigpric) +'
                          'educ + age + I(age**2) + restaurn',
                  data=smoke)
results_ols = reg_ols.fit()
table_ols = pd.DataFrame({'b': round(results_ols.params, 4),
                          'se': round(results_ols.bse, 4),
                          't': round(results_ols.tvalues, 4),
                          'pval': round(results_ols.pvalues, 4)})
print(f'table_ols: \n{table_ols}\n')

# BP test:
y, X = pt.dmatrices('cigs ~ np.log(income) + np.log(cigpric) + educ +'
                    'age + I(age**2) + restaurn',
                    data=smoke, return_type='dataframe')
result_bp = sm.stats.diagnostic.het_breuschpagan(results_ols.resid, X)
bp_statistic = result_bp[0]
bp_pval = result_bp[1]
print(f'bp_statistic: {bp_statistic}\n')
print(f'bp_pval: {bp_pval}\n')

# FGLS (estimation of the variance function):
smoke['logu2'] = np.log(results_ols.resid ** 2)
reg_fgls = smf.ols(formula='logu2 ~ np.log(income) + np.log(cigpric) +'
                           'educ + age + I(age**2) + restaurn', data=smoke)
results_fgls = reg_fgls.fit()
table_fgls = pd.DataFrame({'b': round(results_fgls.params, 4),
                           'se': round(results_fgls.bse, 4),
                           't': round(results_fgls.tvalues, 4),
                           'pval': round(results_fgls.pvalues, 4)})
print(f'table_fgls: \n{table_fgls}\n')

# FGLS (WLS):
wls_weight = list(1 / np.exp(results_fgls.fittedvalues))
reg_wls = smf.wls(formula='cigs ~ np.log(income) + np.log(cigpric) +'
                          'educ + age + I(age**2) + restaurn',
                  weights=wls_weight, data=smoke)
results_wls = reg_wls.fit()
table_wls = pd.DataFrame({'b': round(results_wls.params, 4),
                          'se': round(results_wls.bse, 4),
                          't': round(results_wls.tvalues, 4),
                          'pval': round(results_wls.pvalues, 4)})
print(f'table_wls: \n{table_wls}\n')

WLS-Robust.py

import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

k401ksubs = woo.dataWoo('401ksubs')

# subsetting data:
k401ksubs_sub = k401ksubs[k401ksubs['fsize'] == 1]

# WLS:
wls_weight = list(1 / k401ksubs_sub['inc'])
reg_wls = smf.wls(formula='nettfa ~ inc + I((age-25)**2) + male + e401k',
                  weights=wls_weight, data=k401ksubs_sub)

# non-robust (default) results:
results_wls = reg_wls.fit()
table_default = pd.DataFrame({'b': round(results_wls.params, 4),
                              'se': round(results_wls.bse, 4),
                              't': round(results_wls.tvalues, 4),
                              'pval': round(results_wls.pvalues, 4)})
print(f'table_default: \n{table_default}\n')

# robust results (Refined White SE):
results_white = reg_wls.fit(cov_type='HC3')
table_white = pd.DataFrame({'b': round(results_white.params, 4),
                            'se': round(results_white.bse, 4),
                            't': round(results_white.tvalues, 4),
                            'pval': round(results_white.pvalues, 4)})
print(f'table_white: \n{table_white}\n')

calc-white-se.jl

using LinearAlgebra
include("../03/getMats.jl")

# for details, see Wooldridge (2010), p. 57
function calc_white_se(reg, df)
    f = formula(reg)
    xy = getMats(f, df)
    y = xy[1]
    X = xy[2]
    u = residuals(reg)
    invXX = inv(X' * X)
    sumterm = (X .* u)' * (X .* u)
    avar = invXX' * sumterm * invXX
    std_white = sqrt.(diag(avar))
    return std_white
end

Example-8-2-cont.jl

using PyCall, WooldridgeDatasets, GLM, DataFrames
include("../03/getMats.jl")

# install Python's statsmodels with: using Conda; Conda.add("statsmodels")
sm = pyimport("statsmodels.api")

gpa3 = DataFrame(wooldridge("gpa3"))
gpa3_subset = subset(gpa3, :spring => ByRow(==(1)))

# F test using usual VCOV in Julia:
reg_ur = lm(@formula(cumgpa ~ sat + hsperc + tothrs + female + black + white),
    gpa3_subset)
reg_r = lm(@formula(cumgpa ~ sat + hsperc + tothrs + female),
    gpa3_subset)
ftest_res = ftest(reg_r.model, reg_ur.model)
fstat_jl = ftest_res.fstat[2]
fpval_jl = ftest_res.pval[2]
println("fstat_jl = $fstat_jl\n")
println("fpval_jl = $fpval_jl\n")

# F test using different variance-covariance formulas:
# definition of model and hypotheses:
f = @formula(cumgpa ~ 1 + sat + hsperc + tothrs + female + black + white)
xy = getMats(f, gpa3_subset)
reg = sm.OLS(xy[1], xy[2])
hypotheses = ["x5 = 0", "x6 = 0"] # meaning "black = 0" and "white = 0"

# usual VCOV in Python:
results_default = reg.fit()
ftest_py_default = results_default.f_test(hypotheses)
fstat_py_default = ftest_py_default.statistic
fpval_py_default = ftest_py_default.pvalue
println("fstat_py_default = $fstat_py_default\n")
println("fpval_py_default = $fpval_py_default\n")

# classical White VCOV in Python:
results_hc0 = reg.fit(cov_type="HC0")
ftest_py_hc0 = results_hc0.f_test(hypotheses)
fstat_py_hc0 = ftest_py_hc0.statistic
fpval_py_hc0 = ftest_py_hc0.pvalue
println("fstat_py_hc0 = $fstat_py_hc0\n")
println("fpval_py_hc0 = $fpval_py_hc0")

Example-8-2-manual.jl

using WooldridgeDatasets, GLM, DataFrames
include("../03/getMats.jl")
gpa3 = DataFrame(wooldridge("gpa3"))

reg_default = lm(@formula(cumgpa ~ sat + hsperc + tothrs +
                                   female + black + white),
        subset(gpa3, :spring => ByRow(==(1))))

# extract formula parts for SE calculation:
f = formula(reg_default)
xy = getMats(f, subset(gpa3, :spring => ByRow(==(1))))
y = xy[1]
X = xy[2]
u = residuals(reg_default)
df = DataFrame(X, :auto)


# calculate all rij:
ri1 = residuals(lm(@formula(x1 ~ 0 + x2 + x3 + x4 + x5 + x6 + x7), df))
ri2 = residuals(lm(@formula(x2 ~ 0 + x1 + x3 + x4 + x5 + x6 + x7), df))
ri3 = residuals(lm(@formula(x3 ~ 0 + x1 + x2 + x4 + x5 + x6 + x7), df))
ri4 = residuals(lm(@formula(x4 ~ 0 + x1 + x2 + x3 + x5 + x6 + x7), df))
ri5 = residuals(lm(@formula(x5 ~ 0 + x1 + x2 + x3 + x4 + x6 + x7), df))
ri6 = residuals(lm(@formula(x6 ~ 0 + x1 + x2 + x3 + x4 + x5 + x7), df))
ri7 = residuals(lm(@formula(x7 ~ 0 + x1 + x2 + x3 + x4 + x5 + x6), df))

# calculate SE according to Wooldridge (2019), Ch. 8.2:
se1 = sqrt(sum((ri1 .^ 2) .* (u .^ 2)) / (sum((ri1 .^ 2))^2))
se2 = sqrt(sum((ri2 .^ 2) .* (u .^ 2)) / (sum((ri2 .^ 2))^2))
se3 = sqrt(sum((ri3 .^ 2) .* (u .^ 2)) / (sum((ri3 .^ 2))^2))
se4 = sqrt(sum((ri4 .^ 2) .* (u .^ 2)) / (sum((ri4 .^ 2))^2))
se5 = sqrt(sum((ri5 .^ 2) .* (u .^ 2)) / (sum((ri5 .^ 2))^2))
se6 = sqrt(sum((ri6 .^ 2) .* (u .^ 2)) / (sum((ri6 .^ 2))^2))
se7 = sqrt(sum((ri7 .^ 2) .* (u .^ 2)) / (sum((ri7 .^ 2))^2))

se_white = round.([se1, se2, se3, se4, se5, se6, se7], digits=5)
println("se_white = $se_white")

Example-8-2.jl

using WooldridgeDatasets, GLM, DataFrames
include("calc-white-se.jl")

gpa3 = DataFrame(wooldridge("gpa3"))

reg_default = lm(@formula(cumgpa ~ sat + hsperc + tothrs +
                                   female + black + white),
        subset(gpa3, :spring => ByRow(==(1))))

hc0 = calc_white_se(reg_default, subset(gpa3, :spring => ByRow(==(1))))

table_se = DataFrame(coefficients=coeftable(reg_default).rownms,
        b=round.(coef(reg_default), digits=5),
        se_default=round.(coeftable(reg_default).cols[2], digits=5),
        se_white=hc0)
println("table_se: \n$table_se")

Example-8-4.jl

using WooldridgeDatasets, GLM, DataFrames, HypothesisTests

hprice1 = DataFrame(wooldridge("hprice1"))

# estimate model:
f = @formula(price ~ 1 + lotsize + sqrft + bdrms)
reg = lm(f, hprice1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")

# automatic BP test (LM version),
# type = :linear specifies Breusch-Pagan test:
X = getMats(f, hprice1)[2]
result_bp_lm = WhiteTest(X, residuals(reg), type=:linear)
bp_lm_statistic = result_bp_lm.lm
bp_lm_pval = pvalue(result_bp_lm)
println("bp_lm_statistic = $bp_lm_statistic\n")
println("bp_lm_pval = $bp_lm_pval\n")

# manual BP test (F version):
hprice1.resid_sq = residuals(reg) .^ 2
reg_resid = lm(@formula(resid_sq ~ lotsize + sqrft + bdrms), hprice1)
bp_F = ftest(reg_resid.model)
bp_F_statistic = bp_F.fstat
bp_F_pval = bp_F.pval
println("bp_F_statistic = $bp_F_statistic\n")
println("bp_F_pval = $bp_F_pval")

Example-8-5.jl

using WooldridgeDatasets, GLM, DataFrames, HypothesisTests
include("../03/getMats.jl")

hprice1 = DataFrame(wooldridge("hprice1"))

# estimate model:
f = @formula(log(price) ~ 1 + log(lotsize) + log(sqrft) + bdrms)
reg = lm(f, hprice1)

# BP test:
X = getMats(f, hprice1)[2]
result_bp = WhiteTest(X, residuals(reg), type=:linear)
bp_statistic = result_bp.lm
bp_pval = pvalue(result_bp)
println("bp_statistic = $bp_statistic\n")
println("bp_pval = $bp_pval\n")

# White test:
X_wh = hcat(ones(size(X)[1]),
    predict(reg),
    predict(reg) .^ 2)
result_white = WhiteTest(X_wh, residuals(reg), type=:linear)
white_statistic = result_white.lm
white_pval = pvalue(result_white)
println("white_statistic = $white_statistic\n")
println("white_pval = $white_pval")

Example-8-6.jl

using WooldridgeDatasets, GLM, DataFrames
include("calc-white-se.jl")

k401ksubs = DataFrame(wooldridge("401ksubs"))

# subsetting data:
k401ksubs_sub = subset(k401ksubs, :fsize => ByRow(==(1)))

# OLS (only for singles, i.e. ’fsize’==1):
reg_ols = lm(@formula(nettfa ~ inc + ((age - 25)^2) + male + e401k),
        k401ksubs_sub)
hc0 = calc_white_se(reg_ols, k401ksubs_sub)

# print regression table with hc0:
table_ols = DataFrame(coefficients=coeftable(reg_ols).rownms,
        b=round.(coef(reg_ols), digits=5),
        se=round.(hc0, digits=5))
println("table_ols: \n$table_ols\n")

# WLS:
k401ksubs_sub.w = (1 ./ sqrt.(k401ksubs_sub.inc))
reg_wls = lm(@formula(identity(nettfa * w) ~ 0 + w + identity(inc * w) +
                                             identity((age - 25)^2 * w) +
                                             identity(male * w) +
                                             identity(e401k * w)), k401ksubs_sub)

# print regression table:
table_wls = DataFrame(coefficients=coeftable(reg_wls).rownms,
        b=round.(coef(reg_wls), digits=5),
        se=round.(coeftable(reg_wls).cols[2], digits=5))
println("table_wls: \n$table_wls\n")

Example-8-7.jl

using WooldridgeDatasets, GLM, DataFrames, HypothesisTests

smoke = DataFrame(wooldridge("smoke"))

# OLS:
reg_ols = lm(@formula(cigs ~ log(income) + log(cigpric) +
                             educ + age + age^2 + restaurn), smoke)
table_ols = DataFrame(coefficients=coeftable(reg_ols).rownms,
        b=round.(coef(reg_ols), digits=5),
        se=round.(stderror(reg_ols), digits=5))
println("table_ols: \n$table_ols\n")

# BP test:
X = modelmatrix(reg_ols)
result_bp = WhiteTest(X, residuals(reg_ols), type=:linear)
bp_statistic = result_bp.lm
bp_pval = pvalue(result_bp)
println("bp_statistic = $bp_statistic\n")
println("bp_pval = $bp_pval\n")

# FGLS (estimation of the variance function):
smoke.logu2 = log.(residuals(reg_ols) .^ 2)
reg_fgls = lm(@formula(logu2 ~ log(income) + log(cigpric) +
                               educ + age + age^2 + restaurn), smoke)

table_fgls = DataFrame(coefficients=coeftable(reg_fgls).rownms,
        b=round.(coef(reg_fgls), digits=5),
        se=round.(stderror(reg_fgls), digits=5))
println("table_fgls: \n$table_fgls\n")

# FGLS (WLS):
smoke.w = (1 ./ sqrt.(exp.(predict(reg_fgls))))

reg_wls = lm(@formula(identity(cigs * w) ~ 0 + w + identity(log(income) * w) +
                                           identity(log(cigpric) * w) +
                                           identity(educ * w) +
                                           identity(age * w) +
                                           identity(age^2 * w) +
                                           identity(restaurn * w)), smoke)

table_wls = DataFrame(coefficients=coeftable(reg_wls).rownms,
        b=round.(coef(reg_wls), digits=5),
        se=round.(stderror(reg_wls), digits=5))
println("table_wls: \n$table_wls")

WLS-Robust.jl

using WooldridgeDatasets, GLM, DataFrames
include("calc-white-se.jl")

k401ksubs = DataFrame(wooldridge("401ksubs"))

# subsetting data:
k401ksubs_sub = subset(k401ksubs, :fsize => ByRow(==(1)))

# WLS:
k401ksubs_sub.w = (1 ./ sqrt.(k401ksubs_sub.inc))
reg_wls = lm(@formula(identity(nettfa * w) ~ 0 + w + identity(inc * w) +
                                             identity((age - 25)^2 * w) +
                                             identity(male * w) +
                                             identity(e401k * w)), k401ksubs_sub)

# robust results (White SE):
hc0 = calc_white_se(reg_wls, k401ksubs_sub)

# print regression table:
table_default = DataFrame(coefficients=coeftable(reg_wls).rownms,
        b=round.(coef(reg_wls), digits=5),
        se_default=round.(coeftable(reg_wls).cols[2], digits=5),
        se_robust=round.(hc0, digits=5))
println("table_default: \n$table_default")