Chapter 3

Example-3-1.R
data(gpa1, package='wooldridge')

# Just obtain parameter estimates:
lm(colGPA ~ hsGPA+ACT, data=gpa1)

# Store results under "GPAres" and display full table:
GPAres <- lm(colGPA ~ hsGPA+ACT, data=gpa1)
summary(GPAres)
Example-3-2.R
data(wage1, package='wooldridge')

# OLS regression:
summary( lm(log(wage) ~ educ+exper+tenure, data=wage1) )
Example-3-3.R
d401k <- rio::import("401k.dta")

# OLS regression:
summary( lm(prate ~ mrate+age, data=d401k) )
Example-3-5.R
data(crime1, package='wooldridge')

# Model without avgsen:
summary( lm(narr86 ~ pcnv+ptime86+qemp86, data=crime1) )

# Model with avgsen:
summary( lm(narr86 ~ pcnv+avgsen+ptime86+qemp86, data=crime1) )
Example-3-6.R
data(wage1, package='wooldridge')

# OLS regression:
summary( lm(log(wage) ~ educ, data=wage1) )
MLR-SE.R
data(gpa1, package='wooldridge')

# Full estimation results including automatic SE :
res <- lm(colGPA ~ hsGPA+ACT, data=gpa1)
summary(res)

# Extract SER (instead of calculation via residuals)
( SER <- summary(res)$sigma )

# regressing hsGPA on ACT for calculation of R2 & VIF
( R2.hsGPA  <- summary( lm(hsGPA~ACT, data=gpa1) )$r.squared )
( VIF.hsGPA <- 1/(1-R2.hsGPA) )

# manual calculation of SE of hsGPA coefficient:
n <- nobs(res)
sdx <- sd(gpa1$hsGPA) * sqrt((n-1)/n)  # (Note: sd() uses the (n-1) version)
( SE.hsGPA <- 1/sqrt(n) * SER/sdx  * sqrt(VIF.hsGPA) )
MLR-VIF.R
data(wage1, package='wooldridge')

# OLS regression:
lmres <- lm(log(wage) ~ educ+exper+tenure, data=wage1)

# Regression output:
summary(lmres)

# Load package "car" (has to be installed):
library(car)
# Automatically calculate VIF :
vif(lmres)
OLS-Matrices.R
data(gpa1, package='wooldridge')

# Determine sample size & no. of regressors:
n <- nrow(gpa1); k<-2

# extract y
y <- gpa1$colGPA

# extract X & add a column of ones
X <- cbind(1, gpa1$hsGPA, gpa1$ACT)

# Display first rows of X:
head(X)

# Parameter estimates:
( bhat <- solve( t(X)%*%X ) %*% t(X)%*%y )

# Residuals, estimated variance of u and SER:
uhat <- y - X %*% bhat
sigsqhat <- as.numeric( t(uhat) %*% uhat / (n-k-1) )
( SER <- sqrt(sigsqhat) )

# Estimated variance of the parameter estimators and SE:
Vbetahat <- sigsqhat * solve( t(X)%*%X )
( se <- sqrt( diag(Vbetahat) ) )
Omitted-Vars.R
data(gpa1, package='wooldridge')

# Parameter estimates for full and simple model:
beta.hat <- coef( lm(colGPA ~ ACT+hsGPA, data=gpa1) )
beta.hat

# Relation between regressors:
delta.tilde <- coef( lm(hsGPA ~ ACT, data=gpa1) )
delta.tilde

# Omitted variables formula for beta1.tilde:
beta.hat["ACT"] + beta.hat["hsGPA"]*delta.tilde["ACT"]

# Actual regression with hsGPA omitted:
lm(colGPA ~ ACT, data=gpa1)
Example-3-1.py
import wooldridge as woo
import statsmodels.formula.api as smf

gpa1 = woo.dataWoo('gpa1')

reg = smf.ols(formula='colGPA ~ hsGPA + ACT', data=gpa1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
Example-3-2.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

wage1 = woo.dataWoo('wage1')

reg = smf.ols(formula='np.log(wage) ~ educ + exper + tenure', data=wage1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
Example-3-3.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

k401k = woo.dataWoo('401k')

reg = smf.ols(formula='prate ~ mrate + age', data=k401k)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
Example-3-5a.py
import wooldridge as woo
import statsmodels.formula.api as smf

crime1 = woo.dataWoo('crime1')

# model without avgsen:
reg = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86', data=crime1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
Example-3-5b.py
import wooldridge as woo
import statsmodels.formula.api as smf

crime1 = woo.dataWoo('crime1')

# model with avgsen:
reg = smf.ols(formula='narr86 ~ pcnv + avgsen + ptime86 + qemp86', data=crime1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
Example-3-6.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

wage1 = woo.dataWoo('wage1')

reg = smf.ols(formula='np.log(wage) ~ educ', data=wage1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
MLR-SE.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

gpa1 = woo.dataWoo('gpa1')

# full estimation results including automatic SE:
reg = smf.ols(formula='colGPA ~ hsGPA + ACT', data=gpa1)
results = reg.fit()

# extract SER (instead of calculation via residuals):
SER = np.sqrt(results.mse_resid)

# regressing hsGPA on ACT for calculation of R2 & VIF:
reg_hsGPA = smf.ols(formula='hsGPA ~ ACT', data=gpa1)
results_hsGPA = reg_hsGPA.fit()
R2_hsGPA = results_hsGPA.rsquared
VIF_hsGPA = 1 / (1 - R2_hsGPA)
print(f'VIF_hsGPA: {VIF_hsGPA}\n')

# manual calculation of SE of hsGPA coefficient:
n = results.nobs
sdx = np.std(gpa1['hsGPA'], ddof=1) * np.sqrt((n - 1) / n)
SE_hsGPA = 1 / np.sqrt(n) * SER / sdx * np.sqrt(VIF_hsGPA)
print(f'SE_hsGPA: {SE_hsGPA}\n')
MLR-VIF.py
import wooldridge as woo
import numpy as np
import statsmodels.stats.outliers_influence as smo
import patsy as pt

wage1 = woo.dataWoo('wage1')

# extract matrices using patsy:
y, X = pt.dmatrices('np.log(wage) ~ educ + exper + tenure',
                    data=wage1, return_type='dataframe')

# get VIF:
K = X.shape[1]
VIF = np.empty(K)
for i in range(K):
    VIF[i] = smo.variance_inflation_factor(X.values, i)
print(f'VIF: \n{VIF}\n')
OLS-Matrices.py
import wooldridge as woo
import numpy as np
import pandas as pd
import patsy as pt

gpa1 = woo.dataWoo('gpa1')

# determine sample size & no. of regressors:
n = len(gpa1)
k = 2

# extract y:
y = gpa1['colGPA']

# extract X & add a column of ones:
X = pd.DataFrame({'const': 1, 'hsGPA': gpa1['hsGPA'], 'ACT': gpa1['ACT']})

# alternative with patsy:
y2, X2 = pt.dmatrices('colGPA ~ hsGPA + ACT', data=gpa1, return_type='dataframe')

# display first rows of X:
print(f'X.head(): \n{X.head()}\n')

# parameter estimates:
X = np.array(X)
y = np.array(y).reshape(n, 1)  # creates a row vector
b = np.linalg.inv(X.T @ X) @ X.T @ y
print(f'b: \n{b}\n')

# residuals, estimated variance of u and SER:
u_hat = y - X @ b
sigsq_hat = (u_hat.T @ u_hat) / (n - k - 1)
SER = np.sqrt(sigsq_hat)
print(f'SER: {SER}\n')

# estimated variance of the parameter estimators and SE:
Vbeta_hat = sigsq_hat * np.linalg.inv(X.T @ X)
se = np.sqrt(np.diagonal(Vbeta_hat))
print(f'se: {se}\n')
Omitted-Vars.py
import wooldridge as woo
import statsmodels.formula.api as smf

gpa1 = woo.dataWoo('gpa1')

# parameter estimates for full and simple model:
reg = smf.ols(formula='colGPA ~ ACT + hsGPA', data=gpa1)
results = reg.fit()
b = results.params
print(f'b: \n{b}\n')

# relation between regressors:
reg_delta = smf.ols(formula='hsGPA ~ ACT', data=gpa1)
results_delta = reg_delta.fit()
delta_tilde = results_delta.params
print(f'delta_tilde: \n{delta_tilde}\n')

# omitted variables formula for b1_tilde:
b1_tilde = b['ACT'] + b['hsGPA'] * delta_tilde['ACT']
print(f'b1_tilde:  \n{b1_tilde}\n')

# actual regression with hsGPA omitted:
reg_om = smf.ols(formula='colGPA ~ ACT', data=gpa1)
results_om = reg_om.fit()
b_om = results_om.params
print(f'b_om: \n{b_om}\n')
Example-3-1.jl
using WooldridgeDatasets, GLM, DataFrames

gpa1 = DataFrame(wooldridge("gpa1"))

reg = lm(@formula(colGPA ~ hsGPA + ACT), gpa1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")

r2_automatic = r2(reg)
println("r2_automatic = $r2_automatic")
Example-3-2.jl
using WooldridgeDatasets, GLM, DataFrames

wage1 = DataFrame(wooldridge("wage1"))

reg = lm(@formula(log(wage) ~ educ + exper + tenure), wage1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-3-3.jl
using WooldridgeDatasets, DataFrames, GLM

k401k = DataFrame(wooldridge("401k"))

reg = lm(@formula(prate ~ mrate + age), k401k)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-3-5a.jl
using WooldridgeDatasets, DataFrames, GLM

crime1 = DataFrame(wooldridge("crime1"))

# model without avgsen:
reg = lm(@formula(narr86 ~ pcnv + ptime86 + qemp86), crime1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-3-5b.jl
using WooldridgeDatasets, DataFrames, GLM

crime1 = DataFrame(wooldridge("crime1"))

# model with avgsen:
reg = lm(@formula(narr86 ~ pcnv + avgsen + ptime86 + qemp86), crime1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-3-6.jl
using WooldridgeDatasets, DataFrames, GLM

wage1 = DataFrame(wooldridge("wage1"))

reg = lm(@formula(log(wage) ~ educ), wage1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
getMats.jl
# for details, see https://juliastats.org/StatsModels.jl/stable/internals/
using StatsModels

function getMats(formula, df)
    f = apply_schema(formula, schema(formula, df))
    resp, pred = modelcols(f, df)
    return (resp, pred)
end
MLR-SE.jl
using WooldridgeDatasets, DataFrames, GLM, Statistics

gpa1 = DataFrame(wooldridge("gpa1"))

# full estimation results including automatic SE:
reg = lm(@formula(colGPA ~ hsGPA + ACT), gpa1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")

# calculation of SER via residuals:
n = nobs(reg)
k = length(coef(reg))
SER = sqrt(sum(residuals(reg) .^ 2) / (n - k))

# regressing hsGPA on ACT for calculation of R2 & VIF:
reg_hsGPA = lm(@formula(hsGPA ~ ACT), gpa1)
R2_hsGPA = r2(reg_hsGPA)
VIF_hsGPA = 1 / (1 - R2_hsGPA)
println("VIF_hsGPA = $VIF_hsGPA\n")

# manual calculation of SE of hsGPA coefficient:
sdx = std(gpa1.hsGPA) * sqrt((n - 1) / n)
SE_hsGPA = 1 / sqrt(n) * SER / sdx * sqrt(VIF_hsGPA)
println("SE_hsGPA = $SE_hsGPA")
MLR-VIF.jl
using WooldridgeDatasets, DataFrames, GLM

wage1 = DataFrame(wooldridge("wage1"))

# get VIF:
m_1 = lm(@formula(educ ~ exper + tenure), wage1)
Vif_1 = 1 / (1 - r2(m_1))
println("Vif_1 = $Vif_1")

m_2 = lm(@formula(exper ~ educ + tenure), wage1)
Vif_2 = 1 / (1 - r2(m_2))
println("Vif_2 = $Vif_2")

m_3 = lm(@formula(tenure ~ educ + exper), wage1)
Vif_3 = 1 / (1 - r2(m_3))
println("Vif_3 = $Vif_3")
OLS-Matrices-Formula.jl
using WooldridgeDatasets, DataFrames, StatsModels, LinearAlgebra
include("getMats.jl")

gpa1 = DataFrame(wooldridge("gpa1"))

# build y and X from a formula:
f = @formula(colGPA ~ 1 + hsGPA + ACT)
xy = getMats(f, gpa1)
y = xy[1]
X = xy[2]

# parameter estimates:
b = inv(transpose(X) * X) * transpose(X) * y
println("b = $b")
OLS-Matrices.jl
using WooldridgeDatasets, DataFrames, LinearAlgebra

gpa1 = DataFrame(wooldridge("gpa1"))

# determine sample size & no. of regressors:
n = size(gpa1)[1]
k = 2

# extract y:
y = gpa1.colGPA

# extract X and add a column of ones:
X = hcat(ones(n), gpa1.hsGPA, gpa1.ACT)
# display first rows of X:
X_preview = round.(X[1:3, :], digits=5)
println("X_preview = $X_preview\n")

# parameter estimates:
b = inv(transpose(X) * X) * transpose(X) * y
println("b = $b\n")

# residuals, estimated variance of u and SER:
u_hat = y - X * b
sigsq_hat = (transpose(u_hat) * u_hat) / (n - k - 1)
SER = sqrt(sigsq_hat)
println("SER = $SER\n")

# estimated variance of the parameter estimators and SE:
Vbeta_hat = sigsq_hat .* inv(transpose(X) * X)
se = sqrt.(diag(Vbeta_hat))
println("se = $se")
Omitted-Vars.jl
using WooldridgeDatasets, DataFrames, GLM

gpa1 = DataFrame(wooldridge("gpa1"))

# parameter estimates for full and simple model:
reg = lm(@formula(colGPA ~ ACT + hsGPA), gpa1)
b = coef(reg)
println("b = $b\n")

# relation between regressors:
reg_delta = lm(@formula(hsGPA ~ ACT), gpa1)
delta_tilde = coef(reg_delta)
println("delta_tilde = $delta_tilde\n")

# omitted variables formula for b1_tilde:
b1_tilde = b[2] + b[3] * delta_tilde[2]
println("b1_tilde = $b1_tilde\n")

# actual regression with hsGPA omitted:
reg_om = lm(@formula(colGPA ~ ACT), gpa1)
b_om = coef(reg_om)
println("b_om = $b_om")