Example-3-1.R
data(gpa1, package='wooldridge')
# Just obtain parameter estimates:
lm(colGPA ~ hsGPA+ACT, data=gpa1)
# Store results under "GPAres" and display full table:
<- lm(colGPA ~ hsGPA+ACT, data=gpa1)
GPAres summary(GPAres)
data(gpa1, package='wooldridge')
# Full estimation results including automatic SE :
res <- lm(colGPA ~ hsGPA+ACT, data=gpa1)
summary(res)
# Extract SER (instead of calculation via residuals)
( SER <- summary(res)$sigma )
# regressing hsGPA on ACT for calculation of R2 & VIF
( R2.hsGPA <- summary( lm(hsGPA~ACT, data=gpa1) )$r.squared )
( VIF.hsGPA <- 1/(1-R2.hsGPA) )
# manual calculation of SE of hsGPA coefficient:
n <- nobs(res)
sdx <- sd(gpa1$hsGPA) * sqrt((n-1)/n) # (Note: sd() uses the (n-1) version)
( SE.hsGPA <- 1/sqrt(n) * SER/sdx * sqrt(VIF.hsGPA) )
data(gpa1, package='wooldridge')
# Determine sample size & no. of regressors:
n <- nrow(gpa1); k<-2
# extract y
y <- gpa1$colGPA
# extract X & add a column of ones
X <- cbind(1, gpa1$hsGPA, gpa1$ACT)
# Display first rows of X:
head(X)
# Parameter estimates:
( bhat <- solve( t(X)%*%X ) %*% t(X)%*%y )
# Residuals, estimated variance of u and SER:
uhat <- y - X %*% bhat
sigsqhat <- as.numeric( t(uhat) %*% uhat / (n-k-1) )
( SER <- sqrt(sigsqhat) )
# Estimated variance of the parameter estimators and SE:
Vbetahat <- sigsqhat * solve( t(X)%*%X )
( se <- sqrt( diag(Vbetahat) ) )
data(gpa1, package='wooldridge')
# Parameter estimates for full and simple model:
beta.hat <- coef( lm(colGPA ~ ACT+hsGPA, data=gpa1) )
beta.hat
# Relation between regressors:
delta.tilde <- coef( lm(hsGPA ~ ACT, data=gpa1) )
delta.tilde
# Omitted variables formula for beta1.tilde:
beta.hat["ACT"] + beta.hat["hsGPA"]*delta.tilde["ACT"]
# Actual regression with hsGPA omitted:
lm(colGPA ~ ACT, data=gpa1)
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
gpa1 = woo.dataWoo('gpa1')
# full estimation results including automatic SE:
reg = smf.ols(formula='colGPA ~ hsGPA + ACT', data=gpa1)
results = reg.fit()
# extract SER (instead of calculation via residuals):
SER = np.sqrt(results.mse_resid)
# regressing hsGPA on ACT for calculation of R2 & VIF:
reg_hsGPA = smf.ols(formula='hsGPA ~ ACT', data=gpa1)
results_hsGPA = reg_hsGPA.fit()
R2_hsGPA = results_hsGPA.rsquared
VIF_hsGPA = 1 / (1 - R2_hsGPA)
print(f'VIF_hsGPA: {VIF_hsGPA}\n')
# manual calculation of SE of hsGPA coefficient:
n = results.nobs
sdx = np.std(gpa1['hsGPA'], ddof=1) * np.sqrt((n - 1) / n)
SE_hsGPA = 1 / np.sqrt(n) * SER / sdx * np.sqrt(VIF_hsGPA)
print(f'SE_hsGPA: {SE_hsGPA}\n')
import wooldridge as woo
import numpy as np
import statsmodels.stats.outliers_influence as smo
import patsy as pt
wage1 = woo.dataWoo('wage1')
# extract matrices using patsy:
y, X = pt.dmatrices('np.log(wage) ~ educ + exper + tenure',
data=wage1, return_type='dataframe')
# get VIF:
K = X.shape[1]
VIF = np.empty(K)
for i in range(K):
VIF[i] = smo.variance_inflation_factor(X.values, i)
print(f'VIF: \n{VIF}\n')
import wooldridge as woo
import numpy as np
import pandas as pd
import patsy as pt
gpa1 = woo.dataWoo('gpa1')
# determine sample size & no. of regressors:
n = len(gpa1)
k = 2
# extract y:
y = gpa1['colGPA']
# extract X & add a column of ones:
X = pd.DataFrame({'const': 1, 'hsGPA': gpa1['hsGPA'], 'ACT': gpa1['ACT']})
# alternative with patsy:
y2, X2 = pt.dmatrices('colGPA ~ hsGPA + ACT', data=gpa1, return_type='dataframe')
# display first rows of X:
print(f'X.head(): \n{X.head()}\n')
# parameter estimates:
X = np.array(X)
y = np.array(y).reshape(n, 1) # creates a row vector
b = np.linalg.inv(X.T @ X) @ X.T @ y
print(f'b: \n{b}\n')
# residuals, estimated variance of u and SER:
u_hat = y - X @ b
sigsq_hat = (u_hat.T @ u_hat) / (n - k - 1)
SER = np.sqrt(sigsq_hat)
print(f'SER: {SER}\n')
# estimated variance of the parameter estimators and SE:
Vbeta_hat = sigsq_hat * np.linalg.inv(X.T @ X)
se = np.sqrt(np.diagonal(Vbeta_hat))
print(f'se: {se}\n')
import wooldridge as woo
import statsmodels.formula.api as smf
gpa1 = woo.dataWoo('gpa1')
# parameter estimates for full and simple model:
reg = smf.ols(formula='colGPA ~ ACT + hsGPA', data=gpa1)
results = reg.fit()
b = results.params
print(f'b: \n{b}\n')
# relation between regressors:
reg_delta = smf.ols(formula='hsGPA ~ ACT', data=gpa1)
results_delta = reg_delta.fit()
delta_tilde = results_delta.params
print(f'delta_tilde: \n{delta_tilde}\n')
# omitted variables formula for b1_tilde:
b1_tilde = b['ACT'] + b['hsGPA'] * delta_tilde['ACT']
print(f'b1_tilde: \n{b1_tilde}\n')
# actual regression with hsGPA omitted:
reg_om = smf.ols(formula='colGPA ~ ACT', data=gpa1)
results_om = reg_om.fit()
b_om = results_om.params
print(f'b_om: \n{b_om}\n')
using WooldridgeDatasets, DataFrames, GLM, Statistics
gpa1 = DataFrame(wooldridge("gpa1"))
# full estimation results including automatic SE:
reg = lm(@formula(colGPA ~ hsGPA + ACT), gpa1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")
# calculation of SER via residuals:
n = nobs(reg)
k = length(coef(reg))
SER = sqrt(sum(residuals(reg) .^ 2) / (n - k))
# regressing hsGPA on ACT for calculation of R2 & VIF:
reg_hsGPA = lm(@formula(hsGPA ~ ACT), gpa1)
R2_hsGPA = r2(reg_hsGPA)
VIF_hsGPA = 1 / (1 - R2_hsGPA)
println("VIF_hsGPA = $VIF_hsGPA\n")
# manual calculation of SE of hsGPA coefficient:
sdx = std(gpa1.hsGPA) * sqrt((n - 1) / n)
SE_hsGPA = 1 / sqrt(n) * SER / sdx * sqrt(VIF_hsGPA)
println("SE_hsGPA = $SE_hsGPA")
using WooldridgeDatasets, DataFrames, GLM
wage1 = DataFrame(wooldridge("wage1"))
# get VIF:
m_1 = lm(@formula(educ ~ exper + tenure), wage1)
Vif_1 = 1 / (1 - r2(m_1))
println("Vif_1 = $Vif_1")
m_2 = lm(@formula(exper ~ educ + tenure), wage1)
Vif_2 = 1 / (1 - r2(m_2))
println("Vif_2 = $Vif_2")
m_3 = lm(@formula(tenure ~ educ + exper), wage1)
Vif_3 = 1 / (1 - r2(m_3))
println("Vif_3 = $Vif_3")
using WooldridgeDatasets, DataFrames, StatsModels, LinearAlgebra
include("getMats.jl")
gpa1 = DataFrame(wooldridge("gpa1"))
# build y and X from a formula:
f = @formula(colGPA ~ 1 + hsGPA + ACT)
xy = getMats(f, gpa1)
y = xy[1]
X = xy[2]
# parameter estimates:
b = inv(transpose(X) * X) * transpose(X) * y
println("b = $b")
using WooldridgeDatasets, DataFrames, LinearAlgebra
gpa1 = DataFrame(wooldridge("gpa1"))
# determine sample size & no. of regressors:
n = size(gpa1)[1]
k = 2
# extract y:
y = gpa1.colGPA
# extract X and add a column of ones:
X = hcat(ones(n), gpa1.hsGPA, gpa1.ACT)
# display first rows of X:
X_preview = round.(X[1:3, :], digits=5)
println("X_preview = $X_preview\n")
# parameter estimates:
b = inv(transpose(X) * X) * transpose(X) * y
println("b = $b\n")
# residuals, estimated variance of u and SER:
u_hat = y - X * b
sigsq_hat = (transpose(u_hat) * u_hat) / (n - k - 1)
SER = sqrt(sigsq_hat)
println("SER = $SER\n")
# estimated variance of the parameter estimators and SE:
Vbeta_hat = sigsq_hat .* inv(transpose(X) * X)
se = sqrt.(diag(Vbeta_hat))
println("se = $se")
using WooldridgeDatasets, DataFrames, GLM
gpa1 = DataFrame(wooldridge("gpa1"))
# parameter estimates for full and simple model:
reg = lm(@formula(colGPA ~ ACT + hsGPA), gpa1)
b = coef(reg)
println("b = $b\n")
# relation between regressors:
reg_delta = lm(@formula(hsGPA ~ ACT), gpa1)
delta_tilde = coef(reg_delta)
println("delta_tilde = $delta_tilde\n")
# omitted variables formula for b1_tilde:
b1_tilde = b[2] + b[3] * delta_tilde[2]
println("b1_tilde = $b1_tilde\n")
# actual regression with hsGPA omitted:
reg_om = lm(@formula(colGPA ~ ACT), gpa1)
b_om = coef(reg_om)
println("b_om = $b_om")