Example-4-1-cv.R
# CV for alpha=5% and 1% using the t distribution with 522 d.f.:
<- c(0.05, 0.01)
alpha qt(1-alpha, 522)
# Critical values for alpha=5% and 1% using the normal approximation:
qnorm(1-alpha)
data(meap93, package='wooldridge')
# define new variable within data frame
meap93$b_s <- meap93$benefits / meap93$salary
# Estimate three different models
model1<- lm(log(salary) ~ b_s , data=meap93)
model2<- lm(log(salary) ~ b_s+log(enroll)+log(staff), data=meap93)
model3<- lm(log(salary) ~ b_s+log(enroll)+log(staff)+droprate+gradrate
, data=meap93)
# Load package and display table of results
library(stargazer)
stargazer(list(model1,model2,model3),type="text",keep.stat=c("n","rsq"))
data(gpa1, package='wooldridge')
# Store results under "sumres" and display full table:
( sumres <- summary( lm(colGPA ~ hsGPA+ACT+skipped, data=gpa1) ) )
# Manually confirm the formulas: Extract coefficients and SE
regtable <- sumres$coefficients
bhat <- regtable[,1]
se <- regtable[,2]
# Reproduce t statistic
( tstat <- bhat / se )
# Reproduce p value
( pval <- 2*pt(-abs(tstat),137) )
data(mlb1, package='wooldridge')
# Unrestricted OLS regression:
res.ur <- lm(log(salary) ~ years+gamesyr+bavg+hrunsyr+rbisyr, data=mlb1)
# Restricted OLS regression:
res.r <- lm(log(salary) ~ years+gamesyr, data=mlb1)
# R2:
( r2.ur <- summary(res.ur)$r.squared )
( r2.r <- summary(res.r)$r.squared )
# F statistic:
( F <- (r2.ur-r2.r) / (1-r2.ur) * 347/3 )
# p value = 1-cdf of the appropriate F distribution:
1-pf(F, 3,347)
import scipy.stats as stats
import numpy as np
# CV for alpha=5% and 1% using the t distribution with 522 d.f.:
alpha = np.array([0.05, 0.01])
cv_t = stats.t.ppf(1 - alpha, 522)
print(f'cv_t: {cv_t}\n')
# CV for alpha=5% and 1% using the normal approximation:
cv_n = stats.norm.ppf(1 - alpha)
print(f'cv_n: {cv_n}\n')
# Not in use...
#import wooldridge as woo
#import numpy as np
#import statsmodels.formula.api as smf
#import stargazer.stargazer as sg
#
#meap93 = woo.dataWoo('meap93')
#meap93['b_s'] = meap93['benefits'] / meap93['salary']
#
# OLS regression:
#reg1 = smf.ols(formula='np.log(salary) ~ b_s', data=meap93)
#results1 = reg1.fit()
#reg2 = smf.ols(formula='np.log(salary) ~ b_s + np.log(enroll) + np.log(staff)', data=meap93)
#results2 = reg2.fit()
#reg3 = smf.ols(formula='np.log(salary) ~ b_s + np.log(enroll) + np.log(staff) + droprate + gradrate', data=meap93)
#results3 = reg3.fit()
#
## create latex table of results with stargazer
#stargazer = sg.Stargazer([results1, results2, results3])
#print(stargazer.render_latex())
import scipy.stats as stats
import numpy as np
# CV for alpha=5% and 1% using the t distribution with 137 d.f.:
alpha = np.array([0.05, 0.01])
cv_t = stats.t.ppf(1 - alpha / 2, 137)
print(f'cv_t: {cv_t}\n')
# CV for alpha=5% and 1% using the normal approximation:
cv_n = stats.norm.ppf(1 - alpha / 2)
print(f'cv_n: {cv_n}\n')
import wooldridge as woo
import statsmodels.formula.api as smf
import scipy.stats as stats
gpa1 = woo.dataWoo('gpa1')
# store and display results:
reg = smf.ols(formula='colGPA ~ hsGPA + ACT + skipped', data=gpa1)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
# manually confirm the formulas, i.e. extract coefficients and SE:
b = results.params
se = results.bse
# reproduce t statistic:
tstat = b / se
print(f'tstat: \n{tstat}\n')
# reproduce p value:
pval = 2 * stats.t.cdf(-abs(tstat), 137)
print(f'pval: \n{pval}\n')
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
rdchem = woo.dataWoo('rdchem')
# OLS regression:
reg = smf.ols(formula='np.log(rd) ~ np.log(sales) + profmarg', data=rdchem)
results = reg.fit()
print(f'results.summary(): \n{results.summary()}\n')
# 95% CI:
CI95 = results.conf_int(0.05)
print(f'CI95: \n{CI95}\n')
# 99% CI:
CI99 = results.conf_int(0.01)
print(f'CI99: \n{CI99}\n')
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
mlb1 = woo.dataWoo('mlb1')
# OLS regression:
reg = smf.ols(
formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr',
data=mlb1)
results = reg.fit()
# automated F test:
hypotheses = ['bavg = 0', 'hrunsyr = 0', 'rbisyr = 0']
ftest = results.f_test(hypotheses)
fstat = ftest.statistic[0][0]
fpval = ftest.pvalue
print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
mlb1 = woo.dataWoo('mlb1')
# OLS regression:
reg = smf.ols(
formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr',
data=mlb1)
results = reg.fit()
# automated F test:
hypotheses = ['bavg = 0', 'hrunsyr = 2*rbisyr']
ftest = results.f_test(hypotheses)
fstat = ftest.statistic[0][0]
fpval = ftest.pvalue
print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
import scipy.stats as stats
mlb1 = woo.dataWoo('mlb1')
n = mlb1.shape[0]
# unrestricted OLS regression:
reg_ur = smf.ols(
formula='np.log(salary) ~ years + gamesyr + bavg + hrunsyr + rbisyr',
data=mlb1)
fit_ur = reg_ur.fit()
r2_ur = fit_ur.rsquared
print(f'r2_ur: {r2_ur}\n')
# restricted OLS regression:
reg_r = smf.ols(formula='np.log(salary) ~ years + gamesyr', data=mlb1)
fit_r = reg_r.fit()
r2_r = fit_r.rsquared
print(f'r2_r: {r2_r}\n')
# F statistic:
fstat = (r2_ur - r2_r) / (1 - r2_ur) * (n - 6) / 3
print(f'fstat: {fstat}\n')
# CV for alpha=1% using the F distribution with 3 and 347 d.f.:
cv = stats.f.ppf(1 - 0.01, 3, 347)
print(f'cv: {cv}\n')
# p value = 1-cdf of the appropriate F distribution:
fpval = 1 - stats.f.cdf(fstat, 3, 347)
print(f'fpval: {fpval}\n')
using Distributions
# CV for alpha=5% and 1% using the t distribution with 522 d.f.:
alpha = [0.05, 0.01]
cv_t = round.(quantile.(TDist(522), 1 .- alpha), digits=5)
println("cv_t = $cv_t\n")
# CV for alpha=5% and 1% using the normal approximation:
cv_n = round.(quantile.(Normal(), 1 .- alpha), digits=5)
println("cv_n = $cv_n")
using WooldridgeDatasets, GLM, DataFrames, RegressionTables
meap93 = DataFrame(wooldridge("meap93"))
meap93.b_s = meap93.benefits ./ meap93.salary
# estimate three different models:
reg1 = lm(@formula(log(salary) ~ b_s), meap93)
reg2 = lm(@formula(log(salary) ~ b_s + log(enroll) + log(staff)), meap93)
reg3 = lm(@formula(log(salary) ~
b_s + log(enroll) + log(staff) + droprate + gradrate), meap93)
# print results with RegressionTables:
regtable(reg1, reg2, reg3)
using Distributions
# CV for alpha=5% and 1% using the t distribution with 137 d.f.:
alpha = [0.05, 0.01]
cv_t = round.(quantile.(TDist(137), 1 .- alpha ./ 2), digits=5)
println("cv_t = $cv_t\n")
# CV for alpha=5% and 1% using the normal approximation:
cv_n = round.(quantile.(Normal(), 1 .- alpha ./ 2), digits=5)
println("cv_n = $cv_n")
using WooldridgeDatasets, GLM, DataFrames, Distributions
gpa1 = DataFrame(wooldridge("gpa1"))
# store and display results:
reg = lm(@formula(colGPA ~ hsGPA + ACT + skipped), gpa1)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")
# manually confirm the formulas, i.e. extract coefficients and SE:
b = coef(reg)
se = stderror(reg)
# reproduce t statistic:
tstat = round.(b ./ se, digits=5)
println("tstat = $tstat\n")
# reproduce p value:
pval = round.(2 * cdf.(TDist(137), -abs.(tstat)), digits=5)
println("pval = $pval")
using WooldridgeDatasets, GLM, DataFrames
rdchem = DataFrame(wooldridge("rdchem"))
# OLS regression:
reg_ur = lm(@formula(log(rd) ~ log(sales) + profmarg), rdchem)
reg_r = lm(@formula(log(rd) ~ 1), rdchem)
# automated F test:
ftest_res = ftest(reg_r.model, reg_ur.model)
fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
using WooldridgeDatasets, GLM, DataFrames, Distributions
rdchem = DataFrame(wooldridge("rdchem"))
# OLS regression:
reg = lm(@formula(log(rd) ~ log(sales) + profmarg), rdchem)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")
# replicating 95% CI:
alpha = 0.05
CI95_upper = coef(reg) .+ stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
CI95_lower = coef(reg) .- stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
println("CI95_upper = $CI95_upper\n")
println("CI95_lower = $CI95_lower\n")
# calculating 99% CI:
alpha = 0.01
CI99_upper = coef(reg) .+ stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
CI99_lower = coef(reg) .- stderror(reg) .* quantile(TDist(32 - 3), alpha / 2)
println("CI99_upper = $CI99_upper\n")
println("CI99_lower = $CI99_lower")
using WooldridgeDatasets, GLM, DataFrames
mlb1 = DataFrame(wooldridge("mlb1"))
# OLS regression:
reg_ur = lm(@formula(log(salary) ~
years + gamesyr + bavg + hrunsyr + rbisyr), mlb1)
reg_r = lm(@formula(log(salary) ~
years + gamesyr), mlb1)
# automated F test:
ftest_res = ftest(reg_r.model, reg_ur.model)
fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
using WooldridgeDatasets, GLM, DataFrames
mlb1 = DataFrame(wooldridge("mlb1"))
# OLS regression:
reg_ur = lm(@formula(log(salary) ~
years + gamesyr + bavg + hrunsyr + rbisyr), mlb1)
# restrictions "bavg = 0" and "hrunsyr = 2*rbisyr":
mlb1.newvar = 2 * mlb1.hrunsyr + mlb1.rbisyr
reg_r = lm(@formula(log(salary) ~ years + gamesyr + newvar), mlb1)
# automated F test:
ftest_res = ftest(reg_r.model, reg_ur.model)
fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
using WooldridgeDatasets, GLM, DataFrames, Distributions
mlb1 = DataFrame(wooldridge("mlb1"))
# unrestricted OLS regression:
reg_ur = lm(@formula(log(salary) ~
years + gamesyr + bavg + hrunsyr + rbisyr), mlb1)
r2_ur = r2(reg_ur)
println("r2_ur = $r2_ur\n")
# restricted OLS regression:
reg_r = lm(@formula(log(salary) ~ years + gamesyr), mlb1)
r2_r = r2(reg_r)
println("r2_r = $r2_r\n")
# F statistic:
n = nobs(reg_ur)
fstat = (r2_ur - r2_r) / (1 - r2_ur) * (n - 6) / 3
println("fstat = $fstat\n")
# CV for alpha=1% using the F distribution with 3 and 347 d.f.:
cv = quantile(FDist(3, 347), 1 - 0.01)
println("cv = $cv\n")
# p value = 1-cdf of the appropriate F distribution:
fpval = 1 - cdf(FDist(3, 347), fstat)
println("fpval = $fpval")