Chapter 6

Data-Scaling.R
data(bwght, package='wooldridge')

# Basic model:
lm( bwght ~ cigs+faminc, data=bwght)

# Weight in pounds, manual way:
bwght$bwghtlbs <- bwght$bwght/16
lm( bwghtlbs ~ cigs+faminc, data=bwght)

# Weight in pounds, direct way:
lm( I(bwght/16) ~ cigs+faminc, data=bwght)

# Packs of cigarettes:
lm( bwght ~ I(cigs/20) +faminc, data=bwght)
Effects-Automatic.R
# Repeating the regression from Example 6.2:
data(hprice2, package='wooldridge')

res <- lm( log(price) ~ log(nox)+log(dist)+rooms+I(rooms^2)+stratio,
                                                         data=hprice2)

# Automatic effects plot using the package "effects"
library(effects)
plot( effect("rooms",res) )
Effects-Manual.R
# Repeating the regression from Example 6.2:
data(hprice2, package='wooldridge')

res <- lm( log(price) ~ log(nox)+log(dist)+rooms+I(rooms^2)+stratio,
                                                         data=hprice2)

# Predictions: Values of the regressors:
# rooms = 4-8, all others at the sample mean:
X <- data.frame(rooms=seq(4,8),nox=5.5498,dist=3.7958,stratio=18.4593)

# Calculate predictions and confidence interval:
pred <- predict(res, X, interval = "confidence")

# Table of regressor values, predictions and CI:
cbind(X,pred)

# Plot 
matplot(X$rooms, pred, type="l", lty=c(1,2,2))
Example-6-1.R
data(hprice2, package='wooldridge')

# Estimate model with standardized variables:
lm(scale(price) ~ 0+scale(nox)+scale(crime)+scale(rooms)+
                              scale(dist)+scale(stratio), data=hprice2)
Example-6-2-Anova.R
library(car)
data(hprice2, package='wooldridge')
res <- lm(log(price)~log(nox)+log(dist)+poly(rooms,2,raw=TRUE)+
           stratio,data=hprice2)

# Manual F test for rooms:
linearHypothesis(res, matchCoefs(res,"rooms"))

# ANOVA (type 2) table:
Anova(res)
Example-6-2.R
data(hprice2, package='wooldridge')

res <- lm(log(price)~log(nox)+log(dist)+rooms+I(rooms^2)+
           stratio,data=hprice2)
summary(res)

# Using poly(...):
res <- lm(log(price)~log(nox)+log(dist)+poly(rooms,2,raw=TRUE)+
           stratio,data=hprice2)
summary(res)
Example-6-3.R
data(attend, package='wooldridge')

# Estimate model with interaction effect:
(myres<-lm(stndfnl~atndrte*priGPA+ACT+I(priGPA^2)+I(ACT^2), data=attend))

# Estimate for partial effect at priGPA=2.59:
b <- coef(myres)
b["atndrte"] + 2.59*b["atndrte:priGPA"] 

# Test partial effect for priGPA=2.59:
library(car)
linearHypothesis(myres,c("atndrte+2.59*atndrte:priGPA"))
Example-6-5.R
data(gpa2, package='wooldridge')

# Regress and report coefficients
reg <- lm(colgpa~sat+hsperc+hsize+I(hsize^2),data=gpa2)
reg

# Generate data set containing the regressor values for predictions
cvalues <- data.frame(sat=1200, hsperc=30, hsize=5)

# Point estimate of prediction
predict(reg, cvalues)

# Point estimate and 95% confidence interval
predict(reg, cvalues, interval = "confidence")

# Define three sets of regressor variables
cvalues <- data.frame(sat=c(1200,900,1400), hsperc=c(30,20,5), 
                                                 hsize=c(5,3,1))
cvalues
# Point estimates and 99% confidence intervals for these
predict(reg, cvalues, interval = "confidence", level=0.99)
Example-6-6.R
data(gpa2, package='wooldridge')

# Regress (as before)
reg <- lm(colgpa~sat+hsperc+hsize+I(hsize^2),data=gpa2)

# Define three sets of regressor variables (as before)
cvalues <- data.frame(sat=c(1200,900,1400), hsperc=c(30,20,5), 
                                                 hsize=c(5,3,1))

# Point estimates and 95% prediction intervals for these
predict(reg, cvalues, interval = "prediction")
Formula-Logarithm.R
data(hprice2, package='wooldridge')

# Estimate model with logs:
lm(log(price)~log(nox)+rooms, data=hprice2)
Confidence-Bands.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf
import pandas as pd
import matplotlib.pyplot as plt

gpa2 = woo.dataWoo('gpa2')

# regress and report coefficients:
reg = smf.ols(formula='colgpa ~ sat', data=gpa2)
results = reg.fit()
print(f'beta_hat: \n{results.params}\n')

# regressor (SAT) values for prediction from 400 to 1600 in steps of 100:
SAT = pd.DataFrame({'sat': np.arange(400, 1600 + 1, 100)})

# predictions and 95% confidence intervals:
colgpa_pred = results.get_prediction(SAT)
colgpa_pred_CI = colgpa_pred.summary_frame(alpha=0.05)[['mean', 'mean_ci_lower', 'mean_ci_upper']]
print(f'colgpa_pred_CI: \n{colgpa_pred_CI}\n')

# plot:
plt.plot(SAT, colgpa_pred_CI['mean'], color='black', linestyle='-', label='')
plt.plot(SAT, colgpa_pred_CI['mean_ci_upper'], color='green', linestyle='--', label='upper CI')
plt.plot(SAT, colgpa_pred_CI['mean_ci_lower'], color='red', linestyle='--', label='lower CI')
plt.ylabel('colgpa')
plt.xlabel('sat')
plt.legend()
plt.savefig('PyGraphs/Confidence-Bands.pdf')

# quadratic model as an alternative:
reg2 = smf.ols(formula='colgpa ~ sat + I(sat**2)', data=gpa2)
results2 = reg2.fit()
colgpa_pred2 = results2.get_prediction(SAT)
colgpa_pred2_CI = colgpa_pred2.summary_frame(alpha=0.05)[['mean', 'mean_ci_lower', 'mean_ci_upper']]

# plot:
plt.plot(SAT, colgpa_pred2_CI['mean'], color='black', linestyle='-', label='')
plt.plot(SAT, colgpa_pred2_CI['mean_ci_upper'], color='green', linestyle='--', label='upper CI')
plt.plot(SAT, colgpa_pred2_CI['mean_ci_lower'], color='red', linestyle='--', label='lower CI')
plt.ylabel('colgpa')
plt.xlabel('sat')
plt.legend()
plt.savefig('PyGraphs/Confidence-Bands2.pdf')
Data-Scaling.py
import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf

bwght = woo.dataWoo('bwght')

# regress and report coefficients:
reg = smf.ols(formula='bwght ~ cigs + faminc', data=bwght)
results = reg.fit()

# weight in pounds, manual way:
bwght['bwght_lbs'] = bwght['bwght'] / 16
reg_lbs = smf.ols(formula='bwght_lbs ~ cigs + faminc', data=bwght)
results_lbs = reg_lbs.fit()

# weight in pounds, direct way:
reg_lbs2 = smf.ols(formula='I(bwght/16) ~ cigs + faminc', data=bwght)
results_lbs2 = reg_lbs2.fit()

# packs of cigarettes:
reg_packs = smf.ols(formula='bwght ~ I(cigs/20) + faminc', data=bwght)
results_packs = reg_packs.fit()

# compare results:
table = pd.DataFrame({'b': round(results.params, 4),
                      'b_lbs': round(results_lbs.params, 4),
                      'b_lbs2': round(results_lbs2.params, 4),
                      'b_packs': round(results_packs.params, 4)})
print(f'table: \n{table}\n')
Effects-Manual.py
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

hprice2 = woo.dataWoo('hprice2')

# repeating the regression from Example 6.2:
reg = smf.ols(
    formula='np.log(price) ~ np.log(nox)+np.log(dist)+rooms+I(rooms**2)+stratio',
    data=hprice2)
results = reg.fit()

# predictions with rooms = 4-8, all others at the sample mean:
nox_mean = np.mean(hprice2['nox'])
dist_mean = np.mean(hprice2['dist'])
stratio_mean = np.mean(hprice2['stratio'])
X = pd.DataFrame({'rooms': np.linspace(4, 8, num=5),
                  'nox': nox_mean,
                  'dist': dist_mean,
                  'stratio': stratio_mean})
print(f'X: \n{X}\n')

# calculate 95% confidence interval:
lpr_PICI = results.get_prediction(X).summary_frame(alpha=0.05)
lpr_CI = lpr_PICI[['mean', 'mean_ci_lower', 'mean_ci_upper']]
print(f'lpr_CI: \n{lpr_CI}\n')

# plot:
plt.plot(X['rooms'], lpr_CI['mean'], color='black',
         linestyle='-', label='')
plt.plot(X['rooms'], lpr_CI['mean_ci_upper'], color='lightgrey',
         linestyle='--', label='upper CI')
plt.plot(X['rooms'], lpr_CI['mean_ci_lower'], color='darkgrey',
         linestyle='--', label='lower CI')
plt.ylabel('lprice')
plt.xlabel('rooms')
plt.legend()
plt.savefig('PyGraphs/Effects-Manual.pdf')
Example-6-1.py
import wooldridge as woo
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf


# define a function for the standardization:
def scale(x):
    x_mean = np.mean(x)
    x_var = np.var(x, ddof=1)
    x_scaled = (x - x_mean) / np.sqrt(x_var)
    return x_scaled


# standardize and estimate:
hprice2 = woo.dataWoo('hprice2')
hprice2['price_sc'] = scale(hprice2['price'])
hprice2['nox_sc'] = scale(hprice2['nox'])
hprice2['crime_sc'] = scale(hprice2['crime'])
hprice2['rooms_sc'] = scale(hprice2['rooms'])
hprice2['dist_sc'] = scale(hprice2['dist'])
hprice2['stratio_sc'] = scale(hprice2['stratio'])

reg = smf.ols(
    formula='price_sc ~ 0 + nox_sc + crime_sc + rooms_sc + dist_sc + stratio_sc',
    data=hprice2)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
Example-6-2-Ftest.py
import wooldridge as woo
import numpy as np
import statsmodels.formula.api as smf

hprice2 = woo.dataWoo('hprice2')
n = hprice2.shape[0]

reg = smf.ols(
    formula='np.log(price) ~ np.log(nox)+np.log(dist)+rooms+I(rooms**2)+stratio',
    data=hprice2)
results = reg.fit()

# implemented F test for rooms:
hypotheses = ['rooms = 0', 'I(rooms ** 2) = 0']
ftest = results.f_test(hypotheses)
fstat = ftest.statistic[0][0]
fpval = ftest.pvalue

print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')
Example-6-2.py
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

hprice2 = woo.dataWoo('hprice2')

reg = smf.ols(
    formula='np.log(price) ~ np.log(nox)+np.log(dist)+rooms+I(rooms**2)+stratio',
    data=hprice2)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
Example-6-3.py
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

attend = woo.dataWoo('attend')
n = attend.shape[0]

reg = smf.ols(formula='stndfnl ~ atndrte*priGPA + ACT + I(priGPA**2) + I(ACT**2)',
              data=attend)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

# estimate for partial effect at priGPA=2.59:
b = results.params
partial_effect = b['atndrte'] + 2.59 * b['atndrte:priGPA']
print(f'partial_effect: {partial_effect}\n')

# F test for partial effect at priGPA=2.59:
hypotheses = 'atndrte + 2.59 * atndrte:priGPA = 0'
ftest = results.f_test(hypotheses)
fstat = ftest.statistic[0][0]
fpval = ftest.pvalue

print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')
Example-6-5.py
import wooldridge as woo
import statsmodels.formula.api as smf
import pandas as pd

gpa2 = woo.dataWoo('gpa2')

reg = smf.ols(formula='colgpa ~ sat + hsperc + hsize + I(hsize**2)', data=gpa2)
results = reg.fit()

# define three sets of regressor variables:
cvalues2 = pd.DataFrame({'sat': [1200, 900, 1400, ],
                        'hsperc': [30, 20, 5], 'hsize': [5, 3, 1]},
                       index=['newPerson1', 'newPerson2', 'newPerson3'])

# point estimates and 95% confidence and prediction intervals:
colgpa_PICI_95 = results.get_prediction(cvalues2).summary_frame(alpha=0.05)
print(f'colgpa_PICI_95: \n{colgpa_PICI_95}\n')

# point estimates and 99% confidence and prediction intervals:
colgpa_PICI_99 = results.get_prediction(cvalues2).summary_frame(alpha=0.01)
print(f'colgpa_PICI_99: \n{colgpa_PICI_99}\n')
Formula-Logarithm.py
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

hprice2 = woo.dataWoo('hprice2')

reg = smf.ols(formula='np.log(price) ~ np.log(nox) + rooms', data=hprice2)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
Predictions.py
import wooldridge as woo
import statsmodels.formula.api as smf
import pandas as pd

gpa2 = woo.dataWoo('gpa2')

reg = smf.ols(formula='colgpa ~ sat + hsperc + hsize + I(hsize**2)', data=gpa2)
results = reg.fit()

# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
                      'se': round(results.bse, 4),
                      't': round(results.tvalues, 4),
                      'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')

# generate data set containing the regressor values for predictions:
cvalues1 = pd.DataFrame({'sat': [1200], 'hsperc': [30],
                        'hsize': [5]}, index=['newPerson1'])
print(f'cvalues1: \n{cvalues1}\n')

# point estimate of prediction (cvalues1):
colgpa_pred1 = results.predict(cvalues1)
print(f'colgpa_pred1: \n{colgpa_pred1}\n')

# define three sets of regressor variables:
cvalues2 = pd.DataFrame({'sat': [1200, 900, 1400, ],
                        'hsperc': [30, 20, 5], 'hsize': [5, 3, 1]},
                       index=['newPerson1', 'newPerson2', 'newPerson3'])
print(f'cvalues2: \n{cvalues2}\n')

# point estimate of prediction (cvalues2):
colgpa_pred2 = results.predict(cvalues2)
print(f'colgpa_pred2: \n{colgpa_pred2}\n')
Data-Scaling.jl
using WooldridgeDatasets, GLM, DataFrames, RegressionTables

bwght = DataFrame(wooldridge("bwght"))

# regress and report coefficients:
reg = lm(@formula(bwght ~ cigs + faminc), bwght)

# weight in pounds, manual way:
bwght.bwght_lbs = bwght.bwght ./ 16
reg_lbs1 = lm(@formula(bwght_lbs ~ cigs + faminc), bwght)

# weight in pounds, direct way:
reg_lbs2 = lm(@formula((bwght / 16) ~ cigs + faminc), bwght)

# packs of cigaretts:
reg_packs = lm(@formula(bwght ~ (cigs / 20) + faminc), bwght)

# weight in ounces using bwght_lbs:
reg_pds = lm(@formula(identity(bwght_lbs * 16) ~ cigs + faminc), bwght)

# print results with RegressionTables:
regtable(reg, reg_lbs1, reg_lbs2, reg_packs, reg_pds)
Effects-Manual.jl
using WooldridgeDatasets, GLM, DataFrames, Plots, Statistics

hprice2 = DataFrame(wooldridge("hprice2"))

# repeating the regression from Example 6.2:
reg = lm(@formula(log(price) ~
        log(nox) + log(dist) + rooms + (rooms^2) + stratio), hprice2)

# predictions with rooms = 4-8, all others at the sample mean:
nox_mean = mean(hprice2.nox)
dist_mean = mean(hprice2.dist)
stratio_mean = mean(hprice2.stratio)
X = DataFrame(
    rooms=4:8,
    nox=nox_mean,
    dist=dist_mean,
    stratio=stratio_mean)
println("X: \n$X\n")

# calculate 95% confidence interval:
lpr_CI = predict(reg, X, interval=:confidence)
println("lpr_CI: \n$lpr_CI\n")

# plot:
plot(X.rooms, lpr_CI.prediction, color="black", label=false, legend=:topleft)
plot!(X.rooms, lpr_CI.upper, color="lightgrey", linestyle=:dash, label="upper CI")
plot!(X.rooms, lpr_CI.lower, color="darkgrey", linestyle=:dash, label="lower CI")
ylabel!("lprice")
xlabel!("rooms")
savefig("JlGraphs/Effects-Manual.pdf")
Example-6-1.jl
using WooldridgeDatasets, GLM, DataFrames, Statistics

hprice2 = DataFrame(wooldridge("hprice2"))

# define a function for the standardization:
function scale(x)
    x_mean = mean(x)
    x_var = var(x)
    x_scaled = (x .- x_mean) ./ sqrt.(x_var)
    return x_scaled
end

# standardize and estimate:
hprice2.price_sc = scale(hprice2.price)
hprice2.nox_sc = scale(hprice2.nox)
hprice2.crime_sc = scale(hprice2.crime)
hprice2.rooms_sc = scale(hprice2.rooms)
hprice2.dist_sc = scale(hprice2.dist)
hprice2.stratio_sc = scale(hprice2.stratio)

reg = lm(@formula(price_sc ~
        0 + nox_sc + crime_sc + rooms_sc + dist_sc + stratio_sc),
    hprice2)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-6-2-Ftest.jl
using WooldridgeDatasets, GLM, DataFrames

hprice2 = DataFrame(wooldridge("hprice2"))

reg_ur = lm(@formula(log(price) ~
                log(nox) + log(dist) + rooms + (rooms^2) + stratio), hprice2)

# testing hypotheses rooms = 0 and rooms^2 = 0:
reg_r = lm(@formula(log(price) ~
                log(nox) + log(dist) + stratio), hprice2)

ftest_res = ftest(reg_r.model, reg_ur.model)
fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
Example-6-2.jl
using WooldridgeDatasets, GLM, DataFrames

hprice2 = DataFrame(wooldridge("hprice2"))

reg = lm(@formula(log(price) ~
                log(nox) + log(dist) + rooms + (rooms^2) + stratio), hprice2)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Example-6-3.jl
using WooldridgeDatasets, GLM, DataFrames

attend = DataFrame(wooldridge("attend"))

reg_ur = lm(@formula(stndfnl ~ atndrte * priGPA + ACT +
                               (priGPA^2) + (ACT^2)), attend)
table_reg_ur = coeftable(reg_ur)
println("table_reg_ur: \n$table_reg_ur\n")

# estimate for partial effect at priGPA=2.59:
b = coef(reg_ur)
partial_effect = b[2] + 2.59 * b[7]
println("partial_effect = $partial_effect\n")

# F test for partial effect at priGPA=2.59:
attend.pe = -2.59 .* attend.atndrte .+ attend.atndrte .* attend.priGPA
reg_r = lm(@formula(stndfnl ~ pe + priGPA + ACT +
                              (priGPA^2) + (ACT^2)), attend)
ftest_res = ftest(reg_r.model, reg_ur.model)
fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")
Example-6-5.jl
using WooldridgeDatasets, GLM, DataFrames

gpa2 = DataFrame(wooldridge("gpa2"))

reg = lm(@formula(colgpa ~ sat + hsperc + hsize + (hsize^2)), gpa2)

# define three sets of regressor variables:
cvalues2 = DataFrame(
    id=["newPerson1", "newPerson2", "newPerson3"],
    sat=[1200, 900, 1400],
    hsperc=[30, 20, 5],
    hsize=[5, 3, 1])

# point estimates and 95% confidence and prediction intervals:
colgpa_CI_95 = predict(reg, cvalues2, interval=:confidence)
println("colgpa_CI_95: \n$colgpa_CI_95\n")
colgpa_PI_95 = predict(reg, cvalues2, interval=:prediction)
println("colgpa_PI_95: \n$colgpa_PI_95\n")

# point estimates and 99% confidence and prediction intervals:
colgpa_CI_99 = predict(reg, cvalues2, interval=:confidence, level=0.99)
println("colgpa_CI_99: \n$colgpa_CI_99\n")
colgpa_PI_99 = predict(reg, cvalues2, interval=:prediction, level=0.99)
println("colgpa_PI_99: \n$colgpa_PI_99")
Formula-Logarithm.jl
using WooldridgeDatasets, GLM, DataFrames

hprice2 = DataFrame(wooldridge("hprice2"))

reg = lm(@formula(log(price) ~ log(nox) + rooms), hprice2)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
Predictions.jl
using WooldridgeDatasets, GLM, DataFrames

gpa2 = DataFrame(wooldridge("gpa2"))

reg = lm(@formula(colgpa ~ sat + hsperc + hsize + (hsize^2)), gpa2)

# print regression table:
table_reg = coeftable(reg)
println("table_reg: \n$table_reg\n")

# generate data set containing the regressor values for predictions:
cvalues1 = DataFrame(id="newPerson1", sat=1200, hsperc=30, hsize=5)
println("cvalues1: \n$cvalues1\n")

# point estimate of prediction (cvalues1):
colgpa_pred1 = round.(predict(reg, cvalues1), digits=5)
println("colgpa_pred1 = $colgpa_pred1\n")

# define three sets of regressor variables:
cvalues2 = DataFrame(id=["newPerson1", "newPerson2", "newPerson3"],
    sat=[1200, 900, 1400],
    hsperc=[30, 20, 5], hsize=[5, 3, 1])
println("cvalues2: \n$cvalues2\n")

# point estimate of prediction (cvalues2):
colgpa_pred2 = round.(predict(reg, cvalues2), digits=5)
println("colgpa_pred2 = $colgpa_pred2")