Example-13-2.R
data(cps78_85, package='wooldridge')
# Detailed OLS results including interaction terms
summary( lm(lwage ~ y85*(educ+female) +exper+ I((exper^2)/100) + union,
data=cps78_85) )
data(kielmc, package='wooldridge')
# Separate regressions for 1978 and 1981: report coeeficients only
coef( lm(rprice~nearinc, data=kielmc, subset=(year==1978)) )
coef( lm(rprice~nearinc, data=kielmc, subset=(year==1981)) )
# Joint regression including an interaction term
library(lmtest)
coeftest( lm(rprice~nearinc*y81, data=kielmc) )
library(plm);library(lmtest)
data(crime4, package='wooldridge')
crime4.p <- pdata.frame(crime4, index=c("county","year") )
pdim(crime4.p)
# manually calculate first differences of crime rate:
crime4.p$dcrmrte <- diff(crime4.p$crmrte)
# Display selected variables for observations 1-9:
crime4.p[1:9, c("county","year","crmrte","dcrmrte")]
# Estimate FD model:
coeftest( plm(log(crmrte)~d83+d84+d85+d86+d87+lprbarr+lprbconv+
lprbpris+lavgsen+lpolpc,data=crime4.p, model="fd") )
library(plm); library(lmtest)
data(crime2, package='wooldridge')
crime2.p <- pdata.frame(crime2, index=46 )
# manually calculate first differences:
crime2.p$dcrmrte <- diff(crime2.p$crmrte)
crime2.p$dunem <- diff(crime2.p$unem)
# Display selected variables for observations 1-6:
crime2.p[1:6,c("id","time","year","crmrte","dcrmrte","unem","dunem")]
# Estimate FD model with lm on differenced data:
coeftest( lm(dcrmrte~dunem, data=crime2.p) )
# Estimate FD model with plm on original data:
coeftest( plm(crmrte~unem, data=crime2.p, model="fd") )
library(plm)
data(crime4, package='wooldridge')
# Generate pdata.frame:
crime4.p <- pdata.frame(crime4, index=c("county","year") )
# Calculations within the pdata.frame:
crime4.p$cr.l <- lag(crime4.p$crmrte)
crime4.p$cr.d <- diff(crime4.p$crmrte)
crime4.p$cr.B <- Between(crime4.p$crmrte)
crime4.p$cr.W <- Within(crime4.p$crmrte)
# Display selected variables for observations 1-16:
crime4.p[1:16,c("county","year","crmrte","cr.l","cr.d","cr.B","cr.W")]
import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf
cps78_85 = woo.dataWoo('cps78_85')
# OLS results including interaction terms:
reg = smf.ols(formula='lwage ~ y85*(educ+female) + exper +'
'I((exper**2)/100) + union',
data=cps78_85)
results = reg.fit()
# print regression table:
table = pd.DataFrame({'b': round(results.params, 4),
'se': round(results.bse, 4),
't': round(results.tvalues, 4),
'pval': round(results.pvalues, 4)})
print(f'table: \n{table}\n')
import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf
kielmc = woo.dataWoo('kielmc')
# separate regressions for 1978 and 1981:
y78 = (kielmc['year'] == 1978)
reg78 = smf.ols(formula='rprice ~ nearinc', data=kielmc, subset=y78)
results78 = reg78.fit()
y81 = (kielmc['year'] == 1981)
reg81 = smf.ols(formula='rprice ~ nearinc', data=kielmc, subset=y81)
results81 = reg81.fit()
# joint regression including an interaction term:
reg_joint = smf.ols(formula='rprice ~ nearinc * C(year)', data=kielmc)
results_joint = reg_joint.fit()
# print regression tables:
table_78 = pd.DataFrame({'b': round(results78.params, 4),
'se': round(results78.bse, 4),
't': round(results78.tvalues, 4),
'pval': round(results78.pvalues, 4)})
print(f'table_78: \n{table_78}\n')
table_81 = pd.DataFrame({'b': round(results81.params, 4),
'se': round(results81.bse, 4),
't': round(results81.tvalues, 4),
'pval': round(results81.pvalues, 4)})
print(f'table_81: \n{table_81}\n')
table_joint = pd.DataFrame({'b': round(results_joint.params, 4),
'se': round(results_joint.bse, 4),
't': round(results_joint.tvalues, 4),
'pval': round(results_joint.pvalues, 4)})
print(f'table_joint: \n{table_joint}\n')
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
kielmc = woo.dataWoo('kielmc')
# difference in difference (DiD):
reg_did = smf.ols(formula='np.log(rprice) ~ nearinc*C(year)', data=kielmc)
results_did = reg_did.fit()
# print regression table:
table_did = pd.DataFrame({'b': round(results_did.params, 4),
'se': round(results_did.bse, 4),
't': round(results_did.tvalues, 4),
'pval': round(results_did.pvalues, 4)})
print(f'table_did: \n{table_did}\n')
# DiD with control variables:
reg_didC = smf.ols(formula='np.log(rprice) ~ nearinc*C(year) + age +'
'I(age**2) + np.log(intst) + np.log(land) +'
'np.log(area) + rooms + baths',
data=kielmc)
results_didC = reg_didC.fit()
# print regression table:
table_didC = pd.DataFrame({'b': round(results_didC.params, 4),
'se': round(results_didC.bse, 4),
't': round(results_didC.tvalues, 4),
'pval': round(results_didC.pvalues, 4)})
print(f'table_didC: \n{table_didC}\n')
import wooldridge as woo
import numpy as np
import linearmodels as plm
crime4 = woo.dataWoo('crime4')
crime4 = crime4.set_index(['county', 'year'], drop=False)
# estimate FD model:
reg = plm.FirstDifferenceOLS.from_formula(
formula='np.log(crmrte) ~ year + d83 + d84 + d85 + d86 + d87 +'
'lprbarr + lprbconv + lprbpris + lavgsen + lpolpc',
data=crime4)
results = reg.fit()
print(f'results: \n{results}\n')
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import linearmodels as plm
crime2 = woo.dataWoo('crime2')
# create time variable dummy by converting a Boolean variable to an integer:
crime2['t'] = (crime2['year'] == 87).astype(int) # False=0, True=1
# create an index in this balanced data set by combining two arrays:
id_tmp = np.linspace(1, 46, num=46)
crime2['id'] = np.sort(np.concatenate([id_tmp, id_tmp]))
# manually calculate first differences per entity for crmrte and unem:
crime2['crmrte_diff1'] = \
crime2.sort_values(['id', 'year']).groupby('id')['crmrte'].diff()
crime2['unem_diff1'] = \
crime2.sort_values(['id', 'year']).groupby('id')['unem'].diff()
var_selection = ['id', 't', 'crimes', 'unem', 'crmrte_diff1', 'unem_diff1']
print(f'crime2[var_selection].head(): \n{crime2[var_selection].head()}\n')
# estimate FD model with statmodels on differenced data:
reg_sm = smf.ols(formula='crmrte_diff1 ~ unem_diff1', data=crime2)
results_sm = reg_sm.fit()
# print results:
table_sm = pd.DataFrame({'b': round(results_sm.params, 4),
'se': round(results_sm.bse, 4),
't': round(results_sm.tvalues, 4),
'pval': round(results_sm.pvalues, 4)})
print(f'table_sm: \n{table_sm}\n')
# estimate FD model with linearmodels:
crime2 = crime2.set_index(['id', 'year'])
reg_plm = plm.FirstDifferenceOLS.from_formula(formula='crmrte ~ t + unem',
data=crime2)
results_plm = reg_plm.fit()
# print results:
table_plm = pd.DataFrame({'b': round(results_plm.params, 4),
'se': round(results_plm.std_errors, 4),
't': round(results_plm.tstats, 4),
'pval': round(results_plm.pvalues, 4)})
print(f'table_plm: \n{table_plm}\n')
using WooldridgeDatasets, GLM, DataFrames, RegressionTables
kielmc = DataFrame(wooldridge("kielmc"))
kielmc.is1981 = kielmc.year .== 1981
# separate regressions for 1978 and 1981:
y78 = subset(kielmc, :year => ByRow(==(1978)))
reg78 = lm(@formula(rprice ~ nearinc), y78)
y81 = subset(kielmc, :year => ByRow(==(1981)))
reg81 = lm(@formula(rprice ~ nearinc), y81)
# joint regression including an interaction term:
reg_joint = lm(@formula(rprice ~ nearinc * is1981), kielmc)
# print results with RegressionTables:
regtable(reg78, reg81, reg_joint)
using WooldridgeDatasets, GLM, DataFrames
kielmc = DataFrame(wooldridge("kielmc"))
kielmc.is1981 = kielmc.year .== 1981
# difference in difference (DiD):
reg_did = lm(@formula(log(rprice) ~ nearinc * is1981), kielmc)
table_did = coeftable(reg_did)
println("table_did: \n$table_did\n")
# DiD with control variables:
reg_didC = lm(@formula(log(rprice) ~ nearinc * is1981 + age + (age^2) +
log(intst) + log(land) + log(area) +
rooms + baths), kielmc)
table_didC = coeftable(reg_didC)
println("table_didC: \n$table_didC")
using WooldridgeDatasets, GLM, DataFrames
crime4 = DataFrame(wooldridge("crime4"))
crime4.lcrmrte = log.(crime4.crmrte)
# sort data by county and year:
sort!(crime4, [:county, :year])
# manually calculate first differences for multiple variables:
vars_to_diff = ["lcrmrte", "d83", "d84", "d85", "d86", "d87",
"lprbarr", "lprbconv", "lprbpris", "lavgsen", "lpolpc"]
grouped_df = groupby(crime4, :county)
diff_df = DataFrame()
for i in vars_to_diff
tmp_diff_i = combine(grouped_df, Symbol(i) => diff)[:, 2]
diff_df[!, i] = tmp_diff_i
end
# estimate FD model:
reg = lm(@formula(lcrmrte ~ d83 + d84 + d85 + d86 + d87 +
lprbarr + lprbconv + lprbpris +
lavgsen + lpolpc), diff_df)
table_reg = coeftable(reg)
println("table_reg: \n$table_reg")
using WooldridgeDatasets, GLM, DataFrames
crime2 = DataFrame(wooldridge("crime2"))
# create an index in this balanced data set by combining two vectors:
id_tmp = 1:46
crime2.id = sort(vcat(id_tmp, id_tmp))
# sort data by id and year:
sort!(crime2, [:id, :year])
# manually calculate first differences per entity for crmrte and unem:
grouped_df = groupby(crime2, :id)
diff_df = DataFrame(id=id_tmp)
diff_df.crmrte_diff1 = combine(grouped_df, :crmrte => diff).crmrte_diff
diff_df.unem_diff1 = combine(grouped_df, :unem => diff).unem_diff
preview = diff_df[1:5, :]
println("preview: \n$preview\n")
# estimate FD model with OLS on differenced data:
reg_sm = lm(@formula(crmrte_diff1 ~ unem_diff1), diff_df)
table_sm = coeftable(reg_sm)
println("table_sm: \n$table_sm")