library(plm);library(lmtest)data(crime4, package='wooldridge')# Generate pdata.frame:crime4.p <-pdata.frame(crime4, index=c("county","year") )# Estimate FD model:reg <- ( plm(log(crmrte)~d83+d84+d85+d86+d87+lprbarr+lprbconv+ lprbpris+lavgsen+lpolpc,data=crime4.p, model="fd") )# Regression table with standard SEcoeftest(reg)# Regression table with "clustered" SE (default type HC0):coeftest(reg, vcovHC)# Regression table with "clustered" SE (small-sample correction)# This is the default version used by Stata and reported by Wooldridge:coeftest(reg, vcovHC(reg, type="sss"))
library(plm);library(stargazer)data(wagepan, package='wooldridge')# Generate pdata.frame:wagepan.p <-pdata.frame(wagepan, index=c("nr","year") )pdim(wagepan.p)# Check variation of variables within individualspvar(wagepan.p)
Example-14-4-2.R
# Estimate different modelswagepan.p$yr<-factor(wagepan.p$year)reg.ols<- (plm(lwage~educ+black+hisp+exper+I(exper^2)+married+union+yr, data=wagepan.p, model="pooling") )reg.re <- (plm(lwage~educ+black+hisp+exper+I(exper^2)+married+union+yr, data=wagepan.p, model="random") )reg.fe <- (plm(lwage~I(exper^2)+married+union+yr, data=wagepan.p, model="within") )# Pretty table of selected results (not reporting year dummies)stargazer(reg.ols,reg.re,reg.fe, type="text", column.labels=c("OLS","RE","FE"),keep.stat=c("n","rsq"),keep=c("ed","bl","hi","exp","mar","un"))
Example-CRE-test-RE.R
# Note that the estimates "reg.cre" are calculated in# Script "Example-Dummy-CRE-1.R" which has to be run first.# RE test as an F test on the "Between" coefficients library(car)linearHypothesis(reg.cre, matchCoefs(reg.cre,"Between"))
# Note that the estimates "reg.fe" and "reg.re" are calculated in# Example 14.4. The scripts have to be run first.# Hausman test of RE vs. FE:phtest(reg.fe, reg.re)
Example-13-9-ClSE.py
import wooldridge as wooimport numpy as npimport pandas as pdimport linearmodels as plmcrime4 = woo.dataWoo('crime4')crime4 = crime4.set_index(['county', 'year'], drop=False)# estimate FD model:reg = plm.FirstDifferenceOLS.from_formula( formula='np.log(crmrte) ~ year + d83 + d84 + d85 + d86 + d87 +''lprbarr + lprbconv + lprbpris + lavgsen + lpolpc', data=crime4)# regression with standard SE:results_default = reg.fit()# regression with "clustered" SE:results_cluster = reg.fit(cov_type='clustered', cluster_entity=True, debiased=False)# regression with "clustered" SE (small-sample correction):results_css = reg.fit(cov_type='clustered', cluster_entity=True)# print results:table = pd.DataFrame({'b': round(results_default.params, 4),'se_default': round(results_default.std_errors, 4),'se_cluster': round(results_cluster.std_errors, 4),'se_css': round(results_css.std_errors, 4)})print(f'table: \n{table}\n')
Example-14-1.py
import wooldridge as wooimport pandas as pdimport statsmodels.formula.api as smfimport linearmodels as plmjtrain = woo.dataWoo('jtrain')jtrain['entity'] = jtrain['fcode']jtrain = jtrain.set_index(['fcode', 'year'])# Manual computation of deviations of entity means:jtrain['lscrap_w'] = jtrain['lscrap'] - jtrain.groupby('fcode').mean()['lscrap']jtrain['d88_w'] = jtrain['d88'] - jtrain.groupby('fcode').mean()['d88']jtrain['d89_w'] = jtrain['d89'] - jtrain.groupby('fcode').mean()['d89']jtrain['grant_w'] = jtrain['grant'] - jtrain.groupby('fcode').mean()['grant']jtrain['grant_1_w'] = jtrain['grant_1'] - jtrain.groupby('fcode').mean()['grant_1']# manual FE model estimation:results_man = smf.ols(formula='lscrap_w ~ 0 + d88_w + d89_w + grant_w + grant_1_w', data=jtrain).fit()table_man = pd.DataFrame({'b': round(results_man.params, 4),'se': round(results_man.bse, 4),'t': round(results_man.tvalues, 4),'pval': round(results_man.pvalues, 4)})print(f'table_man: \n{table_man}\n')# automatic FE model estimation:reg_aut = plm.PanelOLS.from_formula(formula='lscrap ~ d88 + d89 + grant + grant_1 + EntityEffects', data=jtrain)results_aut = reg_aut.fit()table_aut = pd.DataFrame({'b': round(results_aut.params, 4),'se': round(results_aut.std_errors, 4),'t': round(results_aut.tstats, 4),'pval': round(results_aut.pvalues, 4)})print(f'table_aut: \n{table_aut}\n')
Example-14-2.py
import wooldridge as wooimport pandas as pdimport linearmodels as plmwagepan = woo.dataWoo('wagepan')wagepan = wagepan.set_index(['nr', 'year'], drop=False)# FE model estimation:reg = plm.PanelOLS.from_formula( formula='lwage ~ married + union + C(year)*educ + EntityEffects', data=wagepan, drop_absorbed=True)results = reg.fit()# print regression table:table = pd.DataFrame({'b': round(results.params, 4),'se': round(results.std_errors, 4),'t': round(results.tstats, 4),'pval': round(results.pvalues, 4)})print(f'table: \n{table}\n')
Example-14-4-1.py
import wooldridge as woowagepan = woo.dataWoo('wagepan')# print relevant dimensions for panel:N = wagepan.shape[0]T = wagepan['year'].drop_duplicates().shape[0]n = wagepan['nr'].drop_duplicates().shape[0]print(f'N: {N}\n')print(f'T: {T}\n')print(f'n: {n}\n')# check non-varying variables# (I) across time and within individuals by calculating individual# specific variances for each variable:isv_nr = (wagepan.groupby('nr').var() ==0) # True, if variance is zero# choose variables where all grouped variances are zero:noVar_nr = isv_nr.all(axis=0) # which cols are completely Trueprint(f'isv_nr.columns[noVar_nr]: \n{isv_nr.columns[noVar_nr]}\n')# (II) across individuals within one point in time for each variable:isv_t = (wagepan.groupby('year').var() ==0)noVar_t = isv_t.all(axis=0)print(f'isv_t.columns[noVar_t]: \n{isv_t.columns[noVar_t]}\n')
import wooldridge as wooimport pandas as pdimport linearmodels as plmwagepan = woo.dataWoo('wagepan')wagepan['t'] = wagepan['year']wagepan['entity'] = wagepan['nr']wagepan = wagepan.set_index(['nr'])# include group specific means:wagepan['married_b'] = wagepan.groupby('nr').mean()['married']wagepan['union_b'] = wagepan.groupby('nr').mean()['union']wagepan = wagepan.set_index(['year'], append=True)# estimate CRE paramters:reg = plm.RandomEffects.from_formula( formula='lwage ~ married + union + educ +''black + hisp + married_b + union_b', data=wagepan)results = reg.fit()# print regression table:table = pd.DataFrame({'b': round(results.params, 4),'se': round(results.std_errors, 4),'t': round(results.tstats, 4),'pval': round(results.pvalues, 4)})print(f'table: \n{table}\n')
Example-CRE-test-RE.py
import wooldridge as wooimport linearmodels as plmwagepan = woo.dataWoo('wagepan')wagepan['t'] = wagepan['year']wagepan['entity'] = wagepan['nr']wagepan = wagepan.set_index(['nr'])# include group specific means:wagepan['married_b'] = wagepan.groupby('nr').mean()['married']wagepan['union_b'] = wagepan.groupby('nr').mean()['union']wagepan = wagepan.set_index(['year'], append=True)# estimate CRE:reg_cre = plm.RandomEffects.from_formula( formula='lwage ~ married + union + C(t)*educ + married_b + union_b', data=wagepan)results_cre = reg_cre.fit()# RE test as an Wald test on the CRE specific coefficients:wtest = results_cre.wald_test(formula='married_b = union_b = 0')print(f'wtest: \n{wtest}\n')
Example-Dummy-CRE-1.py
import wooldridge as wooimport pandas as pdimport statsmodels.formula.api as smfimport linearmodels as plmwagepan = woo.dataWoo('wagepan')wagepan['t'] = wagepan['year']wagepan['entity'] = wagepan['nr']wagepan = wagepan.set_index(['nr'])# include group specific means:wagepan['married_b'] = wagepan.groupby('nr').mean()['married']wagepan['union_b'] = wagepan.groupby('nr').mean()['union']wagepan = wagepan.set_index(['year'], append=True)# estimate FE parameters in 3 different ways:reg_we = plm.PanelOLS.from_formula( formula='lwage ~ married + union + C(t)*educ + EntityEffects', drop_absorbed=True, data=wagepan)results_we = reg_we.fit()reg_dum = smf.ols( formula='lwage ~ married + union + C(t)*educ + C(entity)', data=wagepan)results_dum = reg_dum.fit()reg_cre = plm.RandomEffects.from_formula( formula='lwage ~ married + union + C(t)*educ + married_b + union_b', data=wagepan)results_cre = reg_cre.fit()# compare to RE estimates:reg_re = plm.RandomEffects.from_formula( formula='lwage ~ married + union + C(t)*educ', data=wagepan)results_re = reg_re.fit()var_selection = ['married', 'union', 'C(t)[T.1982]:educ']# print results:table = pd.DataFrame({'b_we': round(results_we.params[var_selection], 4),'b_dum': round(results_dum.params[var_selection], 4),'b_cre': round(results_cre.params[var_selection], 4),'b_re': round(results_re.params[var_selection], 4)})print(f'table: \n{table}\n')
Example-HausmTest.py
import wooldridge as wooimport numpy as npimport linearmodels as plmimport scipy.stats as statswagepan = woo.dataWoo('wagepan')wagepan = wagepan.set_index(['nr', 'year'], drop=False)# estimation of FE and RE:reg_fe = plm.PanelOLS.from_formula(formula='lwage ~ I(exper**2) + married +''union + C(year) + EntityEffects', data=wagepan)results_fe = reg_fe.fit()b_fe = results_fe.paramsb_fe_cov = results_fe.covreg_re = plm.RandomEffects.from_formula( formula='lwage ~ educ + black + hisp + exper + I(exper**2)''+ married + union + C(year)', data=wagepan)results_re = reg_re.fit()b_re = results_re.paramsb_re_cov = results_re.cov# Hausman test of FE vs. RE# (I) find overlapping coefficients:common_coef =set(results_fe.params.index).intersection(results_re.params.index)# (II) calculate differences between FE and RE:b_diff = np.array(results_fe.params[common_coef] - results_re.params[common_coef])df =len(b_diff)b_diff.reshape((df, 1))b_cov_diff = np.array(b_fe_cov.loc[common_coef, common_coef] - b_re_cov.loc[common_coef, common_coef])b_cov_diff.reshape((df, df))# (III) calculate test statistic:stat =abs(np.transpose(b_diff) @ np.linalg.inv(b_cov_diff) @ b_diff)pval =1- stats.chi2.cdf(stat, df)print(f'stat: {stat}\n')print(f'pval: {pval}\n')