data(gpa3, package='wooldridge')# Estimate model for males (& spring data)lm(cumgpa~sat+hsperc+tothrs, data=gpa3, subset=(spring==1&female==0))# Estimate model for females (& spring data)lm(cumgpa~sat+hsperc+tothrs, data=gpa3, subset=(spring==1&female==1))
Dummy-Interact.R
data(gpa3, package='wooldridge')# Model with full interactions with female dummy (only for spring data)reg<-lm(cumgpa~female*(sat+hsperc+tothrs), data=gpa3, subset=(spring==1))summary(reg)# F-Test from package "car". H0: the interaction coefficients are zero# matchCoefs(...) selects all coeffs with names containing "female"library(car)linearHypothesis(reg, matchCoefs(reg, "female"))
Example-7-1-logical.R
data(wage1, package='wooldridge')# replace "female" with logical variablewage1$female <-as.logical(wage1$female)table(wage1$female)# regression with logical variablelm(wage ~ female+educ+exper+tenure, data=wage1)
data(CPS1985,package="AER")# Table of categories and frequencies for two factor variables:table(CPS1985$gender)table(CPS1985$occupation)# Directly using factor variables in regression formula:lm(log(wage) ~ education+experience+gender+occupation, data=CPS1985)# Manually redefine the reference category:CPS1985$gender <-relevel(CPS1985$gender,"female")CPS1985$occupation <-relevel(CPS1985$occupation,"management")# Rerun regression:lm(log(wage) ~ education+experience+gender+occupation, data=CPS1985)
Dummy-Interact-Sep.py
import wooldridge as wooimport pandas as pdimport statsmodels.formula.api as smfgpa3 = woo.dataWoo('gpa3')# estimate model for males (& spring data):reg_m = smf.ols(formula='cumgpa ~ sat + hsperc + tothrs', data=gpa3, subset=(gpa3['spring'] ==1) & (gpa3['female'] ==0))results_m = reg_m.fit()# print regression table:table_m = pd.DataFrame({'b': round(results_m.params, 4),'se': round(results_m.bse, 4),'t': round(results_m.tvalues, 4),'pval': round(results_m.pvalues, 4)})print(f'table_m: \n{table_m}\n')# estimate model for females (& spring data):reg_f = smf.ols(formula='cumgpa ~ sat + hsperc + tothrs', data=gpa3, subset=(gpa3['spring'] ==1) & (gpa3['female'] ==1))results_f = reg_f.fit()# print regression table:table_f = pd.DataFrame({'b': round(results_f.params, 4),'se': round(results_f.bse, 4),'t': round(results_f.tvalues, 4),'pval': round(results_f.pvalues, 4)})print(f'table_f: \n{table_f}\n')
Dummy-Interact.py
import wooldridge as wooimport pandas as pdimport statsmodels.formula.api as smfgpa3 = woo.dataWoo('gpa3')# model with full interactions with female dummy (only for spring data):reg = smf.ols(formula='cumgpa ~ female * (sat + hsperc + tothrs)', data=gpa3, subset=(gpa3['spring'] ==1))results = reg.fit()# print regression table:table = pd.DataFrame({'b': round(results.params, 4),'se': round(results.bse, 4),'t': round(results.tvalues, 4),'pval': round(results.pvalues, 4)})print(f'table: \n{table}\n')# F-Test for H0 (the interaction coefficients of 'female' are zero):hypotheses = ['female = 0', 'female:sat = 0','female:hsperc = 0', 'female:tothrs = 0']ftest = results.f_test(hypotheses)fstat = ftest.statistic[0][0]fpval = ftest.pvalueprint(f'fstat: {fstat}\n')print(f'fpval: {fpval}\n')
import pandas as pdimport numpy as npimport statsmodels.formula.api as smfCPS1985 = pd.read_csv('data/CPS1985.csv')# rename variable to make outputs more compact:CPS1985['oc'] = CPS1985['occupation']# table of categories and frequencies for two categorical variables:freq_gender = pd.crosstab(CPS1985['gender'], columns='count')print(f'freq_gender: \n{freq_gender}\n')freq_occupation = pd.crosstab(CPS1985['oc'], columns='count')print(f'freq_occupation: \n{freq_occupation}\n')# directly using categorical variables in regression formula:reg = smf.ols(formula='np.log(wage) ~ education +''experience + C(gender) + C(oc)', data=CPS1985)results = reg.fit()# print regression table:table = pd.DataFrame({'b': round(results.params, 4),'se': round(results.bse, 4),'t': round(results.tvalues, 4),'pval': round(results.pvalues, 4)})print(f'table: \n{table}\n')# rerun regression with different reference category:reg_newref = smf.ols(formula='np.log(wage) ~ education + experience + ''C(gender, Treatment("male")) + ''C(oc, Treatment("technical"))', data=CPS1985)results_newref = reg_newref.fit()# print results:table_newref = pd.DataFrame({'b': round(results_newref.params, 4),'se': round(results_newref.bse, 4),'t': round(results_newref.tvalues, 4),'pval': round(results_newref.pvalues, 4)})print(f'table_newref: \n{table_newref}\n')
Dummy-Interact-Sep.jl
usingWooldridgeDatasets, GLM, DataFramesgpa3 =DataFrame(wooldridge("gpa3"))# estimate model for males (& spring data):reg_m =lm(@formula(cumgpa ~ sat + hsperc + tothrs),subset(gpa3, :spring =>ByRow(==(1)), :female =>ByRow(==(0))))table_reg_m =coeftable(reg_m)println("table_reg_m: \n$table_reg_m")# estimate model for females (& spring data):reg_f =lm(@formula(cumgpa ~ sat + hsperc + tothrs),subset(gpa3, :spring =>ByRow(==(1)), :female =>ByRow(==(1))))table_reg_f =coeftable(reg_f)println("table_reg_f: \n$table_reg_f")
Dummy-Interact.jl
usingWooldridgeDatasets, GLM, DataFramesgpa3 =DataFrame(wooldridge("gpa3"))# model with full interactions with female dummy (only for spring data):reg_ur =lm(@formula(cumgpa ~ female * (sat + hsperc + tothrs)),subset(gpa3, :spring =>ByRow(==(1))))table_reg_ur =coeftable(reg_ur)println("table_reg_ur: \n$table_reg_ur\n")# F test for H0 (the interaction coefficients of "female" are zero):reg_r =lm(@formula(cumgpa ~ sat + hsperc + tothrs),subset(gpa3, :spring =>ByRow(==(1))))ftest_res =ftest(reg_r.model, reg_ur.model)fstat = ftest_res.fstat[2]fpval = ftest_res.pval[2]println("fstat = $fstat\n")println("fpval = $fpval")