library(car); library(lmtest) # for robust SEdata(mroz, package='wooldridge')# Estimate linear probability modellinprob <-lm(inlf~nwifeinc+educ+exper+I(exper^2)+age+kidslt6+kidsge6,data=mroz)# Regression table with heteroscedasticity-robust SE and t tests:coeftest(linprob,vcov=hccm)
Example-17-1-2.R
# predictions for two "extreme" women (run Example-17-1-1.R first!):xpred <-list(nwifeinc=c(100,0),educ=c(5,17),exper=c(0,30),age=c(20,52),kidslt6=c(2,0),kidsge6=c(0,0))predict(linprob,xpred)
################################################################# Test of overall significance:# Manual calculation of the LR test statistic:probitres$null.deviance - probitres$deviance# Automatic calculations including p-values,...:library(lmtest)lrtest(probitres)################################################################# Test of H0: experience and age are irrelevantrestr <-glm(inlf~nwifeinc+educ+ kidslt6+kidsge6, family=binomial(link=logit),data=mroz)lrtest(restr,probitres)
Example-17-1-6.R
# Predictions from linear probability, probit and logit model:# (run 17-1-1.R through 17-1-4.R first to define the variables!)predict(linprob, xpred,type ="response")predict(logitres, xpred,type ="response")predict(probitres,xpred,type ="response")
Example-17-1-7.R
# APEs (run 17-1-1.R through 17-1-4.R first to define the variables!)# Calculation of linear index at individual values:xb.log <-predict(logitres)xb.prob<-predict(probitres)# APE factors = average(g(xb))factor.log <-mean( dlogis(xb.log) )factor.prob<-mean( dnorm(xb.prob) )cbind(factor.log,factor.prob)# average partial effects = beta*factor:APE.lin <-coef(linprob) *1APE.log <-coef(logitres) * factor.logAPE.prob<-coef(probitres) * factor.prob# Table of APEscbind(APE.lin, APE.log, APE.prob)
Example-17-1-8.R
# Automatic APE calculations with package mfxlibrary(mfx)logitmfx(inlf~nwifeinc+educ+exper+I(exper^2)+age+kidslt6+kidsge6, data=mroz, atmean=FALSE)
Example-17-2-survreg.R
# Estimate Tobit model using survreg:library(survival)res <-survreg(Surv(hours, hours>0, type="left") ~ nwifeinc+educ+exper+I(exper^2)+age+kidslt6+kidsge6, data=mroz, dist="gaussian")summary(res)
Example-17-2.R
data(mroz, package='wooldridge')# Estimate Tobit model using censReg:library(censReg)TobitRes <-censReg(hours~nwifeinc+educ+exper+I(exper^2)+ age+kidslt6+kidsge6, data=mroz )summary(TobitRes)# Partial Effects at the average x:margEff(TobitRes)
import wooldridge as wooimport numpy as npimport patsy as ptimport scipy.stats as statsimport statsmodels.formula.api as smfimport statsmodels.base.model as smclassmroz = woo.dataWoo('mroz')y, X = pt.dmatrices('hours ~ nwifeinc + educ + exper +''I(exper**2)+ age + kidslt6 + kidsge6', data=mroz, return_type='dataframe')# generate starting solution:reg_ols = smf.ols(formula='hours ~ nwifeinc + educ + exper + I(exper**2) +''age + kidslt6 + kidsge6', data=mroz)results_ols = reg_ols.fit()sigma_start = np.log(sum(results_ols.resid **2) /len(results_ols.resid))params_start = np.concatenate((np.array(results_ols.params), sigma_start), axis=None)# extend statsmodels class by defining nloglikeobs:class Tobit(smclass.GenericLikelihoodModel):# define a function that returns the negative log likelihood per observation# for a set of parameters that is provided by the argument "params":def nloglikeobs(self, params):# objects in "self" are defined in the parent class: X =self.exog y =self.endog p = X.shape[1]# for details on the implementation see Wooldridge (2019), formula 17.22: beta = params[0:p] sigma = np.exp(params[p]) y_hat = np.dot(X, beta) y_eq = (y ==0) y_g = (y >0) ll = np.empty(len(y)) ll[y_eq] = np.log(stats.norm.cdf(-y_hat[y_eq] / sigma)) ll[y_g] = np.log(stats.norm.pdf((y - y_hat)[y_g] / sigma)) - np.log(sigma)# return an array of log likelihoods for each observation:return-ll# results of MLE:reg_tobit = Tobit(endog=y, exog=X)results_tobit = reg_tobit.fit(start_params=params_start, maxiter=10000, disp=0)print(f'results_tobit.summary(): \n{results_tobit.summary()}\n')
Example-17-3.py
import wooldridge as wooimport pandas as pdimport statsmodels.api as smimport statsmodels.formula.api as smfcrime1 = woo.dataWoo('crime1')# estimate linear model:reg_lin = smf.ols(formula='narr86 ~ pcnv + avgsen + tottime + ptime86 +''qemp86 + inc86 + black + hispan + born60', data=crime1)results_lin = reg_lin.fit()# print regression table:table_lin = pd.DataFrame({'b': round(results_lin.params, 4),'se': round(results_lin.bse, 4),'t': round(results_lin.tvalues, 4),'pval': round(results_lin.pvalues, 4)})print(f'table_lin: \n{table_lin}\n')# estimate Poisson model:reg_poisson = smf.poisson(formula='narr86 ~ pcnv + avgsen + tottime +''ptime86 + qemp86 + inc86 + black +''hispan + born60', data=crime1)results_poisson = reg_poisson.fit(disp=0)# print regression table:table_poisson = pd.DataFrame({'b': round(results_poisson.params, 4),'se': round(results_poisson.bse, 4),'t': round(results_poisson.tvalues, 4),'pval': round(results_poisson.pvalues, 4)})print(f'table_poisson: \n{table_poisson}\n')# estimate Quasi-Poisson model:reg_qpoisson = smf.glm(formula='narr86 ~ pcnv + avgsen + tottime + ptime86 +''qemp86 + inc86 + black + hispan + born60', family=sm.families.Poisson(), data=crime1)# the argument scale controls for the dispersion in exponential dispersion models,# see the module documentation for more details:results_qpoisson = reg_qpoisson.fit(scale='X2', disp=0)# print regression table:table_qpoisson = pd.DataFrame({'b': round(results_qpoisson.params, 4),'se': round(results_qpoisson.bse, 4),'t': round(results_qpoisson.tvalues, 4),'pval': round(results_qpoisson.pvalues, 4)})print(f'table_qpoisson: \n{table_qpoisson}\n')
Example-17-4.py
import wooldridge as wooimport numpy as npimport patsy as ptimport scipy.stats as statsimport statsmodels.formula.api as smfimport statsmodels.base.model as smclassrecid = woo.dataWoo('recid')# define dummy for censored observations:censored = recid['cens'] !=0y, X = pt.dmatrices('ldurat ~ workprg + priors + tserved + felon +''alcohol + drugs + black + married + educ + age', data=recid, return_type='dataframe')# generate starting solution:reg_ols = smf.ols(formula='ldurat ~ workprg + priors + tserved + felon +''alcohol + drugs + black + married + educ + age', data=recid)results_ols = reg_ols.fit()sigma_start = np.log(sum(results_ols.resid **2) /len(results_ols.resid))params_start = np.concatenate((np.array(results_ols.params), sigma_start), axis=None)# extend statsmodels class by defining nloglikeobs:class CensReg(smclass.GenericLikelihoodModel):def__init__(self, endog, cens, exog):self.cens = censsuper(smclass.GenericLikelihoodModel, self).__init__(endog, exog, missing='none')def nloglikeobs(self, params): X =self.exog y =self.endog cens =self.cens p = X.shape[1] beta = params[0:p] sigma = np.exp(params[p]) y_hat = np.dot(X, beta) ll = np.empty(len(y))# uncensored: ll[~cens] = np.log(stats.norm.pdf((y - y_hat)[~cens] / sigma)) - np.log(sigma)# censored: ll[cens] = np.log(stats.norm.cdf(-(y - y_hat)[cens] / sigma))return-ll# results of MLE:reg_censReg = CensReg(endog=y, exog=X, cens=censored)results_censReg = reg_censReg.fit(start_params=params_start, maxiter=10000, method='BFGS', disp=0)print(f'results_censReg.summary(): \n{results_censReg.summary()}\n')
Example-17-5.py
import wooldridge as wooimport statsmodels.formula.api as smfimport scipy.stats as statsmroz = woo.dataWoo('mroz')# step 1 (use all n observations to estimate a probit model of s_i on z_i):reg_probit = smf.probit(formula='inlf ~ educ + exper + I(exper**2) +''nwifeinc + age + kidslt6 + kidsge6', data=mroz)results_probit = reg_probit.fit(disp=0)pred_inlf = results_probit.fittedvaluesmroz['inv_mills'] = stats.norm.pdf(pred_inlf) / stats.norm.cdf(pred_inlf)# step 2 (regress y_i on x_i and inv_mills in sample selection):reg_heckit = smf.ols(formula='lwage ~ educ + exper + I(exper**2) + inv_mills', subset=(mroz['inlf'] ==1), data=mroz)results_heckit = reg_heckit.fit()# print results:print(f'results_heckit.summary(): \n{results_heckit.summary()}\n')
Tobit-CondMean-Simulation.py
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport scipy.stats as statsimport statsmodels.api as smimport statsmodels.base.model as smclass# set the random seed:np.random.seed(1234567)x = np.sort(stats.norm.rvs(0, 1, size=100) +4)xb =-4+1* xy_star = xb + stats.norm.rvs(0, 1, size=100)y = np.copy(y_star)y[y_star <0] =0x_wc = pd.DataFrame({'const': 1, 'x': x})# extend statsmodel class:class Tobit(smclass.GenericLikelihoodModel):def nloglikeobs(self, params): X =self.exog y =self.endog p = X.shape[1] beta = params[0:p] sigma = np.exp(params[p]) y_hat = np.dot(X, beta) y_eq = (y ==0) y_g = (y >0) ll = np.empty(len(y)) ll[y_eq] = np.log(stats.norm.cdf(-y_hat[y_eq] / sigma)) ll[y_g] = np.log(stats.norm.pdf((y - y_hat)[y_g] / sigma)) - np.log(sigma)return-ll# predictions:reg_ols = sm.OLS(endog=y, exog=x_wc)results_ols = reg_ols.fit()yhat_ols = results_ols.fittedvaluessigma_start = np.log(sum(results_ols.resid **2) /len(results_ols.resid))params_start = np.concatenate((np.array(results_ols.params), sigma_start), axis=None)reg_tobit = Tobit(endog=y, exog=x_wc)results_tobit = reg_tobit.fit(start_params=params_start, disp=0)yhat_tobit = np.dot(x_wc, np.transpose(results_tobit.params[0:2]))# plot data and model predictions:plt.axhline(y=0, linewidth=0.5, linestyle='-', color='grey')plt.plot(x, y_star, color='black', marker='x', linestyle='', label='real data')plt.plot(x, y, color='black', marker='+', linestyle='', label='truncated data')plt.plot(x, yhat_ols, color='black', marker='', linestyle='-', label='OLS fit')plt.plot(x, yhat_tobit, color='black', marker='', linestyle='--', label='Tobit fit')plt.ylabel('y')plt.xlabel('x')plt.legend()plt.savefig('PyGraphs/Tobit-CondMean-Simulation.pdf')
Tobit-CondMean.py
import numpy as npimport matplotlib.pyplot as pltimport scipy.stats as stats# set the random seed:np.random.seed(1234567)x = np.sort(stats.norm.rvs(0, 1, size=100) +4)xb =-4+1* xy_star = xb + stats.norm.rvs(0, 1, size=100)y = np.copy(y_star)y[y_star <0] =0# conditional means:Eystar = xbEy = stats.norm.cdf(xb /1) * xb +1* stats.norm.pdf(xb /1)# plot data and conditional means:plt.axhline(y=0, linewidth=0.5, linestyle='-', color='grey')plt.plot(x, y_star, color='black', marker='x', linestyle='', label='y*')plt.plot(x, y, color='black', marker='+', linestyle='', label='y')plt.plot(x, Eystar, color='black', marker='', linestyle='-', label='E(y*)')plt.plot(x, Ey, color='black', marker='', linestyle='--', label='E(y)')plt.ylabel('y')plt.xlabel('x')plt.legend()plt.savefig('PyGraphs/Tobit-CondMean.pdf')
TruncReg-Simulation.py
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport statsmodels.formula.api as smfimport scipy.stats as stats# set the random seed:np.random.seed(1234567)x = np.sort(stats.norm.rvs(0, 1, size=100) +4)y =-4+1* x + stats.norm.rvs(0, 1, size=100)# complete observations and observed sample:compl = pd.DataFrame({'x': x, 'y': y})sample = compl.loc[y >0]# predictions OLS:reg_ols = smf.ols(formula='y ~ x', data=sample)results_ols = reg_ols.fit()yhat_ols = results_ols.fittedvalues# predictions truncated regression:reg_tr = smf.ols(formula='y ~ x', data=compl)results_tr = reg_tr.fit()yhat_tr = results_tr.fittedvalues# plot data and conditional means:plt.axhline(y=0, linewidth=0.5, linestyle='-', color='grey')plt.plot(compl['x'], compl['y'], color='black', marker='o', fillstyle='none', linestyle='', label='all data')plt.plot(sample['x'], sample['y'], color='black', marker='o', fillstyle='full', linestyle='', label='sample data')plt.plot(sample['x'], yhat_ols, color='black', marker='', linestyle='--', label='OLS fit')plt.plot(compl['x'], yhat_tr, color='black', marker='', linestyle='-', label='Trunc. Reg. fit')plt.ylabel('y')plt.xlabel('x')plt.legend()plt.savefig('PyGraphs/TruncReg-Simulation.pdf')