data(crime1, package='wooldridge')# 1. Estimate restricted model:restr <-lm(narr86 ~ pcnv+ptime86+qemp86, data=crime1)# 2. Regression of residuals from restricted model:utilde <-resid(restr)LMreg <-lm(utilde ~ pcnv+ptime86+qemp86+avgsen+tottime, data=crime1)# R-squared:(r2 <-summary(LMreg)$r.squared )# 3. Calculation of LM test statistic:LM <- r2 *nobs(LMreg)LM# 4. Critical value from chi-squared distribution, alpha=10%:qchisq(1-0.10, 2)# Alternative to critical value: p value1-pchisq(LM, 2)# Alternative: automatic F test (see above)library(car)unrestr <-lm(narr86 ~ pcnv+ptime86+qemp86+avgsen+tottime, data=crime1)linearHypothesis(unrestr, c("avgsen=0","tottime=0"))
Sim-Asy-OLS-chisq.R
# Note: We'll have to set the sample size first, e.g. by uncommenting:# n <- 100# Set the random seedset.seed(1234567)# set true parameters: intercept & slope b0<-1; b1<-0.5# initialize b1hat to store 10000 results:b1hat <-numeric(10000)# Draw a sample of x, fixed over replications:x <-rnorm(n,4,1)# repeat r times:for(j in1:10000) {# Draw a sample of u (standardized chi-squared[1]): u <- ( rchisq(n,1)-1 ) /sqrt(2)# Draw a sample of y: y <- b0 + b1*x + u# regress y on x and store slope estimate at position j bhat <-coef( lm(y~x) ) b1hat[j] <- bhat["x"]}
Sim-Asy-OLS-norm.R
# Note: We'll have to set the sample size first, e.g. by uncommenting:# n <- 100# Set the random seedset.seed(1234567)# set true parameters: intercept & slope b0 <-1; b1 <-0.5# initialize b1hat to store 10000 results:b1hat <-numeric(10000)# Draw a sample of x, fixed over replications:x <-rnorm(n,4,1)# repeat r times:for(j in1:10000) {# Draw a sample of u (std. normal): u <-rnorm(n)# Draw a sample of y: y <- b0 + b1*x + u# regress y on x and store slope estimate at position j bhat <-coef( lm(y~x) ) b1hat[j] <- bhat["x"]}
Sim-Asy-OLS-uncond.R
# Note: We'll have to set the sample size first, e.g. by uncommenting:# n <- 100# Set the random seedset.seed(1234567)# set true parameters: intercept & slope b0<-1; b1<-0.5# initialize b1hat to store 10000 results:b1hat <-numeric(10000)# repeat r times:for(j in1:10000) {# Draw a sample of x, varying over replications: x <-rnorm(n,4,1)# Draw a sample of u (std. normal): u <-rnorm(n)# Draw a sample of y: y <- b0 + b1*x + u# regress y on x and store slope estimate at position j bhat <-coef( lm(y~x) ) b1hat[j] <- bhat["x"]}
Sim-Asy-OLS-unif.R
# Note: We'll have to set the sample size first, e.g. by uncommenting:# n <- 100# Set the random seedset.seed(1234567)# set true parameters: intercept & slope b0<-1; b1<-0.5# initialize b1hat to store 10000 results:b1hat <-numeric(10000)# Draw a sample of x, fixed over replications:x <-rnorm(n,4,1)# repeat r times:for(j in1:10000) {# Draw a sample of u (uniform betw.): u <-runif(n,-sqrt(3),sqrt(3))# Draw a sample of y: y <- b0 + b1*x + u# regress y on x and store slope estimate bhat <-coef( lm(y~x) ) b1hat[j] <- bhat["x"]}
Example-5-3.py
import wooldridge as wooimport statsmodels.formula.api as smfimport scipy.stats as statscrime1 = woo.dataWoo('crime1')# 1. estimate restricted model:reg_r = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86', data=crime1)fit_r = reg_r.fit()r2_r = fit_r.rsquaredprint(f'r2_r: {r2_r}\n')# 2. regression of residuals from restricted model:crime1['utilde'] = fit_r.residreg_LM = smf.ols(formula='utilde ~ pcnv + ptime86 + qemp86 + avgsen + tottime', data=crime1)fit_LM = reg_LM.fit()r2_LM = fit_LM.rsquaredprint(f'r2_LM: {r2_LM}\n')# 3. calculation of LM test statistic:LM = r2_LM * fit_LM.nobsprint(f'LM: {LM}\n')# 4. critical value from chi-squared distribution, alpha=10%:cv = stats.chi2.ppf(1-0.10, 2)print(f'cv: {cv}\n')# 5. p value (alternative to critical value):pval =1- stats.chi2.cdf(LM, 2)print(f'pval: {pval}\n')# 6. compare to F-test:reg = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86 + avgsen + tottime', data=crime1)results = reg.fit()hypotheses = ['avgsen = 0', 'tottime = 0']ftest = results.f_test(hypotheses)fstat = ftest.statistic[0][0]fpval = ftest.pvalueprint(f'fstat: {fstat}\n')print(f'fpval: {fpval}\n')
Sim-Asy-OLS-chisq-fig.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport statsmodels.api as smimport matplotlib.pyplot as pltimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n = [5, 10, 100, 1000]r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4for j in n:# initialize b1 to store results later: b1 = np.empty(r)# draw a sample of x, fixed over replications: x = stats.norm.rvs(ex, sx, size=j)# repeat r times:for i inrange(r):# draw a sample of u (standardized chi-squared[1]): u = (stats.chi2.rvs(1, size=j) -1) / np.sqrt(2) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate conditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']# simulated density: kde = sm.nonparametric.KDEUnivariate(b1) kde.fit()# normal density/ compute mu and se X = pd.DataFrame({'const': 1, 'x': x}) Vbhat = sx * np.linalg.inv(X.T @ X) se = np.sqrt(np.diagonal(Vbhat)) x_range = np.linspace(min(b1), max(b1)) y = stats.norm.pdf(x_range, beta1, se[1])# plotting: filename ='PyGraphs/MCSim-olsasy-chisq-n'+str(j) +'.pdf'# plt.ylim(top=2)# plt.xlim(8.5, 11.5) plt.plot(kde.support, kde.density, color='black', label='b1') plt.plot(x_range, y, linestyle='--', color='black', label='normal distribution') plt.ylabel('density') plt.xlabel('') plt.legend() plt.savefig(filename) plt.close()# Normal vs. Chisqimport scipy.stats as statsimport numpy as npimport matplotlib.pyplot as plt# support of normal density:x_range = np.linspace(-4, 4, num=100)# pdf for all these values:pdf_n = stats.norm.pdf(x_range)pdf_c = stats.chi2.pdf(x_range * np.sqrt(2) +1, 1)# pdf_c = (stats.chi2.pdf(x_range,1) - 1) / np.sqrt(2)# plot:plt.plot(x_range, pdf_n, linestyle='-', color='black', label='standard normal')plt.plot(x_range, pdf_c, linestyle='--', color='black', label='standardized chi squared[1]')plt.xlabel('x')plt.ylabel('dx')plt.legend()plt.savefig('PyGraphs/MCSim-olsasy-stdchisq.pdf')
Sim-Asy-OLS-chisq.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 = np.empty(r)# draw a sample of x, fixed over replications:x = stats.norm.rvs(ex, sx, size=n)# repeat r times:for i inrange(r):# draw a sample of u (standardized chi-squared[1]): u = (stats.chi2.rvs(1, size=n) -1) / np.sqrt(2) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate conditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']
Sim-Asy-OLS-norm-fig.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport statsmodels.api as smimport matplotlib.pyplot as pltimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n = [5, 10, 100, 1000]r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4for j in n:# initialize b1 to store results later: b1 = np.empty(r)# draw a sample of x, fixed over replications: x = stats.norm.rvs(ex, sx, size=j)# repeat r times:for i inrange(r):# draw a sample of u (std. normal): u = stats.norm.rvs(0, 1, size=j) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate conditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']# simulated density: kde = sm.nonparametric.KDEUnivariate(b1) kde.fit()# normal density/ compute mu and se X = pd.DataFrame({'const': 1, 'x': x}) Vbhat = sx * np.linalg.inv(X.T @ X) se = np.sqrt(np.diagonal(Vbhat)) x_range = np.linspace(min(b1), max(b1)) y = stats.norm.pdf(x_range, beta1, se[1])# plotting: filename ='PyGraphs/MCSim-olsasy-norm-n'+str(j) +'.pdf'# plt.ylim(top=2)# plt.xlim(8.5, 11.5) plt.plot(kde.support, kde.density, color='black', label='b1') plt.plot(x_range, y, linestyle='--', color='black', label='normal distribution') plt.ylabel('density') plt.xlabel('') plt.legend() plt.savefig(filename) plt.close()
Sim-Asy-OLS-norm.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 = np.empty(r)# draw a sample of x, fixed over replications:x = stats.norm.rvs(ex, sx, size=n)# repeat r times:for i inrange(r):# draw a sample of u (std. normal): u = stats.norm.rvs(0, 1, size=n) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate conditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']
Sim-Asy-OLS-uncond-fig.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport statsmodels.api as smimport matplotlib.pyplot as pltimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n = [5, 10, 100, 1000]r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4for j in n:# initialize b1 to store results later: b1 = np.empty(r)# repeat r times:for i inrange(r):# draw a sample of x, varying over replications: x = stats.norm.rvs(ex, sx, size=j)# draw a sample of u (std. normal): u = stats.norm.rvs(0, 1, size=j) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate unconditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']# simulated density: kde = sm.nonparametric.KDEUnivariate(b1) kde.fit()# normal density/ compute mu and se X = pd.DataFrame({'const': 1, 'x': x}) Vbhat = sx * np.linalg.inv(X.T @ X) se = np.sqrt(np.diagonal(Vbhat)) x_range = np.linspace(min(b1), max(b1)) y = stats.norm.pdf(x_range, beta1, se[1])# plotting: filename ='PyGraphs/MCSim-olsasy-uncond-n'+str(j) +'.pdf'# plt.ylim(top=2)# plt.xlim(8.5, 11.5) plt.plot(kde.support, kde.density, color='black', label='b1') plt.plot(x_range, y, linestyle='--', color='black', label='normal distribution') plt.ylabel('density') plt.xlabel('') plt.legend() plt.savefig(filename) plt.close()
Sim-Asy-OLS-uncond.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 = np.empty(r)# repeat r times:for i inrange(r):# draw a sample of x, varying over replications: x = stats.norm.rvs(ex, sx, size=n)# draw a sample of u (std. normal): u = stats.norm.rvs(0, 1, size=n) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate unconditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']
Sim-Asy-OLS-unif.py
import numpy as npimport pandas as pdimport statsmodels.formula.api as smfimport scipy.stats as stats# set the random seed:np.random.seed(1234567)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 = np.empty(r)# draw a sample of x, fixed over replications:x = stats.norm.rvs(ex, sx, n)# repeat r times:for i inrange(r):# draw a sample of u (uniform): u = np.random.uniform(-np.sqrt(3), np.sqrt(3), n) y = beta0 + beta1 * x + u df = pd.DataFrame({'y': y, 'x': x})# estimate conditional OLS: reg = smf.ols(formula='y ~ x', data=df) results = reg.fit() b1[i] = results.params['x']
Example-5-3.jl
usingWooldridgeDatasets, GLM, DataFrames, Distributionscrime1 =DataFrame(wooldridge("crime1"))# 1. estimate restricted model:reg_r =lm(@formula(narr86 ~ pcnv + ptime86 + qemp86), crime1)r2_r =r2(reg_r)println("r2_r = $r2_r\n")# 2. regression of residuals from restricted model:crime1.utilde =residuals(reg_r)reg_LM =lm(@formula(utilde ~ pcnv + ptime86 + qemp86 + avgsen + tottime), crime1)r2_LM =r2(reg_LM)println("r2_LM = $r2_LM\n")# 3. calculation of LM test statistic:LM = r2_LM *nobs(reg_LM)println("LM = $LM\n")# 4. critical value from chi-squared distribution, alpha=10%:cv =quantile(Chisq(2), 1-0.10)println("cv = $cv\n")# 5. p value (alternative to critical value):pval =1-cdf(Chisq(2), LM)println("pval = $pval\n")# 6. compare to F test:reg_ur =lm(@formula(narr86 ~ pcnv + ptime86 + qemp86 + avgsen + tottime), crime1)# hypotheses: "avgsen = 0" and "tottime = 0"reg_r =lm(@formula(narr86 ~ pcnv + ptime86 + qemp86), crime1)ftest_res =ftest(reg_r.model, reg_ur.model)fstat = ftest_res.fstat[2]fpval = ftest_res.pval[2]println("fstat = $fstat\n")println("fpval = $fpval")
Sim-Asy-OLS-chisq-fig.jl
usingDistributions, DataFrames, GLM, Random, Plots, KernelDensity, LinearAlgebra# set the random seed:Random.seed!(12345)# set sample size and number of simulations:n = [5, 10, 100, 1000]r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4for j in n# initialize b1 to store results later: b1 =zeros(r)# draw a sample of x, fixed over replications: x =rand(Normal(ex, sx), j)# repeat r times:for i in1:r# draw a sample of u (standardized chi-squared[1]): u = (rand(Chisq(1), j) .-1) ./sqrt(2) y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate conditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end# simulated density: kde_sim = KernelDensity.kde(b1)# normal density/ compute mu and se X = [fill(1.0, (length(x), 1)) x] Vbhat = sx .*inv(transpose(X) * X) se =sqrt.(diag(Vbhat)) x_range =range(minimum(b1), maximum(b1), r) y =pdf.(Normal(beta1, se[2]), x_range)# plotting: filename ="JlGraphs/MCSim-olsasy-chisq-n$j.pdf"plot(kde_sim.x, kde_sim.density, color="black", label="b1")plot!(x_range, y, line=:dash, color="black", label="normal distribution")ylabel!("density")savefig("$filename")end# Normal vs. Chisq# support of normal density:x_range =range(-4, 4, 100)# pdf for all these values:pdf_n =pdf.(Normal(), x_range)pdf_c =pdf.(Chisq.(1), x_range *sqrt(2) .+1)# plot:plot(x_range, pdf_n, line=:line, color=:black, linewidth=2, label="standard normal")plot!(x_range, pdf_c, line=:dash, color=:black, linewidth=2, label="standardized chi squared[1]")ylabel!("dx")xlabel!("x")savefig("JlGraphs/MCSim-olsasy-stdchisq.pdf")
Sim-Asy-OLS-chisq.jl
usingDistributions, DataFrames, GLM, Random# set the random seed:Random.seed!(12345)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 =zeros(r)# draw a sample of x, fixed over replications:x =rand(Normal(ex, sx), n)# repeat r times:for i in1:r# draw a sample of u (standardized chi-squared[1]): u = (rand(Chisq(1), n) .-1) ./sqrt(2) y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate conditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end
Sim-Asy-OLS-norm-fig.jl
usingDistributions, DataFrames, GLM, Random, Plots, KernelDensity, LinearAlgebra# set the random seed:Random.seed!(12345)# set sample size and number of simulations:n = [5, 10, 100, 1000]r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4for j in n# initialize b1 to store results later: b1 =zeros(r)# draw a sample of x, fixed over replications: x =rand(Normal(ex, sx), j)# repeat r times:for i in1:r# draw a sample of u (std. normal): u =rand(Normal(0, 1), j) y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate conditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end# simulated density: kde_sim = KernelDensity.kde(b1)# normal density/ compute mu and se X = [fill(1.0, (length(x), 1)) x] Vbhat = sx .*inv(transpose(X) * X) se =sqrt.(diag(Vbhat)) x_range =range(minimum(b1), maximum(b1), r) y =pdf.(Normal(beta1, se[2]), collect(x_range))# plotting: filename ="JlGraphs/MCSim-olsasy-norm-n$j.pdf"plot(kde_sim.x, kde_sim.density, color="black", label="b1")plot!(x_range, y, line=:dash, color="black", label="normal distribution")ylabel!("density")savefig("$filename")end
Sim-Asy-OLS-norm.jl
usingDistributions, DataFrames, GLM, Random# set the random seed:Random.seed!(12345)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 =zeros(r)# draw a sample of x, fixed over replications:x =rand(Normal(ex, sx), n)# repeat r times:for i in1:r# draw a sample of u (std. normal): u =rand(Normal(0, 1), n) y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate conditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end
Sim-Asy-OLS-uncond-fig.jl
usingDistributions, DataFrames, GLM, Random, Plots, KernelDensity, LinearAlgebra# set the random seed:Random.seed!(12345678)# set sample size and number of simulations:n = [5, 10, 100, 1000]r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4for j in n# initialize b1 to store results later: b1 =zeros(r)# repeat r times:for i in1:r# draw a sample of x, varying over replications: x =rand(Normal(ex, sx), j) #j# draw a sample of u (std. normal): u =rand(Normal(0, 1), j) #j y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate unconditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end# simulated density: kde_sim = KernelDensity.kde(b1)# normal density/ compute mu and se x =rand(Normal(ex, sx), j) #j X = [fill(1.0, (length(x), 1)) x] Vbhat = sx .*inv(transpose(X) * X) se =sqrt.(diag(Vbhat)) x_range =range(minimum(b1), maximum(b1), r) y =pdf.(Normal(beta1, se[2]), x_range)# plotting: filename ="JlGraphs/MCSim-olsasy-uncond-n$j.pdf"plot(kde_sim.x, kde_sim.density, color="black", label="b1")plot!(x_range, y, line=:dash, color="black", label="normal distribution")ylabel!("density")savefig("$filename")end# kurtosis:usingStatsBaseRandom.seed!(12345678)j =5b1 =zeros(r)# repeat r times:for i in1:r# draw a sample of x, varying over replications: x =rand(Normal(ex, sx), j) #j# draw a sample of u (std. normal): u =rand(Normal(0, 1), j) #j y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate unconditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end# simulated density:kde_sim = KernelDensity.kde(b1)# normal density/ compute mu and sex =rand(Normal(ex, sx), j) #jX = [fill(1.0, (length(x), 1)) x]Vbhat = sx .*inv(transpose(X) * X)se =sqrt.(diag(Vbhat))x_range =range(minimum(b1), maximum(b1), r)y =pdf.(Normal(beta1, se[2]), x_range)plot(kde_sim.x, kde_sim.density, color="black", label="b1")plot!(x_range, y, line=:dash, color="black", label="normal distribution")ylabel!("density")StatsBase.kurtosis(kde_sim.density) +3StatsBase.kurtosis(y)x2 =rand(Normal(0, 1), 1000)kurtosis(x2, m=0.0)
Sim-Asy-OLS-uncond.jl
usingDistributions, DataFrames, GLM, Random# set the random seed:Random.seed!(12345)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 =zeros(r)# repeat r times:for i in1:r# draw a sample of x, varying over replications: x =rand(Normal(ex, sx), n)# draw a sample of u (std. normal): u =rand(Normal(0, 1), n) y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate unconditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end
Sim-Asy-OLS-unif.jl
usingDistributions, DataFrames, GLM, Random, StatsBase# set the random seed:Random.seed!(12345)# set sample size and number of simulations:n =100r =10000# set true parameters:beta0 =1beta1 =0.5sx =1ex =4# initialize b1 to store results later:b1 =zeros(r)# draw a sample of x, fixed over replications:x =rand(Normal(ex, sx), n)# repeat r times:for i in1:r# draw a sample of u (uniform): u =rand(Uniform(-sqrt(3), sqrt(3)), n) y = beta0 .+ beta1 .* x .+ u df =DataFrame(y=y, x=x)# estimate conditional OLS: reg =lm(@formula(y ~ x), df) b1[i] =coef(reg)[2]end