Chapter 5

Example-5-3.R

data(crime1, package='wooldridge')

# 1. Estimate restricted model:
restr <- lm(narr86 ~ pcnv+ptime86+qemp86, data=crime1)

# 2. Regression of residuals from restricted model:
utilde <- resid(restr)
LMreg <- lm(utilde ~ pcnv+ptime86+qemp86+avgsen+tottime, data=crime1)
# R-squared:
(r2 <- summary(LMreg)$r.squared )

# 3. Calculation of LM test statistic:
LM <- r2 * nobs(LMreg)
LM

# 4. Critical value from chi-squared distribution, alpha=10%:
qchisq(1-0.10, 2)

# Alternative to critical value: p value
1-pchisq(LM, 2)

# Alternative: automatic F test (see above)
library(car)
unrestr <- lm(narr86 ~ pcnv+ptime86+qemp86+avgsen+tottime, data=crime1)
linearHypothesis(unrestr, c("avgsen=0","tottime=0"))

Sim-Asy-OLS-chisq.R

# Note: We'll have to set the sample size first, e.g. by uncommenting:
# n <- 100
# Set the random seed
set.seed(1234567)
# set true parameters: intercept & slope 
b0<-1; b1<-0.5
# initialize b1hat to store 10000 results:
b1hat <- numeric(10000)

# Draw a sample of x, fixed over replications:
x <- rnorm(n,4,1)
# repeat r times:
for(j in 1:10000) {
  # Draw a sample of u (standardized chi-squared[1]):
  u <- ( rchisq(n,1)-1 ) / sqrt(2)
  # Draw a sample of y:
  y <- b0 + b1*x + u
  # regress y on x and store slope estimate at position j
  bhat <- coef( lm(y~x) )
  b1hat[j] <- bhat["x"]
}

Sim-Asy-OLS-norm.R

# Note: We'll have to set the sample size first, e.g. by uncommenting:
# n <- 100
# Set the random seed
set.seed(1234567)
# set true parameters: intercept & slope 
b0 <- 1; b1 <- 0.5
# initialize b1hat to store 10000 results:
b1hat <- numeric(10000)

# Draw a sample of x, fixed over replications:
x <- rnorm(n,4,1)
# repeat r times:
for(j in 1:10000) {
  # Draw a sample of u (std. normal):
  u <- rnorm(n)
  # Draw a sample of y:
  y <- b0 + b1*x + u
  # regress y on x and store slope estimate at position j
  bhat <- coef( lm(y~x) )
  b1hat[j] <- bhat["x"]
}

Sim-Asy-OLS-uncond.R

# Note: We'll have to set the sample size first, e.g. by uncommenting:
# n <- 100
# Set the random seed
set.seed(1234567)
# set true parameters: intercept & slope 
b0<-1; b1<-0.5
# initialize b1hat to store 10000 results:
b1hat <- numeric(10000)

# repeat r times:
for(j in 1:10000) {
  # Draw a sample of x, varying over replications:
  x <- rnorm(n,4,1)
  # Draw a sample of u (std. normal):
  u <- rnorm(n)
  # Draw a sample of y:
  y <- b0 + b1*x + u
  # regress y on x and store slope estimate at position j
  bhat <- coef( lm(y~x) )
  b1hat[j] <- bhat["x"]
}

Sim-Asy-OLS-unif.R

# Note: We'll have to set the sample size first, e.g. by uncommenting:
# n <- 100
# Set the random seed
set.seed(1234567)
# set true parameters: intercept & slope 
b0<-1; b1<-0.5
# initialize b1hat to store 10000 results:
b1hat <- numeric(10000)

# Draw a sample of x, fixed over replications:
x <- rnorm(n,4,1)
# repeat r times:
for(j in 1:10000) {
  # Draw a sample of u (uniform betw.):
  u <- runif(n,-sqrt(3),sqrt(3))
  # Draw a sample of y:
  y <- b0 + b1*x + u
  # regress y on x and store slope estimate
  bhat <- coef( lm(y~x) )
  b1hat[j] <- bhat["x"]
}

Example-5-3.py

import wooldridge as woo
import statsmodels.formula.api as smf
import scipy.stats as stats

crime1 = woo.dataWoo('crime1')

# 1. estimate restricted model:
reg_r = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86', data=crime1)
fit_r = reg_r.fit()
r2_r = fit_r.rsquared
print(f'r2_r: {r2_r}\n')

# 2. regression of residuals from restricted model:
crime1['utilde'] = fit_r.resid
reg_LM = smf.ols(formula='utilde ~ pcnv + ptime86 + qemp86 + avgsen + tottime',
                 data=crime1)
fit_LM = reg_LM.fit()
r2_LM = fit_LM.rsquared
print(f'r2_LM: {r2_LM}\n')

# 3. calculation of LM test statistic:
LM = r2_LM * fit_LM.nobs
print(f'LM: {LM}\n')

# 4. critical value from chi-squared distribution, alpha=10%:
cv = stats.chi2.ppf(1 - 0.10, 2)
print(f'cv: {cv}\n')

# 5. p value (alternative to critical value):
pval = 1 - stats.chi2.cdf(LM, 2)
print(f'pval: {pval}\n')

# 6. compare to F-test:
reg = smf.ols(formula='narr86 ~ pcnv + ptime86 + qemp86 + avgsen + tottime',
              data=crime1)
results = reg.fit()
hypotheses = ['avgsen = 0', 'tottime = 0']
ftest = results.f_test(hypotheses)
fstat = ftest.statistic
fpval = ftest.pvalue
print(f'fstat: {fstat}\n')
print(f'fpval: {fpval}\n')

Sim-Asy-OLS-chisq-fig.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = [5, 10, 100, 1000]
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

for j in n:
    # initialize b1 to store results later:
    b1 = np.empty(r)
    # draw a sample of x, fixed over replications:
    x = stats.norm.rvs(ex, sx, size=j)
    # repeat r times:
    for i in range(r):
        # draw a sample of u (standardized chi-squared[1]):
        u = (stats.chi2.rvs(1, size=j) - 1) / np.sqrt(2)
        y = beta0 + beta1 * x + u
        df = pd.DataFrame({'y': y, 'x': x})

        # estimate conditional OLS:
        reg = smf.ols(formula='y ~ x', data=df)
        results = reg.fit()
        b1[i] = results.params['x']

    # simulated density:
    kde = sm.nonparametric.KDEUnivariate(b1)
    kde.fit()
    # normal density/ compute mu and se
    X = pd.DataFrame({'const': 1, 'x': x})
    Vbhat = sx * np.linalg.inv(X.T @ X)
    se = np.sqrt(np.diagonal(Vbhat))
    x_range = np.linspace(min(b1), max(b1))
    y = stats.norm.pdf(x_range, beta1, se[1])
    # plotting:
    filename = 'PyGraphs/MCSim-olsasy-chisq-n' + str(j) + '.pdf'
    # plt.ylim(top=2)
    # plt.xlim(8.5, 11.5)
    plt.plot(kde.support, kde.density, color='black', label='b1')
    plt.plot(x_range, y, linestyle='--', color='black', label='normal distribution')
    plt.ylabel('density')
    plt.xlabel('')
    plt.legend()
    plt.savefig(filename)
    plt.close()

# Normal vs. Chisq


import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt

# support of normal density:
x_range = np.linspace(-4, 4, num=100)

# pdf for all these values:
pdf_n = stats.norm.pdf(x_range)
pdf_c = stats.chi2.pdf(x_range * np.sqrt(2) + 1, 1)
# pdf_c = (stats.chi2.pdf(x_range,1) - 1) / np.sqrt(2)
# plot:
plt.plot(x_range, pdf_n, linestyle='-', color='black', label='standard normal')
plt.plot(x_range, pdf_c, linestyle='--', color='black', label='standardized chi squared[1]')
plt.xlabel('x')
plt.ylabel('dx')
plt.legend()
plt.savefig('PyGraphs/MCSim-olsasy-stdchisq.pdf')

Sim-Asy-OLS-chisq.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = np.empty(r)

# draw a sample of x, fixed over replications:
x = stats.norm.rvs(ex, sx, size=n)

# repeat r times:
for i in range(r):
    # draw a sample of u (standardized chi-squared[1]):
    u = (stats.chi2.rvs(1, size=n) - 1) / np.sqrt(2)
    y = beta0 + beta1 * x + u
    df = pd.DataFrame({'y': y, 'x': x})

    # estimate conditional OLS:
    reg = smf.ols(formula='y ~ x', data=df)
    results = reg.fit()
    b1[i] = results.params['x']

Sim-Asy-OLS-norm-fig.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = [5, 10, 100, 1000]
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

for j in n:
    # initialize b1 to store results later:
    b1 = np.empty(r)
    # draw a sample of x, fixed over replications:
    x = stats.norm.rvs(ex, sx, size=j)
    # repeat r times:
    for i in range(r):
        # draw a sample of u (std. normal):
        u = stats.norm.rvs(0, 1, size=j)
        y = beta0 + beta1 * x + u
        df = pd.DataFrame({'y': y, 'x': x})

        # estimate conditional OLS:
        reg = smf.ols(formula='y ~ x', data=df)
        results = reg.fit()
        b1[i] = results.params['x']
    # simulated density:
    kde = sm.nonparametric.KDEUnivariate(b1)
    kde.fit()
    # normal density/ compute mu and se
    X = pd.DataFrame({'const': 1, 'x': x})
    Vbhat = sx * np.linalg.inv(X.T @ X)
    se = np.sqrt(np.diagonal(Vbhat))
    x_range = np.linspace(min(b1), max(b1))
    y = stats.norm.pdf(x_range, beta1, se[1])
    # plotting:
    filename = 'PyGraphs/MCSim-olsasy-norm-n' + str(j) + '.pdf'
    # plt.ylim(top=2)
    # plt.xlim(8.5, 11.5)
    plt.plot(kde.support, kde.density, color='black', label='b1')
    plt.plot(x_range, y, linestyle='--', color='black', label='normal distribution')
    plt.ylabel('density')
    plt.xlabel('')
    plt.legend()
    plt.savefig(filename)
    plt.close()

Sim-Asy-OLS-norm.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = np.empty(r)

# draw a sample of x, fixed over replications:
x = stats.norm.rvs(ex, sx, size=n)

# repeat r times:
for i in range(r):
    # draw a sample of u (std. normal):
    u = stats.norm.rvs(0, 1, size=n)
    y = beta0 + beta1 * x + u
    df = pd.DataFrame({'y': y, 'x': x})

    # estimate conditional OLS:
    reg = smf.ols(formula='y ~ x', data=df)
    results = reg.fit()
    b1[i] = results.params['x']

Sim-Asy-OLS-uncond-fig.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = [5, 10, 100, 1000]
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

for j in n:
    # initialize b1 to store results later:
    b1 = np.empty(r)
    # repeat r times:
    for i in range(r):
        # draw a sample of x, varying over replications:
        x = stats.norm.rvs(ex, sx, size=j)
        # draw a sample of u (std. normal):
        u = stats.norm.rvs(0, 1, size=j)
        y = beta0 + beta1 * x + u
        df = pd.DataFrame({'y': y, 'x': x})

        # estimate unconditional OLS:
        reg = smf.ols(formula='y ~ x', data=df)
        results = reg.fit()
        b1[i] = results.params['x']
    # simulated density:
    kde = sm.nonparametric.KDEUnivariate(b1)
    kde.fit()
    # normal density/ compute mu and se
    X = pd.DataFrame({'const': 1, 'x': x})
    Vbhat = sx * np.linalg.inv(X.T @ X)
    se = np.sqrt(np.diagonal(Vbhat))
    x_range = np.linspace(min(b1), max(b1))
    y = stats.norm.pdf(x_range, beta1, se[1])
    # plotting:
    filename = 'PyGraphs/MCSim-olsasy-uncond-n' + str(j) + '.pdf'
    # plt.ylim(top=2)
    # plt.xlim(8.5, 11.5)
    plt.plot(kde.support, kde.density, color='black', label='b1')
    plt.plot(x_range, y, linestyle='--', color='black', label='normal distribution')
    plt.ylabel('density')
    plt.xlabel('')
    plt.legend()
    plt.savefig(filename)
    plt.close()

Sim-Asy-OLS-uncond.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = np.empty(r)

# repeat r times:
for i in range(r):
    # draw a sample of x, varying over replications:
    x = stats.norm.rvs(ex, sx, size=n)

    # draw a sample of u (std. normal):
    u = stats.norm.rvs(0, 1, size=n)
    y = beta0 + beta1 * x + u
    df = pd.DataFrame({'y': y, 'x': x})

    # estimate unconditional OLS:
    reg = smf.ols(formula='y ~ x', data=df)
    results = reg.fit()
    b1[i] = results.params['x']

Sim-Asy-OLS-unif.py

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import scipy.stats as stats

# set the random seed:
np.random.seed(1234567)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = np.empty(r)

# draw a sample of x, fixed over replications:
x = stats.norm.rvs(ex, sx, n)

# repeat r times:
for i in range(r):
    # draw a sample of u (uniform):
    u = np.random.uniform(-np.sqrt(3), np.sqrt(3), n)
    y = beta0 + beta1 * x + u
    df = pd.DataFrame({'y': y, 'x': x})

    # estimate conditional OLS:
    reg = smf.ols(formula='y ~ x', data=df)
    results = reg.fit()
    b1[i] = results.params['x']

Example-5-3.jl

using WooldridgeDatasets, GLM, DataFrames, Distributions

crime1 = DataFrame(wooldridge("crime1"))

# 1. estimate restricted model:
reg_r = lm(@formula(narr86 ~ pcnv + ptime86 + qemp86), crime1)
r2_r = r2(reg_r)
println("r2_r = $r2_r\n")

# 2. regression of residuals from restricted model:
crime1.utilde = residuals(reg_r)
reg_LM = lm(@formula(utilde ~
                pcnv + ptime86 + qemp86 + avgsen + tottime), crime1)
r2_LM = r2(reg_LM)
println("r2_LM = $r2_LM\n")

# 3. calculation of LM test statistic:
LM = r2_LM * nobs(reg_LM)
println("LM = $LM\n")

# 4. critical value from chi-squared distribution, alpha=10%:
cv = quantile(Chisq(2), 1 - 0.10)
println("cv = $cv\n")

# 5. p value (alternative to critical value):
pval = 1 - cdf(Chisq(2), LM)
println("pval = $pval\n")

# 6. compare to F test:
reg_ur = lm(@formula(narr86 ~
                pcnv + ptime86 + qemp86 + avgsen + tottime), crime1)
# hypotheses: "avgsen = 0" and "tottime = 0"
reg_r = lm(@formula(narr86 ~ pcnv + ptime86 + qemp86), crime1)
ftest_res = ftest(reg_r.model, reg_ur.model)

fstat = ftest_res.fstat[2]
fpval = ftest_res.pval[2]
println("fstat = $fstat\n")
println("fpval = $fpval")

Sim-Asy-OLS-chisq-fig.jl

using Distributions, DataFrames, GLM, Random, Plots, KernelDensity, LinearAlgebra

# set the random seed:
Random.seed!(12345)

# set sample size and number of simulations:
n = [5, 10, 100, 1000]
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

for j in n
    # initialize b1 to store results later:
    b1 = zeros(r)
    # draw a sample of x, fixed over replications:
    x = rand(Normal(ex, sx), j)
    # repeat r times:
    for i in 1:r
        # draw a sample of u (standardized chi-squared[1]):
        u = (rand(Chisq(1), j) .- 1) ./ sqrt(2)
        y = beta0 .+ beta1 .* x .+ u
        df = DataFrame(y=y, x=x)
        # estimate conditional OLS:
        reg = lm(@formula(y ~ x), df)
        b1[i] = coef(reg)[2]
    end
    # simulated density:
    kde_sim = KernelDensity.kde(b1)

    # normal density/ compute mu and se
    X = [fill(1.0, (length(x), 1)) x]
    Vbhat = sx .* inv(transpose(X) * X)
    se = sqrt.(diag(Vbhat))
    x_range = range(minimum(b1), maximum(b1), r)
    y = pdf.(Normal(beta1, se[2]), x_range)

    # plotting:
    filename = "JlGraphs/MCSim-olsasy-chisq-n$j.pdf"
    plot(kde_sim.x, kde_sim.density, color="black", label="b1")
    plot!(x_range, y, line=:dash, color="black", label="normal distribution")
    ylabel!("density")
    savefig("$filename")
end


# Normal vs. Chisq

# support of normal density:
x_range = range(-4, 4, 100)

# pdf for all these values:
pdf_n = pdf.(Normal(), x_range)
pdf_c = pdf.(Chisq.(1), x_range * sqrt(2) .+ 1)

# plot:
plot(x_range, pdf_n, line=:line, color=:black, linewidth=2, label="standard normal")
plot!(x_range, pdf_c, line=:dash, color=:black, linewidth=2, label="standardized chi squared[1]")
ylabel!("dx")
xlabel!("x")
savefig("JlGraphs/MCSim-olsasy-stdchisq.pdf")

Sim-Asy-OLS-chisq.jl

using Distributions, DataFrames, GLM, Random

# set the random seed:
Random.seed!(12345)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = zeros(r)

# draw a sample of x, fixed over replications:
x = rand(Normal(ex, sx), n)

# repeat r times:
for i in 1:r
    # draw a sample of u (standardized chi-squared[1]):
    u = (rand(Chisq(1), n) .- 1) ./ sqrt(2)
    y = beta0 .+ beta1 .* x .+ u
    df = DataFrame(y=y, x=x)
    # estimate conditional OLS:
    reg = lm(@formula(y ~ x), df)
    b1[i] = coef(reg)[2]
end

Sim-Asy-OLS-norm-fig.jl

using Distributions, DataFrames, GLM, Random, Plots, KernelDensity, LinearAlgebra

# set the random seed:
Random.seed!(12345)

# set sample size and number of simulations:
n = [5, 10, 100, 1000]
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

for j in n
    # initialize b1 to store results later:
    b1 = zeros(r)
    # draw a sample of x, fixed over replications:
    x = rand(Normal(ex, sx), j)
    # repeat r times:
    for i in 1:r
        # draw a sample of u (std. normal):
        u = rand(Normal(0, 1), j)
        y = beta0 .+ beta1 .* x .+ u
        df = DataFrame(y=y, x=x)
        # estimate conditional OLS:
        reg = lm(@formula(y ~ x), df)
        b1[i] = coef(reg)[2]
    end
    # simulated density:
    kde_sim = KernelDensity.kde(b1)

    # normal density/ compute mu and se
    X = [fill(1.0, (length(x), 1)) x]
    Vbhat = sx .* inv(transpose(X) * X)
    se = sqrt.(diag(Vbhat))
    x_range = range(minimum(b1), maximum(b1), r)
    y = pdf.(Normal(beta1, se[2]), collect(x_range))

    # plotting:
    filename = "JlGraphs/MCSim-olsasy-norm-n$j.pdf"
    plot(kde_sim.x, kde_sim.density, color="black", label="b1")
    plot!(x_range, y, line=:dash, color="black", label="normal distribution")
    ylabel!("density")
    savefig("$filename")
end

Sim-Asy-OLS-norm.jl

using Distributions, DataFrames, GLM, Random

# set the random seed:
Random.seed!(12345)

# set sample size and number of simulations:
n = 100
r = 10000
# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4
# initialize b1 to store results later:
b1 = zeros(r)
# draw a sample of x, fixed over replications:
x = rand(Normal(ex, sx), n)

# repeat r times:
for i in 1:r
    # draw a sample of u (std. normal):
    u = rand(Normal(0, 1), n)
    y = beta0 .+ beta1 .* x .+ u
    df = DataFrame(y=y, x=x)
    # estimate conditional OLS:
    reg = lm(@formula(y ~ x), df)
    b1[i] = coef(reg)[2]
end

Sim-Asy-OLS-uncond-fig.jl

using Distributions, DataFrames, GLM, Random, Plots, KernelDensity, LinearAlgebra

# set the random seed:
Random.seed!(12345678)

# set sample size and number of simulations:
n = [5, 10, 100, 1000]
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

for j in n
    # initialize b1 to store results later:
    b1 = zeros(r)
    # repeat r times:
    for i in 1:r
        # draw a sample of x, varying over replications:
        x = rand(Normal(ex, sx), j) #j
        # draw a sample of u (std. normal):
        u = rand(Normal(0, 1), j) #j
        y = beta0 .+ beta1 .* x .+ u
        df = DataFrame(y=y, x=x)

        # estimate unconditional OLS:
        reg = lm(@formula(y ~ x), df)
        b1[i] = coef(reg)[2]
    end
    # simulated density:
    kde_sim = KernelDensity.kde(b1)

    # normal density/ compute mu and se
    x = rand(Normal(ex, sx), j) #j
    X = [fill(1.0, (length(x), 1)) x]
    Vbhat = sx .* inv(transpose(X) * X)
    se = sqrt.(diag(Vbhat))
    x_range = range(minimum(b1), maximum(b1), r)
    y = pdf.(Normal(beta1, se[2]), x_range)

    # plotting:
    filename = "JlGraphs/MCSim-olsasy-uncond-n$j.pdf"
    plot(kde_sim.x, kde_sim.density, color="black", label="b1")
    plot!(x_range, y, line=:dash, color="black", label="normal distribution")
    ylabel!("density")
    savefig("$filename")
end


# kurtosis:
using StatsBase
Random.seed!(12345678)
j = 5
b1 = zeros(r)
# repeat r times:
for i in 1:r
    # draw a sample of x, varying over replications:
    x = rand(Normal(ex, sx), j) #j
    # draw a sample of u (std. normal):
    u = rand(Normal(0, 1), j) #j
    y = beta0 .+ beta1 .* x .+ u
    df = DataFrame(y=y, x=x)

    # estimate unconditional OLS:
    reg = lm(@formula(y ~ x), df)
    b1[i] = coef(reg)[2]
end
# simulated density:
kde_sim = KernelDensity.kde(b1)

# normal density/ compute mu and se
x = rand(Normal(ex, sx), j) #j
X = [fill(1.0, (length(x), 1)) x]
Vbhat = sx .* inv(transpose(X) * X)
se = sqrt.(diag(Vbhat))
x_range = range(minimum(b1), maximum(b1), r)
y = pdf.(Normal(beta1, se[2]), x_range)
plot(kde_sim.x, kde_sim.density, color="black", label="b1")
plot!(x_range, y, line=:dash, color="black", label="normal distribution")
ylabel!("density")

StatsBase.kurtosis(kde_sim.density) + 3
StatsBase.kurtosis(y)


x2 = rand(Normal(0, 1), 1000)
kurtosis(x2, m=0.0)

Sim-Asy-OLS-uncond.jl

using Distributions, DataFrames, GLM, Random

# set the random seed:
Random.seed!(12345)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = zeros(r)

# repeat r times:
for i in 1:r
    # draw a sample of x, varying over replications:
    x = rand(Normal(ex, sx), n)
    # draw a sample of u (std. normal):
    u = rand(Normal(0, 1), n)
    y = beta0 .+ beta1 .* x .+ u
    df = DataFrame(y=y, x=x)
    # estimate unconditional OLS:
    reg = lm(@formula(y ~ x), df)
    b1[i] = coef(reg)[2]
end

Sim-Asy-OLS-unif.jl

using Distributions, DataFrames, GLM, Random, StatsBase

# set the random seed:
Random.seed!(12345)

# set sample size and number of simulations:
n = 100
r = 10000

# set true parameters:
beta0 = 1
beta1 = 0.5
sx = 1
ex = 4

# initialize b1 to store results later:
b1 = zeros(r)

# draw a sample of x, fixed over replications:
x = rand(Normal(ex, sx), n)

# repeat r times:
for i in 1:r
    # draw a sample of u (uniform):
    u = rand(Uniform(-sqrt(3), sqrt(3)), n)
    y = beta0 .+ beta1 .* x .+ u
    df = DataFrame(y=y, x=x)
    # estimate conditional OLS:
    reg = lm(@formula(y ~ x), df)
    b1[i] = coef(reg)[2]
end