keefereuther's picture
Update app.R
a1b0840 verified
library(shiny)
library(ggplot2)
library(knitr)
library(kableExtra)
library(shinythemes)
ui <- fluidPage(
theme = shinytheme("spacelab"),
titlePanel("Learning about Confidence Intervals"),
h5("Created by Keefe Reuther"),
hr(),
tabsetPanel(
########################################
### TAB 1: Single Sample from Normal Distribution
########################################
tabPanel(
title = "Single Sample",
sidebarLayout(
sidebarPanel(
width = 3,
h3(strong("The Population")),
h4(strong("Population Mean: 0")),
br(),
numericInput(
inputId = "normalSD_tab1",
label = "Population SD:",
value = 1,
min = 0.01,
step = 0.1
),
hr(),
h3(strong("Your Single Sample")),
numericInput(
inputId = "sampleSize_tab1",
label = "Sample Size:",
value = 50,
min = 1
),
sliderInput(
inputId = "confidenceLevel_tab1",
label = "Confidence Level (%)",
min = 1,
max = 99,
value = 95,
step = 1
),
actionButton(
inputId = "generateBtn_tab1",
label = "Generate New Sample"
),
hr(),
h4(strong("Guiding Questions:")),
h5("1. How does increasing _______ affect the certainty that your sample mean is close to the population mean?"),
tags$ul(
tags$li("Sample size"),
tags$li("Standard deviation")
),
hr(),
h5("2. If the standard deviation is 4, what is the minimum sample size you should consider collecting if you want your sample mean to be likely within 1 of the population mean?"),
tags$ul(
tags$li("Sample size"),
tags$li("Standard deviation")
)
),
mainPanel(
width = 9,
plotOutput("plot_tab1"),
hr(),
uiOutput("stats_tab1"),
h4(strong("AI Tutor")),
h5("Have a conversation with the chatbot about this simulation!"),
# Embed your chatbot URL here in an iframe
tags$iframe(
src = "https://keefereuther-ci-tab1.hf.space",
width = "100%",
height = "500px",
style = "border:none;"
)
)
)
),
########################################
### TAB 2: Multiple Samples
########################################
tabPanel(
title = "Multiple Samples",
sidebarLayout(
sidebarPanel(
width = 3,
h3(strong("The Population")),
numericInput(
inputId = "normalSD_tab2",
label = "Population SD:",
value = 1,
min = 0.1,
step = 0.1
),
hr(),
h3(strong("Your Multiple Samples")),
numericInput(
inputId = "sampleSize_tab2",
label = "Sample Size (each):",
value = 50,
min = 1
),
numericInput(
inputId = "numSamples_tab2",
label = "Number of Random Samples:",
value = 100,
min = 1
),
sliderInput(
inputId = "confidenceLevel_tab2",
label = "Confidence Level (%)",
min = 50,
max = 99,
value = 95,
step = 1
),
actionButton(
inputId = "generateBtn_tab2",
label = "Generate Samples"
),
hr(),
h4(strong("Guiding Questions:")),
h5("1. If you run 100 simulated experiments, how many experiments (red dots) failed to contain the true population mean within their 95% CI?"),
h5("2. Why is it sometimes not exactly 5 red dots (samples means with 95% CIs that don't include the population mean?)"),
h5("3. How does changing standard deviation and sample size affect the output?"),
h5("4. What is the relationship between a confidence interval and standard error?")
),
mainPanel(
width = 9,
plotOutput("plot_tab2"),
hr(),
verbatimTextOutput("stats_tab2"), # Comma added here
h4(strong("AI Tutor")),
h5("Have a conversation with the chatbot about this simulation!"),
# Embed your chatbot URL here in an iframe
tags$iframe(
src = "https://keefereuther-ci-tab2.hf.space",
width = "100%",
height = "700px",
style = "border:none;"
)
)
)
),
########################################
### TAB 3: Bootstrapping
########################################
tabPanel(
title = "Bootstrapping",
sidebarLayout(
sidebarPanel(
width = 3,
h3(strong("The Population")),
numericInput(
inputId = "normalSD_tab3",
label = "Population SD:",
value = 1,
min = 0.01,
step = 0.1
),
hr(),
h3(strong("Your Initial Sample")),
numericInput(
inputId = "sampleSize_tab3",
label = "Sample Size (initial):",
value = 50,
min = 1
),
h5(strong("You must select this button before running the bootstrap:")),
actionButton(
inputId = "generateBtn_tab3",
label = "Generate New Initial Sample"
),
hr(),
h3(strong("Bootstrapping")),
numericInput(
inputId = "bootstrapReps_tab3",
label = "Number of Bootstrap Replicates:",
value = 1000,
min = 1
),
sliderInput(
inputId = "confidenceLevel_tab3",
label = "Confidence Level (%)",
min = 50,
max = 99,
value = 95,
step = 1
),
actionButton(
inputId = "runBootstrap_tab3",
label = "Run Bootstrap"
),
hr(),
h4(strong("Guiding Questions:")),
h5("1. What is the effect on the 95% CI if you bootstrap 10,000X instead of 100X?"),
h5("2. When you bootstrap 10,000X, does it seem to give a very similar 95% CI compared to the theoretical CI calculated from the sample size and standard deviation?")
),
mainPanel(
width = 9,
plotOutput("plot_tab3"),
hr(),
uiOutput("stats_tab3"), # Comma added here
h4(strong("AI Tutor")),
h5("Have a conversation with the chatbot about this simulation!"),
# Embed your chatbot URL here in an iframe
tags$iframe(
src = "https://keefereuther-ci-tab3.hf.space",
width = "100%",
height = "500px",
style = "border:none;"
)
)
)
)
)
)
server <- function(input, output, session) {
#################################################
# TAB 1: Single Sample
#################################################
singleSample <- eventReactive(input$generateBtn_tab1, {
rnorm(
n = input$sampleSize_tab1,
mean = 0,
sd = input$normalSD_tab1
)
})
output$stats_tab1 <- renderUI({
x <- singleSample()
mn <- mean(x)
s <- sd(x)
se <- s / sqrt(length(x))
stats_df <- data.frame(
Statistic = c("Sample Mean", "Sample SD", "Sample SE"),
Value = c(round(mn, 3), round(s, 3), round(se, 3))
)
HTML(
knitr::kable(stats_df, format = "html", align = c("l","r")) |>
kableExtra::kable_styling(full_width = FALSE)
)
})
output$plot_tab1 <- renderPlot({
x <- singleSample()
mn <- mean(x)
s <- sd(x)
n <- length(x)
se <- s / sqrt(n)
alpha <- 1 - (input$confidenceLevel_tab1 / 100)
z_crit <- qnorm(1 - alpha / 2)
halfwidth <- z_crit * se
ci_lower <- mn - halfwidth
ci_upper <- mn + halfwidth
op <- par(
cex.main = 1.4,
cex.lab = 1.2,
cex.axis = 1.2
)
on.exit(par(op))
hist(
x,
col = "lightblue",
border = "white",
main = "Single Sample Histogram",
xlab = "Value",
xlim = c(-7, 7),
ylim = c(0, 0.5),
freq = FALSE
)
## MODIFICATION: Add red theoretical distribution (Population)
xvals <- seq(-7, 7, length.out = 400)
yvals <- dnorm(xvals, mean = 0, sd = input$normalSD_tab1)
lines(xvals, yvals, col = "red", lwd = 2)
abline(v = 0, lty = 2, col = "red", lwd = 3)
abline(v = mn, lty = 2, col = "blue", lwd = 3)
segments(ci_lower, 0.5, ci_upper, 0.5, col = "black", lwd = 3)
segments(ci_lower, 0.48, ci_lower, 0.52, col = "black", lwd = 3)
segments(ci_upper, 0.48, ci_upper, 0.52, col = "black", lwd = 3)
# --- ADDED LEGEND FOR TAB 1 ---
legend(
"topright",
legend = c("Population distribution", "Population mean", "Sample mean", "95% CI"),
col = c("red", "red", "blue", "black"),
lty = c(1, 2, 2, 1),
lwd = c(2, 3, 3, 3),
bty = "n"
)
})
#################################################
# TAB 2: Multiple Samples -> Beeswarm Dot Plot
#################################################
manySampleData <- eventReactive(input$generateBtn_tab2, {
sims <- replicate(
n = input$numSamples_tab2,
expr = {
x <- rnorm(
n = input$sampleSize_tab2,
mean = 0,
sd = input$normalSD_tab2
)
c(mean = mean(x), sd = sd(x))
}
)
df <- as.data.frame(t(sims))
colnames(df) <- c("sampleMean", "sampleSD")
df
})
output$stats_tab2 <- renderPrint({
df <- manySampleData()
grand_mean <- mean(df$sampleMean)
sd_means <- sd(df$sampleMean)
cat("Number of Samples:", nrow(df), "\n",
"Mean of Sample Means:", round(grand_mean, 3), "\n",
"SD of Sample Means:", round(sd_means, 3))
})
output$plot_tab2 <- renderPlot({
df <- manySampleData()
if (!nrow(df)) return()
n_each <- input$sampleSize_tab2
alpha <- 1 - (input$confidenceLevel_tab2 / 100)
z_crit <- qnorm(1 - alpha / 2)
df$sampleSE <- df$sampleSD / sqrt(n_each)
df$ciLower <- df$sampleMean - z_crit * df$sampleSE
df$ciUpper <- df$sampleMean + z_crit * df$sampleSE
# Mark whether the CI excludes 0
df$excludesZero <- df$ciLower > 0 | df$ciUpper < 0
ggplot(df, aes(x = 1, y = sampleMean, color = excludesZero)) +
geom_jitter(width = 0.2, size = 4, alpha = 0.7) +
geom_hline(
yintercept = 0,
color = "red",
linetype = "dashed",
size = 1.2
) +
coord_cartesian(
ylim = range(df$sampleMean) + c(-0.05, 0.05)
) +
labs(
title = "Beeswarm Dot Plot of Sample Means",
x = "",
y = "Sample Means"
) +
theme_minimal(base_size = 18) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
scale_color_manual(
name = paste0(input$confidenceLevel_tab2, "% CI excludes 0?"),
values = c("TRUE" = "red", "FALSE" = "blue"),
labels = c("FALSE" = "Includes 0", "TRUE" = "Excludes 0")
)
})
#################################################
# TAB 3: Bootstrapping
#################################################
# 1) Generate a new initial sample from the population
initialSample_tab3 <- eventReactive(input$generateBtn_tab3, {
rnorm(
n = input$sampleSize_tab3,
mean = 0,
sd = input$normalSD_tab3
)
})
# 2) Run the bootstrap on that single sample
bootstrapMeans_tab3 <- eventReactive(input$runBootstrap_tab3, {
req(initialSample_tab3())
x <- initialSample_tab3()
B <- input$bootstrapReps_tab3
replicate(
n = B,
expr = {
boot_samp <- sample(x, size = length(x), replace = TRUE)
mean(boot_samp)
}
)
})
# 3) Summaries: Show initial sample stats + both bootstrap CI and z-based SE CI
output$stats_tab3 <- renderUI({
x <- initialSample_tab3()
if (is.null(x)) return()
boot_means <- bootstrapMeans_tab3()
# Original sample stats
original_mean <- mean(x)
original_sd <- sd(x)
original_se <- original_sd / sqrt(length(x))
# Bootstrap percentile CI
alpha <- 1 - (input$confidenceLevel_tab3 / 100)
q_low <- quantile(boot_means, probs = alpha / 2)
q_high <- quantile(boot_means, probs = 1 - alpha / 2)
# Z-based SE approach
z_crit <- qnorm(1 - alpha / 2)
z_lower <- original_mean - z_crit * original_se
z_upper <- original_mean + z_crit * original_se
stats_df <- data.frame(
Statistic = c(
paste0("Bootstrap ", input$confidenceLevel_tab3, "% CI (Lower)"),
paste0("Z-based ", input$confidenceLevel_tab3, "% CI (Lower)"),
paste0("Bootstrap ", input$confidenceLevel_tab3, "% CI (Upper)"),
paste0("Z-based ", input$confidenceLevel_tab3, "% CI (Upper)")
),
Value = c(
round(q_low, 3),
round(z_lower, 3),
round(q_high, 3),
round(z_upper, 3)
)
)
HTML(
knitr::kable(stats_df, format = "html", align = c("l","r")) |>
kableExtra::kable_styling(full_width = FALSE)
)
})
# 4) Plot the distribution of bootstrap means
output$plot_tab3 <- renderPlot({
x <- initialSample_tab3()
boot_means <- bootstrapMeans_tab3()
if (is.null(x) || is.null(boot_means)) return()
original_mean <- mean(x)
# Percentile-based bootstrap CI
alpha <- 1 - (input$confidenceLevel_tab3 / 100)
q_low <- quantile(boot_means, probs = alpha / 2)
q_high <- quantile(boot_means, probs = 1 - alpha / 2)
op <- par(
cex.main = 1.4,
cex.lab = 1.2,
cex.axis = 1.2
)
on.exit(par(op))
hist(
boot_means,
col = "lightgreen",
border = "white",
main = "Bootstrap Distribution of Means",
xlab = "Bootstrap Means",
freq = FALSE,
xlim = c(-1, 1) # <--- CHANGE #1: x-limits to [-1, 1]
)
# ADD a vertical red line at population mean = 0
abline(v = 0, col = "red", lty = 1, lwd = 3) # <--- CHANGE #2
# Mark the original sample mean (blue line)
abline(v = original_mean, col = "blue", lty = 2, lwd = 3)
# Optional lines marking the bootstrap percentile CI
abline(v = q_low, col = "lightgreen", lty = 3, lwd = 3)
abline(v = q_high, col = "lightgreen", lty = 3, lwd = 3)
# --- ADDED LEGEND FOR TAB 3 ---
legend(
"topright",
legend = c("Population mean", "Sample mean", "95% CI"),
col = c("red", "blue", "lightgreen"),
lty = c(1, 2, 3),
lwd = c(3, 3, 3),
bty = "n"
)
})
}
shinyApp(ui = ui, server = server)