Spaces:

keefereuther
/

confidence_intervals

Build error

App Files Files Community

confidence_intervals / app.R

keefereuther

Update app.R

a1b0840 verified 3 months ago

raw

history blame contribute delete

15.4 kB

	library(shiny)
	library(ggplot2)
	library(knitr)
	library(kableExtra)
	library(shinythemes)

	ui <- fluidPage(
	theme = shinytheme("spacelab"),
	titlePanel("Learning about Confidence Intervals"),
	h5("Created by Keefe Reuther"),
	hr(),
	tabsetPanel(

	########################################
	### TAB 1: Single Sample from Normal Distribution
	########################################
	tabPanel(
	title = "Single Sample",
	sidebarLayout(
	sidebarPanel(
	width = 3,
	h3(strong("The Population")),
	h4(strong("Population Mean: 0")),
	br(),
	numericInput(
	inputId = "normalSD_tab1",
	label = "Population SD:",
	value = 1,
	min = 0.01,
	step = 0.1
	),
	hr(),
	h3(strong("Your Single Sample")),
	numericInput(
	inputId = "sampleSize_tab1",
	label = "Sample Size:",
	value = 50,
	min = 1
	),
	sliderInput(
	inputId = "confidenceLevel_tab1",
	label = "Confidence Level (%)",
	min = 1,
	max = 99,
	value = 95,
	step = 1
	),
	actionButton(
	inputId = "generateBtn_tab1",
	label = "Generate New Sample"
	),
	hr(),
	h4(strong("Guiding Questions:")),
	h5("1. How does increasing _______ affect the certainty that your sample mean is close to the population mean?"),
	tags$ul(
	tags$li("Sample size"),
	tags$li("Standard deviation")
	),
	hr(),
	h5("2. If the standard deviation is 4, what is the minimum sample size you should consider collecting if you want your sample mean to be likely within 1 of the population mean?"),
	tags$ul(
	tags$li("Sample size"),
	tags$li("Standard deviation")
	)
	),
	mainPanel(
	width = 9,
	plotOutput("plot_tab1"),
	hr(),
	uiOutput("stats_tab1"),
	h4(strong("AI Tutor")),
	h5("Have a conversation with the chatbot about this simulation!"),

	# Embed your chatbot URL here in an iframe
	tags$iframe(
	src = "https://keefereuther-ci-tab1.hf.space",
	width = "100%",
	height = "500px",
	style = "border:none;"
	)
	)
	)
	),

	########################################
	### TAB 2: Multiple Samples
	########################################
	tabPanel(
	title = "Multiple Samples",
	sidebarLayout(
	sidebarPanel(
	width = 3,
	h3(strong("The Population")),
	numericInput(
	inputId = "normalSD_tab2",
	label = "Population SD:",
	value = 1,
	min = 0.1,
	step = 0.1
	),
	hr(),
	h3(strong("Your Multiple Samples")),
	numericInput(
	inputId = "sampleSize_tab2",
	label = "Sample Size (each):",
	value = 50,
	min = 1
	),
	numericInput(
	inputId = "numSamples_tab2",
	label = "Number of Random Samples:",
	value = 100,
	min = 1
	),
	sliderInput(
	inputId = "confidenceLevel_tab2",
	label = "Confidence Level (%)",
	min = 50,
	max = 99,
	value = 95,
	step = 1
	),
	actionButton(
	inputId = "generateBtn_tab2",
	label = "Generate Samples"
	),
	hr(),
	h4(strong("Guiding Questions:")),
	h5("1. If you run 100 simulated experiments, how many experiments (red dots) failed to contain the true population mean within their 95% CI?"),
	h5("2. Why is it sometimes not exactly 5 red dots (samples means with 95% CIs that don't include the population mean?)"),
	h5("3. How does changing standard deviation and sample size affect the output?"),
	h5("4. What is the relationship between a confidence interval and standard error?")
	),
	mainPanel(
	width = 9,
	plotOutput("plot_tab2"),
	hr(),
	verbatimTextOutput("stats_tab2"), # Comma added here
	h4(strong("AI Tutor")),
	h5("Have a conversation with the chatbot about this simulation!"),

	# Embed your chatbot URL here in an iframe
	tags$iframe(
	src = "https://keefereuther-ci-tab2.hf.space",
	width = "100%",
	height = "700px",
	style = "border:none;"
	)
	)
	)
	),

	########################################
	### TAB 3: Bootstrapping
	########################################
	tabPanel(
	title = "Bootstrapping",
	sidebarLayout(
	sidebarPanel(
	width = 3,
	h3(strong("The Population")),
	numericInput(
	inputId = "normalSD_tab3",
	label = "Population SD:",
	value = 1,
	min = 0.01,
	step = 0.1
	),
	hr(),
	h3(strong("Your Initial Sample")),
	numericInput(
	inputId = "sampleSize_tab3",
	label = "Sample Size (initial):",
	value = 50,
	min = 1
	),
	h5(strong("You must select this button before running the bootstrap:")),
	actionButton(
	inputId = "generateBtn_tab3",
	label = "Generate New Initial Sample"
	),
	hr(),
	h3(strong("Bootstrapping")),
	numericInput(
	inputId = "bootstrapReps_tab3",
	label = "Number of Bootstrap Replicates:",
	value = 1000,
	min = 1
	),
	sliderInput(
	inputId = "confidenceLevel_tab3",
	label = "Confidence Level (%)",
	min = 50,
	max = 99,
	value = 95,
	step = 1
	),
	actionButton(
	inputId = "runBootstrap_tab3",
	label = "Run Bootstrap"
	),
	hr(),
	h4(strong("Guiding Questions:")),
	h5("1. What is the effect on the 95% CI if you bootstrap 10,000X instead of 100X?"),
	h5("2. When you bootstrap 10,000X, does it seem to give a very similar 95% CI compared to the theoretical CI calculated from the sample size and standard deviation?")
	),
	mainPanel(
	width = 9,
	plotOutput("plot_tab3"),
	hr(),
	uiOutput("stats_tab3"), # Comma added here
	h4(strong("AI Tutor")),
	h5("Have a conversation with the chatbot about this simulation!"),

	# Embed your chatbot URL here in an iframe
	tags$iframe(
	src = "https://keefereuther-ci-tab3.hf.space",
	width = "100%",
	height = "500px",
	style = "border:none;"
	)
	)
	)
	)
	)
	)

	server <- function(input, output, session) {

	#################################################
	# TAB 1: Single Sample
	#################################################
	singleSample <- eventReactive(input$generateBtn_tab1, {
	rnorm(
	n = input$sampleSize_tab1,
	mean = 0,
	sd = input$normalSD_tab1
	)
	})

	output$stats_tab1 <- renderUI({
	x <- singleSample()
	mn <- mean(x)
	s <- sd(x)
	se <- s / sqrt(length(x))

	stats_df <- data.frame(
	Statistic = c("Sample Mean", "Sample SD", "Sample SE"),
	Value = c(round(mn, 3), round(s, 3), round(se, 3))
	)

	HTML(
	knitr::kable(stats_df, format = "html", align = c("l","r")) \|>
	kableExtra::kable_styling(full_width = FALSE)
	)
	})

	output$plot_tab1 <- renderPlot({
	x <- singleSample()
	mn <- mean(x)
	s <- sd(x)
	n <- length(x)
	se <- s / sqrt(n)

	alpha <- 1 - (input$confidenceLevel_tab1 / 100)
	z_crit <- qnorm(1 - alpha / 2)
	halfwidth <- z_crit * se
	ci_lower <- mn - halfwidth
	ci_upper <- mn + halfwidth

	op <- par(
	cex.main = 1.4,
	cex.lab = 1.2,
	cex.axis = 1.2
	)
	on.exit(par(op))

	hist(
	x,
	col = "lightblue",
	border = "white",
	main = "Single Sample Histogram",
	xlab = "Value",
	xlim = c(-7, 7),
	ylim = c(0, 0.5),
	freq = FALSE
	)

	## MODIFICATION: Add red theoretical distribution (Population)
	xvals <- seq(-7, 7, length.out = 400)
	yvals <- dnorm(xvals, mean = 0, sd = input$normalSD_tab1)
	lines(xvals, yvals, col = "red", lwd = 2)

	abline(v = 0, lty = 2, col = "red", lwd = 3)
	abline(v = mn, lty = 2, col = "blue", lwd = 3)

	segments(ci_lower, 0.5, ci_upper, 0.5, col = "black", lwd = 3)
	segments(ci_lower, 0.48, ci_lower, 0.52, col = "black", lwd = 3)
	segments(ci_upper, 0.48, ci_upper, 0.52, col = "black", lwd = 3)

	# --- ADDED LEGEND FOR TAB 1 ---
	legend(
	"topright",
	legend = c("Population distribution", "Population mean", "Sample mean", "95% CI"),
	col = c("red", "red", "blue", "black"),
	lty = c(1, 2, 2, 1),
	lwd = c(2, 3, 3, 3),
	bty = "n"
	)
	})

	#################################################
	# TAB 2: Multiple Samples -> Beeswarm Dot Plot
	#################################################
	manySampleData <- eventReactive(input$generateBtn_tab2, {
	sims <- replicate(
	n = input$numSamples_tab2,
	expr = {
	x <- rnorm(
	n = input$sampleSize_tab2,
	mean = 0,
	sd = input$normalSD_tab2
	)
	c(mean = mean(x), sd = sd(x))
	}
	)
	df <- as.data.frame(t(sims))
	colnames(df) <- c("sampleMean", "sampleSD")
	df
	})

	output$stats_tab2 <- renderPrint({
	df <- manySampleData()
	grand_mean <- mean(df$sampleMean)
	sd_means <- sd(df$sampleMean)

	cat("Number of Samples:", nrow(df), "\n",
	"Mean of Sample Means:", round(grand_mean, 3), "\n",
	"SD of Sample Means:", round(sd_means, 3))
	})

	output$plot_tab2 <- renderPlot({
	df <- manySampleData()
	if (!nrow(df)) return()

	n_each <- input$sampleSize_tab2
	alpha <- 1 - (input$confidenceLevel_tab2 / 100)
	z_crit <- qnorm(1 - alpha / 2)

	df$sampleSE <- df$sampleSD / sqrt(n_each)
	df$ciLower <- df$sampleMean - z_crit * df$sampleSE
	df$ciUpper <- df$sampleMean + z_crit * df$sampleSE

	# Mark whether the CI excludes 0
	df$excludesZero <- df$ciLower > 0 \| df$ciUpper < 0

	ggplot(df, aes(x = 1, y = sampleMean, color = excludesZero)) +
	geom_jitter(width = 0.2, size = 4, alpha = 0.7) +
	geom_hline(
	yintercept = 0,
	color = "red",
	linetype = "dashed",
	size = 1.2
	) +
	coord_cartesian(
	ylim = range(df$sampleMean) + c(-0.05, 0.05)
	) +
	labs(
	title = "Beeswarm Dot Plot of Sample Means",
	x = "",
	y = "Sample Means"
	) +
	theme_minimal(base_size = 18) +
	theme(
	axis.text.x = element_blank(),
	axis.ticks.x = element_blank()
	) +
	scale_color_manual(
	name = paste0(input$confidenceLevel_tab2, "% CI excludes 0?"),
	values = c("TRUE" = "red", "FALSE" = "blue"),
	labels = c("FALSE" = "Includes 0", "TRUE" = "Excludes 0")
	)
	})

	#################################################
	# TAB 3: Bootstrapping
	#################################################

	# 1) Generate a new initial sample from the population
	initialSample_tab3 <- eventReactive(input$generateBtn_tab3, {
	rnorm(
	n = input$sampleSize_tab3,
	mean = 0,
	sd = input$normalSD_tab3
	)
	})

	# 2) Run the bootstrap on that single sample
	bootstrapMeans_tab3 <- eventReactive(input$runBootstrap_tab3, {
	req(initialSample_tab3())

	x <- initialSample_tab3()
	B <- input$bootstrapReps_tab3

	replicate(
	n = B,
	expr = {
	boot_samp <- sample(x, size = length(x), replace = TRUE)
	mean(boot_samp)
	}
	)
	})

	# 3) Summaries: Show initial sample stats + both bootstrap CI and z-based SE CI
	output$stats_tab3 <- renderUI({
	x <- initialSample_tab3()
	if (is.null(x)) return()

	boot_means <- bootstrapMeans_tab3()

	# Original sample stats
	original_mean <- mean(x)
	original_sd <- sd(x)
	original_se <- original_sd / sqrt(length(x))

	# Bootstrap percentile CI
	alpha <- 1 - (input$confidenceLevel_tab3 / 100)
	q_low <- quantile(boot_means, probs = alpha / 2)
	q_high <- quantile(boot_means, probs = 1 - alpha / 2)

	# Z-based SE approach
	z_crit <- qnorm(1 - alpha / 2)
	z_lower <- original_mean - z_crit * original_se
	z_upper <- original_mean + z_crit * original_se

	stats_df <- data.frame(
	Statistic = c(
	paste0("Bootstrap ", input$confidenceLevel_tab3, "% CI (Lower)"),
	paste0("Z-based ", input$confidenceLevel_tab3, "% CI (Lower)"),
	paste0("Bootstrap ", input$confidenceLevel_tab3, "% CI (Upper)"),
	paste0("Z-based ", input$confidenceLevel_tab3, "% CI (Upper)")
	),
	Value = c(
	round(q_low, 3),
	round(z_lower, 3),
	round(q_high, 3),
	round(z_upper, 3)
	)
	)

	HTML(
	knitr::kable(stats_df, format = "html", align = c("l","r")) \|>
	kableExtra::kable_styling(full_width = FALSE)
	)
	})

	# 4) Plot the distribution of bootstrap means
	output$plot_tab3 <- renderPlot({
	x <- initialSample_tab3()
	boot_means <- bootstrapMeans_tab3()

	if (is.null(x) \|\| is.null(boot_means)) return()

	original_mean <- mean(x)

	# Percentile-based bootstrap CI
	alpha <- 1 - (input$confidenceLevel_tab3 / 100)
	q_low <- quantile(boot_means, probs = alpha / 2)
	q_high <- quantile(boot_means, probs = 1 - alpha / 2)

	op <- par(
	cex.main = 1.4,
	cex.lab = 1.2,
	cex.axis = 1.2
	)
	on.exit(par(op))

	hist(
	boot_means,
	col = "lightgreen",
	border = "white",
	main = "Bootstrap Distribution of Means",
	xlab = "Bootstrap Means",
	freq = FALSE,
	xlim = c(-1, 1) # <--- CHANGE #1: x-limits to [-1, 1]
	)

	# ADD a vertical red line at population mean = 0
	abline(v = 0, col = "red", lty = 1, lwd = 3) # <--- CHANGE #2

	# Mark the original sample mean (blue line)
	abline(v = original_mean, col = "blue", lty = 2, lwd = 3)

	# Optional lines marking the bootstrap percentile CI
	abline(v = q_low, col = "lightgreen", lty = 3, lwd = 3)
	abline(v = q_high, col = "lightgreen", lty = 3, lwd = 3)

	# --- ADDED LEGEND FOR TAB 3 ---
	legend(
	"topright",
	legend = c("Population mean", "Sample mean", "95% CI"),
	col = c("red", "blue", "lightgreen"),
	lty = c(1, 2, 3),
	lwd = c(3, 3, 3),
	bty = "n"
	)
	})

	}

	shinyApp(ui = ui, server = server)