--- title: "Interactive apps: Confidence intervals & power (modern plots)" subtitle: "Week 02 — Foundations & reproducibility" author: "Bartosz Maćkiewicz" format: html: toc: true theme: cosmo server: shiny execute: echo: false warning: false message: false --- This file is a **Quarto + Shiny** document. How to run it: - In **RStudio**: open this `.qmd` and click **Run Document** - In a terminal: `quarto serve week02_ci_power_apps_modern.qmd` Important: `quarto render` creates a **static** HTML file. To use the interactive controls, you must run it with **Run Document** / `quarto serve`. If you get “there is no package called ‘shiny’”, install it with `install.packages("shiny")` (or use the course **Install R packages** script from the course page). ```{r} #| context: setup #| include: false library(shiny) library(ggplot2) library(scales) ``` ::: {.panel-tabset} ## CI: known $\sigma$ (normal approximation) ```{r} #| echo: false inputPanel( sliderInput("ci_known_k", "Number of replications (k)", min = 10, max = 500, value = 50, step = 10), sliderInput("ci_known_n", "Sample size (N)", min = 5, max = 200, value = 20, step = 1), sliderInput("ci_known_mu", "Population mean (μ)", min = -200, max = 200, value = 165, step = 1), sliderInput("ci_known_sigma", "Population standard deviation (σ)", min = 1, max = 50, value = 5, step = 1) ) uiOutput("ci_known_plot_ui") ``` ## CI: use sample SD $s$ (normal approximation) ```{r} #| echo: false inputPanel( sliderInput("ci_sd_k", "Number of replications (k)", min = 10, max = 500, value = 50, step = 10), sliderInput("ci_sd_n", "Sample size (N)", min = 5, max = 200, value = 20, step = 1), sliderInput("ci_sd_mu", "Population mean (μ)", min = -200, max = 200, value = 165, step = 1), sliderInput("ci_sd_sigma", "Population standard deviation (σ)", min = 1, max = 50, value = 5, step = 1) ) uiOutput("ci_sd_plot_ui") ``` ## CI: use sample SD $s$ with $t$ ```{r} #| echo: false inputPanel( sliderInput("ci_t_k", "Number of replications (k)", min = 10, max = 500, value = 50, step = 10), sliderInput("ci_t_n", "Sample size (N)", min = 5, max = 200, value = 20, step = 1), sliderInput("ci_t_mu", "Population mean (μ)", min = -200, max = 200, value = 165, step = 1), sliderInput("ci_t_sigma", "Population standard deviation (σ)", min = 1, max = 50, value = 5, step = 1) ) uiOutput("ci_t_plot_ui") ``` ## Power: known $\sigma$ (normal approximation) ```{r} #| echo: false inputPanel( sliderInput("pow_known_n", "Sample size per group (n)", min = 5, max = 500, value = 20, step = 1), sliderInput("pow_known_alpha", "Significance level (α)", min = 0.001, max = 0.2, value = 0.025, step = 0.001), sliderInput("pow_known_sigma", "Standard deviation (σ)", min = 1, max = 50, value = 5, step = 1), sliderInput("pow_known_diff", "Difference under H₁ (Δ)", min = 0.1, max = 20, value = 6, step = 0.1) ) plotOutput("pow_known_plot", height = "450px") ``` ## Power: unknown $\sigma$ (t statistic + simulation) ```{r} #| echo: false inputPanel( sliderInput("pow_unknown_n", "Sample size per group (n)", min = 5, max = 500, value = 20, step = 1), sliderInput("pow_unknown_alpha", "Significance level (α)", min = 0.001, max = 0.2, value = 0.025, step = 0.001), sliderInput("pow_unknown_sigma", "Standard deviation (σ)", min = 1, max = 50, value = 5, step = 1), sliderInput("pow_unknown_diff", "Difference under H₁ (Δ)", min = 0.1, max = 20, value = 6, step = 0.1), sliderInput("pow_unknown_nrep", "Number of replications", min = 100, max = 10000, value = 1000, step = 100) ) plotOutput("pow_unknown_plot", height = "450px") htmlOutput("pow_unknown_summary") ``` ::: ```{r} #| context: server #| echo: false col_h0 <- "#e34a33" col_h1 <- "#2b8cbe" col_hit <- "#2b8cbe" col_miss <- "#d7301f" col_mu <- "#111111" sim_ci <- function(k, N, mu, sigma, method = c("known_sigma", "sample_sd_z", "sample_sd_t")) { method <- match.arg(method) samples <- matrix(rnorm(N * k, mean = mu, sd = sigma), nrow = k, ncol = N) xbar <- rowMeans(samples) if (method == "known_sigma") { arm <- rep(qnorm(.975) * sigma / sqrt(N), k) } else { s <- apply(samples, 1, sd) if (method == "sample_sd_z") { arm <- qnorm(.975) * s / sqrt(N) } else { arm <- qt(.975, df = N - 1) * s / sqrt(N) } } lower <- xbar - arm upper <- xbar + arm hit <- mu >= lower & mu <= upper data.frame( rep = seq_len(k), xbar = xbar, lower = lower, upper = upper, width = upper - lower, hit = hit ) } plot_ci_segments <- function(df, mu, title, subtitle) { k <- nrow(df) seg_linewidth <- pmin(2.2, pmax(0.18, 110 / k)) point_size <- pmin(1.2, pmax(0.0, 1.2 * sqrt(50 / k))) point_alpha <- pmin(0.6, pmax(0.08, 50 / k)) df$status <- ifelse(df$hit, "Contains μ", "Misses μ") ggplot(df, aes(y = rep, yend = rep, x = lower, xend = upper, color = status)) + geom_segment(linewidth = seg_linewidth, lineend = "round", alpha = 0.9) + geom_point(aes(x = xbar), size = point_size, color = "#222222", alpha = point_alpha) + geom_vline(xintercept = mu, linetype = "dashed", linewidth = 0.9, color = col_mu) + scale_color_manual(values = c("Contains μ" = col_hit, "Misses μ" = col_miss)) + labs( title = title, subtitle = subtitle, x = expression(mu), y = NULL, color = NULL ) + theme_minimal(base_size = 13) + theme( legend.position = "top", panel.grid.major.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank() ) } ci_plot_height_px <- function(k) { round(pmin(900, pmax(450, 480 + 0.9 * (k - 50)))) } output$ci_known_plot_ui <- renderUI({ plotOutput("ci_known_plot", height = paste0(ci_plot_height_px(input$ci_known_k), "px")) }) output$ci_sd_plot_ui <- renderUI({ plotOutput("ci_sd_plot", height = paste0(ci_plot_height_px(input$ci_sd_k), "px")) }) output$ci_t_plot_ui <- renderUI({ plotOutput("ci_t_plot", height = paste0(ci_plot_height_px(input$ci_t_k), "px")) }) output$ci_known_plot <- renderPlot({ k <- input$ci_known_k N <- input$ci_known_n mu <- input$ci_known_mu sigma <- input$ci_known_sigma df <- sim_ci(k = k, N = N, mu = mu, sigma = sigma, method = "known_sigma") coverage <- mean(df$hit) plot_ci_segments( df, mu = mu, title = "95% confidence intervals for μ (known σ; normal approximation)", subtitle = sprintf( "Coverage: %s | CI width: %.2f | k=%d, N=%d, σ=%.2f", scales::percent(coverage, accuracy = 0.1), mean(df$width), k, N, sigma ) ) }) output$ci_sd_plot <- renderPlot({ k <- input$ci_sd_k N <- input$ci_sd_n mu <- input$ci_sd_mu sigma <- input$ci_sd_sigma df <- sim_ci(k = k, N = N, mu = mu, sigma = sigma, method = "sample_sd_z") coverage <- mean(df$hit) plot_ci_segments( df, mu = mu, title = "95% confidence intervals for μ (estimate σ with s; normal approximation)", subtitle = sprintf( "Coverage: %s | Mean width: %.2f | k=%d, N=%d, σ=%.2f", scales::percent(coverage, accuracy = 0.1), mean(df$width), k, N, sigma ) ) }) output$ci_t_plot <- renderPlot({ k <- input$ci_t_k N <- input$ci_t_n mu <- input$ci_t_mu sigma <- input$ci_t_sigma df <- sim_ci(k = k, N = N, mu = mu, sigma = sigma, method = "sample_sd_t") coverage <- mean(df$hit) plot_ci_segments( df, mu = mu, title = "95% confidence intervals for μ (estimate σ with s; t critical value)", subtitle = sprintf( "Coverage: %s | Mean width: %.2f | k=%d, N=%d, σ=%.2f", scales::percent(coverage, accuracy = 0.1), mean(df$width), k, N, sigma ) ) }) output$pow_known_plot <- renderPlot({ n <- input$pow_known_n alpha <- input$pow_known_alpha sigma <- input$pow_known_sigma diff <- input$pow_known_diff se <- sigma * sqrt(2 / n) crit <- qnorm(alpha, mean = 0, sd = se, lower.tail = FALSE) power <- pnorm(crit, mean = diff, sd = se, lower.tail = FALSE) x_min <- min(-4 * se, diff - 4 * se) x_max <- max(4 * se, diff + 4 * se) x <- seq(x_min, x_max, length.out = 512) df0 <- data.frame(x = x, density = dnorm(x, mean = 0, sd = se), hypothesis = "H₀ (Δ = 0)") df1 <- data.frame(x = x, density = dnorm(x, mean = diff, sd = se), hypothesis = "H₁ (Δ > 0)") df_lines <- rbind(df0, df1) ggplot(df_lines, aes(x = x, y = density, color = hypothesis)) + annotate( "rect", xmin = crit, xmax = Inf, ymin = -Inf, ymax = Inf, fill = "#000000", alpha = 0.04 ) + geom_area( data = df0[df0$x >= crit, ], aes(x = x, y = density), fill = col_h0, alpha = 0.18, color = NA, inherit.aes = FALSE ) + geom_area( data = df1[df1$x >= crit, ], aes(x = x, y = density), fill = col_h1, alpha = 0.18, color = NA, inherit.aes = FALSE ) + geom_line(linewidth = 1.1) + geom_vline(xintercept = crit, linetype = "dashed", linewidth = 0.9, color = "grey30") + scale_color_manual(values = c("H₀ (Δ = 0)" = col_h0, "H₁ (Δ > 0)" = col_h1)) + labs( title = "Power (known σ): sampling distributions and rejection region", subtitle = sprintf( "α=%.3f | n=%d per group | σ=%.2f | Δ=%.2f | Critical value=%.2f | Power≈%.3f", alpha, n, sigma, diff, crit, power ), x = expression(bar(X)[1] - bar(X)[2]), y = "Density", color = NULL ) + theme_minimal(base_size = 13) + theme(legend.position = "top") }) pow_unknown_stats <- reactive({ n <- input$pow_unknown_n sigma <- input$pow_unknown_sigma diff <- input$pow_unknown_diff nrep <- input$pow_unknown_nrep t_null <- replicate(nrep, { s1 <- rnorm(n, 0, sigma) s2 <- rnorm(n, 0, sigma) (mean(s1) - mean(s2)) / sqrt((var(s1) + var(s2)) / n) }) t_alt <- replicate(nrep, { s1 <- rnorm(n, diff, sigma) s2 <- rnorm(n, 0, sigma) (mean(s1) - mean(s2)) / sqrt((var(s1) + var(s2)) / n) }) list(t_null = t_null, t_alt = t_alt) }) output$pow_unknown_plot <- renderPlot({ n <- input$pow_unknown_n alpha <- input$pow_unknown_alpha sigma <- input$pow_unknown_sigma diff <- input$pow_unknown_diff se <- sigma * sqrt(2 / n) df <- 2 * n - 2 crit <- qt(alpha, df, lower.tail = FALSE) stats <- pow_unknown_stats() t_null <- stats$t_null t_alt <- stats$t_alt power_theory <- pt(crit, df = df, ncp = diff / se, lower.tail = FALSE) power_sim <- mean(t_alt >= crit) vals <- c(t_null, t_alt) lims <- as.numeric(stats::quantile(vals, c(0.001, 0.999), na.rm = TRUE)) pad <- max(1e-6, diff(lims) * 0.05) xlim <- c(lims[1] - pad, lims[2] + pad) df_plot <- data.frame( t = c(t_null, t_alt), hypothesis = c(rep("H₀ (Δ = 0)", length(t_null)), rep("H₁ (Δ > 0)", length(t_alt))) ) ggplot(df_plot, aes(x = t, fill = hypothesis)) + annotate( "rect", xmin = crit, xmax = Inf, ymin = -Inf, ymax = Inf, fill = "#000000", alpha = 0.04 ) + geom_histogram( aes(y = after_stat(density)), bins = 60, position = "identity", alpha = 0.25, color = NA ) + stat_function( fun = dt, args = list(df = df), color = col_h0, linewidth = 1.1, inherit.aes = FALSE ) + stat_function( fun = function(x) dt(x, df = df, ncp = diff / se), color = col_h1, linewidth = 1.1, inherit.aes = FALSE ) + geom_vline(xintercept = crit, linetype = "dashed", linewidth = 0.9, color = "grey30") + scale_fill_manual(values = c("H₀ (Δ = 0)" = col_h0, "H₁ (Δ > 0)" = col_h1)) + coord_cartesian(xlim = xlim) + labs( title = "Power (unknown σ): simulated t statistics", subtitle = sprintf( "α=%.3f | n=%d per group | σ=%.2f | Δ=%.2f | Critical value=%.2f | Power (theory)≈%.3f | (sim)≈%.3f", alpha, n, sigma, diff, crit, power_theory, power_sim ), x = "t statistic", y = "Density", fill = NULL ) + theme_minimal(base_size = 13) + theme(legend.position = "top") }) output$pow_unknown_summary <- renderUI({ n <- input$pow_unknown_n alpha <- input$pow_unknown_alpha sigma <- input$pow_unknown_sigma diff <- input$pow_unknown_diff se <- sigma * sqrt(2 / n) df <- 2 * n - 2 crit <- qt(alpha, df, lower.tail = FALSE) stats <- pow_unknown_stats() power_theory <- pt(crit, df = df, ncp = diff / se, lower.tail = FALSE) power_sim <- mean(stats$t_alt >= crit) HTML(sprintf( "Theoretical power: %.3f
Simulated power: %.3f", power_theory, power_sim )) }) ```