---
title: "Interactive apps: Confidence intervals & power (modern plots)"
subtitle: "Week 02 — Foundations & reproducibility"
author: "Bartosz Maćkiewicz"
format:
html:
toc: true
theme: cosmo
server: shiny
execute:
echo: false
warning: false
message: false
---
This file is a **Quarto + Shiny** document.
How to run it:
- In **RStudio**: open this `.qmd` and click **Run Document**
- In a terminal: `quarto serve week02_ci_power_apps_modern.qmd`
Important: `quarto render` creates a **static** HTML file. To use the interactive controls, you must run it with **Run Document** / `quarto serve`.
If you get “there is no package called ‘shiny’”, install it with `install.packages("shiny")` (or use the course **Install R packages** script from the course page).
```{r}
#| context: setup
#| include: false
library(shiny)
library(ggplot2)
library(scales)
```
::: {.panel-tabset}
## CI: known $\sigma$ (normal approximation)
```{r}
#| echo: false
inputPanel(
sliderInput("ci_known_k", "Number of replications (k)", min = 10, max = 500, value = 50, step = 10),
sliderInput("ci_known_n", "Sample size (N)", min = 5, max = 200, value = 20, step = 1),
sliderInput("ci_known_mu", "Population mean (μ)", min = -200, max = 200, value = 165, step = 1),
sliderInput("ci_known_sigma", "Population standard deviation (σ)", min = 1, max = 50, value = 5, step = 1)
)
uiOutput("ci_known_plot_ui")
```
## CI: use sample SD $s$ (normal approximation)
```{r}
#| echo: false
inputPanel(
sliderInput("ci_sd_k", "Number of replications (k)", min = 10, max = 500, value = 50, step = 10),
sliderInput("ci_sd_n", "Sample size (N)", min = 5, max = 200, value = 20, step = 1),
sliderInput("ci_sd_mu", "Population mean (μ)", min = -200, max = 200, value = 165, step = 1),
sliderInput("ci_sd_sigma", "Population standard deviation (σ)", min = 1, max = 50, value = 5, step = 1)
)
uiOutput("ci_sd_plot_ui")
```
## CI: use sample SD $s$ with $t$
```{r}
#| echo: false
inputPanel(
sliderInput("ci_t_k", "Number of replications (k)", min = 10, max = 500, value = 50, step = 10),
sliderInput("ci_t_n", "Sample size (N)", min = 5, max = 200, value = 20, step = 1),
sliderInput("ci_t_mu", "Population mean (μ)", min = -200, max = 200, value = 165, step = 1),
sliderInput("ci_t_sigma", "Population standard deviation (σ)", min = 1, max = 50, value = 5, step = 1)
)
uiOutput("ci_t_plot_ui")
```
## Power: known $\sigma$ (normal approximation)
```{r}
#| echo: false
inputPanel(
sliderInput("pow_known_n", "Sample size per group (n)", min = 5, max = 500, value = 20, step = 1),
sliderInput("pow_known_alpha", "Significance level (α)", min = 0.001, max = 0.2, value = 0.025, step = 0.001),
sliderInput("pow_known_sigma", "Standard deviation (σ)", min = 1, max = 50, value = 5, step = 1),
sliderInput("pow_known_diff", "Difference under H₁ (Δ)", min = 0.1, max = 20, value = 6, step = 0.1)
)
plotOutput("pow_known_plot", height = "450px")
```
## Power: unknown $\sigma$ (t statistic + simulation)
```{r}
#| echo: false
inputPanel(
sliderInput("pow_unknown_n", "Sample size per group (n)", min = 5, max = 500, value = 20, step = 1),
sliderInput("pow_unknown_alpha", "Significance level (α)", min = 0.001, max = 0.2, value = 0.025, step = 0.001),
sliderInput("pow_unknown_sigma", "Standard deviation (σ)", min = 1, max = 50, value = 5, step = 1),
sliderInput("pow_unknown_diff", "Difference under H₁ (Δ)", min = 0.1, max = 20, value = 6, step = 0.1),
sliderInput("pow_unknown_nrep", "Number of replications", min = 100, max = 10000, value = 1000, step = 100)
)
plotOutput("pow_unknown_plot", height = "450px")
htmlOutput("pow_unknown_summary")
```
:::
```{r}
#| context: server
#| echo: false
col_h0 <- "#e34a33"
col_h1 <- "#2b8cbe"
col_hit <- "#2b8cbe"
col_miss <- "#d7301f"
col_mu <- "#111111"
sim_ci <- function(k, N, mu, sigma, method = c("known_sigma", "sample_sd_z", "sample_sd_t")) {
method <- match.arg(method)
samples <- matrix(rnorm(N * k, mean = mu, sd = sigma), nrow = k, ncol = N)
xbar <- rowMeans(samples)
if (method == "known_sigma") {
arm <- rep(qnorm(.975) * sigma / sqrt(N), k)
} else {
s <- apply(samples, 1, sd)
if (method == "sample_sd_z") {
arm <- qnorm(.975) * s / sqrt(N)
} else {
arm <- qt(.975, df = N - 1) * s / sqrt(N)
}
}
lower <- xbar - arm
upper <- xbar + arm
hit <- mu >= lower & mu <= upper
data.frame(
rep = seq_len(k),
xbar = xbar,
lower = lower,
upper = upper,
width = upper - lower,
hit = hit
)
}
plot_ci_segments <- function(df, mu, title, subtitle) {
k <- nrow(df)
seg_linewidth <- pmin(2.2, pmax(0.18, 110 / k))
point_size <- pmin(1.2, pmax(0.0, 1.2 * sqrt(50 / k)))
point_alpha <- pmin(0.6, pmax(0.08, 50 / k))
df$status <- ifelse(df$hit, "Contains μ", "Misses μ")
ggplot(df, aes(y = rep, yend = rep, x = lower, xend = upper, color = status)) +
geom_segment(linewidth = seg_linewidth, lineend = "round", alpha = 0.9) +
geom_point(aes(x = xbar), size = point_size, color = "#222222", alpha = point_alpha) +
geom_vline(xintercept = mu, linetype = "dashed", linewidth = 0.9, color = col_mu) +
scale_color_manual(values = c("Contains μ" = col_hit, "Misses μ" = col_miss)) +
labs(
title = title,
subtitle = subtitle,
x = expression(mu),
y = NULL,
color = NULL
) +
theme_minimal(base_size = 13) +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
}
ci_plot_height_px <- function(k) {
round(pmin(900, pmax(450, 480 + 0.9 * (k - 50))))
}
output$ci_known_plot_ui <- renderUI({
plotOutput("ci_known_plot", height = paste0(ci_plot_height_px(input$ci_known_k), "px"))
})
output$ci_sd_plot_ui <- renderUI({
plotOutput("ci_sd_plot", height = paste0(ci_plot_height_px(input$ci_sd_k), "px"))
})
output$ci_t_plot_ui <- renderUI({
plotOutput("ci_t_plot", height = paste0(ci_plot_height_px(input$ci_t_k), "px"))
})
output$ci_known_plot <- renderPlot({
k <- input$ci_known_k
N <- input$ci_known_n
mu <- input$ci_known_mu
sigma <- input$ci_known_sigma
df <- sim_ci(k = k, N = N, mu = mu, sigma = sigma, method = "known_sigma")
coverage <- mean(df$hit)
plot_ci_segments(
df,
mu = mu,
title = "95% confidence intervals for μ (known σ; normal approximation)",
subtitle = sprintf(
"Coverage: %s | CI width: %.2f | k=%d, N=%d, σ=%.2f",
scales::percent(coverage, accuracy = 0.1),
mean(df$width),
k, N, sigma
)
)
})
output$ci_sd_plot <- renderPlot({
k <- input$ci_sd_k
N <- input$ci_sd_n
mu <- input$ci_sd_mu
sigma <- input$ci_sd_sigma
df <- sim_ci(k = k, N = N, mu = mu, sigma = sigma, method = "sample_sd_z")
coverage <- mean(df$hit)
plot_ci_segments(
df,
mu = mu,
title = "95% confidence intervals for μ (estimate σ with s; normal approximation)",
subtitle = sprintf(
"Coverage: %s | Mean width: %.2f | k=%d, N=%d, σ=%.2f",
scales::percent(coverage, accuracy = 0.1),
mean(df$width),
k, N, sigma
)
)
})
output$ci_t_plot <- renderPlot({
k <- input$ci_t_k
N <- input$ci_t_n
mu <- input$ci_t_mu
sigma <- input$ci_t_sigma
df <- sim_ci(k = k, N = N, mu = mu, sigma = sigma, method = "sample_sd_t")
coverage <- mean(df$hit)
plot_ci_segments(
df,
mu = mu,
title = "95% confidence intervals for μ (estimate σ with s; t critical value)",
subtitle = sprintf(
"Coverage: %s | Mean width: %.2f | k=%d, N=%d, σ=%.2f",
scales::percent(coverage, accuracy = 0.1),
mean(df$width),
k, N, sigma
)
)
})
output$pow_known_plot <- renderPlot({
n <- input$pow_known_n
alpha <- input$pow_known_alpha
sigma <- input$pow_known_sigma
diff <- input$pow_known_diff
se <- sigma * sqrt(2 / n)
crit <- qnorm(alpha, mean = 0, sd = se, lower.tail = FALSE)
power <- pnorm(crit, mean = diff, sd = se, lower.tail = FALSE)
x_min <- min(-4 * se, diff - 4 * se)
x_max <- max(4 * se, diff + 4 * se)
x <- seq(x_min, x_max, length.out = 512)
df0 <- data.frame(x = x, density = dnorm(x, mean = 0, sd = se), hypothesis = "H₀ (Δ = 0)")
df1 <- data.frame(x = x, density = dnorm(x, mean = diff, sd = se), hypothesis = "H₁ (Δ > 0)")
df_lines <- rbind(df0, df1)
ggplot(df_lines, aes(x = x, y = density, color = hypothesis)) +
annotate(
"rect",
xmin = crit, xmax = Inf,
ymin = -Inf, ymax = Inf,
fill = "#000000",
alpha = 0.04
) +
geom_area(
data = df0[df0$x >= crit, ],
aes(x = x, y = density),
fill = col_h0,
alpha = 0.18,
color = NA,
inherit.aes = FALSE
) +
geom_area(
data = df1[df1$x >= crit, ],
aes(x = x, y = density),
fill = col_h1,
alpha = 0.18,
color = NA,
inherit.aes = FALSE
) +
geom_line(linewidth = 1.1) +
geom_vline(xintercept = crit, linetype = "dashed", linewidth = 0.9, color = "grey30") +
scale_color_manual(values = c("H₀ (Δ = 0)" = col_h0, "H₁ (Δ > 0)" = col_h1)) +
labs(
title = "Power (known σ): sampling distributions and rejection region",
subtitle = sprintf(
"α=%.3f | n=%d per group | σ=%.2f | Δ=%.2f | Critical value=%.2f | Power≈%.3f",
alpha, n, sigma, diff, crit, power
),
x = expression(bar(X)[1] - bar(X)[2]),
y = "Density",
color = NULL
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")
})
pow_unknown_stats <- reactive({
n <- input$pow_unknown_n
sigma <- input$pow_unknown_sigma
diff <- input$pow_unknown_diff
nrep <- input$pow_unknown_nrep
t_null <- replicate(nrep, {
s1 <- rnorm(n, 0, sigma)
s2 <- rnorm(n, 0, sigma)
(mean(s1) - mean(s2)) / sqrt((var(s1) + var(s2)) / n)
})
t_alt <- replicate(nrep, {
s1 <- rnorm(n, diff, sigma)
s2 <- rnorm(n, 0, sigma)
(mean(s1) - mean(s2)) / sqrt((var(s1) + var(s2)) / n)
})
list(t_null = t_null, t_alt = t_alt)
})
output$pow_unknown_plot <- renderPlot({
n <- input$pow_unknown_n
alpha <- input$pow_unknown_alpha
sigma <- input$pow_unknown_sigma
diff <- input$pow_unknown_diff
se <- sigma * sqrt(2 / n)
df <- 2 * n - 2
crit <- qt(alpha, df, lower.tail = FALSE)
stats <- pow_unknown_stats()
t_null <- stats$t_null
t_alt <- stats$t_alt
power_theory <- pt(crit, df = df, ncp = diff / se, lower.tail = FALSE)
power_sim <- mean(t_alt >= crit)
vals <- c(t_null, t_alt)
lims <- as.numeric(stats::quantile(vals, c(0.001, 0.999), na.rm = TRUE))
pad <- max(1e-6, diff(lims) * 0.05)
xlim <- c(lims[1] - pad, lims[2] + pad)
df_plot <- data.frame(
t = c(t_null, t_alt),
hypothesis = c(rep("H₀ (Δ = 0)", length(t_null)), rep("H₁ (Δ > 0)", length(t_alt)))
)
ggplot(df_plot, aes(x = t, fill = hypothesis)) +
annotate(
"rect",
xmin = crit, xmax = Inf,
ymin = -Inf, ymax = Inf,
fill = "#000000",
alpha = 0.04
) +
geom_histogram(
aes(y = after_stat(density)),
bins = 60,
position = "identity",
alpha = 0.25,
color = NA
) +
stat_function(
fun = dt,
args = list(df = df),
color = col_h0,
linewidth = 1.1,
inherit.aes = FALSE
) +
stat_function(
fun = function(x) dt(x, df = df, ncp = diff / se),
color = col_h1,
linewidth = 1.1,
inherit.aes = FALSE
) +
geom_vline(xintercept = crit, linetype = "dashed", linewidth = 0.9, color = "grey30") +
scale_fill_manual(values = c("H₀ (Δ = 0)" = col_h0, "H₁ (Δ > 0)" = col_h1)) +
coord_cartesian(xlim = xlim) +
labs(
title = "Power (unknown σ): simulated t statistics",
subtitle = sprintf(
"α=%.3f | n=%d per group | σ=%.2f | Δ=%.2f | Critical value=%.2f | Power (theory)≈%.3f | (sim)≈%.3f",
alpha, n, sigma, diff, crit, power_theory, power_sim
),
x = "t statistic",
y = "Density",
fill = NULL
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")
})
output$pow_unknown_summary <- renderUI({
n <- input$pow_unknown_n
alpha <- input$pow_unknown_alpha
sigma <- input$pow_unknown_sigma
diff <- input$pow_unknown_diff
se <- sigma * sqrt(2 / n)
df <- 2 * n - 2
crit <- qt(alpha, df, lower.tail = FALSE)
stats <- pow_unknown_stats()
power_theory <- pt(crit, df = df, ncp = diff / se, lower.tail = FALSE)
power_sim <- mean(stats$t_alt >= crit)
HTML(sprintf(
"Theoretical power: %.3f
Simulated power: %.3f",
power_theory,
power_sim
))
})
```