set.seed(2025)

hack_the_p <- function(n0 = 40,      # initial N per group
                       max_extra = 40, # maximum additional participants to add during 'peeking'
                       trim_sd = 2) {  # outlier‑trimming threshold (in SD units)

  # Generate data under the null hypothesis (both groups drawn from the same distribution)
  x <- rnorm(n0)
  y <- rnorm(n0)

  # 1) Classic two‑sided t‑test
  p <- t.test(x, y, var.equal = TRUE)$p.value
  if (p < .05) return(p)

  # 2) One‑sided test in the more 'favourable' direction
  p_one <- min(
    t.test(x, y, alternative = "less")$p.value,
    t.test(x, y, alternative = "greater")$p.value
  )
  if (p_one < .05) return(p_one)

  # 3) Remove outliers (> trim_sd SD) and rerun the two‑sided test, hoping for significance
  x_trim <- x[abs(scale(x)) < trim_sd]
  y_trim <- y[abs(scale(y)) < trim_sd]
  p_trim <- t.test(x_trim, y_trim, var.equal = TRUE)$p.value
  if (p_trim < .05) return(p_trim)

  # 4) Non‑parametric alternative (Wilcoxon) – maybe it turns significant
  p_rank <- wilcox.test(x, y)$p.value
  if (p_rank < .05) return(p_rank)

  # 5) Sequential sampling ('peeking')
  add_each <- 2                      # add two participants per group at each peek
  reps <- max_extra / add_each
  for (i in seq_len(reps)) {
    x <- c(x, rnorm(add_each))
    y <- c(y, rnorm(add_each))
    p_seq <- t.test(x, y, var.equal = TRUE)$p.value  # rerun the test
    if (p_seq < .05) return(p_seq)
  }

  # If we never achieve p < .05, return the smallest p among all tests (the most 'appealing' result)
  return(min(p, p_one, p_rank, p_seq))
}

B <- 5000                # number of simulated studies
p_vals <- replicate(B, hack_the_p())

# Visualise the distribution of the final p‑values produced by the p‑hacking strategy
hist(
  p_vals, 
  breaks = 40, 
  main = "Distribution of final p-values\n(p-hacking)",
  xlab = "p-value",
  col = "steelblue",
  border = NA
)
abline(v = .05, lty = 2, lwd = 2)

# Zoom in on the significant p‑values only
hist(
  p_vals[p_vals < 0.05],
  breaks = 10,
  main = "Distribution of final p-values\n(p-hacking)",
  xlab = "p-value", 
  col = "steelblue", 
  border = NA
)