set.seed(2025) hack_the_p <- function(n0 = 40, # initial N per group max_extra = 40, # maximum additional participants to add during 'peeking' trim_sd = 2) { # outlier‑trimming threshold (in SD units) # Generate data under the null hypothesis (both groups drawn from the same distribution) x <- rnorm(n0) y <- rnorm(n0) # 1) Classic two‑sided t‑test p <- t.test(x, y, var.equal = TRUE)$p.value if (p < .05) return(p) # 2) One‑sided test in the more 'favourable' direction p_one <- min( t.test(x, y, alternative = "less")$p.value, t.test(x, y, alternative = "greater")$p.value ) if (p_one < .05) return(p_one) # 3) Remove outliers (> trim_sd SD) and rerun the two‑sided test, hoping for significance x_trim <- x[abs(scale(x)) < trim_sd] y_trim <- y[abs(scale(y)) < trim_sd] p_trim <- t.test(x_trim, y_trim, var.equal = TRUE)$p.value if (p_trim < .05) return(p_trim) # 4) Non‑parametric alternative (Wilcoxon) – maybe it turns significant p_rank <- wilcox.test(x, y)$p.value if (p_rank < .05) return(p_rank) # 5) Sequential sampling ('peeking') add_each <- 2 # add two participants per group at each peek reps <- max_extra / add_each for (i in seq_len(reps)) { x <- c(x, rnorm(add_each)) y <- c(y, rnorm(add_each)) p_seq <- t.test(x, y, var.equal = TRUE)$p.value # rerun the test if (p_seq < .05) return(p_seq) } # If we never achieve p < .05, return the smallest p among all tests (the most 'appealing' result) return(min(p, p_one, p_rank, p_seq)) } B <- 5000 # number of simulated studies p_vals <- replicate(B, hack_the_p()) # Visualise the distribution of the final p‑values produced by the p‑hacking strategy hist( p_vals, breaks = 40, main = "Distribution of final p-values\n(p-hacking)", xlab = "p-value", col = "steelblue", border = NA ) abline(v = .05, lty = 2, lwd = 2) # Zoom in on the significant p‑values only hist( p_vals[p_vals < 0.05], breaks = 10, main = "Distribution of final p-values\n(p-hacking)", xlab = "p-value", col = "steelblue", border = NA )