## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----install_cran, eval = FALSE----------------------------------------------- # install.packages("batchtma") ## ----install_github, eval = FALSE--------------------------------------------- # # install.packages("remotes") # The "remotes" package needs to be installed # remotes::install_github("stopsack/batchtma") ## ----message = FALSE---------------------------------------------------------- library(batchtma) library(dplyr) library(ggplot2) ## ----------------------------------------------------------------------------- set.seed(123) # for reproducibility df <- tibble( # Batches: batch = rep( paste0("batch", LETTERS[1:4]), times = 100 ), batchnum = rep( c(1, 5, 2, 3), times = 100 ), # Participants: person = rep( letters[1:10], each = 40 ), # Instead of a confounder, we will use a random variable for now: random = runif( n = 400, min = -2, max = 2 ), # The true (usually unobservable biomarker value): true = rep( c(2, 2.5, 3, 5, 6, 8, 10, 12, 15, 12), each = 40 ), # The observed biomarker value with random error ("noise"): noisy = true + runif(max = true / 3, n = 400) * 4 ) df ## ----------------------------------------------------------------------------- df |> plot_batch( marker = noisy, batch = batch, color = person ) ## ----------------------------------------------------------------------------- df <- df |> # Multiply by batch number to differentially change variance by batch, # divide by mean batch number to keep overall variance the same: mutate( noisy_batch = noisy * batchnum / mean(batchnum) + # Similarly, change mean value per batch, keeping overall mean the same: batchnum * 3 - mean(batchnum) * 3 ) df |> plot_batch( marker = noisy_batch, batch = batch, color = person ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_batch, batch = batch, method = simple ) |> plot_batch( marker = noisy_batch_adj2, batch = batch, color = person ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_batch, batch = batch, method = standardize, confounders = random ) |> plot_batch( marker = noisy_batch_adj3, batch = batch, color = person ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_batch, batch = batch, method = ipw, confounders = random ) |> plot_batch( marker = noisy_batch_adj4, batch = batch, color = person ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_batch, batch = batch, method = quantreg, confounders = random ) |> plot_batch( marker = noisy_batch_adj5, batch = batch, color = person ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_batch, batch = batch, method = quantnorm ) |> plot_batch( marker = noisy_batch_adj6, batch = batch, color = person ) ## ----------------------------------------------------------------------------- set.seed(123) # for reproducibility df <- df |> # Make confounder associated with batch: mutate( confounder = round(batchnum + runif(n = 200, max = 2)), # Make biomarker values associated with confounder: noisy_conf = noisy + confounder * 3 - mean(confounder) * 3 ) df |> plot_batch( marker = noisy_conf, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- df <- df |> # Add batch effects to confounded biomarker values: mutate( noisy_conf_batch = noisy_conf * batchnum / mean(batchnum) + batchnum * 3 - mean(batchnum) * 3 ) df |> plot_batch( marker = noisy_conf_batch, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_conf_batch, batch = batch, method = standardize, confounders = confounder ) |> plot_batch( marker = noisy_conf_batch_adj3, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_conf_batch, batch = batch, method = ipw, confounders = confounder ) |> plot_batch( marker = noisy_conf_batch_adj4, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_conf_batch, batch = batch, method = quantreg, confounders = confounder ) |> plot_batch( marker = noisy_conf_batch_adj5, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_conf_batch, batch = batch, method = simple, confounders = confounder ) |> plot_batch( marker = noisy_conf_batch_adj2, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- df |> adjust_batch( markers = noisy_conf_batch, batch = batch, method = quantnorm, confounders = confounder ) |> plot_batch( marker = noisy_conf_batch_adj6, batch = batch, color = confounder ) ## ----------------------------------------------------------------------------- # df2 is the new dataset that also contains "noisy_conf_batch_adj2": df2 <- df |> adjust_batch( markers = noisy_conf_batch, batch = batch, method = standardize, confounders = confounder ) # Show overview of model diagnostics: diagnose_models(df2) ## ----------------------------------------------------------------------------- fit <- diagnose_models(data = df2)$model_fits[[1]][[1]] summary(fit) ## ----------------------------------------------------------------------------- diagnose_models(df2)$adjust_parameters ## ----------------------------------------------------------------------------- tibble( fitted = fitted.values(fit), residuals = residuals(fit) ) |> ggplot( mapping = aes( x = fitted, y = residuals ) ) + geom_point() + theme_minimal()