## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----install_cran, eval = FALSE-----------------------------------------------
# install.packages("batchtma")

## ----install_github, eval = FALSE---------------------------------------------
# # install.packages("remotes")  # The "remotes" package needs to be installed
# remotes::install_github("stopsack/batchtma")

## ----message = FALSE----------------------------------------------------------
library(batchtma)
library(dplyr)
library(ggplot2)

## -----------------------------------------------------------------------------
set.seed(123)  # for reproducibility
df <- tibble(
  # Batches:
  batch = rep(
    paste0("batch", LETTERS[1:4]), 
    times = 100
  ),
  batchnum = rep(
    c(1, 5, 2, 3), 
    times = 100
  ),
  # Participants:
  person = rep(
    letters[1:10], 
    each = 40
  ),
  # Instead of a confounder, we will use a random variable for now:
  random = runif(
    n = 400, 
    min = -2, 
    max = 2
  ),
  # The true (usually unobservable biomarker value):
  true = rep(
    c(2, 2.5, 3, 5, 6, 8, 10, 12, 15, 12), 
    each = 40
  ),
  # The observed biomarker value with random error ("noise"):
  noisy = true + 
    runif(max = true / 3, n = 400) * 4
)

df

## -----------------------------------------------------------------------------
df |> 
  plot_batch(
    marker = noisy, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
df <- df |>
  # Multiply by batch number to differentially change variance by batch,
  # divide by mean batch number to keep overall variance the same:
  mutate(
    noisy_batch = noisy * batchnum / mean(batchnum) + 
      # Similarly, change mean value per batch, keeping overall mean the same:
      batchnum * 3 - 
      mean(batchnum) * 3
  )

df |> 
  plot_batch(
    marker = noisy_batch, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_batch, 
    batch = batch, 
    method = simple
  ) |>
  plot_batch(
    marker = noisy_batch_adj2, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_batch, 
    batch = batch, 
    method = standardize, 
    confounders = random
  ) |>
  plot_batch(
    marker = noisy_batch_adj3, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_batch, 
    batch = batch, 
    method = ipw, 
    confounders = random
  ) |>
  plot_batch(
    marker = noisy_batch_adj4, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_batch, 
    batch = batch, 
    method = quantreg, 
    confounders = random
  ) |>
  plot_batch(
    marker = noisy_batch_adj5, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_batch, 
    batch = batch, 
    method = quantnorm
    ) |>
  plot_batch(
    marker = noisy_batch_adj6, 
    batch = batch, 
    color = person
  )

## -----------------------------------------------------------------------------
set.seed(123)  # for reproducibility
df <- df |>
  # Make confounder associated with batch:
  mutate(
    confounder = round(batchnum + runif(n = 200, max = 2)),
    # Make biomarker values associated with confounder:
    noisy_conf = noisy + confounder * 3 - mean(confounder) * 3
  )

df |> 
  plot_batch(
    marker = noisy_conf, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
df <- df |> 
  # Add batch effects to confounded biomarker values:
  mutate(
    noisy_conf_batch = noisy_conf * batchnum / mean(batchnum) + 
      batchnum * 3 - 
      mean(batchnum) * 3
  )

df |> 
  plot_batch(
    marker = noisy_conf_batch, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_conf_batch, 
    batch = batch, 
    method = standardize, 
    confounders = confounder
  ) |>
  plot_batch(
    marker = noisy_conf_batch_adj3, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_conf_batch, 
    batch = batch, 
    method = ipw, 
    confounders = confounder
  ) |>
  plot_batch(
    marker = noisy_conf_batch_adj4, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_conf_batch, 
    batch = batch, 
    method = quantreg, 
    confounders = confounder
  ) |>
  plot_batch(
    marker = noisy_conf_batch_adj5, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_conf_batch, 
    batch = batch, 
    method = simple, 
    confounders = confounder
  ) |>
  plot_batch(
    marker = noisy_conf_batch_adj2, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
df |> 
  adjust_batch(
    markers = noisy_conf_batch, 
    batch = batch, 
    method = quantnorm, 
    confounders = confounder
  ) |>
  plot_batch(
    marker = noisy_conf_batch_adj6, 
    batch = batch, 
    color = confounder
  )

## -----------------------------------------------------------------------------
# df2 is the new dataset that also contains "noisy_conf_batch_adj2":
df2 <- df |> 
  adjust_batch(
    markers = noisy_conf_batch, 
    batch = batch, 
    method = standardize, 
    confounders = confounder
  )

# Show overview of model diagnostics:
diagnose_models(df2)

## -----------------------------------------------------------------------------
fit <- diagnose_models(data = df2)$model_fits[[1]][[1]]
summary(fit)

## -----------------------------------------------------------------------------
diagnose_models(df2)$adjust_parameters

## -----------------------------------------------------------------------------
tibble(
  fitted = fitted.values(fit),
  residuals = residuals(fit)
) |>
  ggplot(
    mapping = aes(
      x = fitted, 
      y = residuals
    )
  ) +
  geom_point() +
  theme_minimal()