Reproducible Supplement

Load the required packages

Show code

library(openxlsx)
library(here)
library(janitor)
library(tidyverse)
library(labelled)
library(zoo)
library(latrend)
library(ggplot2)
library(ggnewscale)
library(scales)
library(ggrepel)
library(gtsummary)
library(prodlim)
library(ggsurvfit)
library(Epi)
library(pmsampsize)
library(caret)
library(rms)
library(tidymodels)
library(glmnet)
library(probably)
library(vip)
library(runway)
library(cutpointr)
library(dcurves)
library(survival)
library(patchwork)
theme_set(theme_bw())
theme_gtsummary_compact()

Automated eICAHT grading

Import data

Show code

# Clinical data
# Already excludes the patients who ho died prior to day +14 after CAR T-cell infusion, 
# who had more than 7 consecutive days of missing ANC values, or 
# who had a second CAR T-cell infusion within 30 days of the first CAR T-cell infusion 
df <-
  read.xlsx(
    here(
      "dfs",
      "Outputs",
      "File for eIPM Supplement.xlsx"
    ),
    detectDates = TRUE
  )

# CBC data
df_cbc <-
  read.xlsx(here("dfs", "EPIC Data Exports", "2024.05.29.hems.xlsx"),
            detectDates = TRUE)
df_cbc <- clean_names(df_cbc)
df_cbc <- df_cbc %>%
  filter(cart_num == 1)

Create data frame with longitudinal daily ANC values (including interpolating missing data)

Show code

# Create data frame with longitudinal ANC values
# If last lab draw occurs after 23:00, assign ANC value to the following day
df_ancx <- df_cbc %>%
  filter(upn %in% df$upn) %>%
  select(upn, cart_date, date, time, ancx) %>%
  mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
         new_date  = as.Date(ifelse(
           time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
           date + days(1),
           date
         ))) %>%
  ungroup()

df_ancx <- df_ancx %>% 
  select(-date, -time) %>% 
  rename(date = new_date)

df_ancx <- df_ancx %>% 
  mutate(time_post_inf = as.numeric(date - cart_date)) %>%
  filter(time_post_inf %in% 0:30) %>%
  distinct(upn, time_post_inf, ancx, .keep_all = TRUE) %>% # Keep unique combinations of upn, time_post_inf, and anc
  group_by(upn, time_post_inf) %>% 
  slice_min(ancx) %>% # For each combination of upn and time_post_inf, keep the lowest ANC value
  ungroup()

# Fill in missing time_post_inf and replicate UPNs by first creating df_extract
df_extract <- df %>%
  distinct(upn, cart_date, datelast)

# Create replicate_ids() to replicate UPNs according to duration of follow-up
replicate_ids <- function(upn, cart_date, datelast) {
  if (is.na(datelast) || (datelast - cart_date) >= 30) {
    return(replicate(31, upn))
  } else {
    return(replicate(datelast - cart_date + 1, upn)) # Add 1 to account for day +0
  }
}

# Create df_id with replicated UPNs
df_id <- df_extract %>%
  dplyr::rowwise() %>%
  do(data.frame(upn = replicate_ids(.$upn, .$cart_date, .$datelast))) %>% 
  group_by(upn) %>% 
  mutate(time_post_inf = row_number() - 1) %>% 
  ungroup()

# Merge df_id with df
df_ancx <- df_ancx %>%
  group_by(upn) %>%
  merge(.,
        df_id,
        by = c("upn", "time_post_inf"),
        all.y = TRUE) %>% 
  ungroup()

# Fill in cart_date and date columns
df_ancx <- df_ancx %>%
  group_by(upn) %>%
  mutate(cart_date = as.Date(ifelse(
    is.na(cart_date), max(cart_date, na.rm = TRUE), cart_date
  )),
  date = as.Date(cart_date + time_post_inf)) %>% 
  ungroup()

# Linearly interpolate missing ANC values for up to 7 consecutive NAs
df_ancx <- df_ancx %>%
  arrange(upn, time_post_inf) %>%
  group_by(upn) %>% 
  mutate(ancx = na.approx(ancx, maxgap = 7, rule = 2)) %>% 
  ungroup()

# Round ANC values to nearest integer divisible by 10 (like real ANC values)
df_ancx <- df_ancx %>% 
  mutate(ancx = round(ancx/10) * 10)

# Add UWIDs
df_ancx <- df_ancx %>%
  inner_join(., df %>% select(upn, uwid), by = "upn") %>%
  select(upn, uwid, cart_date, date, time_post_inf, ancx)

# Log10-transform ANC and select variables for clustering
df_anc <- df_ancx %>%
  mutate(anc = ifelse(ancx == 0, 0.01, ancx),
         anc = log10(anc)) %>%
  select(upn, time_post_inf, anc)

var_label(df_anc$time_post_inf) <- "Days after CAR T-cell infusion"
var_label(df_anc$anc) <- "ANC"

Automatically assign eICAHT grades using {heatwaveR}

Show code

# For eICAHT threshold of ANC ≤ 500 cells/μL
df_exceedances_below_501 <-
  # ddply splits a data frame, applies a function, and combines results into a data frame
  plyr::ddply(.data = df_ancx, .variables = c("upn", "uwid"), .fun = function(data) {
    heatwaveR::exceedance(
      data,
      x = date,
      y = ancx,
      threshold = 501,
      below = TRUE, # Whether to detect events below the threshold,
      minDuration = 1, # Minimum duration for acceptance of detected events
      joinAcrossGaps = TRUE, # Whether to join events which occur before/after gap
      maxGap = 2 # Maximum length of gap allowed for joining of events
    )$exceedance
  })

# Create data frame containing maximum duration of events for each UPN
df_anc_500 <- df_exceedances_below_501 %>%
  select(upn, uwid, exceedance_no, duration) %>%
  group_by(upn, uwid) %>%
  dplyr::summarize(duration_below_501_max = max(duration)) %>%
  mutate(duration_below_501_max = ifelse( # Replace NAs with 0 (never below threshold)
    is.na(duration_below_501_max),
    0,
    duration_below_501_max
  ))

# For eICAHT threshold of ANC ≤ 100 cells/μL
df_exceedances_below_101 <-
  plyr::ddply(.data = df_ancx, .variables = c("upn"), .fun = function(data) {
    heatwaveR::exceedance(
      data,
      x = date,
      y = ancx,
      threshold = 101,
      below = TRUE,
      minDuration = 1,
      joinAcrossGaps = TRUE,
      maxGap = 2,
      maxPadLength = FALSE
    )$exceedance
  })

df_anc_100 <- df_exceedances_below_101 %>%
  select(upn, exceedance_no, duration) %>%
  group_by(upn) %>%
  dplyr::summarize(duration_below_101_max = max(duration)) %>%
  mutate(duration_below_101_max = ifelse(
    is.na(duration_below_101_max),
    0,
    duration_below_101_max
  ))

# Create data frame with exceedances below each threshold for each UPN
df_icaht <- df_anc_500 %>% 
  left_join(., df_anc_100, by = "upn") %>% 
  ungroup()

# Create column with eICAHT grades
df_icaht <- df_icaht %>%
  mutate(
    icaht_grade = case_when(
      duration_below_501_max == 0 & duration_below_101_max == 0 ~ "Grade 0",
      duration_below_501_max %in% 1:6 & duration_below_101_max < 7 ~ "Grade 1",
      duration_below_501_max %in% 7:13 & duration_below_101_max < 7 ~ "Grade 2",
      (duration_below_501_max %in% 14:30 & duration_below_101_max < 7) |
        (duration_below_501_max < 31 & duration_below_101_max %in% 7:13) ~ "Grade 3",
      (duration_below_501_max >= 31 & duration_below_101_max < 14) | duration_below_101_max >= 14 ~ "Grade 4"
    )
  )

Additionally define grade 4 eICAHT

Show code

df_exceedances_below_501 <- df_exceedances_below_501 %>%
  filter(!is.na(exceedance_no)) %>%  # Filter out patients who never had neutropenia
  select(upn, 
         exceedance_below_501_no = exceedance_no, 
         exceedance_below_501_date_start = date_start,
         exceedance_below_501_date_end = date_end) %>% 
  mutate(exceedance_below_501_date_start = as.Date(exceedance_below_501_date_start), # Convert dates to class "Date"
         exceedance_below_501_date_end = as.Date(exceedance_below_501_date_end))

# Add date of last available ANC value
df_last_time_post_inf <- df_ancx %>%
  arrange(upn, time_post_inf) %>%
  group_by(upn) %>% 
  slice_tail(n = 1) %>% # Select last row
  ungroup() %>% 
  select(upn, 
         cart_date,
         last_lab_date = date)

df_exceedances_below_501 <- df_exceedances_below_501 %>% 
  left_join(., df_last_time_post_inf, by = "upn")

# Classify patients who had exceedances below ANC ≤ 500 cells/μL 
# starting between days 0-3 and ending on "last_lab_date" as grade 4 eICAHT
df_exceedances_below_501 <- df_exceedances_below_501 %>%
  mutate(
    icaht_grade_4 = ifelse(
      exceedance_below_501_date_start - cart_date <= 3 &
        exceedance_below_501_date_end == last_lab_date,
      "Yes",
      "No"
    )
  )

df_icaht <- df_icaht %>%
  left_join(.,
            df_exceedances_below_501 %>% select(upn, icaht_grade_4),
            by = "upn")

df_icaht <- df_icaht %>%
  mutate(
    icaht_grade = dplyr::if_else(
      icaht_grade_4 == "Yes",
      "Grade 4",
      icaht_grade,
      missing = icaht_grade
    )
  ) %>%
  distinct(upn,
           icaht_grade,
           .keep_all = TRUE)

Update eICAHT grades for patients who were included in Liang, Rejeski, et al., Bone Marrow Transplantation 2024 (n = 605)

Show code

# Data frame contains automated grades assigned by {heatwaveR} approach, MSKCC manual code, and manual grades for discrepancies
df_check <-
  read.xlsx(here("dfs",
                 "Outputs",
                 "Manual vs. Automated Early ICAHT Grades.xlsx")) %>% 
  clean_names()

# Filter to cases incorrectly graded by {heatwaveR} approach
df_check <- df_check %>% 
  filter(center == "FHCC",
         disagreement_manual_heatwave_r == 1) %>% 
  select(upn = patient_id,
         manual_icaht_grade) %>% 
  mutate(manual_icaht_grade = paste0("Grade ", manual_icaht_grade))

# Replace grades in df_icaht with correct grades
df_icaht <- df_icaht %>% 
  left_join(., df_check, by = "upn")

df_icaht <- df_icaht %>%
  mutate(icaht_grade = ifelse(!is.na(manual_icaht_grade), manual_icaht_grade, icaht_grade)) %>% 
  select(-icaht_grade_4, -manual_icaht_grade)

# Add eICAHT grades to df
df <- df %>% 
  filter(upn %in% df_icaht$upn) %>% 
  left_join(., df_icaht %>% select(upn, icaht_grade), by = "upn")

df <- df %>%
  mutate(
    icaht_grade_num = case_when(
      icaht_grade == "Grade 0" ~ 0,
      icaht_grade == "Grade 1" ~ 1,
      icaht_grade == "Grade 2" ~ 2,
      icaht_grade == "Grade 3" ~ 3,
      icaht_grade == "Grade 4" ~ 4
    ),
    icaht_grade = fct_relevel(icaht_grade, "Grade 0"),
    icaht_grade_3_4 = ifelse(icaht_grade_num %in% 3:4, 1, 0)
  )

Plot ANC trajectories by ICAHT grade using {latrend}

Show code

# Use {latrend} to create lcModel object based on reference assignments using lcModelPartition()
options(latrend.id = "upn", latrend.time = "time_post_inf")

df_anc <- df_anc %>%
  left_join(., df %>% select(upn, icaht_grade_num), by = "upn") %>% 
  mutate(icaht_grade_latrend = icaht_grade_num + 1) # latrend requires integers ≥1
  
icaht_traj_assigns <- aggregate(icaht_grade_latrend ~ upn,
                                data = df_anc,
                                FUN = data.table::first)
icaht_model <-
  lcModelPartition(
    data = df_anc,
    response = "anc",
    trajectoryAssignments = icaht_traj_assigns$icaht_grade_latrend
  )
clusterNames(icaht_model) <-
  c("Grade 0", "Grade 1", "Grade 2", "Grade 3", "Grade 4")
icaht_model

Longitudinal cluster model using part
lcMethod specifying "undefined"
no arguments

Cluster sizes (K=5):
    Grade 0     Grade 1     Grade 2     Grade 3     Grade 4 
112 (16.2%)   366 (53%) 111 (16.1%)   67 (9.7%)   35 (5.1%) 

Number of obs: 21378, strata (upn): 691

Scaled residuals:
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-6.16331 -0.23806  0.04851  0.00000  0.37705  4.65705

Show code

# Plot individual ANC trajectories (black lines) and cluster trajectories (colored lines)
df_icaht_traj <- trajectories(icaht_model) # Extract individual trajectories and cluster assignments
df_icaht_cluster_traj = clusterTrajectories(icaht_model) # Extract cluster trajectories
df_hline <- data.frame(threshold = factor(c("≤100 cells/μL", "≤500 cells/μL")), y = c(100, 500)) # df to plot early ICAHT thresholds

# Create new labels including cluster percentages
cluster_percentages <- prop.table(table(df_icaht_traj$Cluster))
cluster_labels <- sprintf("%s (%d%%)",
                          names(cluster_percentages),
                          round(cluster_percentages * 100))
new_labels <- as_labeller(
  c(
    "Grade 0" = cluster_labels[1],
    "Grade 1" = cluster_labels[2],
    "Grade 2" = cluster_labels[3],
    "Grade 3" = cluster_labels[4],
    "Grade 4" = cluster_labels[5]
  )
)

ggplot() +
  geom_line(
    data = df_icaht_traj,
    aes(x = time_post_inf, y = anc, group = upn),
    color = "black",
    alpha = 0.5
  ) +
  geom_line(data = df_icaht_cluster_traj,
            aes(x = time_post_inf, y = anc, color = Cluster),
            size = 2,
            show.legend = FALSE) +
  new_scale_color() +
  geom_hline(
    data = df_hline,
    aes(
      yintercept = log10(y),
      linetype = threshold,
      colour = threshold
    ),
    size = 1
  ) +
  scale_colour_manual(
    values = c("red", "blue"),
    labels = c("≤100 cells/μL", "≤500 cells/μL"),
    name = "Early ICAHT Threshold"
  ) +
  scale_linetype_manual(
    values = c("dashed", "dashed"),
    labels = c("≤100 cells/μL", "≤500 cells/μL"),
    name = "Early ICAHT Threshold"
  ) +
  facet_wrap(~ Cluster,
             labeller = new_labels) +
  scale_x_continuous(name = "Days after CAR T-cell infusion") +
  scale_y_continuous(name = "ANC (log10(cells/μL)") +
  theme(strip.text.x = element_text(size = 12),
        legend.position = c(0.85, 0.35))

Import/wrangle additional data

Clinical data

Show code

df <- df %>%
  mutate(
    ethnih_cat = ifelse(
      ethnih == "Hispanic or Latino",
      "Hispanic or Latino",
      "Not Hispanic/Latino or Unknown"
    ),
    ethnih_cat = fct_relevel(as.factor(ethnih_cat), "Not Hispanic/Latino or Unknown"),
    raceethnih_cat = case_when(
      racenih == "White" ~ "White",
      racenih == "Black or African American" &
        ethnih == "Not Hispanic or Latino" ~ "Non-Hispanic Black or African American",
      .default = "Other"
    ),
    raceethnih_cat = fct_relevel(as.factor(raceethnih_cat), "White"),
    disease_cat =
      factor(
        disease_cat,
        levels = c(
          "Aggressive NHL",
          "Other indolent NHL",
          "ALL",
          "Mantle cell lymphoma",
          "MM/PCL"
        )
      ),
    LD_regimen_cat = factor(
      LD_regimen_cat,
      levels = c("Low-intensity Cy/Flu", "High-intensity Cy/Flu", "Other")
    ),
    product_cat = as.factor(product_cat),
    car_target = case_when(
      product_cat == "Commercial CD19-targeted CAR T-cell product with CD28 costimulatory domain" |
        product_cat == "Commercial CD19-targeted CAR T-cell product with 4-1BB costimulatory domain" |
        product_infused == "JCAR014" |
        product_infused == "JCAR021" ~ "CD19",
      product_infused == "Investigational CD20-targeted product" ~ "CD20",
      .default = "BCMA"
    ),
    car_target = factor(car_target, levels = c("BCMA", "CD19", "CD20"))
  )

var_label(df$cart_age) <- "Age"
var_label(df$gender_desc) <- "Sex"
var_label(df$racenih) <- "Race"
var_label(df$ethnih) <- "Ethnicity"
var_label(df$ethnih_cat) <- "Ethnicity"
var_label(df$raceethnih_cat) <- "Race/ethnicity"
var_label(df$disease_cat) <- "Disease"
var_label(df$prior_hct) <- "Prior HCT"
var_label(df$bm_disease_by_morph) <- "Bone marrow involvement by morphology (%)"
var_label(df$bm_disease_by_morph_log10) <- "Bone marrow involvement by morphology (log10(%))"
var_label(df$bm_disease_by_flow) <- "Bone marrow involvement by flow cytometry (%)"
var_label(df$bm_disease_by_flow_log10) <- "Bone marrow involvement by flow cytometry (log10(%))"
var_label(df$bridging_yn) <- "Bridging therapy"
var_label(df$LD_regimen_cat) <- "Lymphodepletion regimen"
var_label(df$LD_regimen_low_CyFlu_vs_other) <- "Lymphodepletion regimen (low-dose Cy/Flu vs. other)"
var_label(df$product_infused) <- "CAR T-cell product"
var_label(df$product_cat) <- "CAR T-cell product type"
var_label(df$car_target) <- "CAR target"
var_label(df$costim_domain) <- "Costimulatory domain"
var_label(df$cart_dose_total) <- "Total CAR T-cell dose (x 10^6 cells)"
var_label(df$cart_dose_total_log10) <- "Total CAR T-cell dose (log10(x 10^6 cells))"
var_label(df$anc_pre_ld) <- "Pre-LD ANC (x 10^3 cells/µL)"
var_label(df$hb_pre_ld) <- "Pre-LD Hb (g/dL)"
var_label(df$plt_pre_ld) <- "Pre-LD platelet count (x 10^3 cells/µL)"
var_label(df$anc_pre_ld_log10) <- "Pre-LD ANC (log10(x 10^3 cells/µL))"
var_label(df$anc_day_3_log10) <- "Day +3 ANC (log10(x 10^3 cells/µL))"
var_label(df$lym_pre_ld_log10) <- "Pre-LD ALC (log10(x 10^3 cells/µL))"
var_label(df$plt_pre_ld_log10) <- "Pre-LD platelet count (log10(x 10^3 /µL))"
var_label(df$plt_day_3_log10) <- "Day +3 platelet count (log10(x 10^3 /µL))"
var_label(df$ldh_pre_ld_log10) <- "Pre-LD LDH (log10(U/L))"
var_label(df$crp_pre_ld_log10) <- "Pre-LD CRP (log10(mg/L))"
var_label(df$crp_day_0_log10) <- "Day +0 CRP (log10(mg/L))"
var_label(df$crp_day_3_log10) <- "Day +3 CRP (log10(mg/L))"
var_label(df$crp_day_5_log10) <- "Day +5 CRP (log10(mg/L))"
var_label(df$crp_day_7_log10) <- "Day +7 CRP (log10(mg/L))"
var_label(df$crp_max_log10) <- "Peak CRP (log10(mg/L))"
var_label(df$ferritin_pre_ld_log10) <- "Pre-LD ferritin (log10(ng/mL))"
var_label(df$ferritin_day_0_log10) <- "Day +0 ferritin (log10(ng/mL))"
var_label(df$ferritin_day_3_log10) <- "Day +3 ferritin (log10(ng/mL))"
var_label(df$ferritin_day_5_log10) <- "Day +5 ferritin (log10(ng/mL))"
var_label(df$ferritin_day_7_log10) <- "Day +7 ferritin (log10(ng/mL))"
var_label(df$ferritin_max_log10) <- "Peak ferritin (log10(ng/mL))"
var_label(df$il6_pre_ld_log10) <- "Pre-LD IL-6 (log10(pg/mL))"
var_label(df$il6_day_0_log10) <- "Day +0 IL-6 (log10(pg/mL))"
var_label(df$il6_day_3_log10) <- "Day +3 IL-6 (log10(pg/mL))"
var_label(df$il6_day_5_log10) <- "Day +5 IL-6 (log10(pg/mL))"
var_label(df$il6_day_7_log10) <- "Day +7 IL-6 (log10(pg/mL))"
var_label(df$il6_max_log10) <- "Peak IL-6 (log10(pg/mL))"
var_label(df$fibrinogen_pre_ld_log10) <- "Pre-LD fibrinogen (log10(mg/dL))"
var_label(df$fibrinogen_day_0_log10) <- "Day +0 fibrinogen (log10(mg/dL))"
var_label(df$fibrinogen_day_3_log10) <- "Day +3 fibrinogen (log10(mg/dL))"
var_label(df$fibrinogen_day_5_log10) <- "Day +5 fibrinogen (log10(mg/dL))"
var_label(df$fibrinogen_day_7_log10) <- "Day +7 fibrinogen (log10(mg/dL))"
var_label(df$fibrinogen_min_log10) <- "Nadir fibrinogen (log10(mg/dL))"
var_label(df$d_dimer_pre_ld_log10) <- "Pre-LD D-dimer (log10(mg/L FEU))"
var_label(df$d_dimer_day_0_log10) <- "Day +0 D-dimer (log10(mg/L FEU))"
var_label(df$d_dimer_day_3_log10) <- "Day +3 D-dimer (log10(mg/L FEU))"
var_label(df$d_dimer_day_5_log10) <- "Day +5 D-dimer (log10(mg/L FEU))"
var_label(df$d_dimer_day_7_log10) <- "Day +7 D-dimer (log10(mg/L FEU))"
var_label(df$d_dimer_max_log10) <- "Peak D-dimer (log10(mg/L FEU))"
var_label(df$ptt_pre_ld_log10) <- "Pre-LD PTT (log10(seconds))"
var_label(df$ptt_day_0_log10) <- "Day +0 PTT (log10(seconds))"
var_label(df$ptt_day_3_log10) <- "Day +3 PTT (log10(seconds))"
var_label(df$ptt_day_5_log10) <- "Day +5 PTT (log10(seconds))"
var_label(df$ptt_day_7_log10) <- "Day +7 PTT (log10(seconds))"
var_label(df$ptt_max_log10) <- "Peak PTT (log10(seconds))"
var_label(df$icaht_grade) <- "eICAHT grade"
var_label(df$icaht_grade_3_4) <- "Grade 3-4 eICAHT"
var_label(df$gcsf) <- "Received G-CSF"
var_label(df$tpo_ra) <- "Received TPO-RA"
var_label(df$crs_grade) <- "CRS grade"
var_label(df$nt_grade) <- "Neurotoxicity grade"
var_label(df$iec_hs) <- "IEC-HS"

Table 1: Patient, disease, and treatment characteristics

Show code

df %>%
  select(
    cart_age,
    gender_desc,
    racenih,
    ethnih,
    prior_hct,
    disease_cat,
    bm_disease_by_morph,
    bm_disease_by_flow,
    bridging_yn,
    LD_regimen_cat,
    cart_dose_total,
    product_infused,
    product_cat,
    anc_pre_ld,
    hb_pre_ld, 
    plt_pre_ld
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()

Characteristic	N = 691¹
Age
Median (IQR)	61 (51, 68)
Range	19 - 84
Sex
Male	437 (63%)
Female	254 (37%)
Race
White	583 (84%)
Asian	58 (8%)
Black or African American	18 (3%)
American Indian or Alaska Native	13 (2%)
Multiple	8 (1%)
Unknown	7 (1%)
Native Hawaiian or other Pacific Islander	4 (1%)
Ethnicity
Not Hispanic or Latino	629 (91%)
Hispanic or Latino	52 (8%)
Unknown	10 (1%)
Prior HCT	234 (34%)
Disease
Aggressive NHL	318 (46%)
MM/PCL	120 (17%)
Other indolent NHL	117 (17%)
ALL	99 (14%)
Mantle cell lymphoma	37 (5%)
Bone marrow involvement by morphology (%)
Median (IQR)	0 (0, 30)
Range	0 - 100
Missing	267
Bone marrow involvement by flow cytometry (%)
Median (IQR)	0 (0, 19)
Range	0 - 99
Missing	252
Bridging therapy	422 (61%)
Lymphodepletion regimen
Low-intensity Cy/Flu	403 (58%)
High-intensity Cy/Flu	253 (37%)
Other	35 (5%)
Total CAR T-cell dose (x 10^6 cells)
Median (IQR)	170 (111, 402)
Range	5 - 43,692
CAR T-cell product
JCAR014	193 (28%)
YESCARTA	140 (20%)
BREYANZI	87 (13%)
CARVYKTI	52 (8%)
TECARTUS	47 (7%)
Investigational CD20-targeted product	45 (7%)
JCAR021	44 (6%)
Investigational BCMA-targeted product	38 (5%)
ABECMA	30 (4%)
KYMRIAH	15 (2%)
CAR T-cell product type
Investigational CAR T-cell product with 4-1BB costimulatory domain	320 (46%)
Commercial CD19-targeted CAR T-cell product with CD28 costimulatory domain	187 (27%)
Commercial CD19-targeted CAR T-cell product with 4-1BB costimulatory domain	102 (15%)
Commercial BCMA-targeted CAR T-cell product	82 (12%)
Pre-LD ANC (x 10^3 cells/µL)
Median (IQR)	2.84 (1.79, 4.29)
Range	0.00 - 54.03
Missing	1
Pre-LD Hb (g/dL)
Median (IQR)	10.70 (9.50, 12.35)
Range	7.00 - 17.10
Pre-LD platelet count (x 10^3 cells/µL)
Median (IQR)	142 (89, 202)
Range	6 - 790
¹ n (%)

Bridging therapy by disease type

Show code

df %>%
  select(disease_cat, bridging_yn) %>%
  tbl_summary(by = disease_cat) %>%
  bold_labels()

Characteristic	Aggressive NHL, N = 318¹	Other indolent NHL, N = 117¹	ALL, N = 99¹	Mantle cell lymphoma, N = 37¹	MM/PCL, N = 120¹
Bridging therapy	153 (48%)	69 (59%)	59 (60%)	33 (89%)	108 (90%)
¹ n (%)

Non-Cy/Flu lymphodepletion regimens

Show code

df_LD <-
  read.xlsx(
    here(
      "dfs",
      "EPIC Data Exports",
      "2024.05.29.lymphodepletionRegimen.xlsx"
    ),
    detectDates = TRUE
  ) %>%
  clean_names()
df_LD <- df_LD %>% filter(cart_num == 1)

df_LD <- df_LD %>%
  mutate(LD_regimen = case_when(
    grepl("cyclophosphamide", dep_regimen, ignore.case = TRUE) &
      grepl("fludarabine", dep_regimen, ignore.case = TRUE) ~ "Cy/Flu",
    .default = "Other"
  ))

df_LD %>% 
  filter(
    upn %in% df$upn,
    LD_regimen == "Other") %>% 
  select(dep_regimen) %>% 
  tbl_summary()

Characteristic	N = 35¹
dep_regimen
Bendamustine	4 (11%)
Bendamustine; Bendamustine	2 (5.7%)
Cyclophosphamide	10 (29%)
Cyclophosphamide + Etoposide + Dex; Etoposide	1 (2.9%)
Cyclophosphamide + Etoposide; Etoposide	6 (17%)
Cyclophosphamide + Mesna	9 (26%)
Cyclophosphamide + Mesna + Etoposide; Etoposide; Neulasta	1 (2.9%)
Fludarabine	1 (2.9%)
Gemcitabine; Start Growth factor; Cyclophosphamide + Mesna	1 (2.9%)
¹ n (%)

Rates of eICAHT by grade

Show code

df %>%
  select(icaht_grade) %>%
  tbl_summary(
    statistic = list(all_categorical() ~ "{n} ({p}%)"),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()

Characteristic	N = 691¹
eICAHT grade
Grade 1	366 (53%)
Grade 0	112 (16%)
Grade 2	111 (16%)
Grade 3	67 (10%)
Grade 4	35 (5%)
¹ n (%)

Rates of grade 3-4 eICAHT by disease type

Show code

df %>%
  select(
    icaht_grade_3_4,
    disease_cat
  ) %>%
  tbl_summary(
    by = disease_cat) %>%
  bold_labels()

Characteristic	Aggressive NHL, N = 318¹	Other indolent NHL, N = 117¹	ALL, N = 99¹	Mantle cell lymphoma, N = 37¹	MM/PCL, N = 120¹
Grade 3-4 eICAHT	35 (11%)	15 (13%)	36 (36%)	4 (11%)	12 (10%)
¹ n (%)

Rates of CRS and neurotoxicity

Show code

df %>%
  select(crs_grade, nt_grade) %>%
  tbl_summary(statistic = list(all_categorical() ~ "{n} ({p}%)"))

Characteristic	N = 691¹
CRS grade
0	175 (25%)
1	224 (32%)
2	257 (37%)
3	23 (3.3%)
4	10 (1.4%)
5	2 (0.3%)
Neurotoxicity grade
0	411 (59%)
1	72 (10%)
2	84 (12%)
3	103 (15%)
4	19 (2.7%)
5	2 (0.3%)
¹ n (%)

Receipt of G-CSF and TPO-RAs between days 0-30 after CAR T-cell infusion

Show code

df %>%
  select(gcsf, tpo_ra) %>%
  tbl_summary() %>%
  bold_labels()

Characteristic	N = 691¹
Received G-CSF	584 (85%)
Received TPO-RA	2 (0.3%)
¹ n (%)

Median follow-up and overall survival

Show code

df <- df %>%
  mutate(
    OS_status = ifelse(is.na(deathdat), 0, 1),
    OS_months = as.duration(cart_date %--% datelast) / dmonths(1)
  )

# Create data frame with OS landmarked at day +30
df_landmark_OS <- df %>% 
  filter(OS_months > 30/30.4167) %>% 
  mutate(OS_months_landmark = OS_months - 30/30.4167)

quantile(prodlim(Hist(OS_months, OS_status) ~ 1, data = df, reverse = TRUE))

Quantiles of the potential follow up time distribution based on the Kaplan-Meier method 
applied to the censored times reversing the roles of event status and censored.

Table of quantiles and corresponding confidence limits:
       q quantile lower upper
   <num>    <num> <num> <num>
1:  0.00       NA    NA    NA
2:  0.25    62.98 58.28    71
3:  0.50    32.92 26.61    37
4:  0.75    12.25 11.40    14
5:  1.00     0.92  0.92     1


Median with interquartile range (IQR):
          Median (IQR)
                <char>
1: 32.92 (12.25;62.98)

Show code

surv_OS <- survfit2(Surv(OS_months, OS_status) ~ 1, data = df)
surv_OS

Call: survfit(formula = Surv(OS_months, OS_status) ~ 1, data = df)

       n events median 0.95LCL 0.95UCL
[1,] 691    307   34.6    27.1    42.6

Show code

# 3-year estimate
summary(surv_OS, times = 36)

Call: survfit(formula = Surv(OS_months, OS_status) ~ 1, data = df)

 time n.risk n.event survival std.err lower 95% CI upper 95% CI
   36    157     274    0.485  0.0231        0.441        0.532

Univariate logistic regression models of grade 3-4 eICAHT

Using log10-transformed laboratory values, except pre-LD hemoglobin due to lack of convergence with log10(pre-LD hemoglobin)

Show code

tbl_uvregression <- df %>%
  select(
    icaht_grade_3_4,
    cart_age,
    gender_desc,
    raceethnih_cat,
    prior_hct,
    disease_cat,
    bm_disease_by_morph_log10,
    bm_disease_by_flow_log10,
    bridging_yn,
    LD_regimen_cat,
    LD_regimen_low_CyFlu_vs_other,
    cart_dose_total_log10,
    car_target,
    costim_domain,
    crs_grade,
    nt_grade,
    iec_hs,
    anc_pre_ld_log10,
    anc_day_3_log10,
    hb_pre_ld,
    plt_pre_ld_log10,
    plt_day_3_log10,
    ldh_pre_ld_log10,
    crp_pre_ld_log10,
    crp_day_0_log10,
    crp_day_3_log10,
    crp_day_5_log10,
    crp_day_7_log10,
    crp_max_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    ferritin_day_5_log10,
    ferritin_day_7_log10,
    ferritin_max_log10,
    il6_pre_ld_log10,
    il6_day_0_log10,
    il6_day_3_log10,
    il6_day_5_log10,
    il6_day_7_log10,
    il6_max_log10,
    fibrinogen_pre_ld_log10,
    fibrinogen_day_0_log10,
    fibrinogen_day_3_log10,
    fibrinogen_day_5_log10,
    fibrinogen_day_7_log10,
    fibrinogen_min_log10,
    d_dimer_pre_ld_log10,
    d_dimer_day_0_log10,
    d_dimer_day_3_log10,
    d_dimer_day_5_log10,
    d_dimer_day_7_log10,
    d_dimer_max_log10,
    ptt_pre_ld_log10,
    ptt_day_0_log10,
    ptt_day_3_log10,
    ptt_day_5_log10,
    ptt_day_7_log10,
    ptt_max_log10
  ) %>%
  tbl_uvregression(
    method = glm,
    y = icaht_grade_3_4,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 2)
  ) %>%
  add_nevent() %>%
  bold_p() %>%
  bold_labels()
tbl_uvregression

Characteristic	N	Event N	OR¹	95% CI¹	p-value
Age	691	102	0.97	0.96, 0.98	<0.001
Sex	691	102
Female			—	—
Male			0.84	0.55, 1.30	0.44
Race/ethnicity	691	102
White			—	—
Non-Hispanic Black or African American			1.19	0.27, 3.73	0.79
Other			0.69	0.32, 1.31	0.29
Prior HCT	691	102
No			—	—
Yes			1.25	0.80, 1.92	0.31
Disease	691	102
Aggressive NHL			—	—
Other indolent NHL			1.19	0.61, 2.23	0.60
ALL			4.62	2.70, 7.95	<0.001
Mantle cell lymphoma			0.98	0.28, 2.65	0.97
MM/PCL			0.90	0.43, 1.75	0.76
Bone marrow involvement by morphology (log10(%))	424	65	1.43	1.22, 1.68	<0.001
Bone marrow involvement by flow cytometry (log10(%))	439	69	1.52	1.29, 1.81	<0.001
Bridging therapy	691	102
No			—	—
Yes			2.05	1.29, 3.35	0.003
Lymphodepletion regimen	691	102
Low-intensity Cy/Flu			—	—
High-intensity Cy/Flu			1.45	0.93, 2.25	0.10
Other			2.44	1.03, 5.35	0.031
Lymphodepletion regimen (low-dose Cy/Flu vs. other)	691	102
Low-intensity Cy/Flu			—	—
Other			1.56	1.02, 2.38	0.040
Total CAR T-cell dose (log10(x 10^6 cells))	691	102	0.42	0.29, 0.60	<0.001
CAR target	691	102
BCMA			—	—
CD19			1.78	0.98, 3.54	0.076
CD20			0.64	0.14, 2.15	0.51
Costimulatory domain	691	102
4-1BB			—	—
CD28			1.02	0.63, 1.62	0.93
Dual 4-1BB and CD28			0.40	0.09, 1.13	0.13
CRS grade	691	102	2.01	1.60, 2.56	<0.001
Neurotoxicity grade	691	102	1.51	1.29, 1.76	<0.001
IEC-HS	691	102
No			—	—
Yes			3.12	1.41, 6.51	0.003
Pre-LD ANC (log10(x 10^3 cells/µL))	690	102	0.15	0.08, 0.24	<0.001
Day +3 ANC (log10(x 10^3 cells/µL))	691	102	0.53	0.47, 0.61	<0.001
Pre-LD Hb (g/dL)	691	102	0.61	0.52, 0.70	<0.001
Pre-LD platelet count (log10(x 10^3 /µL))	691	102	0.06	0.03, 0.11	<0.001
Day +3 platelet count (log10(x 10^3 /µL))	691	102	0.06	0.03, 0.11	<0.001
Pre-LD LDH (log10(U/L))	677	101	9.95	4.72, 21.5	<0.001
Pre-LD CRP (log10(mg/L))	439	58	3.44	2.24, 5.43	<0.001
Day +0 CRP (log10(mg/L))	644	85	3.24	2.21, 4.84	<0.001
Day +3 CRP (log10(mg/L))	643	91	3.54	2.30, 5.65	<0.001
Day +5 CRP (log10(mg/L))	444	81	3.36	2.10, 5.57	<0.001
Day +7 CRP (log10(mg/L))	549	77	3.73	2.36, 6.08	<0.001
Peak CRP (log10(mg/L))	683	98	16.7	7.91, 38.4	<0.001
Pre-LD ferritin (log10(ng/mL))	486	69	5.57	3.42, 9.51	<0.001
Day +0 ferritin (log10(ng/mL))	658	90	9.87	5.82, 17.5	<0.001
Day +3 ferritin (log10(ng/mL))	649	94	9.24	5.65, 15.8	<0.001
Day +5 ferritin (log10(ng/mL))	445	80	6.74	4.17, 11.5	<0.001
Day +7 ferritin (log10(ng/mL))	555	81	5.49	3.65, 8.57	<0.001
Peak ferritin (log10(ng/mL))	691	102	6.90	4.80, 10.2	<0.001
Pre-LD IL-6 (log10(pg/mL))	130	14	4.13	1.58, 11.4	0.004
Day +0 IL-6 (log10(pg/mL))	336	41	2.95	1.68, 5.24	<0.001
Day +3 IL-6 (log10(pg/mL))	476	71	1.44	1.11, 1.86	0.005
Day +5 IL-6 (log10(pg/mL))	356	60	1.56	1.18, 2.07	0.002
Day +7 IL-6 (log10(pg/mL))	413	59	2.13	1.53, 2.99	<0.001
Peak IL-6 (log10(pg/mL))	551	79	2.60	2.00, 3.43	<0.001
Pre-LD fibrinogen (log10(mg/dL))	378	60	3.87	0.53, 29.4	0.19
Day +0 fibrinogen (log10(mg/dL))	606	79	7.19	1.13, 46.4	0.037
Day +3 fibrinogen (log10(mg/dL))	613	85	3.85	0.66, 23.2	0.14
Day +5 fibrinogen (log10(mg/dL))	404	69	0.80	0.16, 4.14	0.79
Day +7 fibrinogen (log10(mg/dL))	508	70	0.68	0.17, 2.78	0.58
Nadir fibrinogen (log10(mg/dL))	661	94	0.07	0.03, 0.18	<0.001
Pre-LD D-dimer (log10(mg/L FEU))	341	57	2.97	1.50, 5.92	0.002
Day +0 D-dimer (log10(mg/L FEU))	557	72	4.85	2.65, 9.00	<0.001
Day +3 D-dimer (log10(mg/L FEU))	561	79	4.20	2.41, 7.41	<0.001
Day +5 D-dimer (log10(mg/L FEU))	373	63	4.01	2.21, 7.44	<0.001
Day +7 D-dimer (log10(mg/L FEU))	455	60	3.58	2.09, 6.19	<0.001
Peak D-dimer (log10(mg/L FEU))	651	92	4.69	3.00, 7.49	<0.001
Pre-LD PTT (log10(seconds))	455	71	5.04	0.18, 114	0.32
Day +0 PTT (log10(seconds))	502	69	7.63	0.43, 111	0.14
Day +3 PTT (log10(seconds))	494	74	7.22	1.05, 45.2	0.038
Day +5 PTT (log10(seconds))	298	55	4.70	0.78, 26.0	0.079
Day +7 PTT (log10(seconds))	402	59	4.81	0.67, 30.0	0.10
Peak PTT (log10(seconds))	606	91	9.09	2.90, 27.9	<0.001
¹ OR = Odds Ratio, CI = Confidence Interval

Show code

df_regression <- tbl_uvregression$table_body %>% 
  mutate(color = case_when(estimate < 1 & p.value < 0.05 ~ "Low",
                           estimate > 1 & p.value < 0.05 ~ "High",
                           .default = "Other"),
         color = as.factor(color))

vp <-
  ggplot(data = df_regression, aes(x = estimate, y = -log2(p.value), color = color)) +
  geom_point() +
  scale_color_manual(values = c("red", "darkgreen", "darkgray")) + 
  geom_hline(yintercept = -log2(0.05), color = "black", linetype = "dashed") +
  geom_hline(yintercept = -log2(0.01), color = "black", linetype = "dashed") +
  geom_vline(xintercept = 1, color = "black", linetype = "dashed") +
  geom_text(aes(x = 19, y = 3, label = "p = 0.05"), color = "black", stat = "unique") +
  geom_text(aes(x = 19, y = 8, label = "p = 0.01"), color = "black", stat = "unique") +
  geom_text(aes(x = 2, y = 75, label = "OR = 1"), color = "black", stat = "unique") +
  scale_x_continuous(name = "Odds of grade 3-4 eICAHT", limits = c(0, 20)) +
  coord_cartesian(clip = "off") +
  theme(legend.position = "none")

options(ggrepel.max.overlaps = Inf)
vp + geom_text_repel(data = filter(df_regression, p.value < 0.05),
                  aes(label = var_label))

Summary statistics of predictors evaluated in univariate models

Show code

df %>%
  select(
    cart_age,
    gender_desc,
    raceethnih_cat,
    prior_hct,
    disease_cat,
    bm_disease_by_morph_log10,
    bm_disease_by_flow_log10,
    bridging_yn,
    LD_regimen_cat,
    LD_regimen_low_CyFlu_vs_other,
    car_target,
    costim_domain,
    cart_dose_total_log10,
    crs_grade,
    nt_grade,
    iec_hs,
    anc_pre_ld_log10,
    anc_day_3_log10,
    hb_pre_ld,
    plt_pre_ld_log10,
    plt_day_3_log10,
    ldh_pre_ld_log10,
    crp_pre_ld_log10,
    crp_day_0_log10,
    crp_day_3_log10,
    crp_day_5_log10,
    crp_day_7_log10,
    crp_max_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    ferritin_day_5_log10,
    ferritin_day_7_log10,
    ferritin_max_log10,
    il6_pre_ld_log10,
    il6_day_0_log10,
    il6_day_3_log10,
    il6_day_5_log10,
    il6_day_7_log10,
    il6_max_log10,
    fibrinogen_pre_ld_log10,
    fibrinogen_day_0_log10,
    fibrinogen_day_3_log10,
    fibrinogen_day_5_log10,
    fibrinogen_day_7_log10,
    fibrinogen_min_log10,
    d_dimer_pre_ld_log10,
    d_dimer_day_0_log10,
    d_dimer_day_3_log10,
    d_dimer_day_5_log10,
    d_dimer_day_7_log10,
    d_dimer_max_log10,
    ptt_pre_ld_log10,
    ptt_day_0_log10,
    ptt_day_3_log10,
    ptt_day_5_log10,
    ptt_day_7_log10,
    ptt_max_log10
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()

Characteristic	N = 691¹
Age
Median (IQR)	61 (51, 68)
Range	19 - 84
Sex
Male	437 (63%)
Female	254 (37%)
Race/ethnicity
White	583 (84%)
Other	91 (13%)
Non-Hispanic Black or African American	17 (2%)
Prior HCT	234 (34%)
Disease
Aggressive NHL	318 (46%)
MM/PCL	120 (17%)
Other indolent NHL	117 (17%)
ALL	99 (14%)
Mantle cell lymphoma	37 (5%)
Bone marrow involvement by morphology (log10(%))
Median (IQR)	-2.00 (-2.00, 1.48)
Range	-2.00 - 2.00
Missing	267
Bone marrow involvement by flow cytometry (log10(%))
Median (IQR)	-1.28 (-2.00, 1.29)
Range	-3.00 - 1.99
Missing	252
Bridging therapy	422 (61%)
Lymphodepletion regimen
Low-intensity Cy/Flu	403 (58%)
High-intensity Cy/Flu	253 (37%)
Other	35 (5%)
Lymphodepletion regimen (low-dose Cy/Flu vs. other)
Low-intensity Cy/Flu	403 (58%)
Other	288 (42%)
CAR target
CD19	526 (76%)
BCMA	120 (17%)
CD20	45 (7%)
Costimulatory domain
4-1BB	459 (66%)
CD28	187 (27%)
Dual 4-1BB and CD28	45 (7%)
Total CAR T-cell dose (log10(x 10^6 cells))
Median (IQR)	2.23 (2.04, 2.60)
Range	0.69 - 4.64
CRS grade
2	257 (37%)
1	224 (32%)
0	175 (25%)
3	23 (3%)
4	10 (1%)
5	2 (0%)
Neurotoxicity grade
0	411 (59%)
3	103 (15%)
2	84 (12%)
1	72 (10%)
4	19 (3%)
5	2 (0%)
IEC-HS	33 (5%)
Pre-LD ANC (log10(x 10^3 cells/µL))
Median (IQR)	0.45 (0.25, 0.63)
Range	-2.00 - 1.73
Missing	1
Day +3 ANC (log10(x 10^3 cells/µL))
Median (IQR)	3.04 (2.52, 3.29)
Range	-2.00 - 3.95
Pre-LD Hb (g/dL)
Median (IQR)	10.70 (9.50, 12.35)
Range	7.00 - 17.10
Pre-LD platelet count (log10(x 10^3 /µL))
Median (IQR)	2.15 (1.95, 2.31)
Range	0.78 - 2.90
Day +3 platelet count (log10(x 10^3 /µL))
Median (IQR)	5.00 (4.75, 5.18)
Range	3.70 - 5.68
Pre-LD LDH (log10(U/L))
Median (IQR)	2.26 (2.16, 2.41)
Range	1.66 - 4.35
Missing	14
Pre-LD CRP (log10(mg/L))
Median (IQR)	0.81 (0.26, 1.38)
Range	-0.70 - 2.40
Missing	252
Day +0 CRP (log10(mg/L))
Median (IQR)	0.99 (0.52, 1.52)
Range	-0.52 - 2.55
Missing	47
Day +3 CRP (log10(mg/L))
Median (IQR)	1.54 (0.97, 1.97)
Range	-0.70 - 2.68
Missing	48
Day +5 CRP (log10(mg/L))
Median (IQR)	1.49 (1.03, 1.88)
Range	-0.40 - 2.66
Missing	247
Day +7 CRP (log10(mg/L))
Median (IQR)	1.35 (0.84, 1.75)
Range	-0.40 - 2.61
Missing	142
Peak CRP (log10(mg/L))
Median (IQR)	1.90 (1.51, 2.18)
Range	-0.15 - 2.68
Missing	8
Pre-LD ferritin (log10(ng/mL))
Median (IQR)	2.57 (2.08, 3.03)
Range	0.48 - 4.56
Missing	205
Day +0 ferritin (log10(ng/mL))
Median (IQR)	2.67 (2.30, 3.06)
Range	1.15 - 4.24
Missing	33
Day +3 ferritin (log10(ng/mL))
Median (IQR)	2.76 (2.35, 3.16)
Range	0.85 - 4.98
Missing	42
Day +5 ferritin (log10(ng/mL))
Median (IQR)	2.84 (2.34, 3.21)
Range	1.00 - 5.28
Missing	246
Day +7 ferritin (log10(ng/mL))
Median (IQR)	2.88 (2.40, 3.25)
Range	0.78 - 6.00
Missing	136
Peak ferritin (log10(ng/mL))
Median (IQR)	3.10 (2.59, 3.54)
Range	1.15 - 6.00
Pre-LD IL-6 (log10(pg/mL))
Median (IQR)	0.78 (0.48, 1.15)
Range	0.00 - 2.50
Missing	561
Day +0 IL-6 (log10(pg/mL))
Median (IQR)	0.90 (0.60, 1.33)
Range	0.00 - 3.07
Missing	355
Day +3 IL-6 (log10(pg/mL))
Median (IQR)	1.65 (1.18, 2.34)
Range	0.00 - 4.53
Missing	215
Day +5 IL-6 (log10(pg/mL))
Median (IQR)	1.68 (1.11, 2.47)
Range	0.30 - 4.52
Missing	335
Day +7 IL-6 (log10(pg/mL))
Median (IQR)	1.58 (1.08, 2.15)
Range	0.00 - 4.23
Missing	278
Peak IL-6 (log10(pg/mL))
Median (IQR)	2.20 (1.62, 3.09)
Range	0.30 - 5.32
Missing	140
Pre-LD fibrinogen (log10(mg/dL))
Median (IQR)	2.61 (2.53, 2.71)
Range	2.11 - 2.95
Missing	313
Day +0 fibrinogen (log10(mg/dL))
Median (IQR)	2.64 (2.56, 2.74)
Range	2.12 - 3.04
Missing	85
Day +3 fibrinogen (log10(mg/dL))
Median (IQR)	2.70 (2.60, 2.78)
Range	2.15 - 3.00
Missing	78
Day +5 fibrinogen (log10(mg/dL))
Median (IQR)	2.66 (2.56, 2.76)
Range	1.97 - 2.97
Missing	287
Day +7 fibrinogen (log10(mg/dL))
Median (IQR)	2.62 (2.50, 2.73)
Range	1.89 - 2.97
Missing	183
Nadir fibrinogen (log10(mg/dL))
Median (IQR)	2.41 (2.19, 2.53)
Range	1.53 - 2.95
Missing	30
Pre-LD D-dimer (log10(mg/L FEU))
Median (IQR)	-0.09 (-0.35, 0.20)
Range	-0.57 - 1.51
Missing	350
Day +0 D-dimer (log10(mg/L FEU))
Median (IQR)	-0.11 (-0.34, 0.20)
Range	-0.57 - 1.30
Missing	134
Day +3 D-dimer (log10(mg/L FEU))
Median (IQR)	0.06 (-0.21, 0.38)
Range	-0.57 - 1.50
Missing	130
Day +5 D-dimer (log10(mg/L FEU))
Median (IQR)	0.11 (-0.17, 0.39)
Range	-0.57 - 1.58
Missing	318
Day +7 D-dimer (log10(mg/L FEU))
Median (IQR)	0.12 (-0.20, 0.45)
Range	-0.57 - 1.60
Missing	236
Peak D-dimer (log10(mg/L FEU))
Median (IQR)	0.41 (0.05, 0.80)
Range	-0.55 - 1.60
Missing	40
Pre-LD PTT (log10(seconds))
Median (IQR)	1.46 (1.41, 1.51)
Range	1.30 - 2.02
Missing	236
Day +0 PTT (log10(seconds))
Median (IQR)	1.48 (1.45, 1.53)
Range	1.34 - 2.15
Missing	189
Day +3 PTT (log10(seconds))
Median (IQR)	1.53 (1.48, 1.59)
Range	1.32 - 2.19
Missing	197
Day +5 PTT (log10(seconds))
Median (IQR)	1.52 (1.46, 1.59)
Range	1.34 - 2.30
Missing	393
Day +7 PTT (log10(seconds))
Median (IQR)	1.51 (1.45, 1.57)
Range	1.32 - 2.23
Missing	289
Peak PTT (log10(seconds))
Median (IQR)	1.58 (1.52, 1.65)
Range	1.32 - 2.30
Missing	85
¹ n (%)

Calculate sample size required for predicting grade 3-4 eICAHT (based on Riley et al., BMJ 2020)

Show code

# Set Cox-Snell R squared = 0.1
# 6 predictors
pmsampsize(
  type = "b",
  nagrsquared = NA,
  csrsquared = 0.1,
  rsquared = NA,
  parameters = 6,
  shrinkage = 0.9,
  prevalence = 0.16,
  cstatistic = NA,
  seed = 123,
  rate = NA,
  timepoint = NA,
  meanfup = NA,
  intercept = NA,
  sd = NA,
  mmoe = 1.1)

NB: Assuming 0.05 acceptable difference in apparent & adjusted R-squared 
NB: Assuming 0.05 margin of error in estimation of intercept 
NB: Events per Predictor Parameter (EPP) assumes prevalence = 0.16  
 
           Samp_size Shrinkage Parameter CS_Rsq Max_Rsq Nag_Rsq   EPP
Criteria 1       510     0.900         6    0.1   0.585   0.171 13.60
Criteria 2       192     0.774         6    0.1   0.585   0.171  5.12
Criteria 3       207     0.900         6    0.1   0.585   0.171  5.52
Final            510     0.900         6    0.1   0.585   0.171 13.60
 
 Minimum sample size required for new model development based on user inputs = 510, 
 with 82 events (assuming an outcome prevalence = 0.16) and an EPP = 13.6

Development and validation of multivariable logistic regression model of grade 3-4 eICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and pre-LD ferritin

Model development and validation using {tidymodels}

Create training and test sets (70%/30%)

Show code

# Create data frame for tidy model
df_tidy <- df %>%
  mutate(
    disease_cat = ifelse(disease_cat == "ALL", "ALL", "Other"),
    disease_cat = factor(disease_cat, levels = c("ALL", "Other")),
    bridging_yn = factor(bridging_yn, levels = c("No", "Yes"))
  ) %>%
  select(
    upn,
    icaht_grade_3_4,
    disease_cat,
    bridging_yn,
    LD_regimen_low_CyFlu_vs_other,
    anc_pre_ld_log10,
    anc_day_3_log10,
    plt_pre_ld_log10,
    plt_day_3_log10,
    ldh_pre_ld_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    crp_day_3_log10,
    d_dimer_day_3_log10
  ) %>%
  mutate(icaht_grade_3_4 = factor(icaht_grade_3_4, levels = c("0", "1")))

# Split df into training set and testing set
set.seed(100)
df_split <-
  initial_split(df_tidy, prop = 0.7, strata = icaht_grade_3_4)
df_train <- training(df_split)
df_test <- testing(df_split)

df_train %>%
  count(icaht_grade_3_4) %>%
  mutate(prop = n / sum(n))

  icaht_grade_3_4   n      prop
1               0 412 0.8530021
2               1  71 0.1469979

Show code

df_test %>%
  count(icaht_grade_3_4) %>%
  mutate(prop = n / sum(n))

  icaht_grade_3_4   n      prop
1               0 177 0.8509615
2               1  31 0.1490385

Build and train logistic regression model with LASSO regularization

Show code

# Create model recipe to pre-process data
recipe <- recipe(icaht_grade_3_4 ~ ., data = df_train) %>%
  update_role(upn, new_role = "ID") %>% # Assign upn to identifier role (as opposed to predictor or outcome)
  update_role_requirements("ID", bake = FALSE) %>% 
  step_impute_bag(
    # Impute missing values using all other predictors
    # No missing values for disease category, LD regimen, bridging therapy, pre-LD plt, or day +3 ANC/plt
    anc_pre_ld_log10,
    ldh_pre_ld_log10,
    ferritin_pre_ld_log10,
    ferritin_day_0_log10,
    ferritin_day_3_log10,
    crp_day_3_log10,
    d_dimer_day_3_log10,
    seed_val = 100
  ) %>%
  # step_zv(all_numeric(), -all_outcomes()) %>% # Remove variables containing only a single value
  step_normalize(all_numeric(), -all_outcomes()) %>% # Normalize numeric data
  step_dummy(disease_cat, bridging_yn, LD_regimen_low_CyFlu_vs_other) # Create dummy variables (nominal to numeric binary)

# Prep (apply pre-processing steps) and tidy the recipe (return data frame of pre-processing steps)
prep(recipe) %>% 
  tidy()

# A tibble: 3 × 6
  number operation type       trained skip  id              
   <int> <chr>     <chr>      <lgl>   <lgl> <chr>           
1      1 step      impute_bag TRUE    FALSE impute_bag_mnEZ1
2      2 step      normalize  TRUE    FALSE normalize_RkOpV 
3      3 step      dummy      TRUE    FALSE dummy_Tbmf1

Show code

# Tidy step_normalize (extract mean and SD)
prep(recipe) %>% 
  tidy(number = 2)

# A tibble: 20 × 4
   terms                 statistic value id             
   <chr>                 <chr>     <dbl> <chr>          
 1 anc_pre_ld_log10      mean      0.408 normalize_RkOpV
 2 anc_day_3_log10       mean      2.61  normalize_RkOpV
 3 plt_pre_ld_log10      mean      2.09  normalize_RkOpV
 4 plt_day_3_log10       mean      4.96  normalize_RkOpV
 5 ldh_pre_ld_log10      mean      2.32  normalize_RkOpV
 6 ferritin_pre_ld_log10 mean      2.53  normalize_RkOpV
 7 ferritin_day_0_log10  mean      2.69  normalize_RkOpV
 8 ferritin_day_3_log10  mean      2.75  normalize_RkOpV
 9 crp_day_3_log10       mean      1.42  normalize_RkOpV
10 d_dimer_day_3_log10   mean      0.103 normalize_RkOpV
11 anc_pre_ld_log10      sd        0.405 normalize_RkOpV
12 anc_day_3_log10       sd        1.33  normalize_RkOpV
13 plt_pre_ld_log10      sd        0.318 normalize_RkOpV
14 plt_day_3_log10       sd        0.310 normalize_RkOpV
15 ldh_pre_ld_log10      sd        0.241 normalize_RkOpV
16 ferritin_pre_ld_log10 sd        0.645 normalize_RkOpV
17 ferritin_day_0_log10  sd        0.525 normalize_RkOpV
18 ferritin_day_3_log10  sd        0.576 normalize_RkOpV
19 crp_day_3_log10       sd        0.645 normalize_RkOpV
20 d_dimer_day_3_log10   sd        0.388 normalize_RkOpV

Show code

# Build model specification
spec_model <- logistic_reg(penalty = 0.1, mixture = 1) %>%
  set_engine("glmnet")

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 6 × 3
  term                  estimate penalty
  <chr>                    <dbl>   <dbl>
1 (Intercept)           -1.77        0.1
2 disease_cat_Other      0           0.1
3 anc_pre_ld_log10      -0.0669      0.1
4 plt_pre_ld_log10      -0.135       0.1
5 ldh_pre_ld_log10       0           0.1
6 ferritin_pre_ld_log10  0.00556     0.1

Tune LASSO parameters

Show code

# Build set of bootstrap resamples
set.seed(123)
bootstrap_resamples <-
  bootstraps(df_train, strata = icaht_grade_3_4)

# Create tuning specifications
# Set penalty = tune() instead of to a single value
tune_spec <- logistic_reg(penalty = tune(), mixture = 1) %>%
  set_engine("glmnet")

# Use penalty() to set up an appropriate grid
lambda_grid <- grid_regular(penalty(), levels = 5)

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

# lasso_grid <- tune_grid(
#   wf %>% update_model(tune_spec, formula = model_formula),
#   resamples = bootstrap_resamples,
#   grid = lambda_grid
# )

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.874     25 0.00302 Preproces…
 2 0.0000000001 brier_class binary     0.0999    25 0.00189 Preproces…
 3 0.0000000001 roc_auc     binary     0.778     25 0.00824 Preproces…
 4 0.0000000316 accuracy    binary     0.874     25 0.00302 Preproces…
 5 0.0000000316 brier_class binary     0.0999    25 0.00189 Preproces…
 6 0.0000000316 roc_auc     binary     0.778     25 0.00824 Preproces…
 7 0.00001      accuracy    binary     0.874     25 0.00302 Preproces…
 8 0.00001      brier_class binary     0.0999    25 0.00189 Preproces…
 9 0.00001      roc_auc     binary     0.778     25 0.00824 Preproces…
10 0.00316      accuracy    binary     0.875     25 0.00295 Preproces…
11 0.00316      brier_class binary     0.0997    25 0.00186 Preproces…
12 0.00316      roc_auc     binary     0.778     25 0.00831 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.05 0.093280
5   4  9.23 0.084990
6   4 11.35 0.077440
7   4 13.08 0.070560
8   4 14.52 0.064300
9   4 15.73 0.058580
10  4 16.75 0.053380
11  4 17.62 0.048640
12  4 18.36 0.044320
13  4 18.99 0.040380
14  4 19.54 0.036790
15  4 20.00 0.033520
16  4 20.40 0.030550
17  4 20.74 0.027830
18  4 21.04 0.025360
19  5 21.29 0.023110
20  5 21.52 0.021050
21  5 21.71 0.019180
22  5 21.88 0.017480
23  5 22.02 0.015930
24  5 22.14 0.014510
25  5 22.24 0.013220
26  5 22.33 0.012050
27  5 22.40 0.010980
28  5 22.47 0.010000
29  5 22.52 0.009114
30  5 22.56 0.008304
31  5 22.60 0.007566
32  5 22.63 0.006894
33  5 22.66 0.006282
34  5 22.68 0.005724
35  5 22.70 0.005215
36  5 22.71 0.004752
37  5 22.73 0.004330
38  5 22.74 0.003945
39  5 22.75 0.003595
40  5 22.75 0.003275
41  5 22.76 0.002984
42  5 22.76 0.002719
43  5 22.77 0.002478
44  5 22.77 0.002258
45  5 22.78 0.002057
46  5 22.78 0.001874

...
and 6 more lines.

Show code

# saveRDS(wf_final_trained, "eIPMPre.rds")

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.05 0.093280
5   4  9.23 0.084990
6   4 11.35 0.077440
7   4 13.08 0.070560
8   4 14.52 0.064300
9   4 15.73 0.058580
10  4 16.75 0.053380
11  4 17.62 0.048640
12  4 18.36 0.044320
13  4 18.99 0.040380
14  4 19.54 0.036790
15  4 20.00 0.033520
16  4 20.40 0.030550
17  4 20.74 0.027830
18  4 21.04 0.025360
19  5 21.29 0.023110
20  5 21.52 0.021050
21  5 21.71 0.019180
22  5 21.88 0.017480
23  5 22.02 0.015930
24  5 22.14 0.014510
25  5 22.24 0.013220
26  5 22.33 0.012050
27  5 22.40 0.010980
28  5 22.47 0.010000
29  5 22.52 0.009114
30  5 22.56 0.008304
31  5 22.60 0.007566
32  5 22.63 0.006894
33  5 22.66 0.006282
34  5 22.68 0.005724
35  5 22.70 0.005215
36  5 22.71 0.004752
37  5 22.73 0.004330
38  5 22.74 0.003945
39  5 22.75 0.003595
40  5 22.75 0.003275
41  5 22.76 0.002984
42  5 22.76 0.002719
43  5 22.77 0.002478
44  5 22.77 0.002258
45  5 22.78 0.002057
46  5 22.78 0.001874
47  5 22.78 0.001708
48  5 22.78 0.001556
49  5 22.78 0.001418
50  5 22.78 0.001292
51  5 22.79 0.001177
52  5 22.79 0.001073

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 6 × 3
  term                  estimate penalty
  <chr>                    <dbl>   <dbl>
1 (Intercept)             -1.97  0.00316
2 disease_cat_Other       -0.212 0.00316
3 anc_pre_ld_log10        -0.533 0.00316
4 plt_pre_ld_log10        -0.278 0.00316
5 ldh_pre_ld_log10         0.477 0.00316
6 ferritin_pre_ld_log10    0.457 0.00316

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.873  Preprocessor1_Model1
3 brier_class binary        0.0868 Preprocessor1_Model1

Show code

# Plot calibration curve for test set using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_pre_ferritin_pre_ld <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_pre_ferritin_pre_ld <- cal_plot(
  df_mod_pre_ferritin_pre_ld,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPre"
)
p_cal_pre_ferritin_pre_ld

Show code

cal_plot(
  df_mod_pre_ferritin_pre_ld,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 4,
  plot_title = "eIPMPre"
)

Show code

# Plot ROC curve for test set
p_roc_pre_ferritin_pre_ld <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPre")
p_roc_pre_ferritin_pre_ld

Show code

# Plot threshold-performance plot for test set
runway::threshperf_plot(
  df_mod_pre_ferritin_pre_ld,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1"
)

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

   AUC   n n_pos n_neg
 0.873 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1173 0.6191 0.7212      0.9355      0.6836 29  2 56 121

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.01485423 0.02350964 0.04473573 0.09134867 0.1566652
       0 0.01485423 0.02220945 0.03954348 0.07580228 0.1142895
       1 0.05883319 0.11199019 0.17600774 0.28972388 0.3986170
   3rd Qu.       95%      Max.        SD NAs
 0.1948714 0.6009895 0.9946001 0.1793896   0
 0.1519401 0.3293760 0.6288580 0.1080376   0
 0.6244815 0.9012574 0.9946001 0.2871989   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_pre_ferritin_pre_ld <- df_mod_pre_ferritin_pre_ld %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_pre_ferritin_pre_ld,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Evaluate final model on subsets of test set (ALL vs. other disease types)

ALL

Show code

# Define test subset
df_test_all <- df_test %>% filter(disease_cat == "ALL")

# Augment test subset (i.e., add columns for predictions)
df_mod_pre_ferritin_pre_ld_all <- wf_final_trained %>%
  augment(new_data = df_test_all) 

# Calculate accuracy and AUROC
model_metrics <- metric_set(yardstick::accuracy, roc_auc)

df_mod_pre_ferritin_pre_ld_all %>%
  ungroup() %>%
  model_metrics(truth = icaht_grade_3_4,
                estimate = .pred_class,
                .pred_1,
                event_level = "second")

# A tibble: 2 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.714
2 roc_auc  binary         0.873

Show code

# Plot calibration curve using {runway}
df_mod_pre_ferritin_pre_ld_all <- df_mod_pre_ferritin_pre_ld_all %>%
  mutate(.pred_class = as.numeric(as.character(.pred_class))) %>% 
  ungroup()

cal_plot(
  df_mod_pre_ferritin_pre_ld_all,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 3,
  plot_title = "eIPMPre (ALL)"
)

Show code

# Plot ROC curve
df_mod_pre_ferritin_pre_ld_all %>%
  ungroup() %>% 
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() + 
  labs(title = "eIPMPre (ALL)")

Show code

# Calculate sensitivity/specificity using an optimal cutpoint based on the Youden index
cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld_all,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC  n n_pos n_neg
 0.8727 21    11    10

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp tn
           0.2141 0.7091 0.8571      0.9091         0.8 10  1  2  8

Predictor summary: 
    Data       Min.         5%   1st Qu.    Median      Mean
 Overall 0.03447581 0.04478461 0.1464598 0.2521307 0.3632346
       0 0.03447581 0.03911477 0.0940455 0.1513242 0.1813287
       1 0.11784828 0.16598329 0.2757682 0.5768907 0.5286036
   3rd Qu.       95%      Max.        SD NAs
 0.6139658 0.8931991 0.8944719 0.2951087   0
 0.1740077 0.4605932 0.6139658 0.1670296   0
 0.7603098 0.8938355 0.8944719 0.2930836   0

Other disease types

Show code

# Define ther disease types test subset
df_test_other <- df_test %>% filter(disease_cat == "Other")

# Augment test subset (i.e., add columns for predictions)
df_mod_pre_ferritin_pre_ld_other <- wf_final_trained %>%
  augment(new_data = df_test_other) 

# Calculate accuracy and AUROC
df_mod_pre_ferritin_pre_ld_other %>%
  ungroup() %>%
  model_metrics(truth = icaht_grade_3_4,
                estimate = .pred_class,
                .pred_1,
                event_level = "second")

# A tibble: 2 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.904
2 roc_auc  binary         0.846

Show code

# Plot calibration curve using {runway}
df_mod_pre_ferritin_pre_ld_other <- df_mod_pre_ferritin_pre_ld_other %>%
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

cal_plot(
  df_mod_pre_ferritin_pre_ld_other,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE
)

Show code

# Plot ROC curve
df_mod_pre_ferritin_pre_ld_other %>%
  ungroup() %>% 
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot()

Show code

# Calculate sensitivity/specificity using an optimal cutpoint based on the Youden index
cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld_other,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8455 187    20   167

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1173 0.6066 0.7273         0.9      0.7066 18  2 49 118

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.01485423 0.02281042 0.04275294 0.08216188 0.1334676
       0 0.01485423 0.02136819 0.03939678 0.07378063 0.1102752
       1 0.05883319 0.10424424 0.16568519 0.21035841 0.3271243
   3rd Qu.       95%      Max.        SD NAs
 0.1716741 0.3673488 0.9946001 0.1452267   0
 0.1455395 0.3265144 0.6288580 0.1028384   0
 0.3897169 0.9123707 0.9946001 0.2643194   0

Development and validation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, receipt of bridging therapy, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and pre-LD ferritin

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + bridging_yn_Yes + anc_pre_ld_log10 +
  plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 7 × 3
  term                  estimate penalty
  <chr>                    <dbl>   <dbl>
1 (Intercept)           -1.77        0.1
2 disease_cat_Other      0           0.1
3 bridging_yn_Yes        0           0.1
4 anc_pre_ld_log10      -0.0669      0.1
5 plt_pre_ld_log10      -0.135       0.1
6 ldh_pre_ld_log10       0           0.1
7 ferritin_pre_ld_log10  0.00556     0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.876     25 0.00294 Preproces…
 2 0.0000000001 brier_class binary     0.0999    25 0.00188 Preproces…
 3 0.0000000001 roc_auc     binary     0.775     25 0.00799 Preproces…
 4 0.0000000316 accuracy    binary     0.876     25 0.00294 Preproces…
 5 0.0000000316 brier_class binary     0.0999    25 0.00188 Preproces…
 6 0.0000000316 roc_auc     binary     0.775     25 0.00799 Preproces…
 7 0.00001      accuracy    binary     0.876     25 0.00294 Preproces…
 8 0.00001      brier_class binary     0.0999    25 0.00188 Preproces…
 9 0.00001      roc_auc     binary     0.775     25 0.00799 Preproces…
10 0.00316      accuracy    binary     0.875     25 0.00286 Preproces…
11 0.00316      brier_class binary     0.0996    25 0.00184 Preproces…
12 0.00316      roc_auc     binary     0.776     25 0.00814 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap( ~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest accuracy
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.05 0.093280
5   4  9.23 0.084990
6   4 11.35 0.077440
7   4 13.08 0.070560
8   4 14.52 0.064300
9   4 15.73 0.058580
10  4 16.75 0.053380
11  4 17.62 0.048640
12  4 18.36 0.044320
13  4 18.99 0.040380
14  4 19.54 0.036790
15  4 20.00 0.033520
16  4 20.40 0.030550
17  4 20.74 0.027830
18  5 21.08 0.025360
19  6 21.41 0.023110
20  6 21.71 0.021050
21  6 21.96 0.019180
22  6 22.17 0.017480
23  6 22.35 0.015930
24  6 22.50 0.014510
25  6 22.63 0.013220
26  6 22.74 0.012050
27  6 22.84 0.010980
28  6 22.92 0.010000
29  6 22.98 0.009114
30  6 23.04 0.008304
31  6 23.09 0.007566
32  6 23.13 0.006894
33  6 23.16 0.006282
34  6 23.19 0.005724
35  6 23.21 0.005215
36  6 23.23 0.004752
37  6 23.25 0.004330
38  6 23.26 0.003945
39  6 23.27 0.003595
40  6 23.28 0.003275
41  6 23.29 0.002984
42  6 23.30 0.002719
43  6 23.30 0.002478
44  6 23.31 0.002258
45  6 23.31 0.002057
46  6 23.31 0.001874

...
and 7 more lines.

Show code

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.05 0.093280
5   4  9.23 0.084990
6   4 11.35 0.077440
7   4 13.08 0.070560
8   4 14.52 0.064300
9   4 15.73 0.058580
10  4 16.75 0.053380
11  4 17.62 0.048640
12  4 18.36 0.044320
13  4 18.99 0.040380
14  4 19.54 0.036790
15  4 20.00 0.033520
16  4 20.40 0.030550
17  4 20.74 0.027830
18  5 21.08 0.025360
19  6 21.41 0.023110
20  6 21.71 0.021050
21  6 21.96 0.019180
22  6 22.17 0.017480
23  6 22.35 0.015930
24  6 22.50 0.014510
25  6 22.63 0.013220
26  6 22.74 0.012050
27  6 22.84 0.010980
28  6 22.92 0.010000
29  6 22.98 0.009114
30  6 23.04 0.008304
31  6 23.09 0.007566
32  6 23.13 0.006894
33  6 23.16 0.006282
34  6 23.19 0.005724
35  6 23.21 0.005215
36  6 23.23 0.004752
37  6 23.25 0.004330
38  6 23.26 0.003945
39  6 23.27 0.003595
40  6 23.28 0.003275
41  6 23.29 0.002984
42  6 23.30 0.002719
43  6 23.30 0.002478
44  6 23.31 0.002258
45  6 23.31 0.002057
46  6 23.31 0.001874
47  6 23.32 0.001708
48  6 23.32 0.001556
49  6 23.32 0.001418
50  6 23.32 0.001292
51  6 23.32 0.001177
52  6 23.33 0.001073
53  6 23.33 0.000977

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 7 × 3
  term                  estimate penalty
  <chr>                    <dbl>   <dbl>
1 (Intercept)             -2.20  0.00316
2 disease_cat_Other       -0.246 0.00316
3 bridging_yn_Yes          0.421 0.00316
4 anc_pre_ld_log10        -0.541 0.00316
5 plt_pre_ld_log10        -0.242 0.00316
6 ldh_pre_ld_log10         0.466 0.00316
7 ferritin_pre_ld_log10    0.433 0.00316

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.858  Preprocessor1_Model1
3 brier_class binary        0.0881 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_pre_ferritin_pre_ld_bridging <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_pre_ferritin_pre_ld_bridging <- cal_plot(
  df_mod_pre_ferritin_pre_ld_bridging,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPre (+ bridging Y/N)"
)
p_cal_pre_ferritin_pre_ld_bridging

Show code

# Plot ROC curve for test set
p_roc_pre_ferritin_pre_ld_bridging <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPre (+ bridging Y/N)")
p_roc_pre_ferritin_pre_ld_bridging

Calculate sensitivity/specificity of final model on test set using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld_bridging,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

   AUC   n n_pos n_neg
 0.858 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1921 0.5611 0.8077      0.7419      0.8192 23  8 32 145

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.01212559 0.02243276 0.04603704 0.09317768 0.1575520
       0 0.01212559 0.02125369 0.04148493 0.07912474 0.1156375
       1 0.06811707 0.08853358 0.18067349 0.29767356 0.3968701
   3rd Qu.       95%      Max.        SD NAs
 0.1972440 0.6122819 0.9945373 0.1808306   0
 0.1586298 0.3269365 0.6508477 0.1095079   0
 0.6323817 0.9080596 0.9945373 0.2927925   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_pre_ferritin_pre_ld_bridging <- df_mod_pre_ferritin_pre_ld_bridging %>%
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(
  icaht_grade_3_4 ~ .pred_1,
  data = df_mod_pre_ferritin_pre_ld_bridging,
  thresholds = seq(0, 0.35, by = 0.01)
) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Development and validation of multivariable logistic regression model of grade 3-4 eICAHT: disease type, lymphodepletion regimen, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and pre-LD ferritin

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + LD_regimen_low_CyFlu_vs_other_Other + anc_pre_ld_log10 +
  plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 7 × 3
  term                                estimate penalty
  <chr>                                  <dbl>   <dbl>
1 (Intercept)                         -1.77        0.1
2 disease_cat_Other                    0           0.1
3 LD_regimen_low_CyFlu_vs_other_Other  0           0.1
4 anc_pre_ld_log10                    -0.0669      0.1
5 plt_pre_ld_log10                    -0.135       0.1
6 ldh_pre_ld_log10                     0           0.1
7 ferritin_pre_ld_log10                0.00556     0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.876     25 0.00311 Preproces…
 2 0.0000000001 brier_class binary     0.0965    25 0.00187 Preproces…
 3 0.0000000001 roc_auc     binary     0.794     25 0.00787 Preproces…
 4 0.0000000316 accuracy    binary     0.876     25 0.00311 Preproces…
 5 0.0000000316 brier_class binary     0.0965    25 0.00187 Preproces…
 6 0.0000000316 roc_auc     binary     0.794     25 0.00787 Preproces…
 7 0.00001      accuracy    binary     0.876     25 0.00311 Preproces…
 8 0.00001      brier_class binary     0.0965    25 0.00187 Preproces…
 9 0.00001      roc_auc     binary     0.794     25 0.00787 Preproces…
10 0.00316      accuracy    binary     0.876     25 0.00309 Preproces…
11 0.00316      brier_class binary     0.0965    25 0.00184 Preproces…
12 0.00316      roc_auc     binary     0.793     25 0.00799 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest accuracy
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.05 0.093280
5   4  9.23 0.084990
6   4 11.35 0.077440
7   4 13.08 0.070560
8   4 14.52 0.064300
9   4 15.73 0.058580
10  4 16.75 0.053380
11  4 17.62 0.048640
12  4 18.36 0.044320
13  5 19.23 0.040380
14  5 20.03 0.036790
15  5 20.72 0.033520
16  5 21.31 0.030550
17  5 21.82 0.027830
18  6 22.26 0.025360
19  6 22.67 0.023110
20  6 23.01 0.021050
21  6 23.30 0.019180
22  6 23.56 0.017480
23  6 23.77 0.015930
24  6 23.95 0.014510
25  6 24.11 0.013220
26  6 24.24 0.012050
27  6 24.36 0.010980
28  6 24.45 0.010000
29  6 24.53 0.009114
30  6 24.60 0.008304
31  6 24.66 0.007566
32  6 24.71 0.006894
33  6 24.75 0.006282
34  6 24.78 0.005724
35  6 24.81 0.005215
36  6 24.84 0.004752
37  6 24.86 0.004330
38  6 24.87 0.003945
39  6 24.89 0.003595
40  6 24.90 0.003275
41  6 24.91 0.002984
42  6 24.92 0.002719
43  6 24.92 0.002478
44  6 24.93 0.002258
45  6 24.94 0.002057
46  6 24.94 0.001874

...
and 8 more lines.

Show code

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.123300
2   2  2.56 0.112400
3   2  4.94 0.102400
4   3  7.05 0.093280
5   4  9.23 0.084990
6   4 11.35 0.077440
7   4 13.08 0.070560
8   4 14.52 0.064300
9   4 15.73 0.058580
10  4 16.75 0.053380
11  4 17.62 0.048640
12  4 18.36 0.044320
13  5 19.23 0.040380
14  5 20.03 0.036790
15  5 20.72 0.033520
16  5 21.31 0.030550
17  5 21.82 0.027830
18  6 22.26 0.025360
19  6 22.67 0.023110
20  6 23.01 0.021050
21  6 23.30 0.019180
22  6 23.56 0.017480
23  6 23.77 0.015930
24  6 23.95 0.014510
25  6 24.11 0.013220
26  6 24.24 0.012050
27  6 24.36 0.010980
28  6 24.45 0.010000
29  6 24.53 0.009114
30  6 24.60 0.008304
31  6 24.66 0.007566
32  6 24.71 0.006894
33  6 24.75 0.006282
34  6 24.78 0.005724
35  6 24.81 0.005215
36  6 24.84 0.004752
37  6 24.86 0.004330
38  6 24.87 0.003945
39  6 24.89 0.003595
40  6 24.90 0.003275
41  6 24.91 0.002984
42  6 24.92 0.002719
43  6 24.92 0.002478
44  6 24.93 0.002258
45  6 24.94 0.002057
46  6 24.94 0.001874
47  6 24.94 0.001708
48  6 24.95 0.001556
49  6 24.95 0.001418
50  6 24.95 0.001292
51  6 24.95 0.001177
52  6 24.95 0.001073
53  6 24.95 0.000977
54  6 24.96 0.000890

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 7 × 3
  term                                estimate      penalty
  <chr>                                  <dbl>        <dbl>
1 (Intercept)                           -2.39  0.0000000001
2 disease_cat_Other                     -0.281 0.0000000001
3 LD_regimen_low_CyFlu_vs_other_Other    0.883 0.0000000001
4 anc_pre_ld_log10                      -0.545 0.0000000001
5 plt_pre_ld_log10                      -0.377 0.0000000001
6 ldh_pre_ld_log10                       0.461 0.0000000001
7 ferritin_pre_ld_log10                  0.446 0.0000000001

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.840  Preprocessor1_Model1
3 brier_class binary        0.0906 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_pre_ferritin_pre_ld_LD_regimen <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_pre_ferritin_pre_ld_LD_regimen <- cal_plot(
  df_mod_pre_ferritin_pre_ld_LD_regimen,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPre (+ LD regimen)"
)
p_cal_pre_ferritin_pre_ld_LD_regimen

Show code

# Plot ROC curve for test set
p_roc_pre_ferritin_pre_ld_LD_regimen <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPre (+ LD regimen)")
p_roc_pre_ferritin_pre_ld_LD_regimen

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_pre_ferritin_pre_ld_LD_regimen,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8398 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1303 0.5562 0.7356      0.8387      0.7175 26  5 50 127

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.01074805 0.01601036 0.03927698 0.08968762 0.1545562
       0 0.01074805 0.01448999 0.03428915 0.07383580 0.1122840
       1 0.03575507 0.07427495 0.13775421 0.27510320 0.3959172
   3rd Qu.       95%      Max.        SD NAs
 0.1747263 0.5592538 0.9965745 0.1893149   0
 0.1442390 0.3435103 0.7378751 0.1166286   0
 0.6494173 0.9094487 0.9965745 0.3110581   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_pre_ferritin_pre_ld_LD_regimen <- df_mod_pre_ferritin_pre_ld_LD_regimen %>%
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(
  icaht_grade_3_4 ~ .pred_1,
  data = df_mod_pre_ferritin_pre_ld_LD_regimen,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(.pred_1 = "eIPMPre")
) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Development and validation of multivariable logistic regression model of grade 3-4 eICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and day +3 ferritin

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 6 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 anc_pre_ld_log10      -0.0685     0.1
4 plt_pre_ld_log10      -0.0684     0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.162      0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.873     25 0.00385 Preproces…
 2 0.0000000001 brier_class binary     0.0972    25 0.00213 Preproces…
 3 0.0000000001 roc_auc     binary     0.814     25 0.00757 Preproces…
 4 0.0000000316 accuracy    binary     0.873     25 0.00385 Preproces…
 5 0.0000000316 brier_class binary     0.0972    25 0.00213 Preproces…
 6 0.0000000316 roc_auc     binary     0.814     25 0.00757 Preproces…
 7 0.00001      accuracy    binary     0.873     25 0.00385 Preproces…
 8 0.00001      brier_class binary     0.0972    25 0.00213 Preproces…
 9 0.00001      roc_auc     binary     0.814     25 0.00757 Preproces…
10 0.00316      accuracy    binary     0.874     25 0.00356 Preproces…
11 0.00316      brier_class binary     0.0968    25 0.00208 Preproces…
12 0.00316      roc_auc     binary     0.815     25 0.00764 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  4 20.33 0.049780
12  4 21.15 0.045350
13  4 21.86 0.041330
14  4 22.47 0.037650
15  4 22.99 0.034310
16  4 23.43 0.031260
17  4 23.82 0.028480
18  4 24.15 0.025950
19  4 24.43 0.023650
20  4 24.67 0.021550
21  4 24.88 0.019630
22  5 25.06 0.017890
23  5 25.22 0.016300
24  5 25.36 0.014850
25  5 25.48 0.013530
26  5 25.58 0.012330
27  5 25.66 0.011230
28  5 25.73 0.010240
29  5 25.79 0.009327
30  5 25.85 0.008499
31  5 25.89 0.007744
32  5 25.93 0.007056
33  5 25.96 0.006429
34  5 25.98 0.005858
35  5 26.00 0.005337
36  5 26.02 0.004863
37  5 26.04 0.004431
38  5 26.05 0.004038
39  5 26.06 0.003679
40  5 26.07 0.003352
41  5 26.08 0.003054
42  5 26.08 0.002783
43  5 26.09 0.002536
44  5 26.09 0.002310
45  5 26.10 0.002105
46  5 26.10 0.001918

...
and 7 more lines.

Show code

# saveRDS(wf_final_trained, "eIPMPost.rds")

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  4 20.33 0.049780
12  4 21.15 0.045350
13  4 21.86 0.041330
14  4 22.47 0.037650
15  4 22.99 0.034310
16  4 23.43 0.031260
17  4 23.82 0.028480
18  4 24.15 0.025950
19  4 24.43 0.023650
20  4 24.67 0.021550
21  4 24.88 0.019630
22  5 25.06 0.017890
23  5 25.22 0.016300
24  5 25.36 0.014850
25  5 25.48 0.013530
26  5 25.58 0.012330
27  5 25.66 0.011230
28  5 25.73 0.010240
29  5 25.79 0.009327
30  5 25.85 0.008499
31  5 25.89 0.007744
32  5 25.93 0.007056
33  5 25.96 0.006429
34  5 25.98 0.005858
35  5 26.00 0.005337
36  5 26.02 0.004863
37  5 26.04 0.004431
38  5 26.05 0.004038
39  5 26.06 0.003679
40  5 26.07 0.003352
41  5 26.08 0.003054
42  5 26.08 0.002783
43  5 26.09 0.002536
44  5 26.09 0.002310
45  5 26.10 0.002105
46  5 26.10 0.001918
47  5 26.10 0.001748
48  5 26.11 0.001592
49  5 26.11 0.001451
50  5 26.11 0.001322
51  5 26.11 0.001205
52  5 26.11 0.001098
53  5 26.11 0.001000

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 6 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)            -2.10  0.00316
2 disease_cat_Other      -0.177 0.00316
3 anc_pre_ld_log10       -0.548 0.00316
4 plt_pre_ld_log10       -0.199 0.00316
5 ldh_pre_ld_log10        0.446 0.00316
6 ferritin_day_3_log10    0.762 0.00316

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.889  Preprocessor1_Model1
2 roc_auc     binary        0.878  Preprocessor1_Model1
3 brier_class binary        0.0846 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_post_ferritin_only <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_ferritin_only <- cal_plot(
  df_mod_post_ferritin_only,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost"
)
p_cal_post_ferritin_only

Show code

cal_plot(
  df_mod_post_ferritin_only,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 4,
  plot_title = "eIPMPost"
)

Show code

# Plot ROC curve for test set
p_roc_post_ferritin_only <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost")
p_roc_post_ferritin_only

Show code

# Plot threshold-performance plot for test set
runway::threshperf_plot(
  df_mod_post_ferritin_only,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1"
)

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_post_ferritin_only,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8784 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
            0.152 0.6184 0.7885      0.8387      0.7797 26  5 39 138

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.00656279 0.01650296 0.03949988 0.09172247 0.1564294
       0 0.00656279 0.01591629 0.03520880 0.07534019 0.1108258
       1 0.05078428 0.10410927 0.17463322 0.29974585 0.4168115
   3rd Qu.       95%      Max.        SD NAs
 0.1923062 0.5830375 0.9966972 0.1883222   0
 0.1351734 0.3208616 0.6531640 0.1109754   0
 0.6345243 0.9629399 0.9966972 0.3002167   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_post_ferritin_only <- df_mod_post_ferritin_only %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_post_ferritin_only,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Evaluate final model on subsets of test set (ALL vs. other disease types)

ALL

Show code

# Augment test subset (i.e., add columns for predictions)
df_mod_post_ferritin_only_all <- wf_final_trained %>%
  augment(new_data = df_test_all) 

# Calculate accuracy and AUROC
model_metrics <- metric_set(yardstick::accuracy, roc_auc)

df_mod_post_ferritin_only_all %>%
  ungroup() %>%
  model_metrics(truth = icaht_grade_3_4,
                estimate = .pred_class,
                .pred_1,
                event_level = "second")

# A tibble: 2 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.714
2 roc_auc  binary         0.9

Show code

# Plot calibration curve using {runway}
df_mod_post_ferritin_only_all <- df_mod_post_ferritin_only_all %>%
  mutate(.pred_class = as.numeric(as.character(.pred_class))) %>% 
  ungroup()

cal_plot(
  df_mod_post_ferritin_only_all,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 3,
  plot_title = "eIPMPost (ALL)"
)

Show code

# Plot ROC curve
df_mod_post_ferritin_only_all %>%
  ungroup() %>% 
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() + 
  labs(title = "eIPMPost (ALL)")

Show code

# Calculate sensitivity/specificity using an optimal cutpoint based on the Youden index
cp <- cutpointr::cutpointr(
  data = df_mod_post_ferritin_only_all,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

 AUC  n n_pos n_neg
 0.9 21    11    10

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp tn
           0.2266 0.8091 0.9048      0.9091         0.9 10  1  1  9

Predictor summary: 
    Data       Min.         5%    1st Qu.    Median      Mean
 Overall 0.04809059 0.04819017 0.14749837 0.2266330 0.3752421
       0 0.04809059 0.04813540 0.08353623 0.1650011 0.1830184
       1 0.10333398 0.16498350 0.25168933 0.5439176 0.5499909
   3rd Qu.       95%      Max.        SD NAs
 0.6442408 0.9539433 0.9617106 0.3151846   0
 0.1886184 0.4497377 0.6442408 0.1733286   0
 0.8029840 0.9578269 0.9617106 0.3179675   0

Other disease types

Show code

# Define ther disease types test subset
df_test_other <- df_test %>% filter(disease_cat == "Other")

# Augment test subset (i.e., add columns for predictions)
df_mod_post_ferritin_only_other <- wf_final_trained %>%
  augment(new_data = df_test_other) 

# Calculate accuracy and AUROC
df_mod_post_ferritin_only_other %>%
  ungroup() %>%
  model_metrics(truth = icaht_grade_3_4,
                estimate = .pred_class,
                .pred_1,
                event_level = "second")

# A tibble: 2 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.909
2 roc_auc  binary         0.856

Show code

# Plot calibration curve using {runway}
df_mod_post_ferritin_only_other <- df_mod_post_ferritin_only_other %>%
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

cal_plot(
  df_mod_post_ferritin_only_other,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (Other disease types)"
)

Show code

# Plot ROC curve
df_mod_post_ferritin_only_other %>%
  ungroup() %>% 
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() + 
  labs(title = "eIPMPost (Other disease types)")

Show code

# Calculate sensitivity/specificity using an optimal cutpoint based on the Youden index
cp <- cutpointr::cutpointr(
  data = df_mod_post_ferritin_only_other,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8557 187    20   167

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.1194 0.6126 0.7326         0.9      0.7126 18  2 48 119

Predictor summary: 
    Data       Min.         5%    1st Qu.     Median      Mean
 Overall 0.00656279 0.01608832 0.03616852 0.08339907 0.1318569
       0 0.00656279 0.01591243 0.03426449 0.07012160 0.1065029
       1 0.05078428 0.10217954 0.15663687 0.27126373 0.3435629
   3rd Qu.       95%      Max.        SD NAs
 0.1601082 0.3776642 0.9966972 0.1509132   0
 0.1288125 0.3141927 0.6531640 0.1053369   0
 0.3956410 0.9657956 0.9966972 0.2705217   0

Development and validation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, receipt of bridging therapy, pre-LD ANC, pre-LD platelet count, pre-LD LDH, and day +3 ferritin

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + bridging_yn_Yes + anc_pre_ld_log10 +
  plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 bridging_yn_Yes        0          0.1
4 anc_pre_ld_log10      -0.0685     0.1
5 plt_pre_ld_log10      -0.0684     0.1
6 ldh_pre_ld_log10       0          0.1
7 ferritin_day_3_log10   0.162      0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.872     25 0.00317 Preproces…
 2 0.0000000001 brier_class binary     0.0971    25 0.00204 Preproces…
 3 0.0000000001 roc_auc     binary     0.809     25 0.00721 Preproces…
 4 0.0000000316 accuracy    binary     0.872     25 0.00317 Preproces…
 5 0.0000000316 brier_class binary     0.0971    25 0.00204 Preproces…
 6 0.0000000316 roc_auc     binary     0.809     25 0.00721 Preproces…
 7 0.00001      accuracy    binary     0.872     25 0.00317 Preproces…
 8 0.00001      brier_class binary     0.0971    25 0.00204 Preproces…
 9 0.00001      roc_auc     binary     0.809     25 0.00721 Preproces…
10 0.00316      accuracy    binary     0.872     25 0.00323 Preproces…
11 0.00316      brier_class binary     0.0966    25 0.00200 Preproces…
12 0.00316      roc_auc     binary     0.810     25 0.00719 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap( ~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest accuracy
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  4 20.33 0.049780
12  4 21.15 0.045350
13  4 21.86 0.041330
14  4 22.47 0.037650
15  4 22.99 0.034310
16  4 23.43 0.031260
17  4 23.82 0.028480
18  5 24.22 0.025950
19  5 24.58 0.023650
20  5 24.90 0.021550
21  5 25.17 0.019630
22  6 25.41 0.017890
23  6 25.62 0.016300
24  6 25.80 0.014850
25  6 25.95 0.013530
26  6 26.08 0.012330
27  6 26.19 0.011230
28  6 26.28 0.010240
29  6 26.36 0.009327
30  6 26.43 0.008499
31  6 26.49 0.007744
32  6 26.53 0.007056
33  6 26.57 0.006429
34  6 26.61 0.005858
35  6 26.64 0.005337
36  6 26.66 0.004863
37  6 26.68 0.004431
38  6 26.70 0.004038
39  6 26.71 0.003679
40  6 26.72 0.003352
41  6 26.73 0.003054
42  6 26.74 0.002783
43  6 26.75 0.002536
44  6 26.75 0.002310
45  6 26.76 0.002105
46  6 26.76 0.001918

...
and 8 more lines.

Show code

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  4 20.33 0.049780
12  4 21.15 0.045350
13  4 21.86 0.041330
14  4 22.47 0.037650
15  4 22.99 0.034310
16  4 23.43 0.031260
17  4 23.82 0.028480
18  5 24.22 0.025950
19  5 24.58 0.023650
20  5 24.90 0.021550
21  5 25.17 0.019630
22  6 25.41 0.017890
23  6 25.62 0.016300
24  6 25.80 0.014850
25  6 25.95 0.013530
26  6 26.08 0.012330
27  6 26.19 0.011230
28  6 26.28 0.010240
29  6 26.36 0.009327
30  6 26.43 0.008499
31  6 26.49 0.007744
32  6 26.53 0.007056
33  6 26.57 0.006429
34  6 26.61 0.005858
35  6 26.64 0.005337
36  6 26.66 0.004863
37  6 26.68 0.004431
38  6 26.70 0.004038
39  6 26.71 0.003679
40  6 26.72 0.003352
41  6 26.73 0.003054
42  6 26.74 0.002783
43  6 26.75 0.002536
44  6 26.75 0.002310
45  6 26.76 0.002105
46  6 26.76 0.001918
47  6 26.77 0.001748
48  6 26.77 0.001592
49  6 26.77 0.001451
50  6 26.77 0.001322
51  6 26.77 0.001205
52  6 26.78 0.001098
53  6 26.78 0.001000
54  6 26.78 0.000911

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)            -2.39  0.00316
2 disease_cat_Other      -0.197 0.00316
3 bridging_yn_Yes         0.473 0.00316
4 anc_pre_ld_log10       -0.562 0.00316
5 plt_pre_ld_log10       -0.153 0.00316
6 ldh_pre_ld_log10        0.434 0.00316
7 ferritin_day_3_log10    0.758 0.00316

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.889  Preprocessor1_Model1
2 roc_auc     binary        0.863  Preprocessor1_Model1
3 brier_class binary        0.0861 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_post_bridging <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_bridging <- cal_plot(
  df_mod_post_bridging,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (+ bridging Y/N)"
)
p_cal_post_bridging

Show code

# Plot ROC curve for test set
p_roc_post_bridging <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost (+ bridging Y/N)")
p_roc_post_bridging

Calculate sensitivity/specificity of final model on test set using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_post_bridging,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8631 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
            0.175 0.5821 0.8029      0.7742      0.8079 24  7 34 143

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.005095741 0.01629999 0.03753716 0.08915127 0.1580132
       0 0.005095741 0.01503095 0.03390703 0.07706598 0.1125861
       1 0.060003793 0.07991962 0.17607529 0.28611468 0.4173873
   3rd Qu.       95%      Max.        SD NAs
 0.2015784 0.6020981 0.9967971 0.1914246   0
 0.1468183 0.3419500 0.6807804 0.1146879   0
 0.6448698 0.9672225 0.9967971 0.3065623   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_post_bridging <- df_mod_post_bridging %>%
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(
  icaht_grade_3_4 ~ .pred_1,
  data = df_mod_post_bridging,
  thresholds = seq(0, 0.35, by = 0.01)
) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Development and validation of multivariable logistic regression model of grade 3-4 eICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, day +3 ferritin, and day +3 CRP

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 +
  ferritin_day_3_log10 + crp_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 anc_pre_ld_log10      -0.0685     0.1
4 plt_pre_ld_log10      -0.0684     0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.162      0.1
7 crp_day_3_log10        0          0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.873     25 0.00349 Preproces…
 2 0.0000000001 brier_class binary     0.0965    25 0.00208 Preproces…
 3 0.0000000001 roc_auc     binary     0.823     25 0.00748 Preproces…
 4 0.0000000316 accuracy    binary     0.873     25 0.00349 Preproces…
 5 0.0000000316 brier_class binary     0.0965    25 0.00208 Preproces…
 6 0.0000000316 roc_auc     binary     0.823     25 0.00748 Preproces…
 7 0.00001      accuracy    binary     0.873     25 0.00349 Preproces…
 8 0.00001      brier_class binary     0.0965    25 0.00208 Preproces…
 9 0.00001      roc_auc     binary     0.823     25 0.00748 Preproces…
10 0.00316      accuracy    binary     0.874     25 0.00304 Preproces…
11 0.00316      brier_class binary     0.0961    25 0.00201 Preproces…
12 0.00316      roc_auc     binary     0.823     25 0.00757 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  5 20.49 0.049780
12  5 21.49 0.045350
13  5 22.34 0.041330
14  5 23.08 0.037650
15  5 23.72 0.034310
16  5 24.28 0.031260
17  5 24.75 0.028480
18  5 25.17 0.025950
19  5 25.52 0.023650
20  6 25.85 0.021550
21  6 26.14 0.019630
22  6 26.38 0.017890
23  6 26.60 0.016300
24  6 26.78 0.014850
25  6 26.94 0.013530
26  6 27.07 0.012330
27  6 27.18 0.011230
28  6 27.28 0.010240
29  6 27.36 0.009327
30  6 27.43 0.008499
31  6 27.49 0.007744
32  6 27.54 0.007056
33  6 27.58 0.006429
34  6 27.62 0.005858
35  6 27.65 0.005337
36  6 27.67 0.004863
37  6 27.70 0.004431
38  6 27.71 0.004038
39  6 27.73 0.003679
40  6 27.74 0.003352
41  6 27.75 0.003054
42  6 27.76 0.002783
43  6 27.77 0.002536
44  6 27.78 0.002310
45  6 27.78 0.002105
46  6 27.78 0.001918

...
and 9 more lines.

Show code

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  5 20.49 0.049780
12  5 21.49 0.045350
13  5 22.34 0.041330
14  5 23.08 0.037650
15  5 23.72 0.034310
16  5 24.28 0.031260
17  5 24.75 0.028480
18  5 25.17 0.025950
19  5 25.52 0.023650
20  6 25.85 0.021550
21  6 26.14 0.019630
22  6 26.38 0.017890
23  6 26.60 0.016300
24  6 26.78 0.014850
25  6 26.94 0.013530
26  6 27.07 0.012330
27  6 27.18 0.011230
28  6 27.28 0.010240
29  6 27.36 0.009327
30  6 27.43 0.008499
31  6 27.49 0.007744
32  6 27.54 0.007056
33  6 27.58 0.006429
34  6 27.62 0.005858
35  6 27.65 0.005337
36  6 27.67 0.004863
37  6 27.70 0.004431
38  6 27.71 0.004038
39  6 27.73 0.003679
40  6 27.74 0.003352
41  6 27.75 0.003054
42  6 27.76 0.002783
43  6 27.77 0.002536
44  6 27.78 0.002310
45  6 27.78 0.002105
46  6 27.78 0.001918
47  6 27.79 0.001748
48  6 27.79 0.001592
49  6 27.79 0.001451
50  6 27.80 0.001322
51  6 27.80 0.001205
52  6 27.80 0.001098
53  6 27.80 0.001000
54  6 27.80 0.000911
55  6 27.80 0.000830

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 7 × 3
  term                 estimate      penalty
  <chr>                   <dbl>        <dbl>
1 (Intercept)            -2.08  0.0000000001
2 disease_cat_Other      -0.336 0.0000000001
3 anc_pre_ld_log10       -0.563 0.0000000001
4 plt_pre_ld_log10       -0.246 0.0000000001
5 ldh_pre_ld_log10        0.429 0.0000000001
6 ferritin_day_3_log10    0.555 0.0000000001
7 crp_day_3_log10         0.534 0.0000000001

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.889  Preprocessor1_Model1
2 roc_auc     binary        0.866  Preprocessor1_Model1
3 brier_class binary        0.0881 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_post_crp <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_crp <- cal_plot(
  df_mod_post_crp,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (+ day +3 CRP)"
)
p_cal_post_crp

Show code

# Plot ROC curve for test set
p_roc_post_crp <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost (+ day +3 CRP)")
p_roc_post_crp

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_post_crp,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8664 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.2223 0.6063 0.8462      0.7419      0.8644 23  8 24 153

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.004248709 0.01075153 0.03614793 0.08968716 0.1563897
       0 0.004248709 0.01060042 0.03134483 0.06847338 0.1114428
       1 0.049466971 0.10137204 0.19028993 0.27979496 0.4130221
   3rd Qu.       95%      Max.        SD NAs
 0.1770402 0.6453671 0.9971238 0.1947642   0
 0.1462608 0.3451540 0.7361069 0.1220691   0
 0.6923864 0.9713001 0.9971238 0.3071562   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_post_crp <- df_mod_post_crp %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_post_crp,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Development and validation of multivariable logistic regression model of grade 3-4 eICAHT: disease type, pre-LD ANC, pre-LD platelet count, pre-LD LDH, day +3 ferritin, and day +3 D-dimer

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 +
  ferritin_day_3_log10 + d_dimer_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.78       0.1
2 disease_cat_Other      0          0.1
3 anc_pre_ld_log10      -0.0685     0.1
4 plt_pre_ld_log10      -0.0684     0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.162      0.1
7 d_dimer_day_3_log10    0          0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.874     25 0.00305 Preproces…
 2 0.0000000001 brier_class binary     0.0976    25 0.00216 Preproces…
 3 0.0000000001 roc_auc     binary     0.808     25 0.00802 Preproces…
 4 0.0000000316 accuracy    binary     0.874     25 0.00305 Preproces…
 5 0.0000000316 brier_class binary     0.0976    25 0.00216 Preproces…
 6 0.0000000316 roc_auc     binary     0.808     25 0.00802 Preproces…
 7 0.00001      accuracy    binary     0.874     25 0.00305 Preproces…
 8 0.00001      brier_class binary     0.0976    25 0.00216 Preproces…
 9 0.00001      roc_auc     binary     0.808     25 0.00802 Preproces…
10 0.00316      accuracy    binary     0.875     25 0.00303 Preproces…
11 0.00316      brier_class binary     0.0972    25 0.00210 Preproces…
12 0.00316      roc_auc     binary     0.809     25 0.00801 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  4 20.33 0.049780
12  4 21.15 0.045350
13  4 21.86 0.041330
14  5 22.47 0.037650
15  5 23.03 0.034310
16  5 23.51 0.031260
17  5 23.93 0.028480
18  5 24.28 0.025950
19  5 24.58 0.023650
20  6 24.84 0.021550
21  6 25.09 0.019630
22  6 25.30 0.017890
23  6 25.48 0.016300
24  6 25.63 0.014850
25  6 25.76 0.013530
26  6 25.87 0.012330
27  6 25.97 0.011230
28  6 26.05 0.010240
29  6 26.12 0.009327
30  6 26.17 0.008499
31  6 26.22 0.007744
32  6 26.26 0.007056
33  6 26.30 0.006429
34  6 26.33 0.005858
35  6 26.35 0.005337
36  6 26.37 0.004863
37  6 26.39 0.004431
38  6 26.41 0.004038
39  6 26.42 0.003679
40  6 26.43 0.003352
41  6 26.44 0.003054
42  6 26.44 0.002783
43  6 26.45 0.002536
44  6 26.45 0.002310
45  6 26.46 0.002105
46  6 26.46 0.001918

...
and 7 more lines.

Show code

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.126200
2   3  3.15 0.115000
3   3  6.60 0.104800
4   3  9.30 0.095470
5   3 11.46 0.086990
6   4 13.32 0.079260
7   4 15.26 0.072220
8   4 16.87 0.065800
9   4 18.22 0.059960
10  4 19.36 0.054630
11  4 20.33 0.049780
12  4 21.15 0.045350
13  4 21.86 0.041330
14  5 22.47 0.037650
15  5 23.03 0.034310
16  5 23.51 0.031260
17  5 23.93 0.028480
18  5 24.28 0.025950
19  5 24.58 0.023650
20  6 24.84 0.021550
21  6 25.09 0.019630
22  6 25.30 0.017890
23  6 25.48 0.016300
24  6 25.63 0.014850
25  6 25.76 0.013530
26  6 25.87 0.012330
27  6 25.97 0.011230
28  6 26.05 0.010240
29  6 26.12 0.009327
30  6 26.17 0.008499
31  6 26.22 0.007744
32  6 26.26 0.007056
33  6 26.30 0.006429
34  6 26.33 0.005858
35  6 26.35 0.005337
36  6 26.37 0.004863
37  6 26.39 0.004431
38  6 26.41 0.004038
39  6 26.42 0.003679
40  6 26.43 0.003352
41  6 26.44 0.003054
42  6 26.44 0.002783
43  6 26.45 0.002536
44  6 26.45 0.002310
45  6 26.46 0.002105
46  6 26.46 0.001918
47  6 26.46 0.001748
48  6 26.47 0.001592
49  6 26.47 0.001451
50  6 26.47 0.001322
51  6 26.47 0.001205
52  6 26.47 0.001098
53  6 26.47 0.001000

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 7 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)            -2.01  0.00316
2 disease_cat_Other      -0.277 0.00316
3 anc_pre_ld_log10       -0.548 0.00316
4 plt_pre_ld_log10       -0.200 0.00316
5 ldh_pre_ld_log10        0.430 0.00316
6 ferritin_day_3_log10    0.646 0.00316
7 d_dimer_day_3_log10     0.183 0.00316

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.885  Preprocessor1_Model1
2 roc_auc     binary        0.879  Preprocessor1_Model1
3 brier_class binary        0.0853 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_post_d_dimer <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_d_dimer <- cal_plot(
  df_mod_post_d_dimer,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (+ day +3 D-dimer)"
)
p_cal_post_d_dimer

Show code

# Plot ROC curve for test set
p_roc_post_d_dimer <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost (+ day +3 D-dimer)")
p_roc_post_d_dimer

Calculate sensitivity/specificity of final model on test dataset using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_post_d_dimer,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

   AUC   n n_pos n_neg
 0.879 208    31   177

 optimal_cutpoint youden    acc sensitivity specificity tp fn fp  tn
           0.2175 0.6233 0.8606      0.7419      0.8814 23  8 21 156

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.007698222 0.01753568 0.04001359 0.09004261 0.1573276
       0 0.007698222 0.01729843 0.03625101 0.07104828 0.1119078
       1 0.043622126 0.10359214 0.19091415 0.30097656 0.4166597
   3rd Qu.       95%      Max.        SD NAs
 0.1841708 0.5847027 0.9969185 0.1895310   0
 0.1552416 0.3695235 0.6825382 0.1146589   0
 0.6277296 0.9627989 0.9969185 0.2984432   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_post_d_dimer <- df_mod_post_d_dimer %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1, data = df_mod_post_d_dimer,
    thresholds = seq(0, 0.35, by = 0.01)) %>% 
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Development and validation of multivariable logistic regression model of grade 3-4 ICAHT: disease type, pre-LD LDH, day +3 ANC, day +3 platelet count, and day +3 ferritin

Model development and validation using {tidymodels}

Build and train logistic regression model with LASSO regularization

Show code

# Create model formula
model_formula <-
  icaht_grade_3_4 ~ disease_cat_Other + anc_day_3_log10 + plt_day_3_log10 + ldh_pre_ld_log10 + ferritin_day_3_log10

# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
  add_model(spec_model, formula = model_formula) %>% 
  add_recipe(recipe)

# Fit model on training set
fit_train <- wf %>%
  fit(data = df_train)

# Extract estimated coefficients
fit_train %>%
  extract_fit_parsnip() %>%
  tidy()

# A tibble: 6 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)           -1.81       0.1
2 disease_cat_Other      0          0.1
3 anc_day_3_log10       -0.374      0.1
4 plt_day_3_log10        0          0.1
5 ldh_pre_ld_log10       0          0.1
6 ferritin_day_3_log10   0.0260     0.1

Tune LASSO parameters

Show code

# Tune grid using workflow object
doParallel::registerDoParallel()

wf_tune <- workflow() %>% 
  add_model(tune_spec, formula = model_formula) %>% 
  add_recipe(recipe)

set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
                        resamples = bootstrap_resamples,
                        grid = lambda_grid)

lasso_grid %>% collect_metrics()

# A tibble: 15 × 7
        penalty .metric     .estimator   mean     n std_err .config   
          <dbl> <chr>       <chr>       <dbl> <int>   <dbl> <chr>     
 1 0.0000000001 accuracy    binary     0.874     25 0.00212 Preproces…
 2 0.0000000001 brier_class binary     0.0985    25 0.00217 Preproces…
 3 0.0000000001 roc_auc     binary     0.834     25 0.00674 Preproces…
 4 0.0000000316 accuracy    binary     0.874     25 0.00212 Preproces…
 5 0.0000000316 brier_class binary     0.0985    25 0.00217 Preproces…
 6 0.0000000316 roc_auc     binary     0.834     25 0.00674 Preproces…
 7 0.00001      accuracy    binary     0.874     25 0.00212 Preproces…
 8 0.00001      brier_class binary     0.0985    25 0.00217 Preproces…
 9 0.00001      roc_auc     binary     0.834     25 0.00674 Preproces…
10 0.00316      accuracy    binary     0.874     25 0.00210 Preproces…
11 0.00316      brier_class binary     0.0982    25 0.00208 Preproces…
12 0.00316      roc_auc     binary     0.834     25 0.00684 Preproces…
13 1            accuracy    binary     0.855     25 0.00293 Preproces…
14 1            brier_class binary     0.124     25 0.00207 Preproces…
15 1            roc_auc     binary     0.5       25 0       Preproces…

Show code

# Visualize performance with regularization parameter
lasso_grid %>%
  collect_metrics() %>%
  ggplot(aes(x = penalty, y = mean, color = .metric)) +
  geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
  geom_line(linewidth = 1.5) +
  facet_wrap( ~ .metric, scales = "free", nrow = 2) +
  scale_x_log10() +
  theme(legend.position = "none")

Show code

# Select highest AUROC
highest_auroc <- lasso_grid %>%
  select_best(metric = "roc_auc")

# Finalize workflow
wf_final <-
  finalize_workflow(wf_tune,
                    highest_auroc)

# Visualize most important variables
wf_final %>%
  fit(data = df_train) %>%
  pull_workflow_fit() %>%
  vi(lambda = highest_auroc$penalty) %>%
  mutate(Importance = abs(Importance),
         Variable = fct_reorder(Variable, Importance)) %>%
  ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
  geom_col() +
  scale_x_continuous(expand = c(0, 0)) +
  labs(y = NULL)

Fit final best model on training set and evaluate on test set

Show code

# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
                      df_split)

# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>% 
  extract_workflow()
wf_final_trained

══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()

── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps

• step_impute_bag()
• step_normalize()
• step_dummy()

── Model ─────────────────────────────────────────────────────────────

Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.164100
2   1  3.98 0.149600
3   1  6.82 0.136300
4   1  8.96 0.124200
5   1 10.63 0.113100
6   2 12.06 0.103100
7   2 14.01 0.093930
8   2 15.64 0.085590
9   2 17.02 0.077980
10  2 18.19 0.071060
11  4 19.24 0.064740
12  4 20.44 0.058990
13  4 21.44 0.053750
14  4 22.30 0.048980
15  5 23.03 0.044620
16  5 23.76 0.040660
17  5 24.38 0.037050
18  5 24.91 0.033760
19  5 25.36 0.030760
20  5 25.74 0.028030
21  5 26.06 0.025540
22  5 26.33 0.023270
23  5 26.56 0.021200
24  5 26.76 0.019320
25  5 26.93 0.017600
26  5 27.07 0.016040
27  5 27.19 0.014610
28  5 27.29 0.013310
29  5 27.37 0.012130
30  5 27.44 0.011050
31  5 27.50 0.010070
32  5 27.55 0.009177
33  5 27.60 0.008362
34  5 27.63 0.007619
35  5 27.66 0.006942
36  5 27.69 0.006325
37  5 27.71 0.005764
38  5 27.73 0.005251
39  5 27.74 0.004785
40  5 27.75 0.004360
41  5 27.76 0.003973
42  5 27.77 0.003620
43  5 27.78 0.003298
44  5 27.79 0.003005
45  5 27.79 0.002738
46  5 27.80 0.002495

...
and 8 more lines.

Show code

# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
  extract_fit_parsnip()
final_fit_mod

parsnip model object


Call:  glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial",      alpha = ~1) 

   Df  %Dev   Lambda
1   0  0.00 0.164100
2   1  3.98 0.149600
3   1  6.82 0.136300
4   1  8.96 0.124200
5   1 10.63 0.113100
6   2 12.06 0.103100
7   2 14.01 0.093930
8   2 15.64 0.085590
9   2 17.02 0.077980
10  2 18.19 0.071060
11  4 19.24 0.064740
12  4 20.44 0.058990
13  4 21.44 0.053750
14  4 22.30 0.048980
15  5 23.03 0.044620
16  5 23.76 0.040660
17  5 24.38 0.037050
18  5 24.91 0.033760
19  5 25.36 0.030760
20  5 25.74 0.028030
21  5 26.06 0.025540
22  5 26.33 0.023270
23  5 26.56 0.021200
24  5 26.76 0.019320
25  5 26.93 0.017600
26  5 27.07 0.016040
27  5 27.19 0.014610
28  5 27.29 0.013310
29  5 27.37 0.012130
30  5 27.44 0.011050
31  5 27.50 0.010070
32  5 27.55 0.009177
33  5 27.60 0.008362
34  5 27.63 0.007619
35  5 27.66 0.006942
36  5 27.69 0.006325
37  5 27.71 0.005764
38  5 27.73 0.005251
39  5 27.74 0.004785
40  5 27.75 0.004360
41  5 27.76 0.003973
42  5 27.77 0.003620
43  5 27.78 0.003298
44  5 27.79 0.003005
45  5 27.79 0.002738
46  5 27.80 0.002495
47  5 27.80 0.002273
48  5 27.80 0.002071
49  5 27.80 0.001887
50  5 27.81 0.001720
51  5 27.81 0.001567
52  5 27.81 0.001428
53  5 27.81 0.001301
54  5 27.81 0.001185

Show code

# Extract estimated coefficients from final_fit
df_final_fit_coefs <- final_fit %>%
  extract_fit_parsnip() %>%
  tidy()
df_final_fit_coefs

# A tibble: 6 × 3
  term                 estimate penalty
  <chr>                   <dbl>   <dbl>
1 (Intercept)            -1.81  0.00316
2 disease_cat_Other      -0.556 0.00316
3 anc_day_3_log10        -0.609 0.00316
4 plt_day_3_log10        -0.218 0.00316
5 ldh_pre_ld_log10        0.298 0.00316
6 ferritin_day_3_log10    0.576 0.00316

Show code

# Compute model-specific variable importance scores for fitted model
final_fit %>% 
  extract_fit_parsnip() %>% 
  vip()

Show code

# Evaluate final model on test set
final_fit %>% collect_metrics()

# A tibble: 3 × 4
  .metric     .estimator .estimate .config             
  <chr>       <chr>          <dbl> <chr>               
1 accuracy    binary        0.861  Preprocessor1_Model1
2 roc_auc     binary        0.877  Preprocessor1_Model1
3 brier_class binary        0.0923 Preprocessor1_Model1

Show code

# Plot calibration curve for test set
# Using {probably}
final_fit %>%
  collect_predictions() %>%
  cal_plot_breaks(
    truth = icaht_grade_3_4,
    estimate = .pred_1,
    event_level = "second",
    num_breaks = 5
  )

Show code

# Using {runway}
# Augment df_test (i.e., add columns for predictions)
df_mod_post_day_3 <- wf_final_trained %>%
  augment(new_data = df_test) %>% 
  mutate(.pred_class = as.numeric(as.character(.pred_class)))

p_cal_post_day_3 <- cal_plot(
  df_mod_post_day_3,
  outcome = "icaht_grade_3_4",
  positive = "1",
  prediction = ".pred_1",
  n_bins = 0,
  show_loess = TRUE,
  plot_title = "eIPMPost (day +3 ANC/plt)"
)
p_cal_post_day_3

Show code

# Plot ROC curve for test set
p_roc_post_day_3 <- final_fit %>%
  collect_predictions() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second") %>%
  autoplot() +
  labs(title = "eIPMPost (day +3 ANC/plt)")
p_roc_post_day_3

Calculate sensitivity/specificity of final model on test set using an optimal cutpoint based on the Youden index

Show code

cp <- cutpointr::cutpointr(
  data = df_mod_post_day_3,
  x = .pred_1,
  class = icaht_grade_3_4,
  direction = ">=",
  pos_class = 1,
  method = maximize_metric,
  metric = youden,
  na.rm = TRUE
)
summary(cp)

Method: maximize_metric 
Predictor: .pred_1 
Outcome: icaht_grade_3_4 
Direction: >= 

    AUC   n n_pos n_neg
 0.8772 208    31   177

 optimal_cutpoint youden   acc sensitivity specificity tp fn fp  tn
            0.115 0.6812 0.774      0.9355      0.7458 29  2 45 132

Predictor summary: 
    Data        Min.         5%    1st Qu.     Median      Mean
 Overall 0.008789295 0.01859198 0.03869103 0.07791277 0.1499486
       0 0.008789295 0.01834401 0.03576852 0.06418202 0.1082043
       1 0.046506841 0.10745259 0.15332250 0.25756441 0.3882952
   3rd Qu.       95%      Max.        SD NAs
 0.1697938 0.5909424 0.9789442 0.1915267   0
 0.1170436 0.3374924 0.7266540 0.1303143   0
 0.5510965 0.9580952 0.9789442 0.2906861   0

Decision curve analysis of final model fitted on test set

Show code

df_mod_post_day_3 <- df_mod_post_day_3 %>% 
  mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))

dcurves::dca(icaht_grade_3_4 ~ .pred_1,
             data = df_mod_post_day_3,
             thresholds = seq(0, 0.35, by = 0.01)) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

Decision curve analyses of grade 3-4 eICAHT prediction models (eIPM)

eIPMPre vs. eIPMPost

Show code

df_mod_all <- df_mod_pre_ferritin_pre_ld %>%
  rename(predictions_pre_ferritin_pre_ld = .pred_1) %>%
  left_join(
    .,
    df_mod_pre_ferritin_pre_ld_bridging %>% select(upn, predictions_pre_ferritin_pre_ld_bridging = .pred_1),
    by = "upn"
  ) %>%
  left_join(
    .,
    df_mod_pre_ferritin_pre_ld_LD_regimen %>% select(upn, predictions_pre_ferritin_pre_ld_LD_regimen = .pred_1),
    by = "upn"
  ) %>%
  left_join(
    .,
    df_mod_post_ferritin_only %>% select(upn, predictions_post_ferritin_only = .pred_1),
    by = "upn"
  ) %>%
  left_join(.,
            df_mod_post_bridging %>% select(upn, predictions_post_bridging = .pred_1),
            by = "upn") %>%
  left_join(.,
            df_mod_post_crp %>% select(upn, predictions_post_crp = .pred_1),
            by = "upn") %>%
  left_join(.,
            df_mod_post_d_dimer %>% select(upn, predictions_post_d_dimer = .pred_1),
            by = "upn") %>%
  left_join(.,
            df_mod_post_day_3 %>% select(upn, predictions_post_day_3 = .pred_1),
            by = "upn") %>%
  select(
    upn,
    icaht_grade_3_4,
    predictions_pre_ferritin_pre_ld,
    predictions_pre_ferritin_pre_ld_bridging,
    predictions_pre_ferritin_pre_ld_LD_regimen,
    predictions_post_ferritin_only,
    predictions_post_bridging,
    predictions_post_crp,
    predictions_post_d_dimer,
    predictions_post_day_3
  )

dcurves::dca(
  icaht_grade_3_4 ~ predictions_pre_ferritin_pre_ld + predictions_post_ferritin_only,
  data = df_mod_all,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(
    predictions_pre_ferritin_pre_ld = "eIPMPre",
    predictions_post_ferritin_only = "eIPMPost"
  )
) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

All eIPMPres and eIPMPosts

Show code

p_dca_pre_all <- dcurves::dca(
  icaht_grade_3_4 ~ predictions_pre_ferritin_pre_ld + predictions_pre_ferritin_pre_ld_bridging +
    predictions_pre_ferritin_pre_ld_LD_regimen,
  data = df_mod_all,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(
    predictions_pre_ferritin_pre_ld = "eIPMPre",
    predictions_pre_ferritin_pre_ld_bridging = "eIPMPre (+ bridging Y/N)",
    predictions_pre_ferritin_pre_ld_LD_regimen = "eIPMPre (+ LD regimen)"
  )
) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

p_dca_post_all <- dcurves::dca(
  icaht_grade_3_4 ~ predictions_post_ferritin_only + predictions_post_bridging + predictions_post_crp +
    predictions_post_d_dimer + predictions_post_day_3,
  data = df_mod_all,
  thresholds = seq(0, 0.35, by = 0.01),
  label = list(
    predictions_post_ferritin_only = "eIPMPost",
    predictions_post_bridging = "eIPMPost (+ bridging Y/N)",
    predictions_post_crp ="eIPMPost (+ day +3 CRP)",
    predictions_post_d_dimer ="eIPMPost (+ day +3 D-dimer)",
    predictions_post_day_3 = "eIPMPost (day +3 ANC/plt)"
  )
) %>%
  plot(smooth = TRUE) +
  labs(x = "Threshold Probability (Physician/Patient Preference)")

p <- p_dca_pre_all + p_dca_post_all
p + plot_layout(ncol = 1) + plot_annotation(tag_levels = "A")

ROC curves of all eIPMPres and eIPMPosts

Show code

# Create ROC data frames for each model
df_roc_pre_ferritin_pre_ld <- df_mod_pre_ferritin_pre_ld %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_pre_ferritin_pre_ld_bridging <- df_mod_pre_ferritin_pre_ld_bridging %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_pre_ferritin_pre_ld_LD_regimen <- df_mod_pre_ferritin_pre_ld_LD_regimen %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_post_ferritin_only <- df_mod_post_ferritin_only %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_post_bridging <- df_mod_post_bridging %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_post_crp <- df_mod_post_crp %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_post_d_dimer <- df_mod_post_d_dimer %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

df_roc_post_day_3 <- df_mod_post_day_3 %>%
  ungroup() %>%
  roc_curve(truth = icaht_grade_3_4, .pred_1, event_level = "second")

# eIPMPres
p_roc_pre_all <- df_roc_pre_ferritin_pre_ld %>%
  mutate(model = "eIPMPre") %>%
  bind_rows(
    df_roc_pre_ferritin_pre_ld_bridging %>% mutate(model = "eIPMPre (+ bridging Y/N)"),
    df_roc_pre_ferritin_pre_ld_LD_regimen %>% mutate(model = "eIPMPre (+ LD regimen)")
  ) %>%
  ggplot(aes(
    x = 1 - specificity,
    y = sensitivity,
    color = model
  )) +
  geom_path(size = 1) +
  geom_abline(lty = 3) +
  labs(color = "Model")

# eIPMPosts
p_roc_post_all <- df_roc_post_ferritin_only %>%
  mutate(model = "eIPMPost") %>%
  bind_rows(
    df_roc_post_bridging %>% mutate(model = "eIPMPost (+ bridging Y/N)"),
    df_roc_post_crp %>% mutate(model = "eIPMPost (+ day +3 CRP)"),
    df_roc_post_d_dimer %>% mutate(model = "eIPMPost (+ day +3 D-dimer)"),
    df_roc_post_day_3 %>% mutate(model = "eIPMPost (day +3 ANC/plt)")
  ) %>%
  ggplot(aes(
    x = 1 - specificity,
    y = sensitivity,
    color = model
  )) +
  geom_path(size = 1) +
  geom_abline(lty = 3) +
  labs(color = "Model")

p <- p_roc_pre_all + p_roc_post_all
p + plot_layout(ncol = 1) + plot_annotation(tag_levels = "A")

Characteristics of training and test sets

Show code

tbl_train <- df %>% 
  filter(upn %in% df_train$upn) %>% 
  select(
    cart_age,
    gender_desc,
    racenih,
    ethnih,
    prior_hct,
    disease_cat,
    bm_disease_by_morph,
    bm_disease_by_flow,
    bridging_yn,
    LD_regimen_cat,
    product_infused,
    product_cat,
    costim_domain,
    anc_pre_ld,
    hb_pre_ld, 
    plt_pre_ld
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()

tbl_test <- df %>% 
  filter(upn %in% df_test$upn) %>% 
  select(
    cart_age,
    gender_desc,
    racenih,
    ethnih,
    prior_hct,
    disease_cat,
    bm_disease_by_morph,
    bm_disease_by_flow,
    bridging_yn,
    LD_regimen_cat,
    product_infused,
    product_cat,
    costim_domain,
    anc_pre_ld,
    hb_pre_ld, 
    plt_pre_ld
  ) %>%
  tbl_summary(
    missing_text = "Missing",
    type = list(all_continuous() ~ "continuous2"),
    statistic = list(
      all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_categorical() ~ 0,
    sort = list(everything() ~ "frequency")
  ) %>%
  bold_labels()

tbl_merge(
  tbls = list(tbl_train, tbl_test),
  tab_spanner = c("**Training cohort**", "**Test cohort**")
)

Characteristic	Training cohort	Test cohort
Characteristic	N = 483¹	N = 208¹
Age
Median (IQR)	62 (50, 68)	61 (53, 67)
Range	21 - 84	19 - 79
Sex
Male	306 (63%)	131 (63%)
Female	177 (37%)	77 (37%)
Race
White	413 (86%)	170 (82%)
Asian	35 (7%)	23 (11%)
Black or African American	13 (3%)	5 (2%)
American Indian or Alaska Native	11 (2%)	2 (1%)
Multiple	6 (1%)	2 (1%)
Unknown	3 (1%)	4 (2%)
Native Hawaiian or other Pacific Islander	2 (0%)	2 (1%)
Ethnicity
Not Hispanic or Latino	440 (91%)	189 (91%)
Hispanic or Latino	37 (8%)	15 (7%)
Unknown	6 (1%)	4 (2%)
Prior HCT	161 (33%)	73 (35%)
Disease
Aggressive NHL	216 (45%)	102 (49%)
Other indolent NHL	84 (17%)	33 (16%)
MM/PCL	79 (16%)	41 (20%)
ALL	78 (16%)	21 (10%)
Mantle cell lymphoma	26 (5%)	11 (5%)
Bone marrow involvement by morphology (%)
Median (IQR)	0 (0, 27)	0 (0, 40)
Range	0 - 100	0 - 95
Missing	188	79
Bone marrow involvement by flow cytometry (%)
Median (IQR)	0 (0, 20)	0 (0, 18)
Range	0 - 99	0 - 97
Missing	177	75
Bridging therapy	292 (60%)	130 (63%)
Lymphodepletion regimen
Low-intensity Cy/Flu	284 (59%)	119 (57%)
High-intensity Cy/Flu	175 (36%)	78 (38%)
Other	24 (5%)	11 (5%)
CAR T-cell product
JCAR014	134 (28%)	59 (28%)
YESCARTA	94 (19%)	46 (22%)
BREYANZI	64 (13%)	23 (11%)
CARVYKTI	35 (7%)	17 (8%)
Investigational CD20-targeted product	34 (7%)	11 (5%)
JCAR021	33 (7%)	11 (5%)
TECARTUS	32 (7%)	15 (7%)
ABECMA	22 (5%)	8 (4%)
Investigational BCMA-targeted product	22 (5%)	16 (8%)
KYMRIAH	13 (3%)	2 (1%)
CAR T-cell product type
Investigational CAR T-cell product with 4-1BB costimulatory domain	223 (46%)	97 (47%)
Commercial CD19-targeted CAR T-cell product with CD28 costimulatory domain	126 (26%)	61 (29%)
Commercial CD19-targeted CAR T-cell product with 4-1BB costimulatory domain	77 (16%)	25 (12%)
Commercial BCMA-targeted CAR T-cell product	57 (12%)	25 (12%)
Costimulatory domain
4-1BB	323 (67%)	136 (65%)
CD28	126 (26%)	61 (29%)
Dual 4-1BB and CD28	34 (7%)	11 (5%)
Pre-LD ANC (x 10^3 cells/µL)
Median (IQR)	2.86 (1.84, 4.22)	2.77 (1.58, 4.60)
Range	0.00 - 23.17	0.02 - 54.03
Missing	1
Pre-LD Hb (g/dL)
Median (IQR)	10.70 (9.50, 12.25)	10.70 (9.60, 12.42)
Range	7.30 - 17.10	7.00 - 15.60
Pre-LD platelet count (x 10^3 cells/µL)
Median (IQR)	146 (94, 203)	139 (82, 201)
Range	6 - 790	7 - 453
¹ n (%)

5th and 95th percentiles of numeric variables in eIPMs

Show code

df %>% 
  filter(upn %in% df_train$upn) %>% 
  plyr::summarize(
    anc_pre_ld_5p = quantile(anc_pre_ld, 0.05, na.rm = TRUE),
    plt_pre_ld_5p = quantile(plt_pre_ld, 0.05, na.rm = TRUE),
    ldh_pre_ld_5p = quantile(ldh_pre_ld, 0.05, na.rm = TRUE),
    ferritin_pre_ld_5p = quantile(ferritin_pre_ld, 0.05, na.rm = TRUE),
    ferritin_day_3_5p = quantile(ferritin_day_3, 0.05, na.rm = TRUE)
  )

  anc_pre_ld_5p plt_pre_ld_5p ldh_pre_ld_5p ferritin_pre_ld_5p
1          0.71          24.1        114.35               27.1
  ferritin_day_3_5p
1                68

Show code

df %>% 
  filter(upn %in% df_train$upn) %>% 
  plyr::summarize(
    anc_pre_ld_95p = quantile(anc_pre_ld, 0.95, na.rm = TRUE),
    plt_pre_ld_95p = quantile(plt_pre_ld, 0.95, na.rm = TRUE),
    ldh_pre_ld_95p = quantile(ldh_pre_ld, 0.95, na.rm = TRUE),
    ferritin_pre_ld_95p = quantile(ferritin_pre_ld, 0.95, na.rm = TRUE),
    ferritin_day_3_95p = quantile(ferritin_day_3, 0.95, na.rm = TRUE)
  )

  anc_pre_ld_95p plt_pre_ld_95p ldh_pre_ld_95p ferritin_pre_ld_95p
1         7.9815          287.4         644.75              3904.1
  ferritin_day_3_95p
1            4931.45

Duration of neutropenia (ANC ≤ 500 cells/μL) by probability of severe eICAHT predicted by eIPMs

Show code

# eIPMPre
df_mod_pre_ferritin_pre_ld <- df_mod_pre_ferritin_pre_ld %>%
  left_join(., df_icaht %>% select(upn, duration_below_501_max), by = "upn") %>%
  mutate(.pred_1_as_percentage = .pred_1 * 100)

# Calculate correlation coefficient and p-value
cor_test <- cor.test(
  df_mod_pre_ferritin_pre_ld$.pred_1_as_percentage,
  df_mod_pre_ferritin_pre_ld$duration_below_501_max
)
cor_coef <- cor_test$estimate
cor_coef

      cor 
0.5979965

Show code

p_value <- cor_test$p.value
p_value

[1] 1.472629e-21

Show code

p1 <- df_mod_pre_ferritin_pre_ld %>%
  ggplot(aes(x = .pred_1_as_percentage, y = duration_below_501_max)) +
  geom_point() +
  geom_smooth(method = "lm",
              se = FALSE,
              color = "blue") +  # Add linear regression line
  labs(x = "Probability of Severe eICAHT (%) Predicted by eIPMPre",
       y = "Duration of Neutropenia (Days)") +
  annotate(
    "text",
    x = 90,
    y = 5,
    label = paste("r =", round(cor_coef, 2), "\np < 0.001"),
    hjust = 0,
    vjust = 1,
    size = 4,
    color = "red"
  )

# eIPMPost
df_mod_post_ferritin_only <- df_mod_post_ferritin_only %>%
  left_join(., df_icaht %>% select(upn, duration_below_501_max), by = "upn") %>%
  mutate(.pred_1_as_percentage = .pred_1 * 100)

# Calculate correlation coefficient and p-value
cor_test <- cor.test(
  df_mod_post_ferritin_only$.pred_1_as_percentage,
  df_mod_post_ferritin_only$duration_below_501_max
)
cor_coef <- cor_test$estimate
cor_coef

      cor 
0.6136532

Show code

p_value <- cor_test$p.value
p_value

[1] 6.550573e-23

Show code

p2 <- df_mod_post_ferritin_only %>%
  ggplot(aes(x = .pred_1_as_percentage, y = duration_below_501_max)) +
  geom_point() +
  geom_smooth(method = "lm",
              se = FALSE,
              color = "blue") +  # Add linear regression line
  labs(x = "Probability of Severe eICAHT (%) Predicted by eIPMPost", 
       y = "Duration of Neutropenia (Days)") +
  annotate(
    "text",
    x = 90,
    y = 5,
    label = paste("r =", round(cor_coef, 2), "\np < 0.001"),
    hjust = 0,
    vjust = 1,
    size = 4,
    color = "red"
  )

p <- p1 + p2
p + plot_layout(ncol = 1) + plot_annotation(tag_levels = "A")

Univariate Cox regression models of overall survival by probability of severe eICAHT predicted by eIPMs

Show code

# eIPMPre
df_mod_pre_ferritin_pre_ld <- df_mod_pre_ferritin_pre_ld %>%
  left_join(., df %>% select(upn, OS_status, OS_months), by = "upn")
  
tbl_pre <- coxph(Surv(OS_months, OS_status) ~ .pred_1_as_percentage,
                 data = df_mod_pre_ferritin_pre_ld) %>%
  tbl_regression(exponentiate = TRUE) %>% 
  bold_p()

# eIPMPost
df_mod_post_ferritin_only <- df_mod_post_ferritin_only %>%
  left_join(., df %>% select(upn, OS_status, OS_months), by = "upn")

tbl_post <- coxph(Surv(OS_months, OS_status) ~ .pred_1_as_percentage, 
                  data = df_mod_post_ferritin_only) %>%
  tbl_regression(exponentiate = TRUE) %>% 
  bold_p()

tbl_stack(
  list(tbl_pre, tbl_post),
  group_header =
    c(
      "eIPMPre",
      "eIPMPost"
    )
) %>%
  modify_header(label = "**Probability of severe eICAHT predicted by by eIPM**")

Probability of severe eICAHT predicted by by eIPM	HR¹	95% CI¹	p-value
eIPMPre
.pred_1_as_percentage	1.04	1.03, 1.05	<0.001
eIPMPost
.pred_1_as_percentage	1.04	1.03, 1.04	<0.001
¹ HR = Hazard Ratio, CI = Confidence Interval

Evaluation of CAR-HEMATOTOX score on test set

Compute CAR-HEMATOTOX scores

Show code

# Impute missing data
df_car_hematotox <- df %>%
  filter(upn %in% df_test$upn) %>% 
  select(upn,
         icaht_grade_3_4,
         anc_pre_ld,
         hb_pre_ld,
         plt_pre_ld,
         crp_pre_ld,
         ferritin_pre_ld) 

recipe <- recipe(icaht_grade_3_4 ~ ., data = df_car_hematotox) %>%
  update_role(upn, new_role = "ID") %>%
  update_role_requirements("ID", bake = FALSE) %>%
  step_impute_bag(anc_pre_ld,
                  hb_pre_ld,
                  plt_pre_ld,
                  crp_pre_ld,
                  ferritin_pre_ld,
                  seed_val = 100) 

# Extract processed dataframe
df_car_hematotox <- recipe %>% 
  prep() %>% 
  bake(new_data = NULL)

# Calculate CAR-HEMATOTOX score (continuous)
df_car_hematotox <- df_car_hematotox %>%
  mutate(
    plt_hematotox_score = case_when(
      plt_pre_ld > 175 ~ 0,
      plt_pre_ld %in% c(75:175) ~ 1,
      plt_pre_ld < 75 ~ 2,
      is.na(plt_pre_ld) ~ NA_real_
    ),
    anc_hematotox_score = case_when(
      anc_pre_ld > 1.2 ~ 0,
      anc_pre_ld < 1.2 ~ 1,
      is.na(anc_pre_ld) ~ NA_real_
    ),
    hb_hematotox_score = case_when(hb_pre_ld > 9 ~ 0,
                                   hb_pre_ld < 9 ~ 1,
                                   is.na(hb_pre_ld) ~ NA_real_),
    crp_hematotox_score = case_when(crp_pre_ld < 3 ~ 0,
                                    crp_pre_ld > 3 ~ 1,
                                    is.na(crp_pre_ld) ~ NA_real_),
    ferritin_hematotox_score = case_when(
      ferritin_pre_ld < 650 ~ 0,
      ferritin_pre_ld %in% c(650:2000) ~ 1,
      ferritin_pre_ld > 2000 ~ 2,
      is.na(ferritin_pre_ld) ~ NA_real_
    )
  )

# Calculate CAR-HEMATOTOX score (categorical)
df_car_hematotox <- df_car_hematotox %>%
  mutate(
    car_hematotox_score = rowSums(across(
      plt_hematotox_score:ferritin_hematotox_score
    )),
    car_hematotox_categorical = case_when(car_hematotox_score < 2 ~ "Low",
                                        car_hematotox_score >= 2 ~ "High"),
    car_hematotox_categorical = as.factor(car_hematotox_categorical),
    car_hematotox_categorical = fct_relevel(car_hematotox_categorical, c("Low", "High"))
  )

var_label(df_car_hematotox$car_hematotox_score) <- "CAR-HEMATOTOX score (continuous)"
var_label(df_car_hematotox$car_hematotox_categorical) <- "CAR-HEMATOTOX score (categorical)"

Sensitivity, specificity, and AUROC for CAR-HEMATOTOX score in predicting grade 3-4 ICAHT

Show code

# Sensitivity and specificity based on confusion matrix
df_matrix <- df_car_hematotox %>% 
  select(upn, icaht_grade_3_4, car_hematotox_categorical) %>% 
  mutate(car_hematotox_high = ifelse(car_hematotox_categorical == "High", 1, 0)) %>% 
  drop_na()

observed_value <- factor(df_matrix$icaht_grade_3_4, levels = c("1", "0"))
predicted_value <- factor(df_matrix$car_hematotox_high, levels = c("1", "0"))

matrix <-
  confusionMatrix(data = predicted_value,
                  reference = observed_value,
                  positive = "1")
matrix

Confusion Matrix and Statistics

          Reference
Prediction  1  0
         1 28 87
         0  1 69
                                          
               Accuracy : 0.5243          
                 95% CI : (0.4498, 0.5981)
    No Information Rate : 0.8432          
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.1848          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.9655          
            Specificity : 0.4423          
         Pos Pred Value : 0.2435          
         Neg Pred Value : 0.9857          
             Prevalence : 0.1568          
         Detection Rate : 0.1514          
   Detection Prevalence : 0.6216          
      Balanced Accuracy : 0.7039          
                                          
       'Positive' Class : 1

Show code

# ROC
Epi::ROC(df_matrix$car_hematotox_high, df_matrix$icaht_grade_3_4)

Computational environment

Show code

devtools::session_info()

─ Session info ─────────────────────────────────────────────────────
 setting  value
 version  R version 4.4.1 (2024-06-14)
 os       macOS 15.2
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Los_Angeles
 date     2025-01-10
 pandoc   3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)

─ Packages ─────────────────────────────────────────────────────────
 package       * version    date (UTC) lib source
 assertthat      0.2.1      2019-03-21 [1] CRAN (R 4.4.0)
 backports       1.5.0      2024-05-23 [1] CRAN (R 4.4.0)
 base64enc       0.1-3      2015-07-28 [1] CRAN (R 4.4.0)
 broom         * 1.0.6      2024-05-17 [1] CRAN (R 4.4.0)
 broom.helpers   1.15.0     2024-04-05 [1] CRAN (R 4.4.0)
 bslib           0.8.0      2024-07-29 [1] CRAN (R 4.4.0)
 cachem          1.1.0      2024-05-16 [1] CRAN (R 4.4.0)
 caret         * 6.0-94     2023-03-21 [1] CRAN (R 4.4.0)
 checkmate       2.3.2      2024-07-29 [1] CRAN (R 4.4.0)
 class           7.3-22     2023-05-03 [1] CRAN (R 4.4.1)
 cli             3.6.3      2024-06-21 [1] CRAN (R 4.4.0)
 cluster         2.1.6      2023-12-01 [1] CRAN (R 4.4.1)
 cmprsk          2.2-12     2024-05-19 [1] CRAN (R 4.4.0)
 codetools       0.2-20     2024-03-31 [1] CRAN (R 4.4.1)
 colorspace      2.1-1      2024-07-26 [1] CRAN (R 4.4.0)
 commonmark      1.9.1      2024-01-30 [1] CRAN (R 4.4.0)
 cutpointr     * 1.1.2      2022-04-13 [1] CRAN (R 4.4.0)
 data.table      1.15.4     2024-03-30 [1] CRAN (R 4.4.0)
 dcurves       * 0.4.0      2022-12-23 [1] CRAN (R 4.4.0)
 devtools        2.4.5      2022-10-11 [1] CRAN (R 4.4.0)
 dials         * 1.2.1      2024-02-22 [1] CRAN (R 4.4.0)
 DiceDesign      1.10       2023-12-07 [1] CRAN (R 4.4.0)
 digest          0.6.36     2024-06-23 [1] CRAN (R 4.4.0)
 distill       * 1.6        2023-10-06 [1] CRAN (R 4.4.0)
 doParallel      1.0.17     2022-02-07 [1] CRAN (R 4.4.0)
 downlit         0.4.4      2024-06-10 [1] CRAN (R 4.4.0)
 dplyr         * 1.1.4      2023-11-17 [1] CRAN (R 4.4.0)
 e1071           1.7-14     2023-12-06 [1] CRAN (R 4.4.0)
 ellipsis        0.3.2      2021-04-29 [1] CRAN (R 4.4.0)
 Epi           * 2.53       2024-07-18 [1] CRAN (R 4.4.0)
 etm             1.1.1      2020-09-08 [1] CRAN (R 4.4.0)
 evaluate        0.24.0     2024-06-10 [1] CRAN (R 4.4.0)
 fansi           1.0.6      2023-12-08 [1] CRAN (R 4.4.0)
 farver          2.1.2      2024-05-13 [1] CRAN (R 4.4.0)
 fastmap         1.2.0      2024-05-15 [1] CRAN (R 4.4.0)
 forcats       * 1.0.0      2023-01-29 [1] CRAN (R 4.4.0)
 foreach         1.5.2      2022-02-02 [1] CRAN (R 4.4.0)
 foreign         0.8-86     2023-11-28 [1] CRAN (R 4.4.1)
 Formula         1.2-5      2023-02-24 [1] CRAN (R 4.4.0)
 fs              1.6.4      2024-04-25 [1] CRAN (R 4.4.0)
 furrr           0.3.1      2022-08-15 [1] CRAN (R 4.4.0)
 future          1.33.2     2024-03-26 [1] CRAN (R 4.4.0)
 future.apply    1.11.2     2024-03-28 [1] CRAN (R 4.4.0)
 generics        0.1.3      2022-07-05 [1] CRAN (R 4.4.0)
 ggeasy          0.1.4      2023-03-12 [1] CRAN (R 4.4.0)
 ggnewscale    * 0.4.10     2024-02-08 [1] CRAN (R 4.4.0)
 ggplot2       * 3.5.1      2024-04-23 [1] CRAN (R 4.4.0)
 ggrepel       * 0.9.5      2024-01-10 [1] CRAN (R 4.4.0)
 ggsurvfit     * 1.1.0      2024-05-08 [1] CRAN (R 4.4.0)
 glmnet        * 4.1-8      2023-08-22 [1] CRAN (R 4.4.0)
 globals         0.16.3     2024-03-08 [1] CRAN (R 4.4.0)
 glue            1.7.0      2024-01-09 [1] CRAN (R 4.4.0)
 gower           1.0.1      2022-12-22 [1] CRAN (R 4.4.0)
 GPfit           1.0-8      2019-02-08 [1] CRAN (R 4.4.0)
 gridExtra       2.3        2017-09-09 [1] CRAN (R 4.4.0)
 gt              0.11.0     2024-07-09 [1] CRAN (R 4.4.0)
 gtable          0.3.5      2024-04-22 [1] CRAN (R 4.4.0)
 gtsummary     * 1.7.2      2023-07-15 [1] CRAN (R 4.4.0)
 hardhat         1.4.0      2024-06-02 [1] CRAN (R 4.4.0)
 haven           2.5.4      2023-11-30 [1] CRAN (R 4.4.0)
 heatwaveR       0.4.6      2021-10-27 [1] CRAN (R 4.4.0)
 here          * 1.0.1      2020-12-13 [1] CRAN (R 4.4.0)
 highr           0.11       2024-05-26 [1] CRAN (R 4.4.0)
 Hmisc         * 5.1-3      2024-05-28 [1] CRAN (R 4.4.0)
 hms             1.1.3      2023-03-21 [1] CRAN (R 4.4.0)
 htmlTable       2.4.3      2024-07-21 [1] CRAN (R 4.4.0)
 htmltools       0.5.8.1    2024-04-04 [1] CRAN (R 4.4.0)
 htmlwidgets     1.6.4      2023-12-06 [1] CRAN (R 4.4.0)
 httpuv          1.6.15     2024-03-26 [1] CRAN (R 4.4.0)
 infer         * 1.0.7      2024-03-25 [1] CRAN (R 4.4.0)
 ipred           0.9-15     2024-07-18 [1] CRAN (R 4.4.0)
 iterators       1.0.14     2022-02-05 [1] CRAN (R 4.4.0)
 janitor       * 2.2.0      2023-02-02 [1] CRAN (R 4.4.0)
 jquerylib       0.1.4      2021-04-26 [1] CRAN (R 4.4.0)
 jsonlite        1.8.8      2023-12-04 [1] CRAN (R 4.4.0)
 knitr         * 1.48       2024-07-07 [1] CRAN (R 4.4.0)
 labeling        0.4.3      2023-08-29 [1] CRAN (R 4.4.0)
 labelled      * 2.13.0     2024-04-23 [1] CRAN (R 4.4.0)
 later           1.3.2      2023-12-06 [1] CRAN (R 4.4.0)
 latrend       * 1.6.1      2024-05-15 [1] CRAN (R 4.4.0)
 lattice       * 0.22-6     2024-03-20 [1] CRAN (R 4.4.1)
 lava            1.8.0      2024-03-05 [1] CRAN (R 4.4.0)
 lhs             1.2.0      2024-06-30 [1] CRAN (R 4.4.0)
 lifecycle       1.0.4      2023-11-07 [1] CRAN (R 4.4.0)
 listenv         0.9.1      2024-01-29 [1] CRAN (R 4.4.0)
 lubridate     * 1.9.3      2023-09-27 [1] CRAN (R 4.4.0)
 magrittr        2.0.3      2022-03-30 [1] CRAN (R 4.4.0)
 markdown        1.13       2024-06-04 [1] CRAN (R 4.4.0)
 MASS            7.3-60.2   2024-04-26 [1] CRAN (R 4.4.1)
 Matrix        * 1.7-0      2024-04-26 [1] CRAN (R 4.4.1)
 MatrixModels    0.5-3      2023-11-06 [1] CRAN (R 4.4.0)
 matrixStats     1.3.0      2024-04-11 [1] CRAN (R 4.4.0)
 memoise         2.0.1      2021-11-26 [1] CRAN (R 4.4.0)
 mgcv            1.9-1      2023-12-21 [1] CRAN (R 4.4.1)
 mime            0.12       2021-09-28 [1] CRAN (R 4.4.0)
 miniUI          0.1.1.1    2018-05-18 [1] CRAN (R 4.4.0)
 modeldata     * 1.4.0      2024-06-19 [1] CRAN (R 4.4.0)
 ModelMetrics    1.2.2.2    2020-03-17 [1] CRAN (R 4.4.0)
 multcomp        1.4-26     2024-07-18 [1] CRAN (R 4.4.0)
 munsell         0.5.1      2024-04-01 [1] CRAN (R 4.4.0)
 mvtnorm         1.2-5      2024-05-21 [1] CRAN (R 4.4.0)
 nlme            3.1-164    2023-11-27 [1] CRAN (R 4.4.1)
 nnet            7.3-19     2023-05-03 [1] CRAN (R 4.4.1)
 numDeriv        2016.8-1.1 2019-06-06 [1] CRAN (R 4.4.0)
 openxlsx      * 4.2.5.2    2023-02-06 [1] CRAN (R 4.4.0)
 parallelly      1.37.1     2024-02-29 [1] CRAN (R 4.4.0)
 parsnip       * 1.2.1      2024-03-22 [1] CRAN (R 4.4.0)
 patchwork     * 1.2.0      2024-01-08 [1] CRAN (R 4.4.0)
 pillar          1.9.0      2023-03-22 [1] CRAN (R 4.4.0)
 pkgbuild        1.4.4      2024-03-17 [1] CRAN (R 4.4.0)
 pkgconfig       2.0.3      2019-09-22 [1] CRAN (R 4.4.0)
 pkgload         1.4.0      2024-06-28 [1] CRAN (R 4.4.0)
 plyr            1.8.9      2023-10-02 [1] CRAN (R 4.4.0)
 pmsampsize    * 1.1.3      2023-12-06 [1] CRAN (R 4.4.0)
 polspline       1.1.25     2024-05-10 [1] CRAN (R 4.4.0)
 probably      * 1.0.3      2024-02-23 [1] CRAN (R 4.4.0)
 pROC            1.18.5     2023-11-01 [1] CRAN (R 4.4.0)
 prodlim       * 2024.06.25 2024-06-24 [1] CRAN (R 4.4.0)
 profvis         0.3.8      2023-05-02 [1] CRAN (R 4.4.0)
 promises        1.3.0      2024-04-05 [1] CRAN (R 4.4.0)
 proxy           0.4-27     2022-06-09 [1] CRAN (R 4.4.0)
 purrr         * 1.0.2      2023-08-10 [1] CRAN (R 4.4.0)
 quantreg        5.98       2024-05-26 [1] CRAN (R 4.4.0)
 R.methodsS3     1.8.2      2022-06-13 [1] CRAN (R 4.4.0)
 R.oo            1.26.0     2024-01-24 [1] CRAN (R 4.4.0)
 R.utils         2.12.3     2023-11-18 [1] CRAN (R 4.4.0)
 R6              2.5.1      2021-08-19 [1] CRAN (R 4.4.0)
 rbibutils       2.2.16     2023-10-25 [1] CRAN (R 4.4.0)
 Rcpp            1.0.13     2024-07-17 [1] CRAN (R 4.4.0)
 Rdpack          2.6        2023-11-08 [1] CRAN (R 4.4.0)
 readr         * 2.1.5      2024-01-10 [1] CRAN (R 4.4.0)
 recipes       * 1.1.0      2024-07-04 [1] CRAN (R 4.4.0)
 remotes         2.5.0      2024-03-17 [1] CRAN (R 4.4.0)
 reshape2        1.4.4      2020-04-09 [1] CRAN (R 4.4.0)
 rlang           1.1.4      2024-06-04 [1] CRAN (R 4.4.0)
 rmarkdown     * 2.27       2024-05-17 [1] CRAN (R 4.4.0)
 rms           * 6.8-1      2024-05-27 [1] CRAN (R 4.4.0)
 rpart           4.1.23     2023-12-05 [1] CRAN (R 4.4.1)
 rprojroot       2.0.4      2023-11-05 [1] CRAN (R 4.4.0)
 rsample       * 1.2.1      2024-03-25 [1] CRAN (R 4.4.0)
 rstudioapi      0.16.0     2024-03-24 [1] CRAN (R 4.4.0)
 runway        * 0.1.0      2024-08-08 [1] Github (ML4LHS/runway@e93d55e)
 sandwich        3.1-0      2023-12-11 [1] CRAN (R 4.4.0)
 sass            0.4.9      2024-03-15 [1] CRAN (R 4.4.0)
 scales        * 1.3.0      2023-11-28 [1] CRAN (R 4.4.0)
 sessioninfo     1.2.2      2021-12-06 [1] CRAN (R 4.4.0)
 shape           1.4.6.1    2024-02-23 [1] CRAN (R 4.4.0)
 shiny           1.8.1.1    2024-04-02 [1] CRAN (R 4.4.0)
 snakecase       0.11.1     2023-08-27 [1] CRAN (R 4.4.0)
 SparseM         1.84-2     2024-07-17 [1] CRAN (R 4.4.0)
 stringi         1.8.4      2024-05-06 [1] CRAN (R 4.4.0)
 stringr       * 1.5.1      2023-11-14 [1] CRAN (R 4.4.0)
 survival      * 3.6-4      2024-04-24 [1] CRAN (R 4.4.1)
 TH.data         1.1-2      2023-04-17 [1] CRAN (R 4.4.0)
 tibble        * 3.2.1      2023-03-20 [1] CRAN (R 4.4.0)
 tidymodels    * 1.2.0      2024-03-25 [1] CRAN (R 4.4.0)
 tidyr         * 1.3.1      2024-01-24 [1] CRAN (R 4.4.0)
 tidyselect      1.2.1      2024-03-11 [1] CRAN (R 4.4.0)
 tidyverse     * 2.0.0      2023-02-22 [1] CRAN (R 4.4.0)
 timechange      0.3.0      2024-01-18 [1] CRAN (R 4.4.0)
 timeDate        4032.109   2023-12-14 [1] CRAN (R 4.4.0)
 tune          * 1.2.1      2024-04-18 [1] CRAN (R 4.4.0)
 tzdb            0.4.0      2023-05-12 [1] CRAN (R 4.4.0)
 urlchecker      1.0.1      2021-11-30 [1] CRAN (R 4.4.0)
 usethis         2.2.3      2024-02-19 [1] CRAN (R 4.4.0)
 utf8            1.2.4      2023-10-22 [1] CRAN (R 4.4.0)
 vctrs           0.6.5      2023-12-01 [1] CRAN (R 4.4.0)
 vip           * 0.4.1      2023-08-21 [1] CRAN (R 4.4.0)
 withr           3.0.1      2024-07-31 [1] CRAN (R 4.4.0)
 workflows     * 1.1.4      2024-02-19 [1] CRAN (R 4.4.0)
 workflowsets  * 1.1.0      2024-03-21 [1] CRAN (R 4.4.0)
 xfun            0.46       2024-07-18 [1] CRAN (R 4.4.0)
 xml2            1.3.6      2023-12-04 [1] CRAN (R 4.4.0)
 xtable          1.8-4      2019-04-21 [1] CRAN (R 4.4.0)
 yaml            2.3.10     2024-07-26 [1] CRAN (R 4.4.0)
 yardstick     * 1.3.1      2024-03-21 [1] CRAN (R 4.4.0)
 zip             2.3.1      2024-01-27 [1] CRAN (R 4.4.0)
 zoo           * 1.8-12     2023-04-13 [1] CRAN (R 4.4.0)

 [1] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library

────────────────────────────────────────────────────────────────────