load("data/result.RData")
library(dplyr)
# Define the ICD-10 code ranges for each category
cardiovascular_codes <- c(paste0("I", 0:999))
respiratory_codes <- c(paste0("J", 0:999))
neoplasm_codes <- c(paste0("C", 0:99), paste0("D0", 0:99), paste0("D1", 0:99), paste0("D2", 0:99), paste0("D3", 0:99), paste0("D4", 0:99))
#external_causes <- c(paste0("V", 0:99), paste0("W", 0:99), paste0("X", 0:99), paste0("Y", 0:99))
# Create new variables for each category
death2 <- death %>%
  mutate(
    cardiovascular_death = ifelse(cofdmain %in% cardiovascular_codes, 1, 0),
    respiratory_death = ifelse(cofdmain %in% respiratory_codes, 1, 0),
    neoplasm_death = ifelse(cofdmain %in% neoplasm_codes, 1, 0)#,
    #external_cause_death = ifelse(cofdmain %in% external_causes, 1, 0)
  )
# Exclude external causes from the other categories
#death2 <- death %>%
 # mutate(
  #  cardiovascular_death = ifelse(external_cause_death == 1, 0, cardiovascular_death),
   # respiratory_death = ifelse(external_cause_death == 1, 0, respiratory_death),
    #neoplasm_death = ifelse(external_cause_death == 1, 0, neoplasm_death)
#  )Advanced survival analysis
Survival analysis stratified by cause of death
A survival analysis using all-cause mortality is informative for us to know that a disease, treatment or characteristic may increase the risk of someone dying. However, it is useful to have a better understanding on what may be driving the high mortality risk. Conducting a survival analysis stratifying by cause of death is a common approach.
Mortality data is often coded using International Disease Classification codes v.10 (ICD-10). There are multiple categories for types of death, but for simplicity here are some examples, which we would be using in this project.
| Cause of Death | ICD-10 Code | Description | 
|---|---|---|
| Natural Causes | R999 | Death due to natural processes such as aging or disease without specific cause specified. | 
| Cardiovascular Diseases | I00-I999 | Diseases of the circulatory system such as heart attack and strokes | 
| Respiratory Diseases | J00-J999 | Respiratory conditions such as pneumonia, COPD, asthma | 
| Neoplasm (cancer) | C00-D48 | Death due to any type of cancers categorised by organ or tissue affected | 
| External Causes | V01-Y89 | Death due to unintentional causes such as accidents or drowning | 
More info on International Classification of Disease V.10
Arrange mortality data by types of death using ICD codes
Note: Remember to remove external cause of death if present in your data. In this example there are no external causes of death, however the code to remove it has been added as a comment below
Censoring of the data using different types of death
#Same as above but now with the different types of death 
#Cardiovascular mortality 
death2$cardiovascular_death <- as.character(death2$cardiovascular_death)
death2$cardiovascular_death[is.na(death2$cardiovascular_death)] <- 0 #No event occured (ALIVE)
death2$cardiovascular_death[death2$cardiovascular_death > 0] <- 1
death2$cardiovascular_death <- as.numeric(death2$cardiovascular_death)
#Respiratory mortality
death2$respiratory_death <- as.character(death2$respiratory_death)
death2$respiratory_death[is.na(death2$respiratory_death)] <- 0 #No event occured (ALIVE)
death2$respiratory_death[death2$respiratory_death > 0] <- 1
death2$respiratory_death <- as.numeric(death2$respiratory_death)
#Neoplasm mortality
death2$neoplasm_death <- as.character(death2$neoplasm_death)
death2$neoplasm_death[is.na(death2$neoplasm_death)] <- 0 #No event occured (ALIVE)
death2$neoplasm_death[death2$neoplasm_death > 0] <- 1
death2$neoplasm_death <- as.numeric(death2$neoplasm_death)Multivariate survival analysis, stratified by cause of death
library(survival)
library(survminer)
library(ggplot2)
library(gtsummary)
#Order smoking status correctly
death2$Smoking_status <- as.factor(death2$Smoking_status)
death2$Smoking_status <- relevel(death2$Smoking_status, ref = "Never")
#All cause mortality adjusted for covariates
#E.g. Cardiovascular mortality 
LTRC_ats <- Surv(time =death2$Age, time2 = death2$time_years, event = death2$cardiovascular_death) #Change type of death here 
coxph_LTRC_airflow <- coxph(LTRC_ats ~ AO.fev1fvc + Sex + bmi + Ethnicity + Smoking_status + UKB_centre, data = death2)
coxph_LTRC_airflow %>% tbl_regression(exponentiate= TRUE) %>% bold_p(t = 0.05) | Characteristic | HR1 | 95% CI1 | p-value | 
|---|---|---|---|
| AO.fev1fvc | 1.69 | 1.52, 1.87 | <0.001 | 
| Sex | |||
| Female | — | — | |
| Male | 2.01 | 1.86, 2.18 | <0.001 | 
| bmi | 1.05 | 1.04, 1.05 | <0.001 | 
| Ethnicity | |||
| Other | — | — | |
| White | 1.00 | 0.85, 1.18 | >0.9 | 
| Smoking_status | |||
| Never | — | — | |
| Current | 3.10 | 2.79, 3.44 | <0.001 | 
| Previous | 1.20 | 1.10, 1.31 | <0.001 | 
| UKB_centre | |||
| Barts | — | — | |
| Birmingham | 0.90 | 0.66, 1.23 | 0.5 | 
| Bristol | 0.84 | 0.63, 1.11 | 0.2 | 
| Bury | 1.13 | 0.86, 1.50 | 0.4 | 
| Cardiff | 1.02 | 0.74, 1.39 | >0.9 | 
| Croydon | 0.86 | 0.63, 1.17 | 0.3 | 
| Edinburgh | 1.13 | 0.84, 1.54 | 0.4 | 
| Glasgow | 1.26 | 0.94, 1.69 | 0.13 | 
| Leeds | 0.85 | 0.65, 1.12 | 0.2 | 
| Liverpool | 0.98 | 0.73, 1.30 | 0.9 | 
| Manchester | 0.81 | 0.58, 1.13 | 0.2 | 
| Middlesborough | 0.91 | 0.67, 1.24 | 0.6 | 
| Newcastle | 0.91 | 0.69, 1.21 | 0.5 | 
| Nottingham | 1.03 | 0.77, 1.36 | 0.9 | 
| Oxford | 0.93 | 0.66, 1.31 | 0.7 | 
| Reading | 0.75 | 0.55, 1.02 | 0.063 | 
| Sheffield | 0.93 | 0.70, 1.25 | 0.6 | 
| Stoke | 0.72 | 0.52, 0.99 | 0.042 | 
| Swansea | 1.39 | 0.83, 2.33 | 0.2 | 
| Wrexham | 1.22 | 0.38, 3.88 | 0.7 | 
| 1 HR = Hazard Ratio, CI = Confidence Interval | |||
t1 <- coxph_LTRC_airflow %>% tbl_regression(exponentiate= TRUE) %>% bold_p(t = 0.05) 
#summary(coxph_LTRC_airflow)
#Respiratory mortality
LTRC_ats <- Surv(time =death2$Age, time2 = death2$time_years, event = death2$respiratory_death) #Change type of death here 
coxph_LTRC_airflow <- coxph(LTRC_ats ~ AO.fev1fvc + Sex + bmi + Ethnicity + Smoking_status + UKB_centre, data = death2)
coxph_LTRC_airflow %>% tbl_regression(exponentiate= TRUE) %>% bold_p(t = 0.05) | Characteristic | HR1 | 95% CI1 | p-value | 
|---|---|---|---|
| AO.fev1fvc | 4.67 | 4.02, 5.42 | <0.001 | 
| Sex | |||
| Female | — | — | |
| Male | 1.78 | 1.54, 2.06 | <0.001 | 
| bmi | 1.05 | 1.03, 1.06 | <0.001 | 
| Ethnicity | |||
| Other | — | — | |
| White | 1.23 | 0.89, 1.70 | 0.2 | 
| Smoking_status | |||
| Never | — | — | |
| Current | 3.25 | 2.69, 3.93 | <0.001 | 
| Previous | 1.38 | 1.17, 1.63 | <0.001 | 
| UKB_centre | |||
| Barts | — | — | |
| Birmingham | 0.72 | 0.39, 1.32 | 0.3 | 
| Bristol | 0.93 | 0.55, 1.57 | 0.8 | 
| Bury | 1.06 | 0.62, 1.79 | 0.8 | 
| Cardiff | 0.82 | 0.45, 1.51 | 0.5 | 
| Croydon | 1.08 | 0.62, 1.90 | 0.8 | 
| Edinburgh | 1.26 | 0.72, 2.20 | 0.4 | 
| Glasgow | 0.87 | 0.49, 1.56 | 0.6 | 
| Leeds | 0.88 | 0.53, 1.46 | 0.6 | 
| Liverpool | 0.98 | 0.58, 1.67 | >0.9 | 
| Manchester | 0.90 | 0.49, 1.65 | 0.7 | 
| Middlesborough | 1.06 | 0.61, 1.86 | 0.8 | 
| Newcastle | 1.09 | 0.65, 1.82 | 0.8 | 
| Nottingham | 0.82 | 0.48, 1.42 | 0.5 | 
| Oxford | 0.92 | 0.49, 1.74 | 0.8 | 
| Reading | 0.76 | 0.43, 1.34 | 0.3 | 
| Sheffield | 1.17 | 0.69, 2.00 | 0.6 | 
| Stoke | 0.93 | 0.52, 1.65 | 0.8 | 
| Swansea | 1.86 | 0.81, 4.28 | 0.15 | 
| Wrexham | 0.00 | 0.00, Inf | >0.9 | 
| 1 HR = Hazard Ratio, CI = Confidence Interval | |||
t2 <- coxph_LTRC_airflow %>% tbl_regression(exponentiate= TRUE) %>% bold_p(t = 0.05) 
#summary(coxph_LTRC_airflow)
#Respiratory mortality
LTRC_ats <- Surv(time =death2$Age, time2 = death2$time_years, event = death2$neoplasm_death) #Change type of death here 
coxph_LTRC_airflow <- coxph(LTRC_ats ~ AO.fev1fvc + Sex + bmi + Ethnicity + Smoking_status + UKB_centre, data = death2)
coxph_LTRC_airflow %>% tbl_regression(exponentiate= TRUE) %>% bold_p(t = 0.05) | Characteristic | HR1 | 95% CI1 | p-value | 
|---|---|---|---|
| AO.fev1fvc | 1.25 | 1.07, 1.47 | 0.006 | 
| Sex | |||
| Female | — | — | |
| Male | 1.65 | 1.49, 1.83 | <0.001 | 
| bmi | 1.01 | 1.00, 1.02 | 0.062 | 
| Ethnicity | |||
| Other | — | — | |
| White | 0.90 | 0.73, 1.10 | 0.3 | 
| Smoking_status | |||
| Never | — | — | |
| Current | 1.27 | 1.07, 1.51 | 0.007 | 
| Previous | 1.14 | 1.02, 1.27 | 0.021 | 
| UKB_centre | |||
| Barts | — | — | |
| Birmingham | 0.98 | 0.62, 1.54 | >0.9 | 
| Bristol | 1.16 | 0.78, 1.73 | 0.5 | 
| Bury | 1.35 | 0.90, 2.03 | 0.14 | 
| Cardiff | 1.07 | 0.68, 1.68 | 0.8 | 
| Croydon | 1.17 | 0.76, 1.80 | 0.5 | 
| Edinburgh | 1.20 | 0.77, 1.86 | 0.4 | 
| Glasgow | 1.28 | 0.83, 1.97 | 0.3 | 
| Leeds | 1.14 | 0.78, 1.68 | 0.5 | 
| Liverpool | 1.21 | 0.81, 1.82 | 0.3 | 
| Manchester | 0.97 | 0.60, 1.56 | 0.9 | 
| Middlesborough | 1.32 | 0.86, 2.03 | 0.2 | 
| Newcastle | 1.16 | 0.77, 1.74 | 0.5 | 
| Nottingham | 1.00 | 0.66, 1.52 | >0.9 | 
| Oxford | 1.12 | 0.70, 1.80 | 0.6 | 
| Reading | 1.24 | 0.83, 1.87 | 0.3 | 
| Sheffield | 1.11 | 0.73, 1.69 | 0.6 | 
| Stoke | 1.19 | 0.77, 1.83 | 0.4 | 
| Swansea | 1.84 | 0.92, 3.68 | 0.084 | 
| Wrexham | 0.94 | 0.13, 6.90 | >0.9 | 
| 1 HR = Hazard Ratio, CI = Confidence Interval | |||
t3 <- coxph_LTRC_airflow %>% tbl_regression(exponentiate= TRUE) %>% bold_p(t = 0.05) 
#summary(coxph_LTRC_airflow)
tbl_merge_ex2 <-
  tbl_merge(
    tbls = list(t1, t2, t3),
    tab_spanner = c("**Cardiovascular mortality**",
                    "**Respiratory mortality**", "**Neoplasm (cancer) mortality**")
  )
tbl_merge_ex2| Characteristic | Cardiovascular mortality | Respiratory mortality | Neoplasm (cancer) mortality | ||||||
|---|---|---|---|---|---|---|---|---|---|
| HR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |
| AO.fev1fvc | 1.69 | 1.52, 1.87 | <0.001 | 4.67 | 4.02, 5.42 | <0.001 | 1.25 | 1.07, 1.47 | 0.006 | 
| Sex | |||||||||
| Female | — | — | — | — | — | — | |||
| Male | 2.01 | 1.86, 2.18 | <0.001 | 1.78 | 1.54, 2.06 | <0.001 | 1.65 | 1.49, 1.83 | <0.001 | 
| bmi | 1.05 | 1.04, 1.05 | <0.001 | 1.05 | 1.03, 1.06 | <0.001 | 1.01 | 1.00, 1.02 | 0.062 | 
| Ethnicity | |||||||||
| Other | — | — | — | — | — | — | |||
| White | 1.00 | 0.85, 1.18 | >0.9 | 1.23 | 0.89, 1.70 | 0.2 | 0.90 | 0.73, 1.10 | 0.3 | 
| Smoking_status | |||||||||
| Never | — | — | — | — | — | — | |||
| Current | 3.10 | 2.79, 3.44 | <0.001 | 3.25 | 2.69, 3.93 | <0.001 | 1.27 | 1.07, 1.51 | 0.007 | 
| Previous | 1.20 | 1.10, 1.31 | <0.001 | 1.38 | 1.17, 1.63 | <0.001 | 1.14 | 1.02, 1.27 | 0.021 | 
| UKB_centre | |||||||||
| Barts | — | — | — | — | — | — | |||
| Birmingham | 0.90 | 0.66, 1.23 | 0.5 | 0.72 | 0.39, 1.32 | 0.3 | 0.98 | 0.62, 1.54 | >0.9 | 
| Bristol | 0.84 | 0.63, 1.11 | 0.2 | 0.93 | 0.55, 1.57 | 0.8 | 1.16 | 0.78, 1.73 | 0.5 | 
| Bury | 1.13 | 0.86, 1.50 | 0.4 | 1.06 | 0.62, 1.79 | 0.8 | 1.35 | 0.90, 2.03 | 0.14 | 
| Cardiff | 1.02 | 0.74, 1.39 | >0.9 | 0.82 | 0.45, 1.51 | 0.5 | 1.07 | 0.68, 1.68 | 0.8 | 
| Croydon | 0.86 | 0.63, 1.17 | 0.3 | 1.08 | 0.62, 1.90 | 0.8 | 1.17 | 0.76, 1.80 | 0.5 | 
| Edinburgh | 1.13 | 0.84, 1.54 | 0.4 | 1.26 | 0.72, 2.20 | 0.4 | 1.20 | 0.77, 1.86 | 0.4 | 
| Glasgow | 1.26 | 0.94, 1.69 | 0.13 | 0.87 | 0.49, 1.56 | 0.6 | 1.28 | 0.83, 1.97 | 0.3 | 
| Leeds | 0.85 | 0.65, 1.12 | 0.2 | 0.88 | 0.53, 1.46 | 0.6 | 1.14 | 0.78, 1.68 | 0.5 | 
| Liverpool | 0.98 | 0.73, 1.30 | 0.9 | 0.98 | 0.58, 1.67 | >0.9 | 1.21 | 0.81, 1.82 | 0.3 | 
| Manchester | 0.81 | 0.58, 1.13 | 0.2 | 0.90 | 0.49, 1.65 | 0.7 | 0.97 | 0.60, 1.56 | 0.9 | 
| Middlesborough | 0.91 | 0.67, 1.24 | 0.6 | 1.06 | 0.61, 1.86 | 0.8 | 1.32 | 0.86, 2.03 | 0.2 | 
| Newcastle | 0.91 | 0.69, 1.21 | 0.5 | 1.09 | 0.65, 1.82 | 0.8 | 1.16 | 0.77, 1.74 | 0.5 | 
| Nottingham | 1.03 | 0.77, 1.36 | 0.9 | 0.82 | 0.48, 1.42 | 0.5 | 1.00 | 0.66, 1.52 | >0.9 | 
| Oxford | 0.93 | 0.66, 1.31 | 0.7 | 0.92 | 0.49, 1.74 | 0.8 | 1.12 | 0.70, 1.80 | 0.6 | 
| Reading | 0.75 | 0.55, 1.02 | 0.063 | 0.76 | 0.43, 1.34 | 0.3 | 1.24 | 0.83, 1.87 | 0.3 | 
| Sheffield | 0.93 | 0.70, 1.25 | 0.6 | 1.17 | 0.69, 2.00 | 0.6 | 1.11 | 0.73, 1.69 | 0.6 | 
| Stoke | 0.72 | 0.52, 0.99 | 0.042 | 0.93 | 0.52, 1.65 | 0.8 | 1.19 | 0.77, 1.83 | 0.4 | 
| Swansea | 1.39 | 0.83, 2.33 | 0.2 | 1.86 | 0.81, 4.28 | 0.15 | 1.84 | 0.92, 3.68 | 0.084 | 
| Wrexham | 1.22 | 0.38, 3.88 | 0.7 | 0.00 | 0.00, Inf | >0.9 | 0.94 | 0.13, 6.90 | >0.9 | 
| 1 HR = Hazard Ratio, CI = Confidence Interval | |||||||||
#tbl_merge_ex2 %>%
 # as_flex_table() %>%
  #flextable::save_as_docx(path = "NAMEs.docx")Extra Resources
Here are some extra bits and pieces to help you with this ReCoDE project including:
R Packages Cheatsheet if you want to get fancier with your tables and graphs
Statistics glossary
