A survival analysis using all-cause mortality is informative for us to know that a disease, treatment or characteristic may increase the risk of someone dying. However, it is useful to have a better understanding on what may be driving the high mortality risk. Conducting a survival analysis stratifying by cause of death is a common approach.
Mortality data is often coded using International Disease Classification codes v.10 (ICD-10). There are multiple categories for types of death, but for simplicity here are some examples, which we would be using in this project.
Cause of Death
ICD-10 Code
Description
Natural Causes
R999
Death due to natural processes such as aging or disease without specific cause specified.
Cardiovascular Diseases
I00-I999
Diseases of the circulatory system such as heart attack and strokes
Respiratory Diseases
J00-J999
Respiratory conditions such as pneumonia, COPD, asthma
Neoplasm (cancer)
C00-D48
Death due to any type of cancers categorised by organ or tissue affected
External Causes
V01-Y89
Death due to unintentional causes such as accidents or drowning
Arrange mortality data by types of death using ICD codes
Note: Remember to remove external cause of death if present in your data. In this example there are no external causes of death, however the code to remove it has been added as a comment below
load("data/result.RData")library(dplyr)# Define the ICD-10 code ranges for each categorycardiovascular_codes <-c(paste0("I", 0:999))respiratory_codes <-c(paste0("J", 0:999))neoplasm_codes <-c(paste0("C", 0:99), paste0("D0", 0:99), paste0("D1", 0:99), paste0("D2", 0:99), paste0("D3", 0:99), paste0("D4", 0:99))#external_causes <- c(paste0("V", 0:99), paste0("W", 0:99), paste0("X", 0:99), paste0("Y", 0:99))# Create new variables for each categorydeath2 <- death %>%mutate(cardiovascular_death =ifelse(cofdmain %in% cardiovascular_codes, 1, 0),respiratory_death =ifelse(cofdmain %in% respiratory_codes, 1, 0),neoplasm_death =ifelse(cofdmain %in% neoplasm_codes, 1, 0)#,#external_cause_death = ifelse(cofdmain %in% external_causes, 1, 0) )# Exclude external causes from the other categories#death2 <- death %>%# mutate(# cardiovascular_death = ifelse(external_cause_death == 1, 0, cardiovascular_death),# respiratory_death = ifelse(external_cause_death == 1, 0, respiratory_death),#neoplasm_death = ifelse(external_cause_death == 1, 0, neoplasm_death)# )
Censoring of the data using different types of death
#Same as above but now with the different types of death #Cardiovascular mortality death2$cardiovascular_death <-as.character(death2$cardiovascular_death)death2$cardiovascular_death[is.na(death2$cardiovascular_death)] <-0#No event occured (ALIVE)death2$cardiovascular_death[death2$cardiovascular_death >0] <-1death2$cardiovascular_death <-as.numeric(death2$cardiovascular_death)#Respiratory mortalitydeath2$respiratory_death <-as.character(death2$respiratory_death)death2$respiratory_death[is.na(death2$respiratory_death)] <-0#No event occured (ALIVE)death2$respiratory_death[death2$respiratory_death >0] <-1death2$respiratory_death <-as.numeric(death2$respiratory_death)#Neoplasm mortalitydeath2$neoplasm_death <-as.character(death2$neoplasm_death)death2$neoplasm_death[is.na(death2$neoplasm_death)] <-0#No event occured (ALIVE)death2$neoplasm_death[death2$neoplasm_death >0] <-1death2$neoplasm_death <-as.numeric(death2$neoplasm_death)
Multivariate survival analysis, stratified by cause of death
library(survival)library(survminer)library(ggplot2)library(gtsummary)#Order smoking status correctlydeath2$Smoking_status <-as.factor(death2$Smoking_status)death2$Smoking_status <-relevel(death2$Smoking_status, ref ="Never")#All cause mortality adjusted for covariates#E.g. Cardiovascular mortality LTRC_ats <-Surv(time =death2$Age, time2 = death2$time_years, event = death2$cardiovascular_death) #Change type of death here coxph_LTRC_airflow <-coxph(LTRC_ats ~ AO.fev1fvc + Sex + bmi + Ethnicity + Smoking_status + UKB_centre, data = death2)coxph_LTRC_airflow %>%tbl_regression(exponentiate=TRUE) %>%bold_p(t =0.05)
Characteristic
HR1
95% CI1
p-value
AO.fev1fvc
1.69
1.52, 1.87
<0.001
Sex
Female
—
—
Male
2.01
1.86, 2.18
<0.001
bmi
1.05
1.04, 1.05
<0.001
Ethnicity
Other
—
—
White
1.00
0.85, 1.18
>0.9
Smoking_status
Never
—
—
Current
3.10
2.79, 3.44
<0.001
Previous
1.20
1.10, 1.31
<0.001
UKB_centre
Barts
—
—
Birmingham
0.90
0.66, 1.23
0.5
Bristol
0.84
0.63, 1.11
0.2
Bury
1.13
0.86, 1.50
0.4
Cardiff
1.02
0.74, 1.39
>0.9
Croydon
0.86
0.63, 1.17
0.3
Edinburgh
1.13
0.84, 1.54
0.4
Glasgow
1.26
0.94, 1.69
0.13
Leeds
0.85
0.65, 1.12
0.2
Liverpool
0.98
0.73, 1.30
0.9
Manchester
0.81
0.58, 1.13
0.2
Middlesborough
0.91
0.67, 1.24
0.6
Newcastle
0.91
0.69, 1.21
0.5
Nottingham
1.03
0.77, 1.36
0.9
Oxford
0.93
0.66, 1.31
0.7
Reading
0.75
0.55, 1.02
0.063
Sheffield
0.93
0.70, 1.25
0.6
Stoke
0.72
0.52, 0.99
0.042
Swansea
1.39
0.83, 2.33
0.2
Wrexham
1.22
0.38, 3.88
0.7
1 HR = Hazard Ratio, CI = Confidence Interval
t1 <- coxph_LTRC_airflow %>%tbl_regression(exponentiate=TRUE) %>%bold_p(t =0.05) #summary(coxph_LTRC_airflow)#Respiratory mortalityLTRC_ats <-Surv(time =death2$Age, time2 = death2$time_years, event = death2$respiratory_death) #Change type of death here coxph_LTRC_airflow <-coxph(LTRC_ats ~ AO.fev1fvc + Sex + bmi + Ethnicity + Smoking_status + UKB_centre, data = death2)coxph_LTRC_airflow %>%tbl_regression(exponentiate=TRUE) %>%bold_p(t =0.05)
Characteristic
HR1
95% CI1
p-value
AO.fev1fvc
4.67
4.02, 5.42
<0.001
Sex
Female
—
—
Male
1.78
1.54, 2.06
<0.001
bmi
1.05
1.03, 1.06
<0.001
Ethnicity
Other
—
—
White
1.23
0.89, 1.70
0.2
Smoking_status
Never
—
—
Current
3.25
2.69, 3.93
<0.001
Previous
1.38
1.17, 1.63
<0.001
UKB_centre
Barts
—
—
Birmingham
0.72
0.39, 1.32
0.3
Bristol
0.93
0.55, 1.57
0.8
Bury
1.06
0.62, 1.79
0.8
Cardiff
0.82
0.45, 1.51
0.5
Croydon
1.08
0.62, 1.90
0.8
Edinburgh
1.26
0.72, 2.20
0.4
Glasgow
0.87
0.49, 1.56
0.6
Leeds
0.88
0.53, 1.46
0.6
Liverpool
0.98
0.58, 1.67
>0.9
Manchester
0.90
0.49, 1.65
0.7
Middlesborough
1.06
0.61, 1.86
0.8
Newcastle
1.09
0.65, 1.82
0.8
Nottingham
0.82
0.48, 1.42
0.5
Oxford
0.92
0.49, 1.74
0.8
Reading
0.76
0.43, 1.34
0.3
Sheffield
1.17
0.69, 2.00
0.6
Stoke
0.93
0.52, 1.65
0.8
Swansea
1.86
0.81, 4.28
0.15
Wrexham
0.00
0.00, Inf
>0.9
1 HR = Hazard Ratio, CI = Confidence Interval
t2 <- coxph_LTRC_airflow %>%tbl_regression(exponentiate=TRUE) %>%bold_p(t =0.05) #summary(coxph_LTRC_airflow)#Respiratory mortalityLTRC_ats <-Surv(time =death2$Age, time2 = death2$time_years, event = death2$neoplasm_death) #Change type of death here coxph_LTRC_airflow <-coxph(LTRC_ats ~ AO.fev1fvc + Sex + bmi + Ethnicity + Smoking_status + UKB_centre, data = death2)coxph_LTRC_airflow %>%tbl_regression(exponentiate=TRUE) %>%bold_p(t =0.05)