# ---------Code for fitting CoxPH models to DRS profiles ---------
# Project: IDEA: Integrated Drug Expression Analysis
# Authors: Matthew H. Ung, Frederick S. Varn, Chao Cheng*
# Institution: Geisel School of Medicine at Dartmouth
# Last Updated: 3/27/15
library(survival)
# [1] Run survival analysis for PI3K inhibitors for all cell lines in METABRIC dataset
myinf1 = "DRS_4allsamples_output.txt"
myinf2 = "Patient_sample_info.txt"
myoutf1 = "surv_analysis_sample.txt"
# Calculate final DRS from DRS(up) - DRS(dn)
data = read.table(myinf1, sep="\t", header=T, row.names=1, quote="")
cnum = ncol(data)/2
data = data[, 1:cnum]
cnum = ncol(data)/2
data = data[, 1:cnum] - data[, (1+cnum):(2*cnum)]
xx = colnames(data)
xx = gsub("_up.ES", "", xx)
colnames(data) = xx
# Match patient samples between gene expression data and clinical information
info = read.table(myinf2, sep="\t", header=T, row.names=1)
row.names(info) = gsub("-", ".", row.names(info), fix=T)
comSam = intersect(row.names(info), row.names(data))
data = data[comSam, ]
info = info[comSam, ]
# Create Surv() object
mysurv = Surv(info$t.surv, info$e.surv)
# Fit Cox proportional hazards model to each DRS profile. Here, a univariate model
# was fitted; multivariate models correcting for potential clinical counfounders can
# be included in the fuction "coxph".
# Create empty matrix
cox_result = matrix(0, ncol(data), 5)
# Loop through, and fit model to all DRS profiles
for (i in 1:ncol(data)) {
drs = data[, i]
mycox = coxph(mysurv~drs) # univariate model
pvalue = summary(mycox)[["coefficients"]][5]
hr = exp(mycox[["coefficients"]])
lb = summary(mycox)[["conf.int"]][3]
ub = summary(mycox)[["conf.int"]][4]
cindex = summary(mycox)[["concordance"]][1]
cox_result[i, ] = c(pvalue, hr, lb, ub, cindex)
}
# Correct for multiple hypothesis testing using Benjamini-Hochberg procedure
cox_result = cbind(p.adjust(as.numeric(cox_result[, 1]), method = "BH"), cox_result)
# (Note that only 50 samples were selected from main dataset (n=~2000)
# which limits the power of analysis. However, effect sizes (hazard ratio) remain
# consistent with results shown in manuscript)
row.names(cox_result) = colnames(data)
colnames(cox_result) = c("adj_pval", "pval", "hr", "lb", "ub", "c_index")
write.table(cox_result, myoutf1, sep = "\t", row.names = T, col.names = T, quote = F)