Skip to content
Snippets Groups Projects
PTV_PRS_interaction.R 9.26 KiB
Newer Older
#######################################################
######### UKBB lipids PTV-PRS interaction #############
######### Author: Yunfeng Huang           #############
#######################################################

library(data.table)
library(dplyr)
library(MASS)

# Read phenotype data

lipids_pheno <- as.data.frame(fread("~/lipids_heiko/LoF_UKB/2021/lipids_lofs_pheno_300k.txt"))

# Test for PTV-PRS interaction through robust linear model fitting and extract BIC as well as interaction model coefficients (beta and t-value)

#HDL

genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_hdl <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
  PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/hdl.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
  data_sub <- merge(select(lipids_pheno,eid = eid.x,hdl_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
  data_sub$prs_std <- scale(data_sub$totalscore)[,1]
  data_sub <- na.omit(data_sub)
  colnames(data_sub) <- c("eid","hdl_res","ptv_gene","prs_total","prs_std")
  c1 <- sum(data_sub$ptv_gene>0)
  model1 <- rlm(hdl_res~ptv_gene,data = data_sub)
  model2 <- rlm(hdl_res~prs_std,data = data_sub)
  model3 <- rlm(hdl_res~ptv_gene+prs_std,data = data_sub)
  model4 <- rlm(hdl_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
  lof_prs_results_hdl[i,1] <- genes[i]
  lof_prs_results_hdl[i,2] <- BIC(model1)
  lof_prs_results_hdl[i,3] <- BIC(model2)
  lof_prs_results_hdl[i,4] <- BIC(model3)
  lof_prs_results_hdl[i,5] <- BIC(model4)
  lof_prs_results_hdl[i,6] <- summary(model4)$coeff[2,1]
  lof_prs_results_hdl[i,7] <- summary(model4)$coeff[3,1]
  lof_prs_results_hdl[i,8] <- summary(model4)$coeff[4,1]
  lof_prs_results_hdl[i,9] <- summary(model4)$coeff[2,3]
  lof_prs_results_hdl[i,10] <- summary(model4)$coeff[3,3]
  lof_prs_results_hdl[i,11] <- summary(model4)$coeff[4,3]
  lof_prs_results_hdl[i,12] <- c1
  if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_hdl) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
                                   "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")


#LDL

genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_ldl <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
  PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/ldl.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
  data_sub <- merge(select(lipids_pheno,eid = eid.x,ldl_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
  data_sub$prs_std <- scale(data_sub$totalscore)[,1]
  data_sub <- na.omit(data_sub)
  colnames(data_sub) <- c("eid","ldl_res","ptv_gene","prs_total","prs_std")
  c1 <- sum(data_sub$ptv_gene>0)
  model1 <- rlm(ldl_res~ptv_gene,data = data_sub)
  model2 <- rlm(ldl_res~prs_std,data = data_sub)
  model3 <- rlm(ldl_res~ptv_gene+prs_std,data = data_sub)
  model4 <- rlm(ldl_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
  lof_prs_results_ldl[i,1] <- genes[i]
  lof_prs_results_ldl[i,2] <- BIC(model1)
  lof_prs_results_ldl[i,3] <- BIC(model2)
  lof_prs_results_ldl[i,4] <- BIC(model3)
  lof_prs_results_ldl[i,5] <- BIC(model4)
  lof_prs_results_ldl[i,6] <- summary(model4)$coeff[2,1]
  lof_prs_results_ldl[i,7] <- summary(model4)$coeff[3,1]
  lof_prs_results_ldl[i,8] <- summary(model4)$coeff[4,1]
  lof_prs_results_ldl[i,9] <- summary(model4)$coeff[2,3]
  lof_prs_results_ldl[i,10] <- summary(model4)$coeff[3,3]
  lof_prs_results_ldl[i,11] <- summary(model4)$coeff[4,3]
  lof_prs_results_ldl[i,12] <- c1
  if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_ldl) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
                                "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")


#TC

genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_tc <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
  PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/tch.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
  data_sub <- merge(select(lipids_pheno,eid = eid.x,tc_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
  data_sub$prs_std <- scale(data_sub$totalscore)[,1]
  data_sub <- na.omit(data_sub)
  colnames(data_sub) <- c("eid","tc_res","ptv_gene","prs_total","prs_std")
  c1 <- sum(data_sub$ptv_gene>0)
  model1 <- rlm(tc_res~ptv_gene,data = data_sub)
  model2 <- rlm(tc_res~prs_std,data = data_sub)
  model3 <- rlm(tc_res~ptv_gene+prs_std,data = data_sub)
  model4 <- rlm(tc_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
  lof_prs_results_tc[i,1] <- genes[i]
  lof_prs_results_tc[i,2] <- BIC(model1)
  lof_prs_results_tc[i,3] <- BIC(model2)
  lof_prs_results_tc[i,4] <- BIC(model3)
  lof_prs_results_tc[i,5] <- BIC(model4)
  lof_prs_results_tc[i,6] <- summary(model4)$coeff[2,1]
  lof_prs_results_tc[i,7] <- summary(model4)$coeff[3,1]
  lof_prs_results_tc[i,8] <- summary(model4)$coeff[4,1]
  lof_prs_results_tc[i,9] <- summary(model4)$coeff[2,3]
  lof_prs_results_tc[i,10] <- summary(model4)$coeff[3,3]
  lof_prs_results_tc[i,11] <- summary(model4)$coeff[4,3]
  lof_prs_results_tc[i,12] <- c1
  if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_tc) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
                               "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")


#TG

genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_tg <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
  PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/tri.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
  data_sub <- merge(select(lipids_pheno,eid = eid.x,tg_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
  data_sub$prs_std <- scale(data_sub$totalscore)[,1]
  data_sub <- na.omit(data_sub)
  colnames(data_sub) <- c("eid","tg_res","ptv_gene","prs_total","prs_std")
  c1 <- sum(data_sub$ptv_gene>0)
  model1 <- rlm(tg_res~ptv_gene,data = data_sub)
  model2 <- rlm(tg_res~prs_std,data = data_sub)
  model3 <- rlm(tg_res~ptv_gene+prs_std,data = data_sub)
  model4 <- rlm(tg_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
  lof_prs_results_tg[i,1] <- genes[i]
  lof_prs_results_tg[i,2] <- BIC(model1)
  lof_prs_results_tg[i,3] <- BIC(model2)
  lof_prs_results_tg[i,4] <- BIC(model3)
  lof_prs_results_tg[i,5] <- BIC(model4)
  lof_prs_results_tg[i,6] <- summary(model4)$coeff[2,1]
  lof_prs_results_tg[i,7] <- summary(model4)$coeff[3,1]
  lof_prs_results_tg[i,8] <- summary(model4)$coeff[4,1]
  lof_prs_results_tg[i,9] <- summary(model4)$coeff[2,3]
  lof_prs_results_tg[i,10] <- summary(model4)$coeff[3,3]
  lof_prs_results_tg[i,11] <- summary(model4)$coeff[4,3]
  lof_prs_results_tg[i,12] <- c1
  if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_tg) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
                               "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")


# merge results for FDR-correction

lof_prs_results_hdl$trait <- "HDL"
lof_prs_results_ldl$trait <- "LDL"
lof_prs_results_tc$trait <- "TC"
lof_prs_results_tg$trait <- "TG"
lof_prs_results_hdl$PTV_P <- pnorm(abs(lof_prs_results_hdl$PTV_t),lower.tail=F)*2
lof_prs_results_hdl$PRS_P <- pnorm(abs(lof_prs_results_hdl$PRS_t),lower.tail=F)*2
lof_prs_results_hdl$PTV_PRS_P <- pnorm(abs(lof_prs_results_hdl$PTV_PRS_t),lower.tail=F)*2
lof_prs_results_ldl$PTV_P <- pnorm(abs(lof_prs_results_ldl$PTV_t),lower.tail=F)*2
lof_prs_results_ldl$PRS_P <- pnorm(abs(lof_prs_results_ldl$PRS_t),lower.tail=F)*2
lof_prs_results_ldl$PTV_PRS_P <- pnorm(abs(lof_prs_results_ldl$PTV_PRS_t),lower.tail=F)*2
lof_prs_results_tc$PTV_P <- pnorm(abs(lof_prs_results_tc$PTV_t),lower.tail=F)*2
lof_prs_results_tc$PRS_P <- pnorm(abs(lof_prs_results_tc$PRS_t),lower.tail=F)*2
lof_prs_results_tc$PTV_PRS_P <- pnorm(abs(lof_prs_results_tc$PTV_PRS_t),lower.tail=F)*2
lof_prs_results_tg$PTV_P <- pnorm(abs(lof_prs_results_tg$PTV_t),lower.tail=F)*2
lof_prs_results_tg$PRS_P <- pnorm(abs(lof_prs_results_tg$PRS_t),lower.tail=F)*2
lof_prs_results_tg$PTV_PRS_P <- pnorm(abs(lof_prs_results_tg$PTV_PRS_t),lower.tail=F)*2

all_results <- rbind(lof_prs_results_hdl,lof_prs_results_ldl,lof_prs_results_tc,lof_prs_results_tg)

all_results_long <- as.data.frame(matrix(NA,nrow(all_results)*3,4))
colnames(all_results_long) <- c("trait","PTV","name","P")
for(i in 1:nrow(all_results)){
  all_results_long[(3*i-2):(3*i),1] <- all_results$trait[i]
  all_results_long[(3*i-2):(3*i),2] <- all_results$PTV_gene[i]
  all_results_long[(3*i-2):(3*i),3] <- c("PTV","PRS","PTV_PRS")
  all_results_long[(3*i-2):(3*i),4] <- as.numeric(all_results[i,14:16])
  if(ceiling(i/100)==(i/100)){print(i)}
}
all_results_long$fdr_q <- p.adjust(all_results_long$P,"fdr")

temp <- as.data.frame(matrix(NA,nrow(all_results),5))
colnames(temp) <- c("trait","PTV_gene","PTV_fdr_q","PRS_fdr_q","PTV_PRS_fdr_q")
for(i in 1:nrow(temp)){
  temp[i,1] <- all_results_long$trait[(3*i-2)]
  temp[i,2] <- all_results_long$PTV[(3*i-2)]
  temp[i,3] <- all_results_long$fdr_q[(3*i-2)]
  temp[i,4] <- all_results_long$fdr_q[(3*i-1)]
  temp[i,5] <- all_results_long$fdr_q[(3*i)]
  if(ceiling(i/100)==(i/100)){print(i)}
}

all_results_merge <- merge(all_results,temp,by = c("trait","PTV_gene"))