Skip to content
Snippets Groups Projects
Commit 72cb0b4f authored by Yunfeng Huang's avatar Yunfeng Huang
Browse files

Upload New File - R code for interaction analysis on PTV-burden with PRS for lipids in the UKBB

parent 7a7d22c6
No related branches found
No related tags found
1 merge request!2Upload R scripts for the UKBB analysis
#######################################################
######### UKBB lipids PTV-PRS interaction #############
######### Author: Yunfeng Huang #############
#######################################################
library(data.table)
library(dplyr)
library(MASS)
# Read phenotype data
lipids_pheno <- as.data.frame(fread("~/lipids_heiko/LoF_UKB/2021/lipids_lofs_pheno_300k.txt"))
# Test for PTV-PRS interaction through robust linear model fitting and extract BIC as well as interaction model coefficients (beta and t-value)
#HDL
genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_hdl <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/hdl.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
data_sub <- merge(select(lipids_pheno,eid = eid.x,hdl_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
data_sub$prs_std <- scale(data_sub$totalscore)[,1]
data_sub <- na.omit(data_sub)
colnames(data_sub) <- c("eid","hdl_res","ptv_gene","prs_total","prs_std")
c1 <- sum(data_sub$ptv_gene>0)
model1 <- rlm(hdl_res~ptv_gene,data = data_sub)
model2 <- rlm(hdl_res~prs_std,data = data_sub)
model3 <- rlm(hdl_res~ptv_gene+prs_std,data = data_sub)
model4 <- rlm(hdl_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
lof_prs_results_hdl[i,1] <- genes[i]
lof_prs_results_hdl[i,2] <- BIC(model1)
lof_prs_results_hdl[i,3] <- BIC(model2)
lof_prs_results_hdl[i,4] <- BIC(model3)
lof_prs_results_hdl[i,5] <- BIC(model4)
lof_prs_results_hdl[i,6] <- summary(model4)$coeff[2,1]
lof_prs_results_hdl[i,7] <- summary(model4)$coeff[3,1]
lof_prs_results_hdl[i,8] <- summary(model4)$coeff[4,1]
lof_prs_results_hdl[i,9] <- summary(model4)$coeff[2,3]
lof_prs_results_hdl[i,10] <- summary(model4)$coeff[3,3]
lof_prs_results_hdl[i,11] <- summary(model4)$coeff[4,3]
lof_prs_results_hdl[i,12] <- c1
if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_hdl) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
"PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")
#LDL
genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_ldl <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/ldl.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
data_sub <- merge(select(lipids_pheno,eid = eid.x,ldl_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
data_sub$prs_std <- scale(data_sub$totalscore)[,1]
data_sub <- na.omit(data_sub)
colnames(data_sub) <- c("eid","ldl_res","ptv_gene","prs_total","prs_std")
c1 <- sum(data_sub$ptv_gene>0)
model1 <- rlm(ldl_res~ptv_gene,data = data_sub)
model2 <- rlm(ldl_res~prs_std,data = data_sub)
model3 <- rlm(ldl_res~ptv_gene+prs_std,data = data_sub)
model4 <- rlm(ldl_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
lof_prs_results_ldl[i,1] <- genes[i]
lof_prs_results_ldl[i,2] <- BIC(model1)
lof_prs_results_ldl[i,3] <- BIC(model2)
lof_prs_results_ldl[i,4] <- BIC(model3)
lof_prs_results_ldl[i,5] <- BIC(model4)
lof_prs_results_ldl[i,6] <- summary(model4)$coeff[2,1]
lof_prs_results_ldl[i,7] <- summary(model4)$coeff[3,1]
lof_prs_results_ldl[i,8] <- summary(model4)$coeff[4,1]
lof_prs_results_ldl[i,9] <- summary(model4)$coeff[2,3]
lof_prs_results_ldl[i,10] <- summary(model4)$coeff[3,3]
lof_prs_results_ldl[i,11] <- summary(model4)$coeff[4,3]
lof_prs_results_ldl[i,12] <- c1
if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_ldl) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
"PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")
#TC
genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_tc <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/tch.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
data_sub <- merge(select(lipids_pheno,eid = eid.x,tc_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
data_sub$prs_std <- scale(data_sub$totalscore)[,1]
data_sub <- na.omit(data_sub)
colnames(data_sub) <- c("eid","tc_res","ptv_gene","prs_total","prs_std")
c1 <- sum(data_sub$ptv_gene>0)
model1 <- rlm(tc_res~ptv_gene,data = data_sub)
model2 <- rlm(tc_res~prs_std,data = data_sub)
model3 <- rlm(tc_res~ptv_gene+prs_std,data = data_sub)
model4 <- rlm(tc_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
lof_prs_results_tc[i,1] <- genes[i]
lof_prs_results_tc[i,2] <- BIC(model1)
lof_prs_results_tc[i,3] <- BIC(model2)
lof_prs_results_tc[i,4] <- BIC(model3)
lof_prs_results_tc[i,5] <- BIC(model4)
lof_prs_results_tc[i,6] <- summary(model4)$coeff[2,1]
lof_prs_results_tc[i,7] <- summary(model4)$coeff[3,1]
lof_prs_results_tc[i,8] <- summary(model4)$coeff[4,1]
lof_prs_results_tc[i,9] <- summary(model4)$coeff[2,3]
lof_prs_results_tc[i,10] <- summary(model4)$coeff[3,3]
lof_prs_results_tc[i,11] <- summary(model4)$coeff[4,3]
lof_prs_results_tc[i,12] <- c1
if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_tc) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
"PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")
#TG
genes <- colnames(lipids_pheno)[8:37]
lof_prs_results_tg <- as.data.frame(matrix(NA,30,12))
for(i in 1:length(genes)){
PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/tri.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ")
data_sub <- merge(select(lipids_pheno,eid = eid.x,tg_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid")
data_sub$prs_std <- scale(data_sub$totalscore)[,1]
data_sub <- na.omit(data_sub)
colnames(data_sub) <- c("eid","tg_res","ptv_gene","prs_total","prs_std")
c1 <- sum(data_sub$ptv_gene>0)
model1 <- rlm(tg_res~ptv_gene,data = data_sub)
model2 <- rlm(tg_res~prs_std,data = data_sub)
model3 <- rlm(tg_res~ptv_gene+prs_std,data = data_sub)
model4 <- rlm(tg_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub)
lof_prs_results_tg[i,1] <- genes[i]
lof_prs_results_tg[i,2] <- BIC(model1)
lof_prs_results_tg[i,3] <- BIC(model2)
lof_prs_results_tg[i,4] <- BIC(model3)
lof_prs_results_tg[i,5] <- BIC(model4)
lof_prs_results_tg[i,6] <- summary(model4)$coeff[2,1]
lof_prs_results_tg[i,7] <- summary(model4)$coeff[3,1]
lof_prs_results_tg[i,8] <- summary(model4)$coeff[4,1]
lof_prs_results_tg[i,9] <- summary(model4)$coeff[2,3]
lof_prs_results_tg[i,10] <- summary(model4)$coeff[3,3]
lof_prs_results_tg[i,11] <- summary(model4)$coeff[4,3]
lof_prs_results_tg[i,12] <- c1
if(ceiling(i/10)==(i/10)){print(i)}
}
colnames(lof_prs_results_tg) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC",
"PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV")
# merge results for FDR-correction
lof_prs_results_hdl$trait <- "HDL"
lof_prs_results_ldl$trait <- "LDL"
lof_prs_results_tc$trait <- "TC"
lof_prs_results_tg$trait <- "TG"
lof_prs_results_hdl$PTV_P <- pnorm(abs(lof_prs_results_hdl$PTV_t),lower.tail=F)*2
lof_prs_results_hdl$PRS_P <- pnorm(abs(lof_prs_results_hdl$PRS_t),lower.tail=F)*2
lof_prs_results_hdl$PTV_PRS_P <- pnorm(abs(lof_prs_results_hdl$PTV_PRS_t),lower.tail=F)*2
lof_prs_results_ldl$PTV_P <- pnorm(abs(lof_prs_results_ldl$PTV_t),lower.tail=F)*2
lof_prs_results_ldl$PRS_P <- pnorm(abs(lof_prs_results_ldl$PRS_t),lower.tail=F)*2
lof_prs_results_ldl$PTV_PRS_P <- pnorm(abs(lof_prs_results_ldl$PTV_PRS_t),lower.tail=F)*2
lof_prs_results_tc$PTV_P <- pnorm(abs(lof_prs_results_tc$PTV_t),lower.tail=F)*2
lof_prs_results_tc$PRS_P <- pnorm(abs(lof_prs_results_tc$PRS_t),lower.tail=F)*2
lof_prs_results_tc$PTV_PRS_P <- pnorm(abs(lof_prs_results_tc$PTV_PRS_t),lower.tail=F)*2
lof_prs_results_tg$PTV_P <- pnorm(abs(lof_prs_results_tg$PTV_t),lower.tail=F)*2
lof_prs_results_tg$PRS_P <- pnorm(abs(lof_prs_results_tg$PRS_t),lower.tail=F)*2
lof_prs_results_tg$PTV_PRS_P <- pnorm(abs(lof_prs_results_tg$PTV_PRS_t),lower.tail=F)*2
all_results <- rbind(lof_prs_results_hdl,lof_prs_results_ldl,lof_prs_results_tc,lof_prs_results_tg)
all_results_long <- as.data.frame(matrix(NA,nrow(all_results)*3,4))
colnames(all_results_long) <- c("trait","PTV","name","P")
for(i in 1:nrow(all_results)){
all_results_long[(3*i-2):(3*i),1] <- all_results$trait[i]
all_results_long[(3*i-2):(3*i),2] <- all_results$PTV_gene[i]
all_results_long[(3*i-2):(3*i),3] <- c("PTV","PRS","PTV_PRS")
all_results_long[(3*i-2):(3*i),4] <- as.numeric(all_results[i,14:16])
if(ceiling(i/100)==(i/100)){print(i)}
}
all_results_long$fdr_q <- p.adjust(all_results_long$P,"fdr")
temp <- as.data.frame(matrix(NA,nrow(all_results),5))
colnames(temp) <- c("trait","PTV_gene","PTV_fdr_q","PRS_fdr_q","PTV_PRS_fdr_q")
for(i in 1:nrow(temp)){
temp[i,1] <- all_results_long$trait[(3*i-2)]
temp[i,2] <- all_results_long$PTV[(3*i-2)]
temp[i,3] <- all_results_long$fdr_q[(3*i-2)]
temp[i,4] <- all_results_long$fdr_q[(3*i-1)]
temp[i,5] <- all_results_long$fdr_q[(3*i)]
if(ceiling(i/100)==(i/100)){print(i)}
}
all_results_merge <- merge(all_results,temp,by = c("trait","PTV_gene"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment