####################################################### ######### UKBB lipids PTV-PRS interaction ############# ######### Author: Yunfeng Huang ############# ####################################################### library(data.table) library(dplyr) library(MASS) # Read phenotype data lipids_pheno <- as.data.frame(fread("~/lipids_heiko/LoF_UKB/2021/lipids_lofs_pheno_300k.txt")) # Test for PTV-PRS interaction through robust linear model fitting and extract BIC as well as interaction model coefficients (beta and t-value) #HDL genes <- colnames(lipids_pheno)[8:37] lof_prs_results_hdl <- as.data.frame(matrix(NA,30,12)) for(i in 1:length(genes)){ PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/hdl.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ") data_sub <- merge(select(lipids_pheno,eid = eid.x,hdl_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid") data_sub$prs_std <- scale(data_sub$totalscore)[,1] data_sub <- na.omit(data_sub) colnames(data_sub) <- c("eid","hdl_res","ptv_gene","prs_total","prs_std") c1 <- sum(data_sub$ptv_gene>0) model1 <- rlm(hdl_res~ptv_gene,data = data_sub) model2 <- rlm(hdl_res~prs_std,data = data_sub) model3 <- rlm(hdl_res~ptv_gene+prs_std,data = data_sub) model4 <- rlm(hdl_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub) lof_prs_results_hdl[i,1] <- genes[i] lof_prs_results_hdl[i,2] <- BIC(model1) lof_prs_results_hdl[i,3] <- BIC(model2) lof_prs_results_hdl[i,4] <- BIC(model3) lof_prs_results_hdl[i,5] <- BIC(model4) lof_prs_results_hdl[i,6] <- summary(model4)$coeff[2,1] lof_prs_results_hdl[i,7] <- summary(model4)$coeff[3,1] lof_prs_results_hdl[i,8] <- summary(model4)$coeff[4,1] lof_prs_results_hdl[i,9] <- summary(model4)$coeff[2,3] lof_prs_results_hdl[i,10] <- summary(model4)$coeff[3,3] lof_prs_results_hdl[i,11] <- summary(model4)$coeff[4,3] lof_prs_results_hdl[i,12] <- c1 if(ceiling(i/10)==(i/10)){print(i)} } colnames(lof_prs_results_hdl) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC", "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV") #LDL genes <- colnames(lipids_pheno)[8:37] lof_prs_results_ldl <- as.data.frame(matrix(NA,30,12)) for(i in 1:length(genes)){ PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/ldl.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ") data_sub <- merge(select(lipids_pheno,eid = eid.x,ldl_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid") data_sub$prs_std <- scale(data_sub$totalscore)[,1] data_sub <- na.omit(data_sub) colnames(data_sub) <- c("eid","ldl_res","ptv_gene","prs_total","prs_std") c1 <- sum(data_sub$ptv_gene>0) model1 <- rlm(ldl_res~ptv_gene,data = data_sub) model2 <- rlm(ldl_res~prs_std,data = data_sub) model3 <- rlm(ldl_res~ptv_gene+prs_std,data = data_sub) model4 <- rlm(ldl_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub) lof_prs_results_ldl[i,1] <- genes[i] lof_prs_results_ldl[i,2] <- BIC(model1) lof_prs_results_ldl[i,3] <- BIC(model2) lof_prs_results_ldl[i,4] <- BIC(model3) lof_prs_results_ldl[i,5] <- BIC(model4) lof_prs_results_ldl[i,6] <- summary(model4)$coeff[2,1] lof_prs_results_ldl[i,7] <- summary(model4)$coeff[3,1] lof_prs_results_ldl[i,8] <- summary(model4)$coeff[4,1] lof_prs_results_ldl[i,9] <- summary(model4)$coeff[2,3] lof_prs_results_ldl[i,10] <- summary(model4)$coeff[3,3] lof_prs_results_ldl[i,11] <- summary(model4)$coeff[4,3] lof_prs_results_ldl[i,12] <- c1 if(ceiling(i/10)==(i/10)){print(i)} } colnames(lof_prs_results_ldl) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC", "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV") #TC genes <- colnames(lipids_pheno)[8:37] lof_prs_results_tc <- as.data.frame(matrix(NA,30,12)) for(i in 1:length(genes)){ PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/tch.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ") data_sub <- merge(select(lipids_pheno,eid = eid.x,tc_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid") data_sub$prs_std <- scale(data_sub$totalscore)[,1] data_sub <- na.omit(data_sub) colnames(data_sub) <- c("eid","tc_res","ptv_gene","prs_total","prs_std") c1 <- sum(data_sub$ptv_gene>0) model1 <- rlm(tc_res~ptv_gene,data = data_sub) model2 <- rlm(tc_res~prs_std,data = data_sub) model3 <- rlm(tc_res~ptv_gene+prs_std,data = data_sub) model4 <- rlm(tc_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub) lof_prs_results_tc[i,1] <- genes[i] lof_prs_results_tc[i,2] <- BIC(model1) lof_prs_results_tc[i,3] <- BIC(model2) lof_prs_results_tc[i,4] <- BIC(model3) lof_prs_results_tc[i,5] <- BIC(model4) lof_prs_results_tc[i,6] <- summary(model4)$coeff[2,1] lof_prs_results_tc[i,7] <- summary(model4)$coeff[3,1] lof_prs_results_tc[i,8] <- summary(model4)$coeff[4,1] lof_prs_results_tc[i,9] <- summary(model4)$coeff[2,3] lof_prs_results_tc[i,10] <- summary(model4)$coeff[3,3] lof_prs_results_tc[i,11] <- summary(model4)$coeff[4,3] lof_prs_results_tc[i,12] <- c1 if(ceiling(i/10)==(i/10)){print(i)} } colnames(lof_prs_results_tc) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC", "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV") #TG genes <- colnames(lipids_pheno)[8:37] lof_prs_results_tg <- as.data.frame(matrix(NA,30,12)) for(i in 1:length(genes)){ PRS <- fread(paste("/camhpc/home/cchen11/lipid_interaction/tri.pst_eff_a1_b05_phiauto_prscs.no_",genes[i],".totalscore",sep = ""),sep = " ") data_sub <- merge(select(lipids_pheno,eid = eid.x,tg_res,starts_with(genes[i])),select(PRS,eid = IID,totalscore = TOTALSCORE_AVG),by = "eid") data_sub$prs_std <- scale(data_sub$totalscore)[,1] data_sub <- na.omit(data_sub) colnames(data_sub) <- c("eid","tg_res","ptv_gene","prs_total","prs_std") c1 <- sum(data_sub$ptv_gene>0) model1 <- rlm(tg_res~ptv_gene,data = data_sub) model2 <- rlm(tg_res~prs_std,data = data_sub) model3 <- rlm(tg_res~ptv_gene+prs_std,data = data_sub) model4 <- rlm(tg_res~ptv_gene+prs_std+ptv_gene:prs_std,data = data_sub) lof_prs_results_tg[i,1] <- genes[i] lof_prs_results_tg[i,2] <- BIC(model1) lof_prs_results_tg[i,3] <- BIC(model2) lof_prs_results_tg[i,4] <- BIC(model3) lof_prs_results_tg[i,5] <- BIC(model4) lof_prs_results_tg[i,6] <- summary(model4)$coeff[2,1] lof_prs_results_tg[i,7] <- summary(model4)$coeff[3,1] lof_prs_results_tg[i,8] <- summary(model4)$coeff[4,1] lof_prs_results_tg[i,9] <- summary(model4)$coeff[2,3] lof_prs_results_tg[i,10] <- summary(model4)$coeff[3,3] lof_prs_results_tg[i,11] <- summary(model4)$coeff[4,3] lof_prs_results_tg[i,12] <- c1 if(ceiling(i/10)==(i/10)){print(i)} } colnames(lof_prs_results_tg) <- c("PTV_gene","model1_BIC","model2_BIC","model3_BIC","model4_BIC", "PTV_E","PRS_E","PTV_PRS_E","PTV_t","PRS_t","PTV_PRS_t","N_carrier_PTV") # merge results for FDR-correction lof_prs_results_hdl$trait <- "HDL" lof_prs_results_ldl$trait <- "LDL" lof_prs_results_tc$trait <- "TC" lof_prs_results_tg$trait <- "TG" lof_prs_results_hdl$PTV_P <- pnorm(abs(lof_prs_results_hdl$PTV_t),lower.tail=F)*2 lof_prs_results_hdl$PRS_P <- pnorm(abs(lof_prs_results_hdl$PRS_t),lower.tail=F)*2 lof_prs_results_hdl$PTV_PRS_P <- pnorm(abs(lof_prs_results_hdl$PTV_PRS_t),lower.tail=F)*2 lof_prs_results_ldl$PTV_P <- pnorm(abs(lof_prs_results_ldl$PTV_t),lower.tail=F)*2 lof_prs_results_ldl$PRS_P <- pnorm(abs(lof_prs_results_ldl$PRS_t),lower.tail=F)*2 lof_prs_results_ldl$PTV_PRS_P <- pnorm(abs(lof_prs_results_ldl$PTV_PRS_t),lower.tail=F)*2 lof_prs_results_tc$PTV_P <- pnorm(abs(lof_prs_results_tc$PTV_t),lower.tail=F)*2 lof_prs_results_tc$PRS_P <- pnorm(abs(lof_prs_results_tc$PRS_t),lower.tail=F)*2 lof_prs_results_tc$PTV_PRS_P <- pnorm(abs(lof_prs_results_tc$PTV_PRS_t),lower.tail=F)*2 lof_prs_results_tg$PTV_P <- pnorm(abs(lof_prs_results_tg$PTV_t),lower.tail=F)*2 lof_prs_results_tg$PRS_P <- pnorm(abs(lof_prs_results_tg$PRS_t),lower.tail=F)*2 lof_prs_results_tg$PTV_PRS_P <- pnorm(abs(lof_prs_results_tg$PTV_PRS_t),lower.tail=F)*2 all_results <- rbind(lof_prs_results_hdl,lof_prs_results_ldl,lof_prs_results_tc,lof_prs_results_tg) all_results_long <- as.data.frame(matrix(NA,nrow(all_results)*3,4)) colnames(all_results_long) <- c("trait","PTV","name","P") for(i in 1:nrow(all_results)){ all_results_long[(3*i-2):(3*i),1] <- all_results$trait[i] all_results_long[(3*i-2):(3*i),2] <- all_results$PTV_gene[i] all_results_long[(3*i-2):(3*i),3] <- c("PTV","PRS","PTV_PRS") all_results_long[(3*i-2):(3*i),4] <- as.numeric(all_results[i,14:16]) if(ceiling(i/100)==(i/100)){print(i)} } all_results_long$fdr_q <- p.adjust(all_results_long$P,"fdr") temp <- as.data.frame(matrix(NA,nrow(all_results),5)) colnames(temp) <- c("trait","PTV_gene","PTV_fdr_q","PRS_fdr_q","PTV_PRS_fdr_q") for(i in 1:nrow(temp)){ temp[i,1] <- all_results_long$trait[(3*i-2)] temp[i,2] <- all_results_long$PTV[(3*i-2)] temp[i,3] <- all_results_long$fdr_q[(3*i-2)] temp[i,4] <- all_results_long$fdr_q[(3*i-1)] temp[i,5] <- all_results_long$fdr_q[(3*i)] if(ceiling(i/100)==(i/100)){print(i)} } all_results_merge <- merge(all_results,temp,by = c("trait","PTV_gene"))