Goal

To look at what the proportion of trans-eQTLs that are colocalized with each of the traits analyzed.

Proportion of trans-eQTLs coloc with traits

# I/O & paras -----
file_res_coloc_reg_prop1 <- '/project2/xuanyao/llw/coloc/ukbb_coloc_blood_traits/data/coloc_region_prop_merged.txt'
file_res_coloc_reg_prop2 <- '/project2/xuanyao/llw/coloc/immune_traits/pmid_all/coloc_region_prop_merged.txt'
file_res_coloc_reg_prop3 <- '/project2/xuanyao/llw/coloc/ukbb_coloc_more_traits/all_trait/data/coloc_region_prop_merged.txt'


# read files -----
res_coloc_reg_prop1 <- fread(file_res_coloc_reg_prop1, header = TRUE)
res_coloc_reg_prop2 <- fread(file_res_coloc_reg_prop2, header = TRUE)
res_coloc_reg_prop3 <- fread(file_res_coloc_reg_prop3, header = TRUE)

res_coloc_reg_prop2$trait <- res_coloc_reg_prop2$Phenocode
res_coloc_reg_prop <- rbind(res_coloc_reg_prop1, res_coloc_reg_prop2, res_coloc_reg_prop3)


df_plt1 <- res_coloc_reg_prop1 %>%
  arrange(desc(propPvalColocMerg)) %>%
  pivot_longer(cols = c(nRegionPvalMerg), names_to = "if_merge", values_to = "Num_reg") %>%
  pivot_longer(cols = c(nRegionPvalColocMerg), names_to = "if_merge_coloc", values_to = "Num_reg_coloc") %>%
  pivot_longer(cols = c(propPvalColocMerg), names_to = "if_merge_prop", values_to = "prop") %>%
  mutate(if_merge = replace(if_merge, if_merge == "nRegionPvalMerg", "(trans)"),
         if_merge_coloc = replace(if_merge_coloc, if_merge_coloc == "nRegionPvalColocMerg", "(coloc)"),
         trait_type = paste("Blood", if_merge),
         trait_type_coloc = paste("Blood", if_merge_coloc),
         trait_type_prop = paste("Blood", if_merge_prop))

df_plt2 <- res_coloc_reg_prop2 %>%
  arrange(desc(propPvalColocMerg)) %>%
  pivot_longer(cols = c(nRegionPvalMerg), names_to = "if_merge", values_to = "Num_reg") %>%
  pivot_longer(cols = c(nRegionPvalColocMerg), names_to = "if_merge_coloc", values_to = "Num_reg_coloc") %>%
  pivot_longer(cols = c(propPvalColocMerg), names_to = "if_merge_prop", values_to = "prop") %>%
  mutate(if_merge = replace(if_merge, if_merge == "nRegionPvalMerg", "(trans)"),
         if_merge_coloc = replace(if_merge_coloc, if_merge_coloc == "nRegionPvalColocMerg", "(coloc)"),
         trait_type = paste("Autoimmune", if_merge),
         trait_type_coloc = paste("Autoimmune", if_merge_coloc),
         trait_type_prop = paste("Autoimmune", if_merge_prop))

df_plt3 <- res_coloc_reg_prop3 %>%
  arrange(desc(propPvalColocMerg)) %>%
  pivot_longer(cols = c(nRegionPvalMerg), names_to = "if_merge", values_to = "Num_reg") %>%
  pivot_longer(cols = c(nRegionPvalColocMerg), names_to = "if_merge_coloc", values_to = "Num_reg_coloc") %>%
  pivot_longer(cols = c(propPvalColocMerg), names_to = "if_merge_prop", values_to = "prop") %>%
  mutate(if_merge = replace(if_merge, if_merge == "nRegionPvalMerg", "(trans)"),
         if_merge_coloc = replace(if_merge_coloc, if_merge_coloc == "nRegionPvalColocMerg", "(coloc)"),
         trait_type = paste("Other", if_merge),
         trait_type_coloc = paste("Other", if_merge_coloc),
         trait_type_prop = paste("Other", if_merge_prop))

df_plt <- rbind(df_plt1, df_plt2, df_plt3)


# add trait order
df_plt$trait <- fct_inorder(factor(df_plt$trait))


y_lim <- max(unique(df_plt$Num_reg))
y_nudge <- 2


base_plt <- ggplot(df_plt, aes(x = trait)) +
  geom_bar(aes(y = Num_reg, fill = trait_type), stat = "identity", position = position_dodge(width = 1)) +
  geom_bar(aes(y = Num_reg_coloc, fill = trait_type_coloc), stat = "identity", position = position_dodge(width = 1)) +
  geom_point(aes(y = prop*y_lim, color = trait_type_prop),
             #position = position_dodge(width = 1),
             show.legend = FALSE,
             shape = 18, size = 2,
             position = position_nudge(y = y_nudge)) +
  geom_line(aes(y = prop*y_lim, group = trait_type_prop, color = trait_type_prop),
            #position = position_dodge(width = 1),
            show.legend = FALSE,
            position = position_nudge(y = y_nudge)) +
  labs(x = NULL, y = "Number of regions", fill = "Region", color = NULL)

base_plt +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  scale_y_continuous(limits = c(0, y_lim),
                     sec.axis = sec_axis(~./y_lim, name = "Coloc Proportion")) +
  #scale_fill_brewer(palette = "Paired", direction = -1)
  scale_fill_manual(
    breaks = c(
      "Blood (trans)", "Blood (coloc)",
      "Autoimmune (trans)", "Autoimmune (coloc)", 
      "Other (trans)", "Other (coloc)"
    ),
    values = c("Blood (trans)" = "#d9eaf3", "Autoimmune (trans)" = "#e2f3d4", "Other (trans)" = "#fde0e0",
               "Blood (coloc)" = "#186191", "Autoimmune (coloc)" = "#24731f", "Other (coloc)" = "#9e1213")
  ) +
  scale_color_manual(
    breaks = c("Blood propPvalColocMerg", "Autoimmune propPvalColocMerg", "Other propPvalColocMerg"),
    values = c("Blood propPvalColocMerg" = "blue",
               "Autoimmune propPvalColocMerg" = "#18e518",
               "Other propPvalColocMerg" = "#ff0000")
  ) +
  theme_classic() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10, face = "bold"),
        #legend.background = element_rect(color = "black", linetype = "dashed"),
        legend.key.size= unit(0.5, "cm"),
        
        axis.line = element_line(colour="black"),
        plot.margin=unit(c(10,5,5,5),"mm"),
        
        #legend.margin = margin(-0.5,0,0,0, unit="cm"),
        
        axis.text.x = element_text(angle = 60, hjust=1, vjust = 1, size = 8, color = "black"),
        axis.text.y = element_text(colour = "black", size = 10),
        axis.title.y = element_text(size = 12),
        axis.title.y.right = element_text(angle = 90),
        axis.title.x = element_text(size = 12))

Dots show the proportion. Light shade bars show the number of trans-eQTL loci for coloc. Dark shade bars show the coloc regions with the corresponding traits on x-axis. Color for trait types.

Look into the coloc regions for each module

Next, I further looked into what are those coloc regions for each trait exactly, in terms of what are the correpsonding module that coloc with the trait, and how many coloc regions for each (module, trait) pair.

# I/O & paras -----
file_list_resColoc <- c(
  list.files(
    "/project2/xuanyao/llw/coloc/ukbb_coloc_blood_traits",
    "^.*coloc_reg_w_merged.txt$",
    full.names = TRUE, recursive = TRUE
  ) %>% set_names("blood"),
  list.files(
    "/project2/xuanyao/llw/coloc/immune_traits",
    "^.*coloc_reg_w_merged.txt$",
    full.names = TRUE, recursive = TRUE
  ) %>% set_names("immune"),
  list.files(
    "/project2/xuanyao/llw/coloc/ukbb_coloc_more_traits",
    "^.*coloc_reg_w_merged.txt$",
    full.names = TRUE, recursive = TRUE
  ) %>% set_names("other")
)
file_trait_type <- '/project2/xuanyao/llw/coloc/ukbb_coloc_blood_traits/ukbb_blood_traits.csv'

# read files -----
resColoc_all <- lapply(file_list_resColoc, fread, header = TRUE, drop = "Phenocode")
resColoc_all <- bind_rows(resColoc_all[(lapply(resColoc_all, nrow) %>% unlist()) > 0],
                          .id = "trait_type")
trait_type_blood <- fread(file_trait_type, header = TRUE, sep = ",")


# Figure data prep -----
## assign more specific blood traits groups -----
resColoc_all[resColoc_all$trait_type == "blood", "trait_type"] <- resColoc_all %>%
  filter(trait_type == "blood") %>%
  left_join(trait_type_blood, by = c("trait" = "Trait Abbreviation")) %>%
  pull(`GWAS Group`)

## summarize the number of un-merged and merged coloc regions for (module, trait) -----
resPlot <- resColoc_all %>%
  group_by(trait_type, trait, Module) %>%
  summarise(num_of_regions = n_distinct(Region),
            num_of_regions_merg = n_distinct(merged_region)) %>%
  ungroup()
## `summarise()` has grouped output by 'trait_type', 'trait'. You can override
## using the `.groups` argument.
## set module order by num of coloc regions and change labels -----
Module_order = resPlot %>%
  separate(Module, into = c(NA, "module_num"), sep = "module", remove = FALSE, convert = TRUE) %>%
  group_by(Module, module_num) %>%
  summarise(n_coloc_regions = sum(num_of_regions),
            n_coloc_regions_merg = sum(num_of_regions_merg),
            n_coloc_trait = n()) %>%
  ungroup() %>%
  arrange(desc(n_coloc_trait), desc(n_coloc_regions_merg), desc(n_coloc_regions))
## `summarise()` has grouped output by 'Module'. You can override using the
## `.groups` argument.
resPlot$Module = factor(resPlot$Module,
                        levels = Module_order$Module,
                        labels = paste0(Module_order$module_num))

## set phenotype order within each trait type by num of coloc regions -----
pheno_order <- resPlot %>%
  group_by(trait_type, trait) %>%
  summarise(n_coloc_regions = sum(num_of_regions),
            n_coloc_regions_merg = sum(num_of_regions_merg),
            n_coloc_m = n()) %>%
  ungroup() %>%
  group_by(trait_type) %>%
  arrange(n_coloc_m, n_coloc_regions_merg, n_coloc_regions, .by_group = TRUE) %>%
  ungroup()
## `summarise()` has grouped output by 'trait_type'. You can override using the
## `.groups` argument.
resPlot$trait = factor(resPlot$trait,
                           levels = pheno_order$trait)

## assign colocs to trait type -----
resPlot <- mutate(resPlot,
                  "trait_color" = recode(trait_type,
                                         "Platelets" = "#0028a1",
                                         "Red blood cells" = "#85192d",
                                         "White blood cells" = "#e89c31",
                                         "immune" = "#339900",
                                         "other" = "black"))
axis_text_color <- resPlot %>% slice(match(levels(trait), trait))


# tile plot of merged coloc regions for (module, trait) -----
base_plt <- ggplot(resPlot, aes(Module, trait)) +
  geom_tile(aes(fill = num_of_regions_merg)) +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 10)) +
  scale_fill_gradientn(colors = RColorBrewer::brewer.pal(8, "Blues")[3:8],
                       na.value = "red",
                       limits = c(0, max(c(resPlot$num_of_regions, resPlot$num_of_regions_merg)))) +
  labs(y = NULL, x = "Module", fill = "Number of \n Coloc Regions")

base_plt +
  theme_classic() +
  theme(
    legend.title = element_text(size = 10, face = "bold"),
    legend.background = element_rect(color = "black", linetype = "dashed"),
    axis.text.y = element_text(colour = axis_text_color$trait_color, size = 8),
    axis.text.x = element_text(colour = "black", size = 8,
                               angle = 60, hjust = 1, vjust = 1)
  )
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

Session info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /scratch/midway2/liliw1/conda_env/rstudio-server/lib/libopenblasp-r0.3.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] data.table_1.14.2 forcats_0.5.1     stringr_1.4.0     dplyr_1.0.7      
##  [5] purrr_0.3.4       readr_2.1.2       tidyr_1.2.0       tibble_3.1.7     
##  [9] ggplot2_3.3.6     tidyverse_1.3.1  
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.1   xfun_0.29          bslib_0.3.1        haven_2.4.3       
##  [5] colorspace_2.0-3   vctrs_0.4.1        generics_0.1.2     htmltools_0.5.2   
##  [9] yaml_2.2.2         utf8_1.2.2         rlang_1.0.3        jquerylib_0.1.4   
## [13] pillar_1.7.0       withr_2.5.0        glue_1.6.2         DBI_1.1.3         
## [17] RColorBrewer_1.1-3 dbplyr_2.1.1       modelr_0.1.8       readxl_1.3.1      
## [21] lifecycle_1.0.1    cellranger_1.1.0   munsell_0.5.0      gtable_0.3.0      
## [25] rvest_1.0.2        evaluate_0.14      labeling_0.4.2     knitr_1.37        
## [29] tzdb_0.2.0         fastmap_1.1.0      fansi_1.0.3        highr_0.9         
## [33] broom_0.7.12       Rcpp_1.0.8.3       backports_1.4.1    scales_1.2.0      
## [37] jsonlite_1.7.3     farver_2.1.1       fs_1.5.2           hms_1.1.1         
## [41] digest_0.6.29      stringi_1.7.6      grid_4.1.2         cli_3.3.0         
## [45] tools_4.1.2        magrittr_2.0.3     sass_0.4.0         crayon_1.5.1      
## [49] pkgconfig_2.0.3    ellipsis_0.3.2     xml2_1.3.3         reprex_2.0.1      
## [53] lubridate_1.8.0    assertthat_0.2.1   rmarkdown_2.11     httr_1.4.2        
## [57] rstudioapi_0.13    R6_2.5.1           compiler_4.1.2