Introduction

Two experiments were conducted looking at how Astrangia poculata responds to both hot and cold thermal extremes. Differential expression was calculated seperately for both experiments. Here, we explore how these two experiments are related. Our motivations are to describe a universal stress response to thermal extremes. This document describes comparing the differentially expressed genes as well as the enriched GO terms for each GO category.

Libraries used

library(dplyr)
library(gplots)
library(VennDiagram)
library(ggplot2)
library(cowplot)
library(plotly)
library(tidyr)

Read in our result tables and isolate gene lists that significant in each experiment.

hot = read.csv("~/Documents/Experiments/Freezing_hot/hot/tables/hot_results.csv", row.names = 1) 
hot = row.names(hot[hot$padj<0.05 & !is.na(hot$padj),])
freezing = read.csv("~/Documents/Experiments/Freezing_hot/freezing/tables/cold_results.csv", row.names = 1) 
freezing = row.names(freezing[freezing$padj<0.05 & !is.na(freezing$padj),])

Set up a venn diagram looking for similar genes in each experiment.

all_shared = list("Hot" = hot, "Freezing" = freezing)
    prettyvenn=venn.diagram(
      x = all_shared,
      filename=NULL,
      col = "transparent",
      fill = c("#ea6227", "#68a2ff"),
      alpha = 0.5,
      # label.col = c("darkred", "white", "darkgreen", "white", "white", "white", "blue4"),
      cex = 2.5,
      fontfamily = "sans",
      fontface = "bold",
      cat.default.pos = "text",
      cat.col = "black",
      cat.cex = 2.5,
      cat.fontfamily = "sans",
      cat.dist = c(0.08, 0.08),
      cat.pos = 1
    );
grid.draw(prettyvenn)

Are the shared DEGs significantly enriched? Here I set up a hypergeometric test to look to see if we would expect a similar amount of overlapping genes by chance.

a = read.csv("~/Documents/Experiments/Freezing_hot/hot/tables/hot_results.csv")
h = read.csv("~/Documents/Experiments/Freezing_hot/hot/tables/hot_results.csv") %>%
  filter(padj < 0.05) 

f = read.csv("~/Documents/Experiments/Freezing_hot/freezing/tables/freezing_results.csv") %>%
  filter(padj < 0.05)

overlap = inner_join(h, f, by = "X")

phyper((nrow(overlap)-1), nrow(h), (nrow(a)-nrow(h)), nrow(f), lower.tail = F, log.p = FALSE)

## [1] 6.726686e-162

We see that that to get this amount of overlap, that it is a p = 7.62e-209 likely to happen by chance. The amount of overlap is very signficant, and suggests that there are a suite of genes that respond to both hot and cold similarly.

What is the log-fold change between these specific genes? Let’s set this up to plot.

annotations = read.delim("~/Documents/Experiments/Freezing_hot/wh_host_final_iso2gene.tab", sep = "\t")
plot = overlap %>%
  rename(Iso = X) %>%
  inner_join(annotations) %>%
  select(Gene, log2FoldChange.x, log2FoldChange.y) %>%
  rename(Hot_LogFoldChange = log2FoldChange.x) %>%
  rename(Cold_LogFoldChange = log2FoldChange.y)

## Joining, by = "Iso"

## Warning: Column `Iso` joining factors with different levels, coercing to
## character vector

plot = ggplot(plot, aes(Hot_LogFoldChange, Cold_LogFoldChange, text = Gene)) +
  geom_point(alpha = 7/10)
ggplotly(plot, tooltip = "Gene")

What about GO terms?

GO_terms = read.delim("~/Documents/Experiments/Freezing_hot/wh_host_final_iso2go.tab", sep = "\t")
colnames(GO_terms) = c("Iso", "GO_term")
plot = overlap %>%
  rename(Iso = X) %>%
  inner_join(GO_terms) %>%
  select(GO_term, log2FoldChange.x, log2FoldChange.y) %>%
  rename(Hot_LogFoldChange = log2FoldChange.x) %>%
  rename(Cold_LogFoldChange = log2FoldChange.y)

## Joining, by = "Iso"

## Warning: Column `Iso` joining factors with different levels, coercing to
## character vector

write.table(plot, "GO_term_list.txt", row.names = F, sep = "\t")

Up regulated genes

hot = read.csv("~/Documents/Experiments/Freezing_hot/hot/tables/hot_results.csv", row.names = 1) 
freezing = read.csv("~/Documents/Experiments/Freezing_hot/freezing/tables/freezing_results.csv", row.names = 1) 
hot = row.names(hot[hot$padj < 0.05 &
         !is.na(hot$padj) &
         hot$log2FoldChange > 0, ])
freezing= row.names(freezing[freezing$padj < 0.05 &
              !is.na(freezing$padj) &
              freezing$log2FoldChange > 0, ])

up = list("Hot" = hot, "Freezing" = freezing)
    prettyvenn=venn.diagram(
      x = up,
      filename=NULL,
      col = "transparent",
      fill = c("#ea6227", "#68a2ff"),
      alpha = 0.5,
      # label.col = c("darkred", "white", "darkgreen", "white", "white", "white", "blue4"),
      cex = 2.5,
      fontfamily = "sans",
      fontface = "bold",
      cat.default.pos = "text",
      cat.col = "black",
      cat.cex = 2.5,
      cat.fontfamily = "sans",
      cat.dist = c(0.08, 0.08),
      cat.pos = 1
    );
grid.draw(prettyvenn)

Down regulated

hot = read.csv("~/Documents/Experiments/Freezing_hot/hot/tables/hot_results.csv", row.names = 1) 
freezing = read.csv("~/Documents/Experiments/Freezing_hot/freezing/tables/freezing_results.csv", row.names = 1) 
hot = row.names(hot[hot$padj < 0.05 &
         !is.na(hot$padj) &
         hot$log2FoldChange < 0, ])
freezing= row.names(freezing[freezing$padj < 0.05 &
              !is.na(freezing$padj) &
              freezing$log2FoldChange < 0, ])
down = list("Hot" = hot, "Freezing" = freezing)
    prettyvenn=venn.diagram(
      x = down,
      filename=NULL,
      col = "transparent",
      fill = c("#ea6227", "#68a2ff"),
      alpha = 0.5,
      # label.col = c("darkred", "white", "darkgreen", "white", "white", "white", "blue4"),
      cex = 2.5,
      fontfamily = "sans",
      fontface = "bold",
      cat.default.pos = "text",
      cat.col = "black",
      cat.cex = 2.5,
      cat.fontfamily = "sans",
      cat.dist = c(0.08, 0.08),
      cat.pos = 1
    );
grid.draw(prettyvenn)

Version control

sessionInfo()

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.5
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] tidyr_0.8.2         plotly_4.9.0        cowplot_0.9.4      
## [4] ggplot2_3.1.0       VennDiagram_1.6.20  futile.logger_1.4.3
## [7] gplots_3.0.1.1      dplyr_0.8.0.1      
## 
## loaded via a namespace (and not attached):
##  [1] prettydoc_0.2.1      gtools_3.8.1         tidyselect_0.2.5    
##  [4] xfun_0.5.3           purrr_0.3.0          colorspace_1.4-0    
##  [7] htmltools_0.3.6      viridisLite_0.3.0    yaml_2.2.0          
## [10] rlang_0.3.1          pillar_1.3.1         later_0.8.0         
## [13] glue_1.3.0.9000      withr_2.1.2          lambda.r_1.2.3      
## [16] plyr_1.8.4           stringr_1.4.0        munsell_0.5.0       
## [19] gtable_0.2.0         caTools_1.17.1.1     htmlwidgets_1.3     
## [22] codetools_0.2-16     evaluate_0.12        labeling_0.3        
## [25] knitr_1.21           Cairo_1.5-9          httpuv_1.5.0        
## [28] crosstalk_1.0.0      Rcpp_1.0.1           xtable_1.8-3        
## [31] KernSmooth_2.23-15   promises_1.0.1       scales_1.0.0        
## [34] formatR_1.5          gdata_2.18.0         jsonlite_1.6        
## [37] mime_0.6             digest_0.6.18        stringi_1.2.4       
## [40] shiny_1.2.0          tools_3.5.1          bitops_1.0-6        
## [43] magrittr_1.5         lazyeval_0.2.1       tibble_2.0.1        
## [46] futile.options_1.0.1 crayon_1.3.4         pkgconfig_2.0.2     
## [49] data.table_1.12.0    assertthat_0.2.0     rmarkdown_1.11      
## [52] httr_1.4.0           R6_2.4.0             compiler_3.5.1

Molecular Component

## Load Data 
hot=read.table("~/Documents/Experiments/Freezing_hot/hot/MWU_CC_go_input_saved.csv",header=T)
freezing=read.table("~/Documents/Experiments/Freezing_hot/freezing/MWU_CC_go_input_saved.csv",header=T)

# Terms in both sets
goods=intersect(hot$term,freezing$term)
data1=hot[hot$term %in% goods,]
data2=freezing[freezing$term %in% goods,]

# Combine them
ress=merge(data1,data2,by="term")

# This is to make colours by quadrant if you care
# red = both up
# blue = both down
# other colours for other things, you can figure it out. 

plot = ress %>%
  mutate(colour = 
           case_when( p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x > 0 & delta.rank.y > 0 ~ 'red',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x < 0 & delta.rank.y < 0 ~ 'blue',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x > 0 & delta.rank.y < 0 ~ 'purple',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x < 0 & delta.rank.y > 0 ~ 'green')) %>%
  replace_na(list(colour = "black")) 

# This is to manually look for interesting go terms, and you can play with it in excel
interest = plot %>%
  filter(p.adj.x <0.1) %>%
  filter(p.adj.y < 0.1)

write.csv(interest, "CC_interesting.csv")

# Read back in your manipulated csv for those that you want to use as labels
CC_interest = read.csv("CC_interesting.csv")

# Here is the actual plot, lots of it is redundant - but can be manipulated to the colour scales you want. 
# unhash the geom_text part if you want to include labels

plot = ggplot(plot, aes(delta.rank.x, delta.rank.y, label = name.y)) +
  geom_point(aes(color = colour), size = 2, show.legend = FALSE) + 
  scale_color_manual(values = c(red = "darkslateblue",
                                blue = "darkslateblue",
                                green = "darkslateblue",
                                red = "darkslateblue",
                                purple = "darkslateblue",
                                black = alpha("black", 0.15))) +
  scale_fill_manual(values = c(red = "orangered",
                               blue = "dodgerblue2",
                               green = "seagreen3",
                               red = "orangered2",
                               purple = "plum2",
                               black = "black")) +
  #  geom_text_repel(data = CC_interest, aes(),
  #                 segment.alpha = 1,
  #                 box.padding = .5,
  #                 direction = "both") + 
  scale_size("size") +
  labs( x = "Hot",
        y = "Freezing") +
  labs(title = "Cellular Components") +
  geom_vline(xintercept = 0, linetype = 2, alpha = 0.75) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.75) +
  theme_cowplot()

ggplotly(plot)

Biological Processes

## Load Data 
hot=read.table("~/Documents/Experiments/Freezing_hot/hot/MWU_BP_go_input_saved.csv",header=T)
freezing=read.table("~/Documents/Experiments/Freezing_hot/freezing/MWU_BP_go_input_saved.csv",header=T)

# Terms in both sets
goods=intersect(hot$term,freezing$term)
data1=hot[hot$term %in% goods,]
data2=freezing[freezing$term %in% goods,]

# Combine them
ress=merge(data1,data2,by="term")

# This is to make colours by quadrant if you care
# red = both up
# blue = both down
# other colours for other things, you can figure it out. 

plot = ress %>%
  mutate(colour = 
           case_when( p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x > 0 & delta.rank.y > 0 ~ 'red',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x < 0 & delta.rank.y < 0 ~ 'blue',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x > 0 & delta.rank.y < 0 ~ 'purple',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x < 0 & delta.rank.y > 0 ~ 'green')) %>%
  replace_na(list(colour = "black")) 

# This is to manually look for interesting go terms, and you can play with it in excel
interest = plot %>%
  filter(p.adj.x <0.1) %>%
  filter(p.adj.y < 0.1)

write.csv(interest, "BP_interesting.csv")

# Read back in your manipulated csv for those that you want to use as labels
BP_interest = read.csv("BP_interesting.csv")

# Here is the actual plot, lots of it is redundant - but can be manipulated to the colour scales you want. 
# unhash the geom_text part if you want to include labels

plot = ggplot(plot, aes(delta.rank.x, delta.rank.y, label = name.y)) +
  geom_point(aes(color = colour), size = 2, show.legend = FALSE) + 
  scale_color_manual(values = c(red = "darkslateblue",
                                blue = "darkslateblue",
                                green = "darkslateblue",
                                red = "darkslateblue",
                                purple = "darkslateblue",
                                black = alpha("black", 0.15))) +
  scale_fill_manual(values = c(red = "orangered",
                               blue = "dodgerblue2",
                               green = "seagreen3",
                               red = "orangered2",
                               purple = "plum2",
                               black = "black")) +
  #  geom_text_repel(data = BP_interest, aes(),
  #                 segment.alpha = 1,
  #                 box.padding = .5,
  #                 direction = "both") + 
  scale_size("size") +
  labs( x = "Hot",
        y = "Freezing") +
  labs(title = "Biological Process") +
  geom_vline(xintercept = 0, linetype = 2, alpha = 0.75) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.75) +
  theme_cowplot()

ggplotly(plot)

Molecular Functions

## Load Data 
hot=read.table("~/Documents/Experiments/Freezing_hot/hot/MWU_MF_go_input_saved.csv",header=T)
freezing=read.table("~/Documents/Experiments/Freezing_hot/freezing/MWU_MF_go_input_saved.csv",header=T)

# Terms in both sets
goods=intersect(hot$term,freezing$term)
data1=hot[hot$term %in% goods,]
data2=freezing[freezing$term %in% goods,]

# Combine them
ress=merge(data1,data2,by="term")

# This is to make colours by quadrant if you care
# red = both up
# blue = both down
# other colours for other things, you can figure it out. 

plot = ress %>%
  mutate(colour = 
           case_when( p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x > 0 & delta.rank.y > 0 ~ 'red',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x < 0 & delta.rank.y < 0 ~ 'blue',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x > 0 & delta.rank.y < 0 ~ 'purple',
                      p.adj.x < 0.1 & p.adj.y < 0.1 & delta.rank.x < 0 & delta.rank.y > 0 ~ 'green')) %>%
  replace_na(list(colour = "black")) 

# This is to manually look for interesting go terms, and you can play with it in excel
interest = plot %>%
  filter(p.adj.x <0.1) %>%
  filter(p.adj.y < 0.1)

write.csv(interest, "MF_interesting.csv")

# Read back in your manipulated csv for those that you want to use as labels
MF_interest = read.csv("MF_interesting.csv")

# Here is the actual plot, lots of it is redundant - but can be manipulated to the colour scales you want. 
# unhash the geom_text part if you want to include labels

plot = ggplot(plot, aes(delta.rank.x, delta.rank.y, label = name.y)) +
  geom_point(aes(color = colour), size = 2, show.legend = FALSE) + 
  scale_color_manual(values = c(red = "darkslateblue",
                                blue = "darkslateblue",
                                green = "darkslateblue",
                                red = "darkslateblue",
                                purple = "darkslateblue",
                                black = alpha("black", 0.15))) +
  scale_fill_manual(values = c(red = "orangered",
                               blue = "dodgerblue2",
                               green = "seagreen3",
                               red = "orangered2",
                               purple = "plum2",
                               black = "black")) +
  #  geom_text_repel(data = MF_interest, aes(),
  #                 segment.alpha = 1,
  #                 box.padding = .5,
  #                 direction = "both") + 
  scale_size("size") +
  labs( x = "Hot",
        y = "Freezing") +
  labs(title = "Molecular Functions") +
  geom_vline(xintercept = 0, linetype = 2, alpha = 0.75) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.75) +
  theme_cowplot()

ggplotly(plot)

sessionInfo()

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.5
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_CA.UTF-8/en_CA.UTF-8/en_CA.UTF-8/C/en_CA.UTF-8/en_CA.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] tidyr_0.8.2         plotly_4.9.0        cowplot_0.9.4      
## [4] ggplot2_3.1.0       VennDiagram_1.6.20  futile.logger_1.4.3
## [7] gplots_3.0.1.1      dplyr_0.8.0.1      
## 
## loaded via a namespace (and not attached):
##  [1] prettydoc_0.2.1      gtools_3.8.1         tidyselect_0.2.5    
##  [4] xfun_0.5.3           purrr_0.3.0          colorspace_1.4-0    
##  [7] htmltools_0.3.6      viridisLite_0.3.0    yaml_2.2.0          
## [10] rlang_0.3.1          pillar_1.3.1         later_0.8.0         
## [13] glue_1.3.0.9000      withr_2.1.2          lambda.r_1.2.3      
## [16] plyr_1.8.4           stringr_1.4.0        munsell_0.5.0       
## [19] gtable_0.2.0         caTools_1.17.1.1     htmlwidgets_1.3     
## [22] codetools_0.2-16     evaluate_0.12        labeling_0.3        
## [25] knitr_1.21           Cairo_1.5-9          httpuv_1.5.0        
## [28] crosstalk_1.0.0      Rcpp_1.0.1           xtable_1.8-3        
## [31] KernSmooth_2.23-15   promises_1.0.1       scales_1.0.0        
## [34] formatR_1.5          gdata_2.18.0         jsonlite_1.6        
## [37] mime_0.6             digest_0.6.18        stringi_1.2.4       
## [40] shiny_1.2.0          tools_3.5.1          bitops_1.0-6        
## [43] magrittr_1.5         lazyeval_0.2.1       tibble_2.0.1        
## [46] futile.options_1.0.1 crayon_1.3.4         pkgconfig_2.0.2     
## [49] data.table_1.12.0    assertthat_0.2.0     rmarkdown_1.11      
## [52] httr_1.4.0           R6_2.4.0             compiler_3.5.1