Relative Abundance Heatmaps: eDNA metabarcoding base script

.Rmd script

This script plots your relative abundance matrix as a heatmap. Figures produced are potentially part of the main figures of your manuscript/report.

Load libraries

library(ggplot2) ## for plotting
library(dplyr) ## for data table manipulation

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr) ## for data table manipulation
library(readxl) ## for reading in excel files
library(stringr) ## for data transformation
library(strex) ## for data transformation
library(purrr) ## for data transformation
library(funrar) ## for make_relative()
library(tidyverse) ## for data transformation

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(naniar) ## replace_with_na_all function
library(ggh4x) ## for facet wrap options
library(tidytext)
library(forcats)
library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## The following object is masked from 'package:purrr':
## 
##     discard

Load data

metadata <- read_xlsx("docs/eDNA 12S metab/example_input/metadata_tidal_cycle.xlsx") %>%
  mutate(`GMGI Sample ID` = gsub("-", "_", `GMGI Sample ID`),
         `GMGI Sample ID` = paste0(`GMGI Sample ID`, "_12S"))

data <- read_xlsx("docs/eDNA 12S metab/example_output/Results_matrix_relative.xlsx")

taxlevels <- read_excel(
  "C:/BoxDrive/Box/Science/Fisheries/Projects/eDNA/Metabarcoding Lab Resources/Reference Databases/GMGI_Vert_Ref.xlsx") %>% 
  dplyr::select("Species_name", "Kingdom", "Phylum", "Class", 
                "Order", "Family", "Genus", "species", "PhylogenySorting") %>%
  distinct()

df_annotated <- data %>% gather("GMGI Sample ID", "Rel_Ab", 4:last_col()) %>%
  left_join(., taxlevels, by = "Species_name") %>%
  left_join(., metadata, by = "GMGI Sample ID")

Adding any common names that are new to this project

## bringing in common names information for those not in our db
commonNames_annotated <- read_xlsx("docs/eDNA 12S metab/example_output/Taxonomic_assignments/CommonNames_required_edited.xlsx")

# Loop through each row of the dataframe to add taxonomic level information from required edited worksheet 
for (i in commonNames_annotated$Species_name) {
  # Extract the current row (will do this for each ASV_ID in the choice df)
  current_row <- commonNames_annotated %>% subset(Species_name==i)

  # Apply filter based on the current row's condition
df_annotated <- df_annotated %>%
  mutate(across(c(Common_name, Category, Kingdom, Phylum, Class, Order, Family, Genus, species, PhylogenySorting),
                ~case_when(Species_name == current_row$Species_name ~ current_row[[cur_column()]],
                           TRUE ~ .x)))
}

## tax list 
df_tax <- df_annotated %>% dplyr::select(Species_name, Kingdom:Genus) %>% distinct()

Remove Categories

If you want to plot relative abundance without human, other, or livestock categories. As FYI/warning, relative abundance is calculated with these categories included. Relative abundance can also be thought of as proportion of total reads, which is calculated from the total reads for that sample.

df_filtered <- df_annotated %>% 
  filter(!Category == "Other" & !Category == "Livestock" & !Category == "Unassigned" & !Category == "Human")

If targeted species heatmap is desired, replace df_annotated with df_filtered in the heatmap code below. If so, remember to change the file name exported.

Heatmap plot

reverse label order: scale y discrete limits reverse limits=rev

https://coolors.co/ (hit tools on the top right hand side)

## if subset of categories is desired, replace df below with df_filtered
df_annotated %>%

  ## replace zeros with NAs for plotting
  replace_with_na_all(condition = ~.x == 0.00000) %>%

  # Create a factor for Common_name ordered by Order within each Category
  group_by(Category) %>%
  mutate(Common_name = factor(Common_name, levels = unique(Common_name[order(Order, desc(PhylogenySorting))]))) %>%
  ungroup() %>%

  ## ggplot basic options (USER EDIT: X AND Y AXIS)
  ggplot(., aes(x = `Project Sample ID`, y = Common_name)) +
  geom_tile(aes(fill = Rel_Ab), color = "black") +

  ## x, y, and legend labels (USER EDITS IF DESIRED)
  ylab("Common name") +
  xlab("Site") +
  labs(fill = "Relative Abundance (%)") +

  ## color of the tile options; direction=1 will flip the low/high (USER EDITS IF DESIRED)
  scale_fill_gradient(na.value = "white", low = "lightskyblue2", high = "#0C4D66") + 

  ## facet grid with Category and project variables
  ## USER: REPLACE . WITH ANY FACTOR; eg: Category ~ Sample_Type
  facet_grid2(Category ~ ., 
              scales = "free", space = "free", 
              labeller = labeller(Category = label_wrap_gen(width = 10))) +

  ## graph theme options
  theme_classic() +
  theme(
    ## axis text 
    axis.text.x = element_text(angle = 90, size=6, color="grey25", hjust = 1),
    axis.text.y = element_text(colour = 'black', size = 8),

    ## legend text and title 
    legend.text = element_text(size = 8, color="black"),
    legend.title = element_text(margin = margin(t = 0, r = 0, b = 5, l = 0), size=10, color="black", face="bold"),
    legend.position = c(-0.4, -0.05), 
    legend.key.height = unit(5, 'mm'),
    legend.direction = "horizontal",
    legend.key.width = unit(5, 'mm'),
    legend.title.align = 0.5,
    legend.title.position = "top",

    ## axis titles 
    axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size=14, face="bold"),
    axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size=14, face="bold"),

    ## facet wrap labels
    strip.text.x = element_text(color = "black", face = "bold", size = 12),
    strip.text.y = element_text(color = "black", face = "bold", size = 12, angle=0),
    strip.background.y = element_blank(),
    strip.clip = "off"
    )

## USER EDITS WIDTH AND HEIGHT TO DESIRED   
ggsave("docs/eDNA 12S metab/example_output/figures/Relative_abundance.png", width = 22, height = 14)