Relative Abundance Heatmaps: eDNA metabarcoding base script

.Rmd script

This script plots your relative abundance matrix as a heatmap. Figures produced are potentially part of the main figures of your manuscript/report.

Load libraries

library(ggplot2) ## for plotting
library(dplyr) ## for data table manipulation

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr) ## for data table manipulation
library(readxl) ## for reading in excel files
library(stringr) ## for data transformation
library(strex) ## for data transformation
library(purrr) ## for data transformation
library(funrar) ## for make_relative()
library(tidyverse) ## for data transformation

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(naniar) ## replace_with_na_all function
library(ggh4x) ## for facet wrap options

## 
## Attaching package: 'ggh4x'
## 
## The following object is masked from 'package:ggplot2':
## 
##     guide_axis_logticks

library(tidytext)
library(forcats)
library(scales)

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## The following object is masked from 'package:purrr':
## 
##     discard

#remotes::install_github("davidsjoberg/ggsankey")
library(ggsankey)

Load data

df <- read_xlsx("example_output/Results_relative_abundance_long_format.xlsx") %>%
  mutate(across(c(relab), ~ round(.x, 5)))

taxlevels <- read_excel(
  "C:/BoxDrive/Box/Science/Fisheries/Projects/eDNA/Metabarcoding Lab Resources/Reference Databases/GMGI_Vert_Ref.xlsx") %>% 
  dplyr::select("Species_name", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "species") %>%
  distinct()

df_annotated <- df %>% left_join(., taxlevels, by = "Species_name")

## bringing in common names information for those not in our db
commonNames_annotated <- read_xlsx("example_output/Taxonomic_assignments/CommonNames_required_edited.xlsx")

# Loop through each row of the dataframe to add taxonomic level information from required edited worksheet 
for (i in commonNames_annotated$Species_name) {
  # Extract the current row (will do this for each ASV_ID in the choice df)
  current_row <- commonNames_annotated %>% subset(Species_name==i)

  # Apply filter based on the current row's condition
df_annotated <- df_annotated %>%
  mutate(across(c(Common_name, Category, Kingdom, Phylum, Class, Order, Family, Genus, species),
                ~case_when(Species_name == current_row$Species_name ~ current_row[[cur_column()]],
                           TRUE ~ .x)))
}

## tax list 
df_tax <- df_annotated %>% dplyr::select(Species_name, Kingdom:Genus) %>% distinct()

Remove Categories

If you want to plot relative abundance without human, other, or livestock categories. As FYI/warning, relative abundance is calculated with these categories included. Relative abundance can also be thought of as proportion of total reads, which is calculated from the total reads for that sample.

df_filtered <- df_annotated %>% 
  filter(!Category == "Other" & !Category == "Livestock" & !Category == "Unassigned" & !Category == "Human") 

df_average <- df_annotated %>%
  group_by(SampleType, Species_name) %>%
  ### average relative abundance by sample Type
  mutate(avg_relab = mean(relab, na.rm=TRUE))

If targeted species heatmap is desired, replace df_annotated with df_filtered in the heatmap code below. If so, remember to change the file name exported.

Heatmap plot

reverse label order: scale y discrete limits reverse limits=rev

https://coolors.co/ (hit tools on the top right hand side)

## if subset of categories is desired, replace df below with df_filtered
df_annotated %>%

  ## replace zeros with NAs for plotting
  replace_with_na_all(condition = ~.x == 0.00000) %>%

  # Create a factor for Common_name ordered by Order within each Category
  group_by(Category) %>%
  mutate(Common_name = factor(Common_name, levels = unique(Common_name[order(Order, desc(Common_name))]))) %>%
  ungroup() %>%

  ## ggplot basic options (USER EDIT: X AND Y AXIS)
  ggplot(., aes(x = sampleID, y = Common_name)) +
  geom_tile(aes(fill = relab), color = "black") +

  ## x, y, and legend labels (USER EDITS IF DESIRED)
  ylab("Common name") +
  xlab("Site") +
  labs(fill = "Relative Abundance (%)") +

  ## color of the tile options; direction=1 will flip the low/high (USER EDITS IF DESIRED)
  scale_fill_gradient(na.value = "white", low = "lightskyblue2", high = "#0C4D66") + 

  ## facet grid with Category and project variables
  facet_grid2(Category ~ SampleType, 
              scales = "free", space = "free", 
              labeller = labeller(Category = label_wrap_gen(width = 10))) +

  ## graph theme options
  theme_classic() +
  theme(
    ## axis text 
    axis.text.x = element_text(angle = 90, size=6, color="grey25", hjust = 1),
    axis.text.y = element_text(colour = 'black', size = 8),

    ## legend text and title 
    legend.text = element_text(size = 8, color="black"),
    legend.title = element_text(margin = margin(t = 0, r = 0, b = 5, l = 0), size=10, color="black", face="bold"),
    legend.position = c(-0.4, -0.05), 
    legend.key.height = unit(5, 'mm'),
    legend.direction = "horizontal",
    legend.key.width = unit(5, 'mm'),
    legend.title.align = 0.5,
    legend.title.position = "top",

    ## axis titles 
    axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size=14, face="bold"),
    axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size=14, face="bold"),

    ## facet wrap labels
    strip.text.x = element_text(color = "black", face = "bold", size = 12),
    strip.text.y = element_text(color = "black", face = "bold", size = 12, angle=0),
    strip.background.y = element_blank(),
    strip.clip = "off"
    )

## Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2
## 3.5.0.
## ℹ Please use theme(legend.title = element_text(hjust)) instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## USER EDITS WIDTH AND HEIGHT TO DESIRED   
ggsave("example_output/Figures/Relative_abundance.png", width = 22, height = 14)

Heatmap plot by sample type

df_average %>%
  dplyr::select(Species_name, Common_name, Category, SampleType, Kingdom:avg_relab) %>%

  ## replace zeros with NAs for plotting
  replace_with_na_all(condition = ~.x == 0.00000) %>%

  # Create a factor for Common_name ordered by Order within each Category
  group_by(Category) %>%
  mutate(Common_name = factor(Common_name, levels = unique(Common_name[order(Order, desc(Common_name))]))) %>%
  ungroup() %>%

    ## ggplot basic options (USER EDIT: X AND Y AXIS)
  ggplot(., aes(x = SampleType, y = Common_name)) +
  geom_tile(aes(fill = avg_relab), color = "black") +

  ## x, y, and legend labels (USER EDITS IF DESIRED)
  ylab("Common name") +
  xlab("") +
  labs(fill = "Relative Abundance (%)") +

  ## color of the tile options; direction=1 will flip the low/high (USER EDITS IF DESIRED)
  scale_fill_gradient(na.value = "white", low = "lightskyblue2", high = "#0C4D66") + 

  ## facet grid with Category and project variables
  facet_grid2(Category ~ SampleType, 
              scales = "free", space = "free", 
              labeller = labeller(Category = label_wrap_gen(width = 10))) +

  ## graph theme options
  theme_classic() +
  theme(
    ## axis text 
    axis.text.x = element_text(angle = 90, size=6, color="grey25", hjust = 1),
    axis.text.y = element_text(colour = 'black', size = 8),

    ## legend text and title 
    legend.text = element_text(size = 8, color="black"),
    legend.title = element_text(margin = margin(t = 0, r = 0, b = 5, l = 0), size=10, color="black", face="bold"),
    legend.position = c(-0.4, -0.05), 
    legend.key.height = unit(5, 'mm'),
    legend.direction = "horizontal",
    legend.key.width = unit(5, 'mm'),
    legend.title.align = 0.5,
    legend.title.position = "top",

    ## axis titles 
    axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size=14, face="bold"),
    axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size=14, face="bold"),

    ## facet wrap labels
    strip.text.x = element_text(color = "black", face = "bold", size = 12),
    strip.text.y = element_text(color = "black", face = "bold", size = 12, angle=0),
    strip.background.y = element_blank(),
    strip.clip = "off"
    )

ggsave("example_output/Figures/Relative_abundance_sampletype.png", width = 7, height = 14)

Top Species List visual

Used for contract report.

top_list <- read_xlsx("example_output/Results_rawreads_long_format.xlsx") %>%
  filter(!Category == "Other" & !Category == "Livestock" & !Category == "unassigned" & !Category == "Human") %>%
  group_by(Species_name, Common_name) %>%
  summarise(total = sum(reads),
            log = log10(total),
            total_M = total/1000000) %>% 
  arrange(desc(total)) %>%
  head(30)

## `summarise()` has grouped output by 'Species_name'. You can override using the
## `.groups` argument.

ggplot(top_list, aes(x = fct_reorder(Common_name, log), y = log)) +
  geom_segment(aes(xend = Common_name, yend = 0), color = "#97C1DE") +  # Lollipop stick
  geom_point(size = 3, shape=21, color='grey30', fill = "#97C1DE") +  # Lollipop head
  coord_flip() +  # Flip coordinates for horizontal lollipop chart
  labs(
       x = "",
       y = "Normalized Reads") +
  theme_bw() +
  theme(axis.text.y = element_text(size = 8, color='black'), #, face="italic"
        axis.text.x = element_text(size = 6),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size=10, face="bold"),
        axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size=10, face="bold"),
        axis.text.x.top = element_text(size = 8, color='black', face="italic", angle = 45, hjust = 0)) +
  scale_y_continuous(
    labels = comma,
    limits = c(0, max(top_list$log) + (max(top_list$log)*0.1))  # Set the upper limit to max value + 10%
  )

ggsave("example_output/Figures/Top_species_log.png", width=3.5, height=6)

Bubble plot

Used for contract report.

### I had different target groups trying to facet this bubble plot so it's easier to put on the report (that failed but still troubleshooting so left it)
raw_df <- read_xlsx("example_output/Results_rawreads_long_format.xlsx") %>%
  group_by(Species_name, Common_name, Category) %>%
  reframe(sum = sum(reads)/1000000,
          xaxis = "x") %>%
  left_join(., df_tax, by = "Species_name") %>%
  mutate(Target_group = case_when(
    Category == "Human" ~ "Nontarget",
    Category == "Other" ~ "Nontarget",
    Category == "Unassigned" ~ "Nontarget",
    Category == "Livestock" ~ "Nontarget",
    Category == "Bird" ~ "Target1",
    Category == "Elasmobranch" ~ "Target1",
    Category == "Marine Mammal" ~ "Target1",
    Category == "Sea Turtle" ~ "Target1",
    Category == "Teleost Fish" ~ "Target2"
  )) %>% filter(!Target_group == "Nontarget") 


raw_df %>% 
  filter(!Target_group == "Nontarget") %>%
  # Create a factor for Common_name ordered by Order within each Category
  group_by(Category) %>%
  mutate(Common_name = factor(Common_name, levels = unique(Common_name[order(Order, desc(Common_name))]))) %>%
  ungroup() %>%

  ggplot(., aes(x=xaxis, y=Common_name)) + 

  geom_point(aes(size=sum, fill=sum), color = 'black', shape=21) +
  scale_fill_gradient(na.value = "white", low = "lightskyblue2", high = "#0C4D66") +

  facet_grid2(Category ~ ., scales = "free", space = "free") +

  theme_bw() +
  labs(
    x="",
    y="",
    fill = "Reads (M)",
    size = "Reads (M)"
  ) +

  theme(
    axis.text.y = element_text(size = 8, color = 'black'),
    axis.text.x = element_blank(),
    axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0), size=12, face="bold"),
    axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0), size=12, face="bold"),
    ## facet wrap labels
    strip.text.x = element_text(color = "black", face = "bold", size = 12),
    #strip.text.y = element_text(color = "black", face = "bold", size = 12, angle=0),
    strip.text.y = element_blank(),
    strip.background.y = element_blank(),
    strip.clip = "off",
    # Combine legends
    legend.position = "right",
    legend.box = "vertical"
  ) +
  guides(
    fill = "none",
    size = guide_legend(order = 2, reverse = TRUE,
                        override.aes = list(fill = scales::seq_gradient_pal("#0C4D66", "lightskyblue2")
                                            (seq(0, 1, length.out = 5))))
  )

ggsave("example_output/Figures/Species_bubbleplot.png", width=4.5, height=16)