Chapter 4 Figure 4

Biomarkers heatmap. Heatmap showing the genera (y-axis) with significant associations discovered by Maaslin2. Comparisons were carried out using the type of buffer used and the storage conditions metadata information (x-axis). Storage condition included Temperature (Fridge:4oC or RT:20oC) and Days stored before being frozen at -80oC (1D: One day, 3D: Three days), Baseline samples were immediately frozen at -80oC. The no buffer at Baseline:–80oC group was used as the reference. Participant numbers were used for random effects for the model. Values on heatmap are (-log(qval)*sign(coeff)).

4.1 Libraries

library("phyloseq")
library("Maaslin2")
library("microbiome")
library("ggh4x")

4.2 Genus

4.2.1 Preprocess table and convert to genus table

#load in phyloseq object
load("./data/preprocess_physeq")

sample_data(physeq)

#subset samples to remove rnalater unwashed samples
physeq <- subset_samples(physeq, !(RNAlater_washed_status == "unwashed"))
#Remove anything that is not bacteria
physeq <- subset_taxa(physeq, Phylum != "Bacteria")

#Clostridium
#Get logical vector to know which rows 
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "Clostridium"), 
        is.na(tax_table(physeq)[,"Genus"] == "Clostridium"),
        FALSE))
#Extract taxa names
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
#Make new genus names for clositriudm
clos_new_genus_name <- paste(tax_table(physeq)[clos_taxa_names,"Family"], 
                             tax_table(physeq)[clos_taxa_names,"Genus"], 
                             sep = "_")
tax_table(physeq)[clos_taxa_names,"Genus"] <- clos_new_genus_name
#Ruminococcus
#Remove instances of square brackets
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "[Ruminococcus]"), 
        is.na(tax_table(physeq)[,"Genus"] == "[Ruminococcus]"),
        FALSE))
#Remove []
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
tax_table(physeq)[clos_taxa_names,"Genus"] <- "Ruminococcus"
#Get logical vector to know which rows 
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "Ruminococcus"), 
        is.na(tax_table(physeq)[,"Genus"] == "Ruminococcus"),
        FALSE))
#Extract taxa names
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
#Make new genus names for clositriudm
clos_new_genus_name <- paste(tax_table(physeq)[clos_taxa_names,"Family"], 
                             tax_table(physeq)[clos_taxa_names,"Genus"], 
                             sep = "_")
tax_table(physeq)[clos_taxa_names,"Genus"] <- clos_new_genus_name


#Create 
#Convert to genus table
physeq <- aggregate_taxa(physeq, "Genus")
#transform to relabund
physeq_relabund <- abundances(physeq, "compositional")
# Get data into maaslin format ####
#First need df of relabund
df_data <- t(as.data.frame(otu_table(physeq_relabund, taxa_are_rows=TRUE)))
#next metadata
#the nonsense is so it becomes a normal df
#rather than a weird phyloseq object within a df
df_metadata <- 
  as.data.frame(t(t(as.data.frame(sample_data(physeq)))))
#keep only columns of interest
df_metadata <- df_metadata[,c(2,5,10,11,12,13)]
#convert some columns to numeric
df_metadata$Storagetemp <- as.numeric(df_metadata$Storagetemp)
df_metadata$DaysstoredpriortoDNAextraction <- as.numeric(df_metadata$DaysstoredpriortoDNAextraction)
df_metadata$DNAconcng_ul <- as.numeric(df_metadata$DNAconcng_ul)
#remvo phyloseq object
rm(physeq)
rm(physeq_relabund)
#Edit metadata
df_metadata$Bufferused_and_Storageconditions <-
  paste0(df_metadata$Bufferused, "_", df_metadata$Storageconditions)
#Remove unwanted metadata columns
df_metadata <- df_metadata[,c(1,5,7)]
#Convert all columns to factors columns
df_metadata[] <- lapply( df_metadata, factor)

#Save objects
save(df_data, file = "./data/maaslin_genus_df")
save(df_metadata, file = "./data/maaslin_genus_metadata")

4.2.2 MaAsLin2 on genera

#Remove Days stored info
df_metadata <- df_metadata[,c(1,3)]
#fit data
fit_data <- Maaslin2::Maaslin2(input_data = df_data,
                     input_metadata = df_metadata,
                     output = "standard_maaslin2_genus",
                     #Random sample used for subject/patients
                     random_effects = "Patientnumber",
                     reference = c("Bufferused_and_Storageconditions", "No buffer_-80")
                     )

#Create heatmap of significant results
#Read in data
df <- read.csv(
  file = "./standard_maaslin2_genus/significant_results.tsv",
  sep = "\t", check.names=FALSE)
#Create (-log(qval)*sign(coeff)) column for value
df$val <- (-log(df$qval)*sign(df$coef))
#Create min and max val
lo <- floor(min(df$val))
up <- ceiling(max(df$val))
mid <- 0
#Change -80 to Baseline
df$value <- gsub(pattern = "_-80", replacement = "_Baseline", df$value)
#Create rounded value for text
df$val_round <- round(df$val, digits = 2)
#heatmap
heatmap_media <- ggplot(df,
                        aes(x = value, y = feature, fill = val)) +
                  #Produce ggplot as tile/heatmap style plot
                  ggplot2::geom_tile(colour = "black") +
                  #Customise colour gradient
                  ggplot2::scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                                                na.value = "white",
                                                midpoint = mid, limit = c(lo,up)) +
                  #Add the values as text in the cells
                  ggplot2::geom_text(aes(value, feature, label = val_round),
                      colour = "black", size = 4) +
                  #Remove the x and y labels (NULL) and add a title
                  ggplot2::labs(x = NULL, y = NULL,
                    title = "Features with significant associations (-log(qval))") +
                  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggsave(filename = "./figures/maaslin2_heatmap_genus.png", plot = heatmap_media,
       device = "png", dpi = 300, units = "mm", height = 200, width = 300)