Chapter 19 Supp table 8

Spearman correlation values of 16S genera and SCFA abundance profiles stratified by buffer used. “All” are the results of using all data (no buffer, RNAlater, ethanol, and PSP). All non-zero values are significant results. Similarites table shows the number of similarities by significant correlations where the correlations are both positive or both negative.

19.1 Libraries

library(microbiome)
library(phyloseq)
library(dplyr)
library(reshape2)
library(DT)
library(funrar)

19.2 Preprocess tables

#load in phyloseq object
load("./data/preprocess_physeq")
#subset samples to remove rnalater unwashed samples
physeq <- 
  subset_samples(physeq, !(RNAlater_washed_status == "unwashed"))
#Subset samples to remove samples with NA lipid info
physeq <- subset_samples(physeq, !(Formate.MM. == "NA"))
#Remove anything that is not bacteria
physeq <- subset_taxa(physeq, Phylum != "Bacteria")

#Clostridium
#Get logical vector to know which rows 
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "Clostridium"), 
        is.na(tax_table(physeq)[,"Genus"] == "Clostridium"),
        FALSE))
#Extract taxa names
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
#Make new genus names for clositriudm
clos_new_genus_name <- paste(tax_table(physeq)[clos_taxa_names,"Family"], 
                             tax_table(physeq)[clos_taxa_names,"Genus"], 
                             sep = "_")
tax_table(physeq)[clos_taxa_names,"Genus"] <- clos_new_genus_name
#Ruminococcus
#Remove instances of square brackets
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "[Ruminococcus]"), 
        is.na(tax_table(physeq)[,"Genus"] == "[Ruminococcus]"),
        FALSE))
#Remove []
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
tax_table(physeq)[clos_taxa_names,"Genus"] <- "Ruminococcus"
#Get logical vector to know which rows 
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "Ruminococcus"), 
        is.na(tax_table(physeq)[,"Genus"] == "Ruminococcus"),
        FALSE))
#Extract taxa names
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
#Make new genus names for clositriudm
clos_new_genus_name <- paste(tax_table(physeq)[clos_taxa_names,"Family"], 
                             tax_table(physeq)[clos_taxa_names,"Genus"], 
                             sep = "_")
tax_table(physeq)[clos_taxa_names,"Genus"] <- clos_new_genus_name


#Convert to genus table
physeq <- aggregate_taxa(physeq, "Genus")
#COnvert to relative abundance
physeq_relabund <- microbiome::transform(physeq, "compositional")
#Extract genus table to a df
genus_df <- as.matrix(otu_table(physeq_relabund))
#transpose
genus_df_t <- as.matrix(t(genus_df))
#log10 the values
#genus_df_t_log10 <- log10(genus_df_t)

#extract lipid table
lipid_mat <- 
  as.matrix(meta(physeq)[,grepl(".MM.",colnames(meta(physeq)))])
#remove .MM. from colnames
colnames(lipid_mat) <- 
  gsub(pattern = ".MM.", replacement = "", colnames(lipid_mat))
#Remove "_b" from sample names
row.names(lipid_mat) <- gsub(pattern = "_b", replacement = "",
                             row.names(lipid_mat))
row.names(genus_df_t) <- gsub(pattern = "_b", replacement = "",
                             row.names(genus_df_t))
#Convert to relative abundance values
lipid_mat_relabund <- make_relative(lipid_mat)

#Sample_data as dataframe
metadf <- as.data.frame(sample_data(physeq))

#Save object
save(genus_df_t, file = "./data/16s_and_lipid_genus_physeq")
save(lipid_mat_relabund, file = "./data/lipid_relabund_matrix")

19.3 No buffer

19.3.1 Correlation

#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "No buffer","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
  subset_genus_df_t, 
  subset_lipid_mat_relabund, 
  method = "spearman", mode = "table", 
  p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,], 
            file = "./data/Genus_lipid_sig_correlation_no_buffer.csv", 
            row.names = FALSE, sep = ",", quote = FALSE,
            col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
          options = list(pageLength = 50) )

19.4 RNAlater

19.4.1 Correlation

#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "Rnalater","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
  subset_genus_df_t, 
  subset_lipid_mat_relabund, 
  method = "spearman", mode = "table", 
  p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,], 
            file = "./data/Genus_lipid_sig_correlation_rnalater.csv", 
            row.names = FALSE, sep = ",", quote = FALSE,
            col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
          options = list(pageLength = 50) )

19.5 Ethanol

19.5.1 Correlation

#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "Ethanol","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
  subset_genus_df_t, 
  subset_lipid_mat_relabund, 
  method = "spearman", mode = "table", 
  p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,], 
            file = "./data/Genus_lipid_sig_correlation_ethanol.csv", 
            row.names = FALSE, sep = ",", quote = FALSE,
            col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
          options = list(pageLength = 50) )

19.6 PSP

19.6.1 Correlation

#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "PSP","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
  subset_genus_df_t, 
  subset_lipid_mat_relabund, 
  method = "spearman", mode = "table", 
  p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,], 
            file = "./data/Genus_lipid_sig_correlation_psp.csv", 
            row.names = FALSE, sep = ",", quote = FALSE,
            col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
          options = list(pageLength = 50) )

19.7 Compare results

#read in data
all_df <- read.csv("data/Genus_lipid_sig_correlation.csv")
nobuffer_df <- read.csv("data/Genus_lipid_sig_correlation_no_buffer.csv")
rnalater_df <- read.csv("data/Genus_lipid_sig_correlation_rnalater.csv")
ethanol_df <- read.csv("data/Genus_lipid_sig_correlation_ethanol.csv")
psp_df <- read.csv("data/Genus_lipid_sig_correlation_psp.csv")
#remove p.adj value and add column for comparison
all_df <- all_df[,1:3]
all_df$comparison <- "All"
nobuffer_df <- nobuffer_df[,1:3]
nobuffer_df$comparison <- "No buffer"
rnalater_df <- rnalater_df[,1:3]
rnalater_df$comparison <- "Rnalater"
ethanol_df <- ethanol_df[,1:3]
ethanol_df$comparison <- "Ethanol"
psp_df <- psp_df[,1:3]
psp_df$comparison <- "PSP"
#COmbine into one long df
long_df <- rbind(all_df,nobuffer_df)
long_df <- rbind(long_df,rnalater_df)
long_df <- rbind(long_df,ethanol_df)
long_df <- rbind(long_df,psp_df)
#convert to wide
wide_df <- dcast(long_df, Genus+Lipid ~ comparison, value.var = "Correlation")
#convert NA to 0
wide_df[is.na(wide_df)] <- 0
#Write table
write.table(wide_df, 
            file = "./data/Genus_lipid_sig_correlation_comparisons.csv", 
            row.names = FALSE, sep = ",", quote = FALSE,
            col.names = TRUE)

#compare results
comparisons <- c("All", "No buffer", "Rnalater", "Ethanol", "PSP")


#Create matrix
comp_mat <- matrix(data = NA, nrow = length(comparisons), ncol = length(comparisons), 
                   dimnames = list(comparisons,comparisons))

#fill in matrix
for (ccol in comparisons) {
  for (crow in comparisons) {
    #Counter for similarities
    similarities <- 0
    #loop through rows
    for (r in 1:nrow(wide_df)) {
      corcol <- wide_df[r,ccol]
      corrow <- wide_df[r,crow]
      #Check if either value is 0, do nothing if the case
      if (corcol == 0) {
        similarities <- similarities
      } else if ( corrow == 0) {
        similarities <- similarities
      } else if (sign(corcol) == sign(corrow)) {
        #Check if both values are positive or if both negative
        similarities <- similarities + 1
      }
    }
      comp_mat[ccol,crow] <- similarities
  }
}

#write table
write.table(comp_mat, 
            file = "./data/Genus_lipid_sig_correlation_comparisons_similarities.csv", 
            sep = ",", quote = FALSE,
            col.names = TRUE)
#Display table
datatable(comp_mat,
          options = list(pageLength = 50),
          rownames = TRUE)