Chapter 19 Supp table 8
Spearman correlation values of 16S genera and SCFA abundance profiles stratified by buffer used. “All” are the results of using all data (no buffer, RNAlater, ethanol, and PSP). All non-zero values are significant results. Similarites table shows the number of similarities by significant correlations where the correlations are both positive or both negative.
19.2 Preprocess tables
#load in phyloseq object
load("./data/preprocess_physeq")
#subset samples to remove rnalater unwashed samples
physeq <-
subset_samples(physeq, !(RNAlater_washed_status == "unwashed"))
#Subset samples to remove samples with NA lipid info
physeq <- subset_samples(physeq, !(Formate.MM. == "NA"))
#Remove anything that is not bacteria
physeq <- subset_taxa(physeq, Phylum != "Bacteria")
#Clostridium
#Get logical vector to know which rows
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "Clostridium"),
is.na(tax_table(physeq)[,"Genus"] == "Clostridium"),
FALSE))
#Extract taxa names
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
#Make new genus names for clositriudm
clos_new_genus_name <- paste(tax_table(physeq)[clos_taxa_names,"Family"],
tax_table(physeq)[clos_taxa_names,"Genus"],
sep = "_")
tax_table(physeq)[clos_taxa_names,"Genus"] <- clos_new_genus_name
#Ruminococcus
#Remove instances of square brackets
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "[Ruminococcus]"),
is.na(tax_table(physeq)[,"Genus"] == "[Ruminococcus]"),
FALSE))
#Remove []
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
tax_table(physeq)[clos_taxa_names,"Genus"] <- "Ruminococcus"
#Get logical vector to know which rows
clos_subset_vector <- as.vector(replace((tax_table(physeq)[,"Genus"] == "Ruminococcus"),
is.na(tax_table(physeq)[,"Genus"] == "Ruminococcus"),
FALSE))
#Extract taxa names
clos_taxa_names <- taxa_names(physeq)[clos_subset_vector]
#Make new genus names for clositriudm
clos_new_genus_name <- paste(tax_table(physeq)[clos_taxa_names,"Family"],
tax_table(physeq)[clos_taxa_names,"Genus"],
sep = "_")
tax_table(physeq)[clos_taxa_names,"Genus"] <- clos_new_genus_name
#Convert to genus table
physeq <- aggregate_taxa(physeq, "Genus")
#COnvert to relative abundance
physeq_relabund <- microbiome::transform(physeq, "compositional")
#Extract genus table to a df
genus_df <- as.matrix(otu_table(physeq_relabund))
#transpose
genus_df_t <- as.matrix(t(genus_df))
#log10 the values
#genus_df_t_log10 <- log10(genus_df_t)
#extract lipid table
lipid_mat <-
as.matrix(meta(physeq)[,grepl(".MM.",colnames(meta(physeq)))])
#remove .MM. from colnames
colnames(lipid_mat) <-
gsub(pattern = ".MM.", replacement = "", colnames(lipid_mat))
#Remove "_b" from sample names
row.names(lipid_mat) <- gsub(pattern = "_b", replacement = "",
row.names(lipid_mat))
row.names(genus_df_t) <- gsub(pattern = "_b", replacement = "",
row.names(genus_df_t))
#Convert to relative abundance values
lipid_mat_relabund <- make_relative(lipid_mat)
#Sample_data as dataframe
metadf <- as.data.frame(sample_data(physeq))
#Save object
save(genus_df_t, file = "./data/16s_and_lipid_genus_physeq")
save(lipid_mat_relabund, file = "./data/lipid_relabund_matrix")
19.3 No buffer
19.3.1 Correlation
#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "No buffer","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
subset_genus_df_t,
subset_lipid_mat_relabund,
method = "spearman", mode = "table",
p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,],
file = "./data/Genus_lipid_sig_correlation_no_buffer.csv",
row.names = FALSE, sep = ",", quote = FALSE,
col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
options = list(pageLength = 50) )
19.4 RNAlater
19.4.1 Correlation
#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "Rnalater","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
subset_genus_df_t,
subset_lipid_mat_relabund,
method = "spearman", mode = "table",
p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,],
file = "./data/Genus_lipid_sig_correlation_rnalater.csv",
row.names = FALSE, sep = ",", quote = FALSE,
col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
options = list(pageLength = 50) )
19.5 Ethanol
19.5.1 Correlation
#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "Ethanol","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
subset_genus_df_t,
subset_lipid_mat_relabund,
method = "spearman", mode = "table",
p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,],
file = "./data/Genus_lipid_sig_correlation_ethanol.csv",
row.names = FALSE, sep = ",", quote = FALSE,
col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
options = list(pageLength = 50) )
19.6 PSP
19.6.1 Correlation
#load in phyloseq object
load("./data/16s_and_lipid_genus_physeq")
load("./data/lipid_relabund_matrix")
load("./data/preprocess_physeq")
metadf <- as.data.frame(sample_data(physeq))
#subset tables
sample_names <- unlist(metadf[metadf$Bufferused == "PSP","Sample_name"])
subset_genus_df_t <- genus_df_t[sample_names,]
subset_lipid_mat_relabund <- lipid_mat_relabund[sample_names,]
#Correlation
correlation.table <- associate(
subset_genus_df_t,
subset_lipid_mat_relabund,
method = "spearman", mode = "table",
p.adj.threshold = 0.05, n.signif = 1)
#Add column names
colnames(correlation.table)[1:2] <- c("Genus","Lipid")
#Write table with significant hits to file
write.table(correlation.table[correlation.table$p.adj < 0.05,],
file = "./data/Genus_lipid_sig_correlation_psp.csv",
row.names = FALSE, sep = ",", quote = FALSE,
col.names = TRUE)
#Display table with significant hits below
datatable(correlation.table[correlation.table$p.adj < 0.05,],
options = list(pageLength = 50) )
19.7 Compare results
#read in data
all_df <- read.csv("data/Genus_lipid_sig_correlation.csv")
nobuffer_df <- read.csv("data/Genus_lipid_sig_correlation_no_buffer.csv")
rnalater_df <- read.csv("data/Genus_lipid_sig_correlation_rnalater.csv")
ethanol_df <- read.csv("data/Genus_lipid_sig_correlation_ethanol.csv")
psp_df <- read.csv("data/Genus_lipid_sig_correlation_psp.csv")
#remove p.adj value and add column for comparison
all_df <- all_df[,1:3]
all_df$comparison <- "All"
nobuffer_df <- nobuffer_df[,1:3]
nobuffer_df$comparison <- "No buffer"
rnalater_df <- rnalater_df[,1:3]
rnalater_df$comparison <- "Rnalater"
ethanol_df <- ethanol_df[,1:3]
ethanol_df$comparison <- "Ethanol"
psp_df <- psp_df[,1:3]
psp_df$comparison <- "PSP"
#COmbine into one long df
long_df <- rbind(all_df,nobuffer_df)
long_df <- rbind(long_df,rnalater_df)
long_df <- rbind(long_df,ethanol_df)
long_df <- rbind(long_df,psp_df)
#convert to wide
wide_df <- dcast(long_df, Genus+Lipid ~ comparison, value.var = "Correlation")
#convert NA to 0
wide_df[is.na(wide_df)] <- 0
#Write table
write.table(wide_df,
file = "./data/Genus_lipid_sig_correlation_comparisons.csv",
row.names = FALSE, sep = ",", quote = FALSE,
col.names = TRUE)
#compare results
comparisons <- c("All", "No buffer", "Rnalater", "Ethanol", "PSP")
#Create matrix
comp_mat <- matrix(data = NA, nrow = length(comparisons), ncol = length(comparisons),
dimnames = list(comparisons,comparisons))
#fill in matrix
for (ccol in comparisons) {
for (crow in comparisons) {
#Counter for similarities
similarities <- 0
#loop through rows
for (r in 1:nrow(wide_df)) {
corcol <- wide_df[r,ccol]
corrow <- wide_df[r,crow]
#Check if either value is 0, do nothing if the case
if (corcol == 0) {
similarities <- similarities
} else if ( corrow == 0) {
similarities <- similarities
} else if (sign(corcol) == sign(corrow)) {
#Check if both values are positive or if both negative
similarities <- similarities + 1
}
}
comp_mat[ccol,crow] <- similarities
}
}
#write table
write.table(comp_mat,
file = "./data/Genus_lipid_sig_correlation_comparisons_similarities.csv",
sep = ",", quote = FALSE,
col.names = TRUE)
#Display table
datatable(comp_mat,
options = list(pageLength = 50),
rownames = TRUE)