This notebook provides a simple example of how one can create barplots easily from PeakGroup data exported from TraceBase.
First, we load our required libraries.
# Load required libraries
library(readr, quietly = TRUE)
library(stringr, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(Rmisc, quietly = TRUE)
library(dplyr, quietly = TRUE, warn.conflicts = FALSE)
The PeakGroup data was downloaded from TraceBase by:
^ The study data used in this example (Serine synthesis from glucose in control vs ser/gly-free diet) is not publicly available, but by following this example, you can apply a similar analysis to any of the Studies available on tracebase.princeton.edu.
serinesyn_peakgroup_data_table <-
"data/serine_synthesis/PeakGroups_10.02.2023.16.17.17.tsv"
serinesyn_peakgroup_data <-
readr::read_tsv(serinesyn_peakgroup_data_table,
comment = "#", na = c("None")
)
## Rows: 561 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Sample, Tissue, Peak Group, Measured Compound(s), Measured Compoun...
## dbl (8): Time Collected (m), Total Abundance, Enrichment Fraction, Enrichme...
## lgl (1): Age (weeks)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
serinesyn_peakgroup_data
For this example, we are going to plot the “Normalized Labeling” of Glucose (not Malate), for tissue, split by “Feeding Status”.
Select those rows that used a glucose
tracer and
measured serine
.
# Select columns
serinesyn_plot_data <-
serinesyn_peakgroup_data %>%
filter(
str_detect(`Infusate`, "glucose-"),
`Measured Compound(s)` == "serine"
)
serinesyn_plot_data
We use the summarySE
function to calculate the standard
error for each group.
serinesyn_summary_data <- serinesyn_plot_data %>%
Rmisc::summarySE(
measurevar = "Normalized Labeling",
groupvars = c("`Feeding Status`", "Tissue"),
na.rm = TRUE
)
serinesyn_summary_data
Now we setup the plotting parameters.
# Setup the plot
p <- ggplot(
data = serinesyn_summary_data,
aes(x = `Tissue`, y = `Normalized Labeling`, fill = `Feeding Status`)
) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(
ymin = `Normalized Labeling` - sd,
ymax = `Normalized Labeling` + sd
),
width = .2,
position = position_dodge(0.9)
) +
theme_gray(base_size = 22) +
theme(axis.text.x = element_text(angle = -45, vjust = 0.5, hjust = 0)) +
ggtitle("Normalized Labeling of Serine with Glucose Tracer",
subtitle = "Serine Synthesis Study"
)
# Display the plot
p
The PeakGroup data was downloaded from TraceBase by searching with: * Output Format == “PeakGroups” * Mesured Compound == “serine” * Tracer Compound == “glucose”
all_serine_peakgroup_table <-
"data/PeakGroups-serine-all-studies10.02.2023.16.21.50.tsv"
all_serine_peakgroup_data <-
readr::read_tsv(all_serine_peakgroup_table, comment = "#", na = c("None"))
## Rows: 679 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Sample, Tissue, Peak Group, Measured Compound(s), Measured Compoun...
## dbl (9): Time Collected (m), Total Abundance, Enrichment Fraction, Enrichme...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_serine_peakgroup_data
For this example, we are going to plot the “Normalized Labeling” of Glucose (not Malate), for tissue, split by “Feeding Status”.
Select those rows that used a glucose
tracer and
measured serine
for animals in the fasted
state.
all_serine_plot_data <- all_serine_peakgroup_data %>%
filter(
str_detect(`Infusate`, "glucose-"),
`Measured Compound(s)` == "serine",
`Feeding Status` == "fasted"
)
all_serine_plot_data
We use the summarySE
function to calculate the standard
error for each group.
all_serine_summary_data <-
# Calculate summary statistics for each study/tissue group
all_serine_plot_data %>%
Rmisc::summarySE(
measurevar = "Normalized Labeling",
groupvars = c("Studies", "Tissue"),
na.rm = TRUE
) %>%
# Plot only tissues with data in all four Studies
group_by(Tissue) %>%
filter(length(unique(Studies)) >= 4) %>%
# Rename some studies
mutate(Studies = recode(Studies,
"fluxomics 2020" = "Fluxomics 2020",
"Serine synthesis from glucose in control vs ser/gly-free diet" =
"Serine synthesis"
))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
all_serine_summary_data
Now we setup the plotting parameters.
# Setup the plot
all_serine_plot <- ggplot(
data = all_serine_summary_data,
aes(x = `Tissue`, y = `Normalized Labeling`, fill = `Studies`)
) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(
ymin = `Normalized Labeling` - sd,
ymax = `Normalized Labeling` + sd
),
width = .2,
position = position_dodge(0.9)
) +
theme_gray(base_size = 22) +
theme(axis.text.x = element_text(angle = -45, vjust = 0.5, hjust = 0)) +
theme(legend.position = "top") +
# scale_fill_discrete(labels=c("Fluxomics 2020", "Serine Synthesis")) +
ggtitle("Normalized Labeling of Serine with Glucose Tracer",
subtitle = "Animals in Fasted State"
)
# Display the plot
all_serine_plot