This notebook provides a simple example of how one can create barplots easily from PeakGroup data exported from TraceBase.

First, we load our required libraries.

# Load required libraries
library(readr, quietly = TRUE)
library(stringr, quietly = TRUE)
library(ggplot2, quietly = TRUE)
library(Rmisc, quietly = TRUE)
library(dplyr, quietly = TRUE, warn.conflicts = FALSE)

Serine Labeling from Serine Synthesis Study

Load PeakGroup data

The PeakGroup data was downloaded from TraceBase by:

  1. View a specific study^
  2. Click the “PeakGroup Data” link at the top
  3. Click on the “Export Data” button

^ The study data used in this example (Serine synthesis from glucose in control vs ser/gly-free diet) is not publicly available, but by following this example, you can apply a similar analysis to any of the Studies available on tracebase.princeton.edu.

serinesyn_peakgroup_data_table <-
  "data/serine_synthesis/PeakGroups_10.02.2023.16.17.17.tsv"
serinesyn_peakgroup_data <-
  readr::read_tsv(serinesyn_peakgroup_data_table,
    comment = "#", na = c("None")
  )
## Rows: 561 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Sample, Tissue, Peak Group, Measured Compound(s), Measured Compoun...
## dbl  (8): Time Collected (m), Total Abundance, Enrichment Fraction, Enrichme...
## lgl  (1): Age (weeks)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
serinesyn_peakgroup_data

Plot Normalized Labeling of Glucose (not Malate) by Feeding Status per Tissue

For this example, we are going to plot the “Normalized Labeling” of Glucose (not Malate), for tissue, split by “Feeding Status”.

Select data

Select those rows that used a glucose tracer and measured serine.

# Select columns
serinesyn_plot_data <-
  serinesyn_peakgroup_data %>%
  filter(
    str_detect(`Infusate`, "glucose-"),
    `Measured Compound(s)` == "serine"
  )
serinesyn_plot_data

Calculate summary statistics

We use the summarySE function to calculate the standard error for each group.

serinesyn_summary_data <- serinesyn_plot_data %>%
  Rmisc::summarySE(
    measurevar = "Normalized Labeling",
    groupvars = c("`Feeding Status`", "Tissue"),
    na.rm = TRUE
  )
serinesyn_summary_data

Generate the bar plot

Now we setup the plotting parameters.

# Setup the plot
p <- ggplot(
  data = serinesyn_summary_data,
  aes(x = `Tissue`, y = `Normalized Labeling`, fill = `Feeding Status`)
) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(
    ymin = `Normalized Labeling` - sd,
    ymax = `Normalized Labeling` + sd
  ),
  width = .2,
  position = position_dodge(0.9)
  ) +
  theme_gray(base_size = 22) +
  theme(axis.text.x = element_text(angle = -45, vjust = 0.5, hjust = 0)) +
  ggtitle("Normalized Labeling of Serine with Glucose Tracer",
    subtitle = "Serine Synthesis Study"
  )

# Display the plot
p

Serine Labeling from Multiple Studies

Load PeakGroup data

The PeakGroup data was downloaded from TraceBase by searching with: * Output Format == “PeakGroups” * Mesured Compound == “serine” * Tracer Compound == “glucose”

TraceBase Serine PeakGroups with Glucose Tracer
TraceBase Serine PeakGroups with Glucose Tracer
all_serine_peakgroup_table <-
  "data/PeakGroups-serine-all-studies10.02.2023.16.21.50.tsv"
all_serine_peakgroup_data <-
  readr::read_tsv(all_serine_peakgroup_table, comment = "#", na = c("None"))
## Rows: 679 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Sample, Tissue, Peak Group, Measured Compound(s), Measured Compoun...
## dbl  (9): Time Collected (m), Total Abundance, Enrichment Fraction, Enrichme...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_serine_peakgroup_data

Plot Normalized Labeling of Glucose (not Malate) by Feeding Status per Tissue

For this example, we are going to plot the “Normalized Labeling” of Glucose (not Malate), for tissue, split by “Feeding Status”.

Select data

Select those rows that used a glucose tracer and measured serine for animals in the fasted state.

all_serine_plot_data <- all_serine_peakgroup_data %>%
  filter(
    str_detect(`Infusate`, "glucose-"),
    `Measured Compound(s)` == "serine",
    `Feeding Status` == "fasted"
  )
all_serine_plot_data

Calculate summary statistics

We use the summarySE function to calculate the standard error for each group.

all_serine_summary_data <-
  # Calculate summary statistics for each study/tissue group
  all_serine_plot_data %>%
  Rmisc::summarySE(
    measurevar = "Normalized Labeling",
    groupvars = c("Studies", "Tissue"),
    na.rm = TRUE
  ) %>%
  # Plot only tissues with data in all four Studies
  group_by(Tissue) %>%
  filter(length(unique(Studies)) >= 4) %>%
  # Rename some studies
  mutate(Studies = recode(Studies,
    "fluxomics 2020" = "Fluxomics 2020",
    "Serine synthesis from glucose in control vs ser/gly-free diet" =
      "Serine synthesis"
  ))
## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced
all_serine_summary_data

Generate the bar plot

Now we setup the plotting parameters.

# Setup the plot
all_serine_plot <- ggplot(
  data = all_serine_summary_data,
  aes(x = `Tissue`, y = `Normalized Labeling`, fill = `Studies`)
) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(
    ymin = `Normalized Labeling` - sd,
    ymax = `Normalized Labeling` + sd
  ),
  width = .2,
  position = position_dodge(0.9)
  ) +
  theme_gray(base_size = 22) +
  theme(axis.text.x = element_text(angle = -45, vjust = 0.5, hjust = 0)) +
  theme(legend.position = "top") +
  # scale_fill_discrete(labels=c("Fluxomics 2020", "Serine Synthesis")) +
  ggtitle("Normalized Labeling of Serine with Glucose Tracer",
    subtitle = "Animals in Fasted State"
  )

# Display the plot
all_serine_plot