# load libraries
library(isoorbi) # for Orbitrap functions
library(dplyr) # for data wrangling
library(ggplot2) # for data visualization# read raw files including 1 spectrum
raw_files <-
c("ac5.RAW", "ac6.RAW", "s3744.RAW") |>
orbi_get_example_files() |>
orbi_read_raw(include_spectra = 100) |>
orbi_aggregate_raw()[32m✔[39m [38;5;249m[99ms][39m [1morbi_read_raw()[22m read [34mac5.RAW[39m from cache, included the [32mspectrum[39m from 1
[32mscan[39m
[32m✔[39m [38;5;249m[127ms][39m [1morbi_read_raw()[22m read [34mac6.RAW[39m from cache, included the [32mspectrum[39m from 1
[32mscan[39m
[32m✔[39m [38;5;249m[86ms][39m [1morbi_read_raw()[22m read [34ms3744.RAW[39m from cache, included the [32mspectrum[39m from
1 [32mscan[39m
[32m✔[39m [38;5;249m[420ms][39m [1morbi_read_raw()[22m finished reading 3 files
[32m✔[39m [38;5;249m[781ms][39m [1morbi_aggregate_raw()[22m aggregated [34mfile_info[39m (3), [34mscans[39m (16.63k), [34mpeaks[39m
(514.98k), and [34mspectra[39m (1.98k) from 3 files using the [1m[3mstandard[23m[22m aggregator
One spectrum from each file with the M+x regions highlighted.
# identify sulfate isotopocules
# could come from a tsv, csv, or xlsx spreadsheet instead
isotopocules <- tibble(
compound = "HSO4-",
isotopocule = c(
"M0", "33S", "17O", "34S", "18O",
# also look for a few clumped isotopocules
"33S18O", "34S17O", "36S", "18O18O"),
mass = c(
96.96000, 97.95940, 97.96420, 98.95580, 98.96430,
99.9632, 99.9590, 100.9551, 100.968046)
)
# identify
raw_files_w_isotopocules <- raw_files |>
orbi_identify_isotopocules(isotopocules) |>
# disregard unidentified isotopocules but keep missing
orbi_filter_isotopocules(keep_missing = TRUE) |>
# check for minor peaks that are in the same mass tolerance
# window of an isotopocule
orbi_flag_satellite_peaks()[33m![39m [38;5;249m[4.4s][39m [1morbi_identify_isotopocules()[22m identified 84.85k/514.98k peaks (16%)
representing 94% of the total ion current (TIC) as isotopocules [32mM0[39m, [32m33S[39m, [32m17O[39m,
[32m34S[39m, [32m18O[39m, [32m36S[39m, [32m33S18O[39m, [32m34S17O[39m, and [32m18O18O[39m using the [32mdefault_tolerance[39m of 1
mmu but encountered [33m1 warning[39m
→ [33m![39m isotopocules [32mM0[39m, [32m33S[39m, [32m17O[39m, [32m34S[39m, [32m18O[39m, [32m33S18O[39m, [32m34S17O[39m, [32m36S[39m, and [32m18O18O[39m are
missing from some scans (64.84k missing peaks in total) - make sure to
evaluate coverage with e.g. [1morbi_plot_isotopocule_coverage()[22m
[32m✔[39m [38;5;249m[30ms][39m [1morbi_filter_isotopocules()[22m removed 430.13k / 579.82k peaks (74%)
because they were [33munidentified[39m peaks (430.13k). Remaining isotopocules: [32mM0[39m,
[32m33S[39m, [32m17O[39m, [32m34S[39m, [32m18O[39m, [32m34S17O[39m, [32m33S18O[39m, [32m36S[39m, and [32m18O18O[39m.
[32m✔[39m [38;5;249m[3s][39m [1morbi_flag_satellite_peaks()[22m confirmed there are no [33msatellite[39m peaks
# plot again now with the isotopocules identified
raw_files_w_isotopocules |> orbi_plot_spectra(96, 102)Spectra with identified isotopocules (missing ones highlighted in pink).
# check coverage (as suggested during isotopocule identifiation)
raw_files_w_isotopocules |> orbi_plot_isotopocule_coverage()isotopocule coverage in each file across the scans.
It’s clear from the plots above that we don’t have good coverage for the clumped isotopocules and 36S (i.e. they are missing from some/many scans). We won’t work with these further anyways but it’s a good visual example of how much harder these are to detect.
# Preprocess data (this is exactly the same as with an isox file)
data <-
raw_files_w_isotopocules |>
# focus on the main single substitutions
orbi_filter_isotopocules(c("M0", "33S", "17O", "34S", "18O")) |>
# double check the remainng isotopcoules
orbi_flag_weak_isotopocules(min_percent = 99) |>
# flags outlying scans that have more than 2 times or less than
# 1/2 times the average number of ions in the Orbitrap analyzer
orbi_flag_outliers(agc_fold_cutoff = 2) |>
# sets one isotopocule in the dataset as the base peak
# (denominator) for ratio calculation
orbi_define_basepeak(basepeak_def = "M0") [32m✔[39m [38;5;249m[30ms][39m [1morbi_filter_isotopocules()[22m removed 66.66k / 149.69k peaks (45%)
because they were [33mmissing[39m isotopocules (64.84k), or [33mnot[39m the [33mselected[39m
isotopocule [32mM0[39m, [32m33S[39m, [32m17O[39m, [32m34S[39m, and [32m18O[39m (1.82k).
[32m✔[39m [38;5;249m[45ms][39m [1morbi_flag_weak_isotopocules()[22m confirmed there are no [33mweak[39m
isotopocules: all are detected in at least 99% of scans in each of the 15 data
groups (based on [32muidx[39m, [32mcompound[39m, and [32misotopocule[39m)
[32m✔[39m [38;5;249m[37ms][39m [1morbi_flag_outliers()[22m flagged 44/16632 scans (0.26%) as [33moutliers[39m based
on [32m2 fold AGC cutoff[39m, i.e. based on [3mscans below 1/2 and above 2 times the[23m
[3maverage number of ions [32mtic[39m * [32mit.ms[39m in the Orbitrap analyzer[23m, in 3 data groups
(based on [32muidx[39m) → use [1morbi_plot_raw_data(y = tic * it.ms)[22m to visualize them
[32m✔[39m [38;5;249m[2.1s][39m [1morbi_define_basepeak()[22m set [32mM0[39m as the ratio denominator and calculated
66.40k [32mratio[39m values for 4 isotopocules ([32m33S[39m, [32m17O[39m, [32m34S[39m, and [32m18O[39m)
Let’s take a look at the AGC cutoff flagged samples
Estimated ions in the analyzer
Isotopocules ions
Seems like the issue is mostly towards the end of the s3744 analysis. What about the resulting ratios?
Isotopocule ratios vs M0
The ratios suggest that there really is an issue at the end of the s3744 run. Having flagged these outliers, they will automatically not be included in the summary calculation of the resulting ratios.
# summarize the ratio summaries
data_summary <-
data |>
orbi_summarize_results(ratio_method = "sum")[32m✔[39m [38;5;249m[92ms][39m [1morbi_summarize_results()[22m summarized ratios from 66.33k peak ([3mexcluding[23m
68 flagged peaks; [3mexcluding[23m 0 unused peaks) using the [1m[3msum[23m[22m method and grouping
the data by [32muidx[39m, [32mfilename[39m, [32mcompound[39m, [32mbasepeak[39m, and [32misotopocule[39m
[32m✔[39m [38;5;249m[9ms][39m [1morbi_get_data()[22m retrieved 12 records from the combination of [34mfile_info[39m
(3) and [34msummary[39m (12) via [32muidx[39m
[38;5;246m# A tibble: 12 × 7[39m
uidx filename compound isotopocule ratio ratio_relative_sem_p…¹ ratio_sem
[3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<fct>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m 1 ac5.RAW HSO4- 33S 0.009[4m0[24m[4m3[24m 1.18 1.06[38;5;246me[39m[31m-5[39m
[38;5;250m 2[39m 1 ac5.RAW HSO4- 17O 0.001[4m6[24m[4m5[24m 2.92 4.81[38;5;246me[39m[31m-6[39m
[38;5;250m 3[39m 1 ac5.RAW HSO4- 34S 0.059[4m9[24m 0.459 2.75[38;5;246me[39m[31m-5[39m
[38;5;250m 4[39m 1 ac5.RAW HSO4- 18O 0.010[4m7[24m 1.06 1.13[38;5;246me[39m[31m-5[39m
[38;5;250m 5[39m 2 ac6.RAW HSO4- 33S 0.009[4m0[24m[4m2[24m 0.99 8.92[38;5;246me[39m[31m-6[39m
[38;5;250m 6[39m 2 ac6.RAW HSO4- 17O 0.001[4m6[24m[4m4[24m 2.47 4.05[38;5;246me[39m[31m-6[39m
[38;5;250m 7[39m 2 ac6.RAW HSO4- 34S 0.059[4m5[24m 0.4 2.38[38;5;246me[39m[31m-5[39m
[38;5;250m 8[39m 2 ac6.RAW HSO4- 18O 0.010[4m6[24m 0.911 9.67[38;5;246me[39m[31m-6[39m
[38;5;250m 9[39m 3 s3744.RAW HSO4- 33S 0.008[4m9[24m[4m4[24m 1.01 9.07[38;5;246me[39m[31m-6[39m
[38;5;250m10[39m 3 s3744.RAW HSO4- 17O 0.001[4m6[24m[4m4[24m 2.58 4.23[38;5;246me[39m[31m-6[39m
[38;5;250m11[39m 3 s3744.RAW HSO4- 34S 0.058[4m4[24m 0.414 2.42[38;5;246me[39m[31m-5[39m
[38;5;250m12[39m 3 s3744.RAW HSO4- 18O 0.010[4m6[24m 0.944 1.00[38;5;246me[39m[31m-5[39m
[38;5;246m# ℹ abbreviated name: ¹ratio_relative_sem_permil[39m
# export file info and summary to excel
data_summary |> orbi_export_data_to_excel(
file = "output.xlsx",
include = c("file_info", "summary")
)[32m✔[39m [38;5;249m[72ms][39m [1morbi_export_data_to_excel()[22m exported the [32mdataset[39m (3 rows of [32mfile_info[39m
and 12 rows of [32msummary[39m) to [34moutput.xlsx[39m
fig <-
data_summary |>
# get out all summary data
orbi_get_data(summary = everything()) |>
# generate a custom label for the data panels
mutate(panel = glue::glue("ratio\n{isotopocule} / {basepeak}")) |>
# plot
ggplot() +
aes(
x = filename,
y = ratio, ymin = ratio - ratio_sem, ymax = ratio + ratio_sem,
color = filename, shape = filename
) +
geom_pointrange(size = 1) +
scale_color_brewer(palette = "Dark2") +
scale_y_continuous(breaks = scales::pretty_breaks(5)) +
facet_grid(panel ~ ., scales = "free_y", switch = "y") +
# styling
theme_bw() +
theme(
text = element_text(size = 16),
panel.grid = element_blank(),
strip.text.y.left = element_text(angle = 0),
strip.placement = "outside",
strip.background = element_blank()
) +
labs(y = NULL, x = NULL)[32m✔[39m [38;5;249m[9ms][39m [1morbi_get_data()[22m retrieved 12 records from the combination of [34mfile_info[39m
(3) and [34msummary[39m (12) via [32muidx[39m
Isotopocule ratios summary