From bbee9467c3d7d234cab77475766687f518afcbe4 Mon Sep 17 00:00:00 2001 From: junikimm717 <68165832+junikimm717@users.noreply.github.com> Date: Sat, 5 Jun 2021 22:09:53 -0400 Subject: [PATCH] added parallelization and changed nontarget versions --- specification.md | 20 ++++++++++++++++++++ src/script.def.R | 48 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/specification.md b/specification.md index bcb66a8..c54b863 100644 --- a/specification.md +++ b/specification.md @@ -19,3 +19,23 @@ isotopes. - Readable by R. - Manipulatable by Tidyverse - Include it in script? + +# Additional Notes + +- Independent of the ordering (it might get corrupted) +- Just search for chlorinated/brominated compounds. +- usually, use_charges will be 1 +- Headers to use in script. + +- Packages to do things in parallel (multiple threads) +- be able to configure the number of cores or threads used. +- make it a requirement to be tab-deliminated. + +- eliminate clusters with only m/z. + +# Notes + +- cutint represents high enough intensity. +- mztol is in terms of ppm. +- mzfrac is in terms of absolute. +- PPM = true always diff --git a/src/script.def.R b/src/script.def.R index d548fa6..5c96dd0 100644 --- a/src/script.def.R +++ b/src/script.def.R @@ -1,44 +1,62 @@ library("nontarget") library("purrr") +library("funprog") library("enviPat") library("stringr") +library("parallel") # Configurations ############################################################# # file : decides which file to read in data from ############################# file <- "/path/to/file" -# separator: decides separator of file ####################################### -separator <- "separator" + +search_isos <- c("13C", "37Cl") + +# Minimum size of a cluster + +min_cluster_size <- 3 + +# Number of cores to be used (will be adjusted if not possible) +used_cores <- 3 ############################################################################## # Read in the Table ########################################################## -table <- read.table(file, header=TRUE, sep=separator) +table <- read.table(file, header=TRUE, sep=",") # Organize the tables by number ############################################## fragments <- max(table[,"Spectra_Number"]) + # The algorithm below guarantees linear complexity of looking up data points.# +# set use_cores to safe amount +use_cores = min(use_cores, detectCores()-1) + # minint is the lower bound of the interval. ################################# # maxint is the upper bound of the interval that contains the fragment. ###### -# initialized memoizes as to whether or not each fragment number exists. ##### +# usable memoizes as to whether or not each fragment number exists. ##### minint <- unlist(map(1:fragments, function(x) 0)) maxint <- unlist(map(1:fragments, function(x) 0)) -initialized <- unlist(map(1:fragments, function(x) FALSE)) +usable <- unlist(map(1:fragments, function(x) FALSE)) # Set all of the intervals ################################################### for (i in seq(1, nrow(table))) { fragment <- table[i, "Spectra_Number"] - if (! initialized[fragment]) { + if (! usable[fragment]) { minint[fragment] <- i } maxint[fragment] <- max(maxint[fragment], i) - initialized[fragment] <- TRUE + usable[fragment] <- TRUE +} + +for (i in 1:fragments) { + if (maxint[i] - minint[i] + 1 < min_cluster_size) + usable[i] = FALSE } getdata <- function(fragment, key) { - if (! initialized[fragment]) { + if (! usable[fragment]) { stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", list(fragment=fragment))) } @@ -55,10 +73,22 @@ getdataframe <- function (fragment) { # Incomplete: cannot get pattern.search function to work per cluster. ######### # Incomplete: Get the diagnostics for a cluster and turn them into a portable# -# format. (such as through tinyverse) ######################################## +# format. (such as through tidyverse) ######################################## + +isos <- make.isos(isotopes,use_isotopes=search_isos, + use_charges=rep(1, length(search_isos))) diagnostics <- function(fragment) { points <- getdataframe(fragment) + ptrn <- pattern.search(points, isos) + return(ptrn) } +use <- Filter(function(x) usable[x], 1:fragments) + +results <- mclapply(use, diagnostics, mc.cores=use_cores) + +# Incomplete: Make the analysis more resiliant to different sorting. +# Incomplete: How to process results. (is it actually supposed to be all negatives?) + ##############################################################################