added parallelization and changed nontarget versions

4 years ago · bbee9467c3
2 changed files with 59 additions and 9 deletions
--- a/specification.md
+++ b/specification.md
@ -19,3 +19,23 @@ isotopes.
 - Readable by R.
 - Manipulatable by Tidyverse
 - Include it in script?
+
+# Additional Notes
+
+- Independent of the ordering (it might get corrupted)
+- Just search for chlorinated/brominated compounds.
+- usually, use_charges will be 1
+- Headers to use in script.
+
+- Packages to do things in parallel (multiple threads)
+- be able to configure the number of cores or threads used.
+- make it a requirement to be tab-deliminated.
+
+- eliminate clusters with only m/z.
+
+# Notes
+
+- cutint represents high enough intensity.
+- mztol is in terms of ppm.
+- mzfrac is in terms of absolute.
+- PPM = true always
--- a/src/script.def.R
+++ b/src/script.def.R
@ -1,44 +1,62 @@
 library("nontarget")
 library("purrr")
+library("funprog")
 library("enviPat")
 library("stringr")
+library("parallel")

 # Configurations #############################################################
 # file : decides which file to read in data from #############################
 file <- "/path/to/file"
-# separator: decides separator of file #######################################
-separator <- "separator"
+
+search_isos <- c("13C", "37Cl")
+
+# Minimum size of a cluster
+
+min_cluster_size <- 3
+
+# Number of cores to be used (will be adjusted if not possible)
+used_cores <- 3

 ##############################################################################
 # Read in the Table ##########################################################
-table <- read.table(file, header=TRUE, sep=separator)
+table <- read.table(file, header=TRUE, sep=",")

 # Organize the tables by number ##############################################
 fragments <- max(table[,"Spectra_Number"])


+
 # The algorithm below guarantees linear complexity of looking up data points.#

+# set use_cores to safe amount
+use_cores = min(use_cores, detectCores()-1)
+
 # minint is the lower bound of the interval. #################################
 # maxint is the upper bound of the interval that contains the fragment. ######
-# initialized memoizes as to whether or not each fragment number exists. #####
+# usable memoizes as to whether or not each fragment number exists. #####
 minint <- unlist(map(1:fragments, function(x) 0))
 maxint <- unlist(map(1:fragments, function(x) 0))
-initialized <- unlist(map(1:fragments, function(x) FALSE))
+usable <- unlist(map(1:fragments, function(x) FALSE))


 # Set all of the intervals ###################################################
 for (i in seq(1, nrow(table))) {
    fragment <- table[i, "Spectra_Number"]
-    if (! initialized[fragment]) {
+    if (! usable[fragment]) {
        minint[fragment] <- i
    }
    maxint[fragment] <- max(maxint[fragment], i)
-    initialized[fragment] <- TRUE
+    usable[fragment] <- TRUE
+}
+
+for (i in 1:fragments) {
+    if (maxint[i] - minint[i] + 1 < min_cluster_size)
+        usable[i] = FALSE
 }

 getdata <- function(fragment, key) {
-    if (! initialized[fragment]) {
+    if (! usable[fragment]) {
        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", 
                    list(fragment=fragment)))
    }
@ -55,10 +73,22 @@ getdataframe <- function (fragment) {

 # Incomplete: cannot get pattern.search function to work per cluster. #########
 # Incomplete: Get the diagnostics for a cluster and turn them into a portable#
-# format. (such as through tinyverse) ########################################
+# format. (such as through tidyverse) ########################################
+
+isos <- make.isos(isotopes,use_isotopes=search_isos, 
+                  use_charges=rep(1, length(search_isos)))

 diagnostics <- function(fragment) {
    points <- getdataframe(fragment)
+    ptrn <- pattern.search(points, isos)
+    return(ptrn)
 }

+use <- Filter(function(x) usable[x], 1:fragments)
+
+results <- mclapply(use, diagnostics, mc.cores=use_cores)
+
+# Incomplete: Make the analysis more resiliant to different sorting.
+# Incomplete: How to process results. (is it actually supposed to be all negatives?)
+
 ##############################################################################