added parallelization and changed nontarget versions

5 years ago · bbee9467c3
2 changed files with 59 additions and 9 deletions
--- a/specification.md
+++ b/specification.md
@ -19,3 +19,23 @@ isotopes.
 - Readable by R.
 - Manipulatable by Tidyverse
 - Include it in script?
 # Additional Notes
 - Independent of the ordering (it might get corrupted)
 - Just search for chlorinated/brominated compounds.
 - usually, use_charges will be 1
 - Headers to use in script.
 - Packages to do things in parallel (multiple threads)
 - be able to configure the number of cores or threads used.
 - make it a requirement to be tab-deliminated.
 - eliminate clusters with only m/z.
 # Notes
 - cutint represents high enough intensity.
 - mztol is in terms of ppm.
 - mzfrac is in terms of absolute.
 - PPM = true always
--- a/src/script.def.R
+++ b/src/script.def.R
@ -1,44 +1,62 @@
 library("nontarget")
 library("purrr")
 library("funprog")
 library("enviPat")
 library("stringr")
 library("parallel")
 # Configurations #############################################################
 # file : decides which file to read in data from #############################
 file <- "/path/to/file"
 # separator: decides separator of file #######################################
 separator <- "separator"
 search_isos <- c("13C", "37Cl")
 # Minimum size of a cluster
 min_cluster_size <- 3
 # Number of cores to be used (will be adjusted if not possible)
 used_cores <- 3
 ##############################################################################
 # Read in the Table ##########################################################
 table <- read.table(file, header=TRUE, sep=separator)
 table <- read.table(file, header=TRUE, sep=",")
 # Organize the tables by number ##############################################
 fragments <- max(table[,"Spectra_Number"])
 # The algorithm below guarantees linear complexity of looking up data points.#
 # set use_cores to safe amount
 use_cores = min(use_cores, detectCores()-1)
 # minint is the lower bound of the interval. #################################
 # maxint is the upper bound of the interval that contains the fragment. ######
 # initialized memoizes as to whether or not each fragment number exists. #####
 # usable memoizes as to whether or not each fragment number exists. #####
 minint <- unlist(map(1:fragments, function(x) 0))
 maxint <- unlist(map(1:fragments, function(x) 0))
 initialized <- unlist(map(1:fragments, function(x) FALSE))
 usable <- unlist(map(1:fragments, function(x) FALSE))
 # Set all of the intervals ###################################################
 for (i in seq(1, nrow(table))) {
    fragment <- table[i, "Spectra_Number"]
    if (! initialized[fragment]) {
    if (! usable[fragment]) {
        minint[fragment] <- i
    }
    maxint[fragment] <- max(maxint[fragment], i)
    initialized[fragment] <- TRUE
    usable[fragment] <- TRUE
 }
 for (i in 1:fragments) {
    if (maxint[i] - minint[i] + 1 < min_cluster_size)
        usable[i] = FALSE
 }
 getdata <- function(fragment, key) {
    if (! initialized[fragment]) {
    if (! usable[fragment]) {
        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", 
                    list(fragment=fragment)))
    }
@ -55,10 +73,22 @@ getdataframe <- function (fragment) {
 # Incomplete: cannot get pattern.search function to work per cluster. #########
 # Incomplete: Get the diagnostics for a cluster and turn them into a portable#
 # format. (such as through tinyverse) ########################################
 # format. (such as through tidyverse) ########################################
 isos <- make.isos(isotopes,use_isotopes=search_isos, 
                  use_charges=rep(1, length(search_isos)))
 diagnostics <- function(fragment) {
    points <- getdataframe(fragment)
    ptrn <- pattern.search(points, isos)
    return(ptrn)
 }
 use <- Filter(function(x) usable[x], 1:fragments)
 results <- mclapply(use, diagnostics, mc.cores=use_cores)
 # Incomplete: Make the analysis more resiliant to different sorting.
 # Incomplete: How to process results. (is it actually supposed to be all negatives?)
 ##############################################################################