Patternmatch/src/script.def.R


								library("nontarget")

								library("purrr")

								library("funprog")

								library("enviPat")

								library("stringr")

								library("parallel")


								# Configurations #############################################################

								# file : decides which file to read in data from #############################

								file <- "/path/to/file"


								search_isos <- c("13C", "37Cl")


								# Minimum size of a cluster


								min_cluster_size <- 3


								# Number of cores to be used (will be adjusted if not possible)

								used_cores <- 3


								##############################################################################

								# Read in the Table ##########################################################

								table <- read.table(file, header=TRUE, sep=",")


								# Organize the tables by number ##############################################

								fragments <- max(table[,"Spectra_Number"])


								# The algorithm below guarantees linear complexity of looking up data points.#


								# set use_cores to safe amount

								use_cores = min(use_cores, detectCores()-1)


								# minint is the lower bound of the interval. #################################

								# maxint is the upper bound of the interval that contains the fragment. ######

								# usable memoizes as to whether or not each fragment number exists. #####

								minint <- unlist(map(1:fragments, function(x) 0))

								maxint <- unlist(map(1:fragments, function(x) 0))

								usable <- unlist(map(1:fragments, function(x) FALSE))


								# Set all of the intervals ###################################################

								for (i in seq(1, nrow(table))) {

								    fragment <- table[i, "Spectra_Number"]

								    if (! usable[fragment]) {

								        minint[fragment] <- i

								    }

								    maxint[fragment] <- max(maxint[fragment], i)

								    usable[fragment] <- TRUE

								}


								for (i in 1:fragments) {

								    if (maxint[i] - minint[i] + 1 < min_cluster_size)

								        usable[i] = FALSE

								}


								getdata <- function(fragment, key) {

								    if (! usable[fragment]) {

								        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set",

								                    list(fragment=fragment)))

								    }

								    return(table[minint[fragment]:maxint[fragment],key])

								}


								# Add all data frames as necessary for evaluation. ############################

								getdataframe <- function (fragment) {

								    mz <- getdata(fragment, "mz")

								    time <- getdata(fragment, "time")

								    Intensity <- getdata(fragment, "Intensity")

								    return(data.frame(mz=mz, time=time, Intensity=Intensity))

								}


								# Incomplete: cannot get pattern.search function to work per cluster. #########

								# Incomplete: Get the diagnostics for a cluster and turn them into a portable#

								# format. (such as through tidyverse) ########################################


								isos <- make.isos(isotopes,use_isotopes=search_isos,

								                  use_charges=rep(1, length(search_isos)))


								diagnostics <- function(fragment) {

								    points <- getdataframe(fragment)

								    ptrn <- pattern.search(points, isos)

								    return(ptrn)

								}


								use <- Filter(function(x) usable[x], 1:fragments)


								results <- mclapply(use, diagnostics, mc.cores=use_cores)


								# Incomplete: Make the analysis more resiliant to different sorting.

								# Incomplete: How to process results. (is it actually supposed to be all negatives?)


								##############################################################################