Patternmatch/src/script.def.R

library("nontarget")
library("purrr")
library("funprog")
library("enviPat")
library("stringr")
library("parallel")

# Configurations #############################################################
# file : decides which file to read in data from #############################
file <- "/path/to/file"

search_isos <- c("13C", "37Cl")

# Minimum size of a cluster

min_cluster_size <- 3

# Number of cores to be used (will be adjusted if not possible)
used_cores <- 3

##############################################################################
# Read in the Table ##########################################################
table <- read.table(file, header=TRUE, sep=",")

# Organize the tables by number ##############################################
fragments <- max(table[,"Spectra_Number"])


# The algorithm below guarantees linear complexity of looking up data points.#

# set use_cores to safe amount
use_cores = min(use_cores, detectCores()-1)

# minint is the lower bound of the interval. #################################
# maxint is the upper bound of the interval that contains the fragment. ######
# usable memoizes as to whether or not each fragment number exists. #####
minint <- unlist(map(1:fragments, function(x) 0))
maxint <- unlist(map(1:fragments, function(x) 0))
usable <- unlist(map(1:fragments, function(x) FALSE))


# Set all of the intervals ###################################################
for (i in seq(1, nrow(table))) {
    fragment <- table[i, "Spectra_Number"]
    if (! usable[fragment]) {
        minint[fragment] <- i
    }
    maxint[fragment] <- max(maxint[fragment], i)
    usable[fragment] <- TRUE
}

for (i in 1:fragments) {
    if (maxint[i] - minint[i] + 1 < min_cluster_size)
        usable[i] = FALSE
}

getdata <- function(fragment, key) {
    if (! usable[fragment]) {
        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", 
                    list(fragment=fragment)))
    }
    return(table[minint[fragment]:maxint[fragment],key])
}

# Add all data frames as necessary for evaluation. ############################
getdataframe <- function (fragment) {
    mz <- getdata(fragment, "mz")
    time <- getdata(fragment, "time")
    Intensity <- getdata(fragment, "Intensity")
    return(data.frame(mz=mz, time=time, Intensity=Intensity))
}

# Incomplete: cannot get pattern.search function to work per cluster. #########
# Incomplete: Get the diagnostics for a cluster and turn them into a portable#
# format. (such as through tidyverse) ########################################

isos <- make.isos(isotopes,use_isotopes=search_isos, 
                  use_charges=rep(1, length(search_isos)))

diagnostics <- function(fragment) {
    points <- getdataframe(fragment)
    ptrn <- pattern.search(points, isos)
    return(ptrn)
}

use <- Filter(function(x) usable[x], 1:fragments)

results <- mclapply(use, diagnostics, mc.cores=use_cores)

# Incomplete: Make the analysis more resiliant to different sorting.
# Incomplete: How to process results. (is it actually supposed to be all negatives?)

##############################################################################