|
|
library("nontarget") library("purrr") library("funprog") library("enviPat") library("stringr") library("parallel")
# Configurations ############################################################# # file : decides which file to read in data from ############################# file <- "/path/to/file"
search_isos <- c("13C", "37Cl")
# Minimum size of a cluster
min_cluster_size <- 3
# Number of cores to be used (will be adjusted if not possible) used_cores <- 3
############################################################################## # Read in the Table ########################################################## table <- read.table(file, header=TRUE, sep=",")
# Organize the tables by number ############################################## fragments <- max(table[,"Spectra_Number"])
# The algorithm below guarantees linear complexity of looking up data points.#
# set use_cores to safe amount use_cores = min(use_cores, detectCores()-1)
# minint is the lower bound of the interval. ################################# # maxint is the upper bound of the interval that contains the fragment. ###### # usable memoizes as to whether or not each fragment number exists. ##### minint <- unlist(map(1:fragments, function(x) 0)) maxint <- unlist(map(1:fragments, function(x) 0)) usable <- unlist(map(1:fragments, function(x) FALSE))
# Set all of the intervals ################################################### for (i in seq(1, nrow(table))) { fragment <- table[i, "Spectra_Number"] if (! usable[fragment]) { minint[fragment] <- i } maxint[fragment] <- max(maxint[fragment], i) usable[fragment] <- TRUE }
for (i in 1:fragments) { if (maxint[i] - minint[i] + 1 < min_cluster_size) usable[i] = FALSE }
getdata <- function(fragment, key) { if (! usable[fragment]) { stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", list(fragment=fragment))) } return(table[minint[fragment]:maxint[fragment],key]) }
# Add all data frames as necessary for evaluation. ############################ getdataframe <- function (fragment) { mz <- getdata(fragment, "mz") time <- getdata(fragment, "time") Intensity <- getdata(fragment, "Intensity") return(data.frame(mz=mz, time=time, Intensity=Intensity)) }
# Incomplete: cannot get pattern.search function to work per cluster. ######### # Incomplete: Get the diagnostics for a cluster and turn them into a portable# # format. (such as through tidyverse) ########################################
isos <- make.isos(isotopes,use_isotopes=search_isos, use_charges=rep(1, length(search_isos)))
diagnostics <- function(fragment) { points <- getdataframe(fragment) ptrn <- pattern.search(points, isos) return(ptrn) }
use <- Filter(function(x) usable[x], 1:fragments)
results <- mclapply(use, diagnostics, mc.cores=use_cores)
# Incomplete: Make the analysis more resiliant to different sorting. # Incomplete: How to process results. (is it actually supposed to be all negatives?)
##############################################################################
|