library("nontarget") library("purrr") library("funprog") library("enviPat") library("stringr") library("parallel") # Configurations ############################################################# # file : decides which file to read in data from ############################# file <- "/path/to/file" search_isos <- c("13C", "37Cl") # Minimum size of a cluster min_cluster_size <- 3 # Number of cores to be used (will be adjusted if not possible) used_cores <- 3 ############################################################################## # Read in the Table ########################################################## table <- read.table(file, header=TRUE, sep=",") # Organize the tables by number ############################################## fragments <- max(table[,"Spectra_Number"]) # The algorithm below guarantees linear complexity of looking up data points.# # set use_cores to safe amount use_cores = min(use_cores, detectCores()-1) # minint is the lower bound of the interval. ################################# # maxint is the upper bound of the interval that contains the fragment. ###### # usable memoizes as to whether or not each fragment number exists. ##### minint <- unlist(map(1:fragments, function(x) 0)) maxint <- unlist(map(1:fragments, function(x) 0)) usable <- unlist(map(1:fragments, function(x) FALSE)) # Set all of the intervals ################################################### for (i in seq(1, nrow(table))) { fragment <- table[i, "Spectra_Number"] if (! usable[fragment]) { minint[fragment] <- i } maxint[fragment] <- max(maxint[fragment], i) usable[fragment] <- TRUE } for (i in 1:fragments) { if (maxint[i] - minint[i] + 1 < min_cluster_size) usable[i] = FALSE } getdata <- function(fragment, key) { if (! usable[fragment]) { stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", list(fragment=fragment))) } return(table[minint[fragment]:maxint[fragment],key]) } # Add all data frames as necessary for evaluation. ############################ getdataframe <- function (fragment) { mz <- getdata(fragment, "mz") time <- getdata(fragment, "time") Intensity <- getdata(fragment, "Intensity") return(data.frame(mz=mz, time=time, Intensity=Intensity)) } # Incomplete: cannot get pattern.search function to work per cluster. ######### # Incomplete: Get the diagnostics for a cluster and turn them into a portable# # format. (such as through tidyverse) ######################################## isos <- make.isos(isotopes,use_isotopes=search_isos, use_charges=rep(1, length(search_isos))) diagnostics <- function(fragment) { points <- getdataframe(fragment) ptrn <- pattern.search(points, isos) return(ptrn) } use <- Filter(function(x) usable[x], 1:fragments) results <- mclapply(use, diagnostics, mc.cores=use_cores) # Incomplete: Make the analysis more resiliant to different sorting. # Incomplete: How to process results. (is it actually supposed to be all negatives?) ##############################################################################