Browse Source

added parallelization and changed nontarget versions

master
junikimm717 4 years ago
parent
commit
bbee9467c3
  1. 20
      specification.md
  2. 48
      src/script.def.R

20
specification.md

@ -19,3 +19,23 @@ isotopes.
- Readable by R. - Readable by R.
- Manipulatable by Tidyverse - Manipulatable by Tidyverse
- Include it in script? - Include it in script?
# Additional Notes
- Independent of the ordering (it might get corrupted)
- Just search for chlorinated/brominated compounds.
- usually, use_charges will be 1
- Headers to use in script.
- Packages to do things in parallel (multiple threads)
- be able to configure the number of cores or threads used.
- make it a requirement to be tab-deliminated.
- eliminate clusters with only m/z.
# Notes
- cutint represents high enough intensity.
- mztol is in terms of ppm.
- mzfrac is in terms of absolute.
- PPM = true always

48
src/script.def.R

@ -1,44 +1,62 @@
library("nontarget") library("nontarget")
library("purrr") library("purrr")
library("funprog")
library("enviPat") library("enviPat")
library("stringr") library("stringr")
library("parallel")
# Configurations ############################################################# # Configurations #############################################################
# file : decides which file to read in data from ############################# # file : decides which file to read in data from #############################
file <- "/path/to/file" file <- "/path/to/file"
# separator: decides separator of file #######################################
separator <- "separator"
search_isos <- c("13C", "37Cl")
# Minimum size of a cluster
min_cluster_size <- 3
# Number of cores to be used (will be adjusted if not possible)
used_cores <- 3
############################################################################## ##############################################################################
# Read in the Table ########################################################## # Read in the Table ##########################################################
table <- read.table(file, header=TRUE, sep=separator)
table <- read.table(file, header=TRUE, sep=",")
# Organize the tables by number ############################################## # Organize the tables by number ##############################################
fragments <- max(table[,"Spectra_Number"]) fragments <- max(table[,"Spectra_Number"])
# The algorithm below guarantees linear complexity of looking up data points.# # The algorithm below guarantees linear complexity of looking up data points.#
# set use_cores to safe amount
use_cores = min(use_cores, detectCores()-1)
# minint is the lower bound of the interval. ################################# # minint is the lower bound of the interval. #################################
# maxint is the upper bound of the interval that contains the fragment. ###### # maxint is the upper bound of the interval that contains the fragment. ######
# initialized memoizes as to whether or not each fragment number exists. #####
# usable memoizes as to whether or not each fragment number exists. #####
minint <- unlist(map(1:fragments, function(x) 0)) minint <- unlist(map(1:fragments, function(x) 0))
maxint <- unlist(map(1:fragments, function(x) 0)) maxint <- unlist(map(1:fragments, function(x) 0))
initialized <- unlist(map(1:fragments, function(x) FALSE))
usable <- unlist(map(1:fragments, function(x) FALSE))
# Set all of the intervals ################################################### # Set all of the intervals ###################################################
for (i in seq(1, nrow(table))) { for (i in seq(1, nrow(table))) {
fragment <- table[i, "Spectra_Number"] fragment <- table[i, "Spectra_Number"]
if (! initialized[fragment]) {
if (! usable[fragment]) {
minint[fragment] <- i minint[fragment] <- i
} }
maxint[fragment] <- max(maxint[fragment], i) maxint[fragment] <- max(maxint[fragment], i)
initialized[fragment] <- TRUE
usable[fragment] <- TRUE
}
for (i in 1:fragments) {
if (maxint[i] - minint[i] + 1 < min_cluster_size)
usable[i] = FALSE
} }
getdata <- function(fragment, key) { getdata <- function(fragment, key) {
if (! initialized[fragment]) {
if (! usable[fragment]) {
stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", stop(str_interp("Fragment $[d]{fragment} does not exist in the data set",
list(fragment=fragment))) list(fragment=fragment)))
} }
@ -55,10 +73,22 @@ getdataframe <- function (fragment) {
# Incomplete: cannot get pattern.search function to work per cluster. ######### # Incomplete: cannot get pattern.search function to work per cluster. #########
# Incomplete: Get the diagnostics for a cluster and turn them into a portable# # Incomplete: Get the diagnostics for a cluster and turn them into a portable#
# format. (such as through tinyverse) ########################################
# format. (such as through tidyverse) ########################################
isos <- make.isos(isotopes,use_isotopes=search_isos,
use_charges=rep(1, length(search_isos)))
diagnostics <- function(fragment) { diagnostics <- function(fragment) {
points <- getdataframe(fragment) points <- getdataframe(fragment)
ptrn <- pattern.search(points, isos)
return(ptrn)
} }
use <- Filter(function(x) usable[x], 1:fragments)
results <- mclapply(use, diagnostics, mc.cores=use_cores)
# Incomplete: Make the analysis more resiliant to different sorting.
# Incomplete: How to process results. (is it actually supposed to be all negatives?)
############################################################################## ##############################################################################
Loading…
Cancel
Save