|
|
@ -1,15 +1,14 @@ |
|
|
|
library("nontarget") |
|
|
|
library("purrr") |
|
|
|
library("enviPat") |
|
|
|
library("stringr") |
|
|
|
library("parallel") |
|
|
|
|
|
|
|
# Configurations ############################################################# |
|
|
|
# file : decides which file to read in data from ############################# |
|
|
|
file <- "/path/to/script" |
|
|
|
file <- "/path/to/csv" |
|
|
|
|
|
|
|
# All isotopes to search for |
|
|
|
search_isos <- c("37Cl", "81Br") |
|
|
|
# All isotopes to check for |
|
|
|
check_isos <- c("37Cl", "81Br", "34S") |
|
|
|
|
|
|
|
# Minimum size of a cluster |
|
|
|
min_cluster_size <- 2 |
|
|
@ -28,50 +27,49 @@ columns.intensity <- "Intensity" |
|
|
|
# Column name for fragment numbers (only numbers accepted) |
|
|
|
columns.spectra <- "Spectra_Number" |
|
|
|
|
|
|
|
############################################################################## |
|
|
|
# Script |
|
|
|
# output options |
|
|
|
output.intensities <- 5 |
|
|
|
|
|
|
|
# Verbose output |
|
|
|
verbose.enable <- TRUE |
|
|
|
verbose.outputdir <- "/output/dir" |
|
|
|
|
|
|
|
|
|
|
|
# Script ##################################################################### |
|
|
|
|
|
|
|
## Defining/setting variables ################################################ |
|
|
|
|
|
|
|
# C-13 is required in the rules of pattern.search, so it must be included. ### |
|
|
|
search_isos <- check_isos |
|
|
|
iso_length <- length(search_isos) |
|
|
|
if (!("13C" %in% search_isos)) { |
|
|
|
search_isos <- append(search_isos, "13C") |
|
|
|
} |
|
|
|
# Read in the Table ########################################################## |
|
|
|
# Read in the Table and sort by spectra ID |
|
|
|
table <- read.table(file, header=TRUE, sep=",") |
|
|
|
|
|
|
|
# Sort table by spectra ID |
|
|
|
table <- table[order(table[,columns.spectra]),] |
|
|
|
|
|
|
|
# Organize the tables by number ############################################## |
|
|
|
# Organize the tables by number |
|
|
|
fragments <- max(table[,columns.spectra]) |
|
|
|
|
|
|
|
|
|
|
|
# The algorithm below guarantees linear complexity of looking up data points.# |
|
|
|
|
|
|
|
# set use_cores to safe amount |
|
|
|
use_cores = min(use_cores, detectCores()-1) |
|
|
|
|
|
|
|
# minint is the lower bound of the interval. ################################# |
|
|
|
# maxint is the upper bound of the interval that contains the fragment. ###### |
|
|
|
# usable memoizes as to whether or not each fragment number exists. ##### |
|
|
|
minint <- unlist(map(1:fragments, function(x) 0)) |
|
|
|
maxint <- unlist(map(1:fragments, function(x) 0)) |
|
|
|
usable <- unlist(map(1:fragments, function(x) FALSE)) |
|
|
|
# lower bound of indices per fragment |
|
|
|
# (sorting guarantees that data points of same fragment are together) |
|
|
|
minint <- rep(0,fragments) |
|
|
|
# upper bound of indices per fragment |
|
|
|
maxint <- rep(0,fragments) |
|
|
|
# memoizes as to |
|
|
|
usable <- rep(FALSE,fragments) |
|
|
|
|
|
|
|
# initialize isotopes. |
|
|
|
data(isotopes) |
|
|
|
isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
|
use_charges=rep(1, length(search_isos))) |
|
|
|
|
|
|
|
# Set all of the intervals ################################################### |
|
|
|
for (i in seq(1, nrow(table))) { |
|
|
|
fragment <- table[i, columns.spectra] |
|
|
|
if (! usable[fragment]) { |
|
|
|
minint[fragment] <- i |
|
|
|
} |
|
|
|
maxint[fragment] <- max(maxint[fragment], i) |
|
|
|
usable[fragment] <- TRUE |
|
|
|
} |
|
|
|
|
|
|
|
for (i in 1:fragments) { |
|
|
|
if (maxint[i] - minint[i] + 1 < min_cluster_size) |
|
|
|
usable[i] = FALSE |
|
|
|
} |
|
|
|
## Setting functions ######################################################### |
|
|
|
|
|
|
|
getdata <- function(fragment, key) { |
|
|
|
if (! usable[fragment]) { |
|
|
@ -81,21 +79,18 @@ getdata <- function(fragment, key) { |
|
|
|
return(table[minint[fragment]:maxint[fragment],key]) |
|
|
|
} |
|
|
|
|
|
|
|
# Add all data frames as necessary for evaluation. ############################ |
|
|
|
# Add all data frames as necessary for evaluation. |
|
|
|
# Note: for pattern.search to work, the data must be ordered in |
|
|
|
# mz, intensity, time. |
|
|
|
getdataframe <- function (fragment) { |
|
|
|
mz <- getdata(fragment, columns.mz) |
|
|
|
time <- getdata(fragment, columns.time) |
|
|
|
Intensity <- getdata(fragment, columns.intensity) |
|
|
|
# Must be indexed in this order. |
|
|
|
return(data.frame(mz=mz, Intensity=Intensity,time=time)) |
|
|
|
} |
|
|
|
|
|
|
|
# initialize isotopes. |
|
|
|
data(isotopes) |
|
|
|
isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
|
use_charges=rep(1, length(search_isos))) |
|
|
|
|
|
|
|
# returns both pattern.search result and fragment number. |
|
|
|
|
|
|
|
diagnostics <- function(fragment) { |
|
|
|
points <- getdataframe(fragment) |
|
|
|
ptrn <- pattern.search( |
|
|
@ -108,35 +103,88 @@ diagnostics <- function(fragment) { |
|
|
|
mzfrac=0.1, |
|
|
|
ppm=TRUE, |
|
|
|
inttol=0.05, |
|
|
|
# Do not modify anything below. |
|
|
|
rules=rep(TRUE,11), |
|
|
|
#rules=c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), |
|
|
|
deter=FALSE, |
|
|
|
entry=50 |
|
|
|
); |
|
|
|
return(c(ptrn, fragment)) |
|
|
|
} |
|
|
|
|
|
|
|
use <- Filter(function(x) usable[x], 1:fragments) |
|
|
|
# checks if the result is positive. |
|
|
|
|
|
|
|
results <- mclapply(use, diagnostics, mc.cores=use_cores) |
|
|
|
positive <- function(result) { |
|
|
|
res <- result$`Counts of isotopes` |
|
|
|
v <- res[res$"isotope" %in% check_isos,"peak counts"] |
|
|
|
return (!all(v %in% c(0))) |
|
|
|
} |
|
|
|
|
|
|
|
handle_res <- function(result) { |
|
|
|
allzero <- function(v) { |
|
|
|
for (x in v) { |
|
|
|
if (x != 0) |
|
|
|
return (FALSE) |
|
|
|
} |
|
|
|
return (TRUE) |
|
|
|
} |
|
|
|
v <- result$`Counts of isotopes`[seq(1,iso_length),"peak counts"] |
|
|
|
if (!allzero(v)) { |
|
|
|
print(result[[13]]) |
|
|
|
derive.fragment <- function(result) { |
|
|
|
return(result[[13]]) |
|
|
|
} |
|
|
|
|
|
|
|
derive.iso <- function(result) { |
|
|
|
res <- result$`Counts of isotopes` |
|
|
|
return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")]) |
|
|
|
} |
|
|
|
|
|
|
|
derive.intensity <- function(result) { |
|
|
|
ptrn <- result$Patterns |
|
|
|
N <- max(output.intensities, nrow(result)) |
|
|
|
inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N] |
|
|
|
return(ptrn[inds, c(2, 1, 3)]) |
|
|
|
} |
|
|
|
|
|
|
|
derive.average <- function(result) { |
|
|
|
ptrn <- result$Patterns[,2] |
|
|
|
return(mean(ptrn)) |
|
|
|
} |
|
|
|
|
|
|
|
derive.frame <- function(result) { |
|
|
|
res <- list( |
|
|
|
derive.fragment(result), |
|
|
|
derive.iso(result), |
|
|
|
derive.intensity(result), |
|
|
|
derive.average(result) |
|
|
|
) |
|
|
|
names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time") |
|
|
|
return(res) |
|
|
|
} |
|
|
|
|
|
|
|
# Execution ################################################################## |
|
|
|
|
|
|
|
## Set all of the intervals ################################################## |
|
|
|
|
|
|
|
for (i in seq(1, nrow(table))) { |
|
|
|
fragment <- table[i, columns.spectra] |
|
|
|
if (! usable[fragment]) { |
|
|
|
minint[fragment] <- i |
|
|
|
} |
|
|
|
maxint[fragment] <- max(maxint[fragment], i) |
|
|
|
usable[fragment] <- TRUE |
|
|
|
} |
|
|
|
|
|
|
|
# make fragment unusable if it does not contain enough data points. |
|
|
|
for (i in 1:fragments) { |
|
|
|
if (maxint[i] - minint[i] + 1 < min_cluster_size) |
|
|
|
usable[i] = FALSE |
|
|
|
} |
|
|
|
|
|
|
|
for (result in results) { |
|
|
|
handle_res(result) |
|
|
|
# Filter all usable fragments |
|
|
|
use <- Filter(function(x) usable[x], 1:fragments) |
|
|
|
|
|
|
|
# Apply diagnostics function to all usable fragments (in parallel). |
|
|
|
results <- mclapply(use, diagnostics, mc.cores=use_cores) |
|
|
|
|
|
|
|
results.positive <- Filter(function(x) positive(x), results) |
|
|
|
|
|
|
|
result.frame <- mapply(derive.frame, results.positive) |
|
|
|
result.fragment <- mapply(derive.fragment, results.positive) |
|
|
|
result.iso <- mapply(derive.iso, results.positive) |
|
|
|
result.intensity <- mapply(derive.intensity, results.positive) |
|
|
|
result.average <- mapply(derive.average, results.positive) |
|
|
|
|
|
|
|
for (x in result.frame) { |
|
|
|
print(x) |
|
|
|
} |
|
|
|
|
|
|
|
############################################################################## |