|
|
@ -4,18 +4,22 @@ library("stringr") |
|
|
|
library("parallel") |
|
|
|
|
|
|
|
# Configurations ############################################################# |
|
|
|
# file : decides which file to read in data from ############################# |
|
|
|
file <- "/path/to/csv" |
|
|
|
# file to read from |
|
|
|
input.filename <- "" |
|
|
|
# separator to use (\t for tabs, "," for CSV) |
|
|
|
input.sep <- "," |
|
|
|
|
|
|
|
# All isotopes to check for |
|
|
|
check_isos <- c("37Cl", "81Br", "34S") |
|
|
|
check_isos <- c("37Cl", "81Br") |
|
|
|
|
|
|
|
# Minimum size of a cluster |
|
|
|
min_cluster_size <- 2 |
|
|
|
|
|
|
|
# Number of cores to be used (will be adjusted if it exceeds the true number of cores) |
|
|
|
# Number of cores to be used in pattern.search evaluation |
|
|
|
# (will be adjusted if it exceeds the true number of cores) |
|
|
|
use_cores <- 6 |
|
|
|
|
|
|
|
|
|
|
|
# Table name configuration |
|
|
|
|
|
|
|
# Column name for m/z |
|
|
@ -28,56 +32,61 @@ columns.intensity <- "Intensity" |
|
|
|
columns.spectra <- "Spectra_Number" |
|
|
|
|
|
|
|
# output options |
|
|
|
|
|
|
|
# Number of highest intensities to display in output |
|
|
|
output.intensities <- 5 |
|
|
|
|
|
|
|
# Verbose output |
|
|
|
verbose.enable <- FALSE |
|
|
|
verbose.enable <- TRUE |
|
|
|
# output directory must already exist. |
|
|
|
verbose.outputdir <- "/path/to/directory" |
|
|
|
verbose.outputdir <- "" |
|
|
|
|
|
|
|
|
|
|
|
# Script ##################################################################### |
|
|
|
|
|
|
|
## Defining/setting variables ################################################ |
|
|
|
|
|
|
|
# C-13 is required in the rules of pattern.search, so it must be included. ### |
|
|
|
# C-13 is required in the rules of pattern.search. |
|
|
|
search_isos <- check_isos |
|
|
|
iso_length <- length(search_isos) |
|
|
|
if (!("13C" %in% search_isos)) { |
|
|
|
search_isos <- append(search_isos, "13C") |
|
|
|
} |
|
|
|
# Read in the Table and sort by spectra ID |
|
|
|
table <- read.table(file, header=TRUE, sep=",") |
|
|
|
table <- table[order(table[,columns.spectra]),] |
|
|
|
table <- read.table(input.filename, header = TRUE, sep = input.sep) |
|
|
|
table <- table[order(table[, columns.spectra]), ] |
|
|
|
|
|
|
|
# Organize the tables by number |
|
|
|
fragments <- max(table[,columns.spectra]) |
|
|
|
fragments <- max(table[, columns.spectra]) |
|
|
|
|
|
|
|
# set use_cores to safe amount |
|
|
|
use_cores = min(use_cores, detectCores()-1) |
|
|
|
use_cores = min(use_cores, detectCores() - 1) |
|
|
|
|
|
|
|
# lower bound of indices per fragment |
|
|
|
# lower bound of indices per fragment |
|
|
|
# (sorting guarantees that data points of same fragment are together) |
|
|
|
minint <- rep(0,fragments) |
|
|
|
minint <- rep(0, fragments) |
|
|
|
# upper bound of indices per fragment |
|
|
|
maxint <- rep(0,fragments) |
|
|
|
# memoizes as to |
|
|
|
usable <- rep(FALSE,fragments) |
|
|
|
maxint <- rep(0, fragments) |
|
|
|
# memoizes as to |
|
|
|
usable <- rep(FALSE, fragments) |
|
|
|
|
|
|
|
# initialize isotopes. |
|
|
|
data(isotopes) |
|
|
|
isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
|
use_charges=rep(1, length(search_isos))) |
|
|
|
|
|
|
|
isos <- make.isos(isotopes, |
|
|
|
use_isotopes = search_isos, |
|
|
|
use_charges = rep(1, length(search_isos))) |
|
|
|
|
|
|
|
|
|
|
|
## Setting functions ######################################################### |
|
|
|
|
|
|
|
getdata <- function(fragment, key) { |
|
|
|
if (! usable[fragment]) { |
|
|
|
stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", |
|
|
|
list(fragment=fragment))) |
|
|
|
if (!usable[fragment]) { |
|
|
|
stop(str_interp( |
|
|
|
"Fragment $[d]{fragment} does not exist in the data set", |
|
|
|
list(fragment = fragment) |
|
|
|
)) |
|
|
|
} |
|
|
|
return(table[minint[fragment]:maxint[fragment],key]) |
|
|
|
return(table[minint[fragment]:maxint[fragment], key]) |
|
|
|
} |
|
|
|
|
|
|
|
# Add all data frames as necessary for evaluation. |
|
|
@ -87,7 +96,11 @@ getdataframe <- function (fragment) { |
|
|
|
mz <- getdata(fragment, columns.mz) |
|
|
|
time <- getdata(fragment, columns.time) |
|
|
|
Intensity <- getdata(fragment, columns.intensity) |
|
|
|
return(data.frame(mz=mz, Intensity=Intensity,time=time)) |
|
|
|
return(data.frame( |
|
|
|
mz = mz, |
|
|
|
Intensity = Intensity, |
|
|
|
time = time |
|
|
|
)) |
|
|
|
} |
|
|
|
|
|
|
|
# returns both pattern.search result and fragment number. |
|
|
@ -97,17 +110,18 @@ diagnostics <- function(fragment) { |
|
|
|
ptrn <- pattern.search( |
|
|
|
points, |
|
|
|
isos, |
|
|
|
cutint=1000, |
|
|
|
rttol=c(-20,20), |
|
|
|
cutint = 1000, |
|
|
|
rttol = c(-20, 20), |
|
|
|
# kept because of gc limitations. |
|
|
|
mztol=3, |
|
|
|
mzfrac=0.1, |
|
|
|
ppm=TRUE, |
|
|
|
inttol=0.05, |
|
|
|
rules=rep(TRUE,11), |
|
|
|
deter=FALSE, |
|
|
|
entry=50 |
|
|
|
); |
|
|
|
mztol = 3, |
|
|
|
mzfrac = 0.1, |
|
|
|
ppm = TRUE, |
|
|
|
inttol = 0.05, |
|
|
|
rules = rep(TRUE, 11), |
|
|
|
deter = FALSE, |
|
|
|
entry = 50 |
|
|
|
) |
|
|
|
|
|
|
|
return(c(ptrn, fragment)) |
|
|
|
} |
|
|
|
|
|
|
@ -115,7 +129,7 @@ diagnostics <- function(fragment) { |
|
|
|
|
|
|
|
positive <- function(result) { |
|
|
|
res <- result$`Counts of isotopes` |
|
|
|
v <- res[res$"isotope" %in% check_isos,"peak counts"] |
|
|
|
v <- res[res$"isotope" %in% check_isos, "peak counts"] |
|
|
|
return (!all(v %in% c(0))) |
|
|
|
} |
|
|
|
|
|
|
@ -125,35 +139,38 @@ derive.fragment <- function(result) { |
|
|
|
|
|
|
|
derive.iso <- function(result) { |
|
|
|
res <- result$`Counts of isotopes` |
|
|
|
return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")]) |
|
|
|
return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")]) |
|
|
|
} |
|
|
|
|
|
|
|
derive.intensity <- function(result) { |
|
|
|
ptrn <- result$Patterns |
|
|
|
N <- max(output.intensities, nrow(result)) |
|
|
|
inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N] |
|
|
|
inds <- order(ptrn[, columns.intensity], decreasing = TRUE)[1:N] |
|
|
|
return(ptrn[inds, c(2, 1, 3)]) |
|
|
|
} |
|
|
|
|
|
|
|
derive.average <- function(result) { |
|
|
|
ptrn <- result$Patterns[,2] |
|
|
|
ptrn <- result$Patterns[, 2] |
|
|
|
return(mean(ptrn)) |
|
|
|
} |
|
|
|
|
|
|
|
derive.frame <- function(result) { |
|
|
|
res <- list( |
|
|
|
derive.fragment(result), |
|
|
|
derive.iso(result), |
|
|
|
derive.intensity(result), |
|
|
|
derive.average(result) |
|
|
|
) |
|
|
|
names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time") |
|
|
|
derive.fragment(result), |
|
|
|
derive.iso(result), |
|
|
|
derive.intensity(result), |
|
|
|
derive.average(result) |
|
|
|
) |
|
|
|
names(res) <- |
|
|
|
c("Fragment_ID", "Peaks", "Intensities", "Average_time") |
|
|
|
return(res) |
|
|
|
} |
|
|
|
|
|
|
|
verbose.dump <- function(result) { |
|
|
|
path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt", |
|
|
|
sep="")) |
|
|
|
path <- |
|
|
|
file.path(verbose.outputdir, |
|
|
|
paste(derive.fragment(result), ".txt", |
|
|
|
sep = "")) |
|
|
|
sink(path) |
|
|
|
dput(result) |
|
|
|
sink() |
|
|
@ -165,7 +182,7 @@ verbose.dump <- function(result) { |
|
|
|
|
|
|
|
for (i in seq(1, nrow(table))) { |
|
|
|
fragment <- table[i, columns.spectra] |
|
|
|
if (! usable[fragment]) { |
|
|
|
if (!usable[fragment]) { |
|
|
|
minint[fragment] <- i |
|
|
|
} |
|
|
|
maxint[fragment] <- max(maxint[fragment], i) |
|
|
@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) { |
|
|
|
# make fragment unusable if it does not contain enough data points. |
|
|
|
for (i in 1:fragments) { |
|
|
|
if (maxint[i] - minint[i] + 1 < min_cluster_size) |
|
|
|
usable[i] = FALSE |
|
|
|
usable[i] <- FALSE |
|
|
|
} |
|
|
|
|
|
|
|
# Filter all usable fragments |
|
|
|
use <- Filter(function(x) usable[x], 1:fragments) |
|
|
|
use <- Filter(function(x) |
|
|
|
usable[x], 1:fragments) |
|
|
|
|
|
|
|
# Apply diagnostics function to all usable fragments (in parallel). |
|
|
|
results <- mclapply(use, diagnostics, mc.cores=use_cores) |
|
|
|
results <- mclapply(use, diagnostics, mc.cores = use_cores) |
|
|
|
|
|
|
|
results.positive <- Filter(function(x) positive(x), results) |
|
|
|
results.positive <- Filter(function(x) |
|
|
|
positive(x), results) |
|
|
|
|
|
|
|
# print to files if verbose output requested |
|
|
|
|
|
|
|
if (verbose.enable) { |
|
|
|
if (!dir.exists(verbose.outputdir)) { |
|
|
|
stop(str_interp("Directory $[s]{dir} does not exist", |
|
|
|
list(dir=verbose.outputdir))) |
|
|
|
stop(str_interp( |
|
|
|
"Directory $[s]{dir} does not exist", |
|
|
|
list(dir = verbose.outputdir) |
|
|
|
)) |
|
|
|
} |
|
|
|
res <- readline(prompt=paste("Are you sure you want to overwrite files in ", |
|
|
|
verbose.outputdir, "? [y/N] ")) |
|
|
|
if (res != "y") { |
|
|
|
stop("Process aborted.") |
|
|
|
if (interactive()) { |
|
|
|
res <- |
|
|
|
readline( |
|
|
|
prompt = paste( |
|
|
|
"Are you sure you want to overwrite files in ", |
|
|
|
verbose.outputdir, |
|
|
|
"? [y/N] " |
|
|
|
) |
|
|
|
) |
|
|
|
if (res != "y") |
|
|
|
{ |
|
|
|
stop("Process aborted.") |
|
|
|
} |
|
|
|
} |
|
|
|
print(paste("printing output to ", verbose.outputdir)) |
|
|
|
mapply(verbose.dump, results.positive) |
|
|
|
lapply(results.positive, verbose.dump) |
|
|
|
} |
|
|
|
|
|
|
|
result.frame <- mapply(derive.frame, results.positive) |
|
|
|
result.fragment <- mapply(derive.fragment, results.positive) |
|
|
|
result.iso <- mapply(derive.iso, results.positive) |
|
|
|
result.intensity <- mapply(derive.intensity, results.positive) |
|
|
|
result.average <- mapply(derive.average, results.positive) |
|
|
|
|
|
|
|
print(result.fragment) |
|
|
|
result.frame <- lapply(results.positive, derive.frame) |
|
|
|
result.fragment <- lapply(results.positive, derive.fragment) |
|
|
|
result.iso <- lapply(results.positive, derive.iso) |
|
|
|
result.intensity <- lapply(results.positive, derive.intensity) |
|
|
|
result.average <- lapply(results.positive, derive.average) |
|
|
|
|
|
|
|
############################################################################## |
|
|
|
print("All Fragments that contain desired isotopes:") |
|
|
|
print(result.frame) |