|
|
@ -4,18 +4,22 @@ library("stringr") |
|
|
|
library("parallel") |
|
|
|
|
|
|
|
# Configurations ############################################################# |
|
|
|
# file : decides which file to read in data from ############################# |
|
|
|
file <- "/path/to/csv" |
|
|
|
# file to read from |
|
|
|
input.filename <- "" |
|
|
|
# separator to use (\t for tabs, "," for CSV) |
|
|
|
input.sep <- "," |
|
|
|
|
|
|
|
# All isotopes to check for |
|
|
|
check_isos <- c("37Cl", "81Br", "34S") |
|
|
|
check_isos <- c("37Cl", "81Br") |
|
|
|
|
|
|
|
# Minimum size of a cluster |
|
|
|
min_cluster_size <- 2 |
|
|
|
|
|
|
|
# Number of cores to be used (will be adjusted if it exceeds the true number of cores) |
|
|
|
# Number of cores to be used in pattern.search evaluation |
|
|
|
# (will be adjusted if it exceeds the true number of cores) |
|
|
|
use_cores <- 6 |
|
|
|
|
|
|
|
|
|
|
|
# Table name configuration |
|
|
|
|
|
|
|
# Column name for m/z |
|
|
@ -28,26 +32,28 @@ columns.intensity <- "Intensity" |
|
|
|
columns.spectra <- "Spectra_Number" |
|
|
|
|
|
|
|
# output options |
|
|
|
|
|
|
|
# Number of highest intensities to display in output |
|
|
|
output.intensities <- 5 |
|
|
|
|
|
|
|
# Verbose output |
|
|
|
verbose.enable <- FALSE |
|
|
|
verbose.enable <- TRUE |
|
|
|
# output directory must already exist. |
|
|
|
verbose.outputdir <- "/path/to/directory" |
|
|
|
verbose.outputdir <- "" |
|
|
|
|
|
|
|
|
|
|
|
# Script ##################################################################### |
|
|
|
|
|
|
|
## Defining/setting variables ################################################ |
|
|
|
|
|
|
|
# C-13 is required in the rules of pattern.search, so it must be included. ### |
|
|
|
# C-13 is required in the rules of pattern.search. |
|
|
|
search_isos <- check_isos |
|
|
|
iso_length <- length(search_isos) |
|
|
|
if (!("13C" %in% search_isos)) { |
|
|
|
search_isos <- append(search_isos, "13C") |
|
|
|
} |
|
|
|
# Read in the Table and sort by spectra ID |
|
|
|
table <- read.table(file, header=TRUE, sep=",") |
|
|
|
table <- read.table(input.filename, header = TRUE, sep = input.sep) |
|
|
|
table <- table[order(table[, columns.spectra]), ] |
|
|
|
|
|
|
|
# Organize the tables by number |
|
|
@ -66,7 +72,8 @@ usable <- rep(FALSE,fragments) |
|
|
|
|
|
|
|
# initialize isotopes. |
|
|
|
data(isotopes) |
|
|
|
isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
|
isos <- make.isos(isotopes, |
|
|
|
use_isotopes = search_isos, |
|
|
|
use_charges = rep(1, length(search_isos))) |
|
|
|
|
|
|
|
|
|
|
@ -74,8 +81,10 @@ isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
|
|
|
|
|
getdata <- function(fragment, key) { |
|
|
|
if (!usable[fragment]) { |
|
|
|
stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", |
|
|
|
list(fragment=fragment))) |
|
|
|
stop(str_interp( |
|
|
|
"Fragment $[d]{fragment} does not exist in the data set", |
|
|
|
list(fragment = fragment) |
|
|
|
)) |
|
|
|
} |
|
|
|
return(table[minint[fragment]:maxint[fragment], key]) |
|
|
|
} |
|
|
@ -87,7 +96,11 @@ getdataframe <- function (fragment) { |
|
|
|
mz <- getdata(fragment, columns.mz) |
|
|
|
time <- getdata(fragment, columns.time) |
|
|
|
Intensity <- getdata(fragment, columns.intensity) |
|
|
|
return(data.frame(mz=mz, Intensity=Intensity,time=time)) |
|
|
|
return(data.frame( |
|
|
|
mz = mz, |
|
|
|
Intensity = Intensity, |
|
|
|
time = time |
|
|
|
)) |
|
|
|
} |
|
|
|
|
|
|
|
# returns both pattern.search result and fragment number. |
|
|
@ -107,7 +120,8 @@ diagnostics <- function(fragment) { |
|
|
|
rules = rep(TRUE, 11), |
|
|
|
deter = FALSE, |
|
|
|
entry = 50 |
|
|
|
); |
|
|
|
) |
|
|
|
|
|
|
|
return(c(ptrn, fragment)) |
|
|
|
} |
|
|
|
|
|
|
@ -147,12 +161,15 @@ derive.frame <- function(result) { |
|
|
|
derive.intensity(result), |
|
|
|
derive.average(result) |
|
|
|
) |
|
|
|
names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time") |
|
|
|
names(res) <- |
|
|
|
c("Fragment_ID", "Peaks", "Intensities", "Average_time") |
|
|
|
return(res) |
|
|
|
} |
|
|
|
|
|
|
|
verbose.dump <- function(result) { |
|
|
|
path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt", |
|
|
|
path <- |
|
|
|
file.path(verbose.outputdir, |
|
|
|
paste(derive.fragment(result), ".txt", |
|
|
|
sep = "")) |
|
|
|
sink(path) |
|
|
|
dput(result) |
|
|
@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) { |
|
|
|
# make fragment unusable if it does not contain enough data points. |
|
|
|
for (i in 1:fragments) { |
|
|
|
if (maxint[i] - minint[i] + 1 < min_cluster_size) |
|
|
|
usable[i] = FALSE |
|
|
|
usable[i] <- FALSE |
|
|
|
} |
|
|
|
|
|
|
|
# Filter all usable fragments |
|
|
|
use <- Filter(function(x) usable[x], 1:fragments) |
|
|
|
use <- Filter(function(x) |
|
|
|
usable[x], 1:fragments) |
|
|
|
|
|
|
|
# Apply diagnostics function to all usable fragments (in parallel). |
|
|
|
results <- mclapply(use, diagnostics, mc.cores = use_cores) |
|
|
|
|
|
|
|
results.positive <- Filter(function(x) positive(x), results) |
|
|
|
results.positive <- Filter(function(x) |
|
|
|
positive(x), results) |
|
|
|
|
|
|
|
# print to files if verbose output requested |
|
|
|
|
|
|
|
if (verbose.enable) { |
|
|
|
if (!dir.exists(verbose.outputdir)) { |
|
|
|
stop(str_interp("Directory $[s]{dir} does not exist", |
|
|
|
list(dir=verbose.outputdir))) |
|
|
|
} |
|
|
|
res <- readline(prompt=paste("Are you sure you want to overwrite files in ", |
|
|
|
verbose.outputdir, "? [y/N] ")) |
|
|
|
if (res != "y") { |
|
|
|
stop(str_interp( |
|
|
|
"Directory $[s]{dir} does not exist", |
|
|
|
list(dir = verbose.outputdir) |
|
|
|
)) |
|
|
|
} |
|
|
|
if (interactive()) { |
|
|
|
res <- |
|
|
|
readline( |
|
|
|
prompt = paste( |
|
|
|
"Are you sure you want to overwrite files in ", |
|
|
|
verbose.outputdir, |
|
|
|
"? [y/N] " |
|
|
|
) |
|
|
|
) |
|
|
|
if (res != "y") |
|
|
|
{ |
|
|
|
stop("Process aborted.") |
|
|
|
} |
|
|
|
} |
|
|
|
print(paste("printing output to ", verbose.outputdir)) |
|
|
|
mapply(verbose.dump, results.positive) |
|
|
|
lapply(results.positive, verbose.dump) |
|
|
|
} |
|
|
|
|
|
|
|
result.frame <- mapply(derive.frame, results.positive) |
|
|
|
result.fragment <- mapply(derive.fragment, results.positive) |
|
|
|
result.iso <- mapply(derive.iso, results.positive) |
|
|
|
result.intensity <- mapply(derive.intensity, results.positive) |
|
|
|
result.average <- mapply(derive.average, results.positive) |
|
|
|
|
|
|
|
print(result.fragment) |
|
|
|
result.frame <- lapply(results.positive, derive.frame) |
|
|
|
result.fragment <- lapply(results.positive, derive.fragment) |
|
|
|
result.iso <- lapply(results.positive, derive.iso) |
|
|
|
result.intensity <- lapply(results.positive, derive.intensity) |
|
|
|
result.average <- lapply(results.positive, derive.average) |
|
|
|
|
|
|
|
############################################################################## |
|
|
|
print("All Fragments that contain desired isotopes:") |
|
|
|
print(result.frame) |