Browse Source

noninteractive corrections and reformat

master
Juni Kim 3 years ago
parent
commit
02b5115a28
  1. 2
      .gitignore
  2. 143
      src/script.def.R

2
.gitignore

@ -2,3 +2,5 @@
/data /data
/.RData /.RData
/.Rhistory /.Rhistory
/.Rproj.user
/*.Rproj

143
src/script.def.R

@ -4,18 +4,22 @@ library("stringr")
library("parallel") library("parallel")
# Configurations ############################################################# # Configurations #############################################################
# file : decides which file to read in data from #############################
file <- "/path/to/csv"
# file to read from
input.filename <- ""
# separator to use (\t for tabs, "," for CSV)
input.sep <- ","
# All isotopes to check for # All isotopes to check for
check_isos <- c("37Cl", "81Br", "34S")
check_isos <- c("37Cl", "81Br")
# Minimum size of a cluster # Minimum size of a cluster
min_cluster_size <- 2 min_cluster_size <- 2
# Number of cores to be used (will be adjusted if it exceeds the true number of cores)
# Number of cores to be used in pattern.search evaluation
# (will be adjusted if it exceeds the true number of cores)
use_cores <- 6 use_cores <- 6
# Table name configuration # Table name configuration
# Column name for m/z # Column name for m/z
@ -28,56 +32,61 @@ columns.intensity <- "Intensity"
columns.spectra <- "Spectra_Number" columns.spectra <- "Spectra_Number"
# output options # output options
# Number of highest intensities to display in output
output.intensities <- 5 output.intensities <- 5
# Verbose output # Verbose output
verbose.enable <- FALSE
verbose.enable <- TRUE
# output directory must already exist. # output directory must already exist.
verbose.outputdir <- "/path/to/directory"
verbose.outputdir <- ""
# Script ##################################################################### # Script #####################################################################
## Defining/setting variables ################################################ ## Defining/setting variables ################################################
# C-13 is required in the rules of pattern.search, so it must be included. ###
# C-13 is required in the rules of pattern.search.
search_isos <- check_isos search_isos <- check_isos
iso_length <- length(search_isos) iso_length <- length(search_isos)
if (!("13C" %in% search_isos)) { if (!("13C" %in% search_isos)) {
search_isos <- append(search_isos, "13C") search_isos <- append(search_isos, "13C")
} }
# Read in the Table and sort by spectra ID # Read in the Table and sort by spectra ID
table <- read.table(file, header=TRUE, sep=",")
table <- table[order(table[,columns.spectra]),]
table <- read.table(input.filename, header = TRUE, sep = input.sep)
table <- table[order(table[, columns.spectra]), ]
# Organize the tables by number # Organize the tables by number
fragments <- max(table[,columns.spectra])
fragments <- max(table[, columns.spectra])
# set use_cores to safe amount # set use_cores to safe amount
use_cores = min(use_cores, detectCores()-1)
use_cores = min(use_cores, detectCores() - 1)
# lower bound of indices per fragment # lower bound of indices per fragment
# (sorting guarantees that data points of same fragment are together) # (sorting guarantees that data points of same fragment are together)
minint <- rep(0,fragments)
minint <- rep(0, fragments)
# upper bound of indices per fragment # upper bound of indices per fragment
maxint <- rep(0,fragments)
maxint <- rep(0, fragments)
# memoizes as to # memoizes as to
usable <- rep(FALSE,fragments)
usable <- rep(FALSE, fragments)
# initialize isotopes. # initialize isotopes.
data(isotopes) data(isotopes)
isos <- make.isos(isotopes,use_isotopes=search_isos,
use_charges=rep(1, length(search_isos)))
isos <- make.isos(isotopes,
use_isotopes = search_isos,
use_charges = rep(1, length(search_isos)))
## Setting functions ######################################################### ## Setting functions #########################################################
getdata <- function(fragment, key) { getdata <- function(fragment, key) {
if (! usable[fragment]) {
stop(str_interp("Fragment $[d]{fragment} does not exist in the data set",
list(fragment=fragment)))
if (!usable[fragment]) {
stop(str_interp(
"Fragment $[d]{fragment} does not exist in the data set",
list(fragment = fragment)
))
} }
return(table[minint[fragment]:maxint[fragment],key])
return(table[minint[fragment]:maxint[fragment], key])
} }
# Add all data frames as necessary for evaluation. # Add all data frames as necessary for evaluation.
@ -87,7 +96,11 @@ getdataframe <- function (fragment) {
mz <- getdata(fragment, columns.mz) mz <- getdata(fragment, columns.mz)
time <- getdata(fragment, columns.time) time <- getdata(fragment, columns.time)
Intensity <- getdata(fragment, columns.intensity) Intensity <- getdata(fragment, columns.intensity)
return(data.frame(mz=mz, Intensity=Intensity,time=time))
return(data.frame(
mz = mz,
Intensity = Intensity,
time = time
))
} }
# returns both pattern.search result and fragment number. # returns both pattern.search result and fragment number.
@ -97,17 +110,18 @@ diagnostics <- function(fragment) {
ptrn <- pattern.search( ptrn <- pattern.search(
points, points,
isos, isos,
cutint=1000,
rttol=c(-20,20),
cutint = 1000,
rttol = c(-20, 20),
# kept because of gc limitations. # kept because of gc limitations.
mztol=3,
mzfrac=0.1,
ppm=TRUE,
inttol=0.05,
rules=rep(TRUE,11),
deter=FALSE,
entry=50
);
mztol = 3,
mzfrac = 0.1,
ppm = TRUE,
inttol = 0.05,
rules = rep(TRUE, 11),
deter = FALSE,
entry = 50
)
return(c(ptrn, fragment)) return(c(ptrn, fragment))
} }
@ -115,7 +129,7 @@ diagnostics <- function(fragment) {
positive <- function(result) { positive <- function(result) {
res <- result$`Counts of isotopes` res <- result$`Counts of isotopes`
v <- res[res$"isotope" %in% check_isos,"peak counts"]
v <- res[res$"isotope" %in% check_isos, "peak counts"]
return (!all(v %in% c(0))) return (!all(v %in% c(0)))
} }
@ -125,18 +139,18 @@ derive.fragment <- function(result) {
derive.iso <- function(result) { derive.iso <- function(result) {
res <- result$`Counts of isotopes` res <- result$`Counts of isotopes`
return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")])
return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")])
} }
derive.intensity <- function(result) { derive.intensity <- function(result) {
ptrn <- result$Patterns ptrn <- result$Patterns
N <- max(output.intensities, nrow(result)) N <- max(output.intensities, nrow(result))
inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N]
inds <- order(ptrn[, columns.intensity], decreasing = TRUE)[1:N]
return(ptrn[inds, c(2, 1, 3)]) return(ptrn[inds, c(2, 1, 3)])
} }
derive.average <- function(result) { derive.average <- function(result) {
ptrn <- result$Patterns[,2]
ptrn <- result$Patterns[, 2]
return(mean(ptrn)) return(mean(ptrn))
} }
@ -147,13 +161,16 @@ derive.frame <- function(result) {
derive.intensity(result), derive.intensity(result),
derive.average(result) derive.average(result)
) )
names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time")
names(res) <-
c("Fragment_ID", "Peaks", "Intensities", "Average_time")
return(res) return(res)
} }
verbose.dump <- function(result) { verbose.dump <- function(result) {
path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt",
sep=""))
path <-
file.path(verbose.outputdir,
paste(derive.fragment(result), ".txt",
sep = ""))
sink(path) sink(path)
dput(result) dput(result)
sink() sink()
@ -165,7 +182,7 @@ verbose.dump <- function(result) {
for (i in seq(1, nrow(table))) { for (i in seq(1, nrow(table))) {
fragment <- table[i, columns.spectra] fragment <- table[i, columns.spectra]
if (! usable[fragment]) {
if (!usable[fragment]) {
minint[fragment] <- i minint[fragment] <- i
} }
maxint[fragment] <- max(maxint[fragment], i) maxint[fragment] <- max(maxint[fragment], i)
@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) {
# make fragment unusable if it does not contain enough data points. # make fragment unusable if it does not contain enough data points.
for (i in 1:fragments) { for (i in 1:fragments) {
if (maxint[i] - minint[i] + 1 < min_cluster_size) if (maxint[i] - minint[i] + 1 < min_cluster_size)
usable[i] = FALSE
usable[i] <- FALSE
} }
# Filter all usable fragments # Filter all usable fragments
use <- Filter(function(x) usable[x], 1:fragments)
use <- Filter(function(x)
usable[x], 1:fragments)
# Apply diagnostics function to all usable fragments (in parallel). # Apply diagnostics function to all usable fragments (in parallel).
results <- mclapply(use, diagnostics, mc.cores=use_cores)
results <- mclapply(use, diagnostics, mc.cores = use_cores)
results.positive <- Filter(function(x) positive(x), results)
results.positive <- Filter(function(x)
positive(x), results)
# print to files if verbose output requested # print to files if verbose output requested
if (verbose.enable) { if (verbose.enable) {
if (!dir.exists(verbose.outputdir)) { if (!dir.exists(verbose.outputdir)) {
stop(str_interp("Directory $[s]{dir} does not exist",
list(dir=verbose.outputdir)))
stop(str_interp(
"Directory $[s]{dir} does not exist",
list(dir = verbose.outputdir)
))
} }
res <- readline(prompt=paste("Are you sure you want to overwrite files in ",
verbose.outputdir, "? [y/N] "))
if (res != "y") {
if (interactive()) {
res <-
readline(
prompt = paste(
"Are you sure you want to overwrite files in ",
verbose.outputdir,
"? [y/N] "
)
)
if (res != "y")
{
stop("Process aborted.") stop("Process aborted.")
} }
}
print(paste("printing output to ", verbose.outputdir)) print(paste("printing output to ", verbose.outputdir))
mapply(verbose.dump, results.positive)
lapply(results.positive, verbose.dump)
} }
result.frame <- mapply(derive.frame, results.positive)
result.fragment <- mapply(derive.fragment, results.positive)
result.iso <- mapply(derive.iso, results.positive)
result.intensity <- mapply(derive.intensity, results.positive)
result.average <- mapply(derive.average, results.positive)
print(result.fragment)
result.frame <- lapply(results.positive, derive.frame)
result.fragment <- lapply(results.positive, derive.fragment)
result.iso <- lapply(results.positive, derive.iso)
result.intensity <- lapply(results.positive, derive.intensity)
result.average <- lapply(results.positive, derive.average)
##############################################################################
print("All Fragments that contain desired isotopes:")
print(result.frame)
Loading…
Cancel
Save