From 02b5115a28b5572c12c9fd48bd873618277b80dc Mon Sep 17 00:00:00 2001 From: Juni Kim Date: Tue, 20 Jul 2021 18:58:07 -0400 Subject: [PATCH] noninteractive corrections and reformat --- .gitignore | 2 + src/script.def.R | 161 ++++++++++++++++++++++++++++------------------- 2 files changed, 97 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index 2c5219a..821ad25 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /data /.RData /.Rhistory +/.Rproj.user +/*.Rproj diff --git a/src/script.def.R b/src/script.def.R index 220ba3e..599b3ed 100644 --- a/src/script.def.R +++ b/src/script.def.R @@ -4,18 +4,22 @@ library("stringr") library("parallel") # Configurations ############################################################# -# file : decides which file to read in data from ############################# -file <- "/path/to/csv" +# file to read from +input.filename <- "" +# separator to use (\t for tabs, "," for CSV) +input.sep <- "," # All isotopes to check for -check_isos <- c("37Cl", "81Br", "34S") +check_isos <- c("37Cl", "81Br") # Minimum size of a cluster min_cluster_size <- 2 -# Number of cores to be used (will be adjusted if it exceeds the true number of cores) +# Number of cores to be used in pattern.search evaluation +# (will be adjusted if it exceeds the true number of cores) use_cores <- 6 + # Table name configuration # Column name for m/z @@ -28,56 +32,61 @@ columns.intensity <- "Intensity" columns.spectra <- "Spectra_Number" # output options + +# Number of highest intensities to display in output output.intensities <- 5 # Verbose output -verbose.enable <- FALSE +verbose.enable <- TRUE # output directory must already exist. -verbose.outputdir <- "/path/to/directory" +verbose.outputdir <- "" # Script ##################################################################### ## Defining/setting variables ################################################ -# C-13 is required in the rules of pattern.search, so it must be included. ### +# C-13 is required in the rules of pattern.search. search_isos <- check_isos iso_length <- length(search_isos) if (!("13C" %in% search_isos)) { search_isos <- append(search_isos, "13C") } # Read in the Table and sort by spectra ID -table <- read.table(file, header=TRUE, sep=",") -table <- table[order(table[,columns.spectra]),] +table <- read.table(input.filename, header = TRUE, sep = input.sep) +table <- table[order(table[, columns.spectra]), ] # Organize the tables by number -fragments <- max(table[,columns.spectra]) +fragments <- max(table[, columns.spectra]) # set use_cores to safe amount -use_cores = min(use_cores, detectCores()-1) +use_cores = min(use_cores, detectCores() - 1) -# lower bound of indices per fragment +# lower bound of indices per fragment # (sorting guarantees that data points of same fragment are together) -minint <- rep(0,fragments) +minint <- rep(0, fragments) # upper bound of indices per fragment -maxint <- rep(0,fragments) -# memoizes as to -usable <- rep(FALSE,fragments) +maxint <- rep(0, fragments) +# memoizes as to +usable <- rep(FALSE, fragments) # initialize isotopes. data(isotopes) -isos <- make.isos(isotopes,use_isotopes=search_isos, - use_charges=rep(1, length(search_isos))) - +isos <- make.isos(isotopes, + use_isotopes = search_isos, + use_charges = rep(1, length(search_isos))) + ## Setting functions ######################################################### getdata <- function(fragment, key) { - if (! usable[fragment]) { - stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", - list(fragment=fragment))) + if (!usable[fragment]) { + stop(str_interp( + "Fragment $[d]{fragment} does not exist in the data set", + list(fragment = fragment) + )) } - return(table[minint[fragment]:maxint[fragment],key]) + return(table[minint[fragment]:maxint[fragment], key]) } # Add all data frames as necessary for evaluation. @@ -87,7 +96,11 @@ getdataframe <- function (fragment) { mz <- getdata(fragment, columns.mz) time <- getdata(fragment, columns.time) Intensity <- getdata(fragment, columns.intensity) - return(data.frame(mz=mz, Intensity=Intensity,time=time)) + return(data.frame( + mz = mz, + Intensity = Intensity, + time = time + )) } # returns both pattern.search result and fragment number. @@ -97,17 +110,18 @@ diagnostics <- function(fragment) { ptrn <- pattern.search( points, isos, - cutint=1000, - rttol=c(-20,20), + cutint = 1000, + rttol = c(-20, 20), # kept because of gc limitations. - mztol=3, - mzfrac=0.1, - ppm=TRUE, - inttol=0.05, - rules=rep(TRUE,11), - deter=FALSE, - entry=50 - ); + mztol = 3, + mzfrac = 0.1, + ppm = TRUE, + inttol = 0.05, + rules = rep(TRUE, 11), + deter = FALSE, + entry = 50 + ) + return(c(ptrn, fragment)) } @@ -115,7 +129,7 @@ diagnostics <- function(fragment) { positive <- function(result) { res <- result$`Counts of isotopes` - v <- res[res$"isotope" %in% check_isos,"peak counts"] + v <- res[res$"isotope" %in% check_isos, "peak counts"] return (!all(v %in% c(0))) } @@ -125,35 +139,38 @@ derive.fragment <- function(result) { derive.iso <- function(result) { res <- result$`Counts of isotopes` - return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")]) + return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")]) } derive.intensity <- function(result) { ptrn <- result$Patterns N <- max(output.intensities, nrow(result)) - inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N] + inds <- order(ptrn[, columns.intensity], decreasing = TRUE)[1:N] return(ptrn[inds, c(2, 1, 3)]) } derive.average <- function(result) { - ptrn <- result$Patterns[,2] + ptrn <- result$Patterns[, 2] return(mean(ptrn)) } derive.frame <- function(result) { res <- list( - derive.fragment(result), - derive.iso(result), - derive.intensity(result), - derive.average(result) - ) - names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time") + derive.fragment(result), + derive.iso(result), + derive.intensity(result), + derive.average(result) + ) + names(res) <- + c("Fragment_ID", "Peaks", "Intensities", "Average_time") return(res) } verbose.dump <- function(result) { - path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt", - sep="")) + path <- + file.path(verbose.outputdir, + paste(derive.fragment(result), ".txt", + sep = "")) sink(path) dput(result) sink() @@ -165,7 +182,7 @@ verbose.dump <- function(result) { for (i in seq(1, nrow(table))) { fragment <- table[i, columns.spectra] - if (! usable[fragment]) { + if (!usable[fragment]) { minint[fragment] <- i } maxint[fragment] <- max(maxint[fragment], i) @@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) { # make fragment unusable if it does not contain enough data points. for (i in 1:fragments) { if (maxint[i] - minint[i] + 1 < min_cluster_size) - usable[i] = FALSE + usable[i] <- FALSE } # Filter all usable fragments -use <- Filter(function(x) usable[x], 1:fragments) +use <- Filter(function(x) + usable[x], 1:fragments) # Apply diagnostics function to all usable fragments (in parallel). -results <- mclapply(use, diagnostics, mc.cores=use_cores) +results <- mclapply(use, diagnostics, mc.cores = use_cores) -results.positive <- Filter(function(x) positive(x), results) +results.positive <- Filter(function(x) + positive(x), results) # print to files if verbose output requested if (verbose.enable) { if (!dir.exists(verbose.outputdir)) { - stop(str_interp("Directory $[s]{dir} does not exist", - list(dir=verbose.outputdir))) + stop(str_interp( + "Directory $[s]{dir} does not exist", + list(dir = verbose.outputdir) + )) } - res <- readline(prompt=paste("Are you sure you want to overwrite files in ", - verbose.outputdir, "? [y/N] ")) - if (res != "y") { - stop("Process aborted.") + if (interactive()) { + res <- + readline( + prompt = paste( + "Are you sure you want to overwrite files in ", + verbose.outputdir, + "? [y/N] " + ) + ) + if (res != "y") + { + stop("Process aborted.") + } } print(paste("printing output to ", verbose.outputdir)) - mapply(verbose.dump, results.positive) + lapply(results.positive, verbose.dump) } -result.frame <- mapply(derive.frame, results.positive) -result.fragment <- mapply(derive.fragment, results.positive) -result.iso <- mapply(derive.iso, results.positive) -result.intensity <- mapply(derive.intensity, results.positive) -result.average <- mapply(derive.average, results.positive) - -print(result.fragment) +result.frame <- lapply(results.positive, derive.frame) +result.fragment <- lapply(results.positive, derive.fragment) +result.iso <- lapply(results.positive, derive.iso) +result.intensity <- lapply(results.positive, derive.intensity) +result.average <- lapply(results.positive, derive.average) -############################################################################## +print("All Fragments that contain desired isotopes:") +print(result.frame) \ No newline at end of file