From 02b5115a28b5572c12c9fd48bd873618277b80dc Mon Sep 17 00:00:00 2001
From: Juni Kim <junikimm717@gmail.com>
Date: Tue, 20 Jul 2021 18:58:07 -0400
Subject: [PATCH] noninteractive corrections and reformat

---
 .gitignore       |   2 +
 src/script.def.R | 161 ++++++++++++++++++++++++++++-------------------
 2 files changed, 97 insertions(+), 66 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2c5219a..821ad25 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 /data
 /.RData
 /.Rhistory
+/.Rproj.user
+/*.Rproj
diff --git a/src/script.def.R b/src/script.def.R
index 220ba3e..599b3ed 100644
--- a/src/script.def.R
+++ b/src/script.def.R
@@ -4,18 +4,22 @@ library("stringr")
 library("parallel")
 
 # Configurations #############################################################
-# file : decides which file to read in data from #############################
-file <- "/path/to/csv"
+# file to read from
+input.filename <- ""
+# separator to use (\t for tabs, "," for CSV)
+input.sep <- ","
 
 # All isotopes to check for
-check_isos <- c("37Cl", "81Br", "34S")
+check_isos <- c("37Cl", "81Br")
 
 # Minimum size of a cluster
 min_cluster_size <- 2
 
-# Number of cores to be used (will be adjusted if it exceeds the true number of cores)
+# Number of cores to be used in pattern.search evaluation
+# (will be adjusted if it exceeds the true number of cores)
 use_cores <- 6
 
+
 # Table name configuration
 
 # Column name for m/z
@@ -28,56 +32,61 @@ columns.intensity <- "Intensity"
 columns.spectra <- "Spectra_Number"
 
 # output options
+
+# Number of highest intensities to display in output
 output.intensities <- 5
 
 # Verbose output
-verbose.enable <- FALSE
+verbose.enable <- TRUE
 # output directory must already exist.
-verbose.outputdir <- "/path/to/directory"
+verbose.outputdir <- ""
 
 
 # Script #####################################################################
 
 ## Defining/setting variables ################################################
 
-# C-13 is required in the rules of pattern.search, so it must be included. ###
+# C-13 is required in the rules of pattern.search.
 search_isos <- check_isos
 iso_length <- length(search_isos)
 if (!("13C" %in% search_isos)) {
     search_isos <- append(search_isos, "13C")
 }
 # Read in the Table and sort by spectra ID
-table <- read.table(file, header=TRUE, sep=",")
-table <- table[order(table[,columns.spectra]),]
+table <- read.table(input.filename, header = TRUE, sep = input.sep)
+table <- table[order(table[, columns.spectra]), ]
 
 # Organize the tables by number
-fragments <- max(table[,columns.spectra])
+fragments <- max(table[, columns.spectra])
 
 # set use_cores to safe amount
-use_cores = min(use_cores, detectCores()-1)
+use_cores = min(use_cores, detectCores() - 1)
 
-# lower bound of indices per fragment 
+# lower bound of indices per fragment
 # (sorting guarantees that data points of same fragment are together)
-minint <- rep(0,fragments)
+minint <- rep(0, fragments)
 # upper bound of indices per fragment
-maxint <- rep(0,fragments)
-# memoizes as to 
-usable <- rep(FALSE,fragments)
+maxint <- rep(0, fragments)
+# memoizes as to
+usable <- rep(FALSE, fragments)
 
 # initialize isotopes.
 data(isotopes)
-isos <- make.isos(isotopes,use_isotopes=search_isos, 
-                  use_charges=rep(1, length(search_isos)))
-                  
+isos <- make.isos(isotopes,
+                  use_isotopes = search_isos,
+                  use_charges = rep(1, length(search_isos)))
+
 
 ## Setting functions #########################################################
 
 getdata <- function(fragment, key) {
-    if (! usable[fragment]) {
-        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", 
-                    list(fragment=fragment)))
+    if (!usable[fragment]) {
+        stop(str_interp(
+            "Fragment $[d]{fragment} does not exist in the data set",
+            list(fragment = fragment)
+        ))
     }
-    return(table[minint[fragment]:maxint[fragment],key])
+    return(table[minint[fragment]:maxint[fragment], key])
 }
 
 # Add all data frames as necessary for evaluation.
@@ -87,7 +96,11 @@ getdataframe <- function (fragment) {
     mz <- getdata(fragment, columns.mz)
     time <- getdata(fragment, columns.time)
     Intensity <- getdata(fragment, columns.intensity)
-    return(data.frame(mz=mz, Intensity=Intensity,time=time))
+    return(data.frame(
+        mz = mz,
+        Intensity = Intensity,
+        time = time
+    ))
 }
 
 # returns both pattern.search result and fragment number.
@@ -97,17 +110,18 @@ diagnostics <- function(fragment) {
     ptrn <- pattern.search(
         points,
         isos,
-        cutint=1000,
-        rttol=c(-20,20),
+        cutint = 1000,
+        rttol = c(-20, 20),
         # kept because of gc limitations.
-        mztol=3,
-        mzfrac=0.1,
-        ppm=TRUE,
-        inttol=0.05,
-        rules=rep(TRUE,11),
-        deter=FALSE,
-        entry=50
-    );
+        mztol = 3,
+        mzfrac = 0.1,
+        ppm = TRUE,
+        inttol = 0.05,
+        rules = rep(TRUE, 11),
+        deter = FALSE,
+        entry = 50
+    )
+    
     return(c(ptrn, fragment))
 }
 
@@ -115,7 +129,7 @@ diagnostics <- function(fragment) {
 
 positive <- function(result) {
     res <- result$`Counts of isotopes`
-    v <- res[res$"isotope" %in% check_isos,"peak counts"]
+    v <- res[res$"isotope" %in% check_isos, "peak counts"]
     return (!all(v %in% c(0)))
 }
 
@@ -125,35 +139,38 @@ derive.fragment <- function(result) {
 
 derive.iso <- function(result) {
     res <- result$`Counts of isotopes`
-    return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")])
+    return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")])
 }
 
 derive.intensity <- function(result) {
     ptrn <- result$Patterns
     N <- max(output.intensities, nrow(result))
-    inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N]
+    inds <- order(ptrn[, columns.intensity], decreasing = TRUE)[1:N]
     return(ptrn[inds, c(2, 1, 3)])
 }
 
 derive.average <- function(result) {
-    ptrn <- result$Patterns[,2]
+    ptrn <- result$Patterns[, 2]
     return(mean(ptrn))
 }
 
 derive.frame <- function(result) {
     res <- list(
-            derive.fragment(result),
-            derive.iso(result),
-            derive.intensity(result),
-            derive.average(result)
-        )
-    names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time")
+        derive.fragment(result),
+        derive.iso(result),
+        derive.intensity(result),
+        derive.average(result)
+    )
+    names(res) <-
+        c("Fragment_ID", "Peaks", "Intensities", "Average_time")
     return(res)
 }
 
 verbose.dump <- function(result) {
-    path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt",
-                                           sep=""))
+    path <-
+        file.path(verbose.outputdir,
+                  paste(derive.fragment(result), ".txt",
+                        sep = ""))
     sink(path)
     dput(result)
     sink()
@@ -165,7 +182,7 @@ verbose.dump <- function(result) {
 
 for (i in seq(1, nrow(table))) {
     fragment <- table[i, columns.spectra]
-    if (! usable[fragment]) {
+    if (!usable[fragment]) {
         minint[fragment] <- i
     }
     maxint[fragment] <- max(maxint[fragment], i)
@@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) {
 # make fragment unusable if it does not contain enough data points.
 for (i in 1:fragments) {
     if (maxint[i] - minint[i] + 1 < min_cluster_size)
-        usable[i] = FALSE
+        usable[i] <- FALSE
 }
 
 # Filter all usable fragments
-use <- Filter(function(x) usable[x], 1:fragments)
+use <- Filter(function(x)
+    usable[x], 1:fragments)
 
 # Apply diagnostics function to all usable fragments (in parallel).
-results <- mclapply(use, diagnostics, mc.cores=use_cores)
+results <- mclapply(use, diagnostics, mc.cores = use_cores)
 
-results.positive <- Filter(function(x) positive(x), results)
+results.positive <- Filter(function(x)
+    positive(x), results)
 
 # print to files if verbose output requested
 
 if (verbose.enable) {
     if (!dir.exists(verbose.outputdir)) {
-        stop(str_interp("Directory $[s]{dir} does not exist", 
-                        list(dir=verbose.outputdir)))
+        stop(str_interp(
+            "Directory $[s]{dir} does not exist",
+            list(dir = verbose.outputdir)
+        ))
     }
-    res <- readline(prompt=paste("Are you sure you want to overwrite files in ", 
-                                 verbose.outputdir, "? [y/N] "))
-    if (res != "y") {
-        stop("Process aborted.")
+    if (interactive()) {
+        res <-
+            readline(
+                prompt = paste(
+                    "Are you sure you want to overwrite files in ",
+                    verbose.outputdir,
+                    "? [y/N] "
+                )
+            )
+        if (res != "y")
+        {
+            stop("Process aborted.")
+        }
     }
     print(paste("printing output to ", verbose.outputdir))
-    mapply(verbose.dump, results.positive)
+    lapply(results.positive, verbose.dump)
 }
 
-result.frame <- mapply(derive.frame, results.positive)
-result.fragment <- mapply(derive.fragment, results.positive)
-result.iso <- mapply(derive.iso, results.positive)
-result.intensity <- mapply(derive.intensity, results.positive)
-result.average <- mapply(derive.average, results.positive)
-
-print(result.fragment)
+result.frame <- lapply(results.positive, derive.frame)
+result.fragment <- lapply(results.positive, derive.fragment)
+result.iso <- lapply(results.positive, derive.iso)
+result.intensity <- lapply(results.positive, derive.intensity)
+result.average <- lapply(results.positive, derive.average)
 
-##############################################################################
+print("All Fragments that contain desired isotopes:")
+print(result.frame)
\ No newline at end of file