noninteractive corrections and reformat

5 years ago · 02b5115a28
2 changed files with 97 additions and 66 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@
 /data
 /.RData
 /.Rhistory
+/.Rproj.user
+/*.Rproj
--- a/src/script.def.R
+++ b/src/script.def.R
@ -4,18 +4,22 @@ library("stringr")
 library("parallel")

 # Configurations #############################################################
-# file : decides which file to read in data from #############################
-file <- "/path/to/csv"
+# file to read from
+input.filename <- ""
+# separator to use (\t for tabs, "," for CSV)
+input.sep <- ","

 # All isotopes to check for
-check_isos <- c("37Cl", "81Br", "34S")
+check_isos <- c("37Cl", "81Br")

 # Minimum size of a cluster
 min_cluster_size <- 2

-# Number of cores to be used (will be adjusted if it exceeds the true number of cores)
+# Number of cores to be used in pattern.search evaluation
+# (will be adjusted if it exceeds the true number of cores)
 use_cores <- 6

+
 # Table name configuration

 # Column name for m/z
@ -28,56 +32,61 @@ columns.intensity <- "Intensity"
 columns.spectra <- "Spectra_Number"

 # output options
+
+# Number of highest intensities to display in output
 output.intensities <- 5

 # Verbose output
-verbose.enable <- FALSE
+verbose.enable <- TRUE
 # output directory must already exist.
-verbose.outputdir <- "/path/to/directory"
+verbose.outputdir <- ""


 # Script #####################################################################

 ## Defining/setting variables ################################################

-# C-13 is required in the rules of pattern.search, so it must be included. ###
+# C-13 is required in the rules of pattern.search.
 search_isos <- check_isos
 iso_length <- length(search_isos)
 if (!("13C" %in% search_isos)) {
    search_isos <- append(search_isos, "13C")
 }
 # Read in the Table and sort by spectra ID
-table <- read.table(file, header=TRUE, sep=",")
-table <- table[order(table[,columns.spectra]),]
+table <- read.table(input.filename, header = TRUE, sep = input.sep)
+table <- table[order(table[, columns.spectra]), ]

 # Organize the tables by number
-fragments <- max(table[,columns.spectra])
+fragments <- max(table[, columns.spectra])

 # set use_cores to safe amount
-use_cores = min(use_cores, detectCores()-1)
+use_cores = min(use_cores, detectCores() - 1)

-# lower bound of indices per fragment 
+# lower bound of indices per fragment
 # (sorting guarantees that data points of same fragment are together)
-minint <- rep(0,fragments)
+minint <- rep(0, fragments)
 # upper bound of indices per fragment
-maxint <- rep(0,fragments)
-# memoizes as to 
-usable <- rep(FALSE,fragments)
+maxint <- rep(0, fragments)
+# memoizes as to
+usable <- rep(FALSE, fragments)

 # initialize isotopes.
 data(isotopes)
-isos <- make.isos(isotopes,use_isotopes=search_isos, 
-                  use_charges=rep(1, length(search_isos)))
-                  
+isos <- make.isos(isotopes,
+                  use_isotopes = search_isos,
+                  use_charges = rep(1, length(search_isos)))
+

 ## Setting functions #########################################################

 getdata <- function(fragment, key) {
-    if (! usable[fragment]) {
-        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", 
-                    list(fragment=fragment)))
+    if (!usable[fragment]) {
+        stop(str_interp(
+            "Fragment $[d]{fragment} does not exist in the data set",
+            list(fragment = fragment)
+        ))
    }
-    return(table[minint[fragment]:maxint[fragment],key])
+    return(table[minint[fragment]:maxint[fragment], key])
 }

 # Add all data frames as necessary for evaluation.
@ -87,7 +96,11 @@ getdataframe <- function (fragment) {
    mz <- getdata(fragment, columns.mz)
    time <- getdata(fragment, columns.time)
    Intensity <- getdata(fragment, columns.intensity)
-    return(data.frame(mz=mz, Intensity=Intensity,time=time))
+    return(data.frame(
+        mz = mz,
+        Intensity = Intensity,
+        time = time
+    ))
 }

 # returns both pattern.search result and fragment number.
@ -97,17 +110,18 @@ diagnostics <- function(fragment) {
    ptrn <- pattern.search(
        points,
        isos,
-        cutint=1000,
-        rttol=c(-20,20),
+        cutint = 1000,
+        rttol = c(-20, 20),
        # kept because of gc limitations.
-        mztol=3,
-        mzfrac=0.1,
-        ppm=TRUE,
-        inttol=0.05,
-        rules=rep(TRUE,11),
-        deter=FALSE,
-        entry=50
-    );
+        mztol = 3,
+        mzfrac = 0.1,
+        ppm = TRUE,
+        inttol = 0.05,
+        rules = rep(TRUE, 11),
+        deter = FALSE,
+        entry = 50
+    )
+    
    return(c(ptrn, fragment))
 }

@ -115,7 +129,7 @@ diagnostics <- function(fragment) {

 positive <- function(result) {
    res <- result$`Counts of isotopes`
-    v <- res[res$"isotope" %in% check_isos,"peak counts"]
+    v <- res[res$"isotope" %in% check_isos, "peak counts"]
    return (!all(v %in% c(0)))
 }

@ -125,35 +139,38 @@ derive.fragment <- function(result) {

 derive.iso <- function(result) {
    res <- result$`Counts of isotopes`
-    return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")])
+    return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")])
 }

 derive.intensity <- function(result) {
    ptrn <- result$Patterns
    N <- max(output.intensities, nrow(result))
-    inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N]
+    inds <- order(ptrn[, columns.intensity], decreasing = TRUE)[1:N]
    return(ptrn[inds, c(2, 1, 3)])
 }

 derive.average <- function(result) {
-    ptrn <- result$Patterns[,2]
+    ptrn <- result$Patterns[, 2]
    return(mean(ptrn))
 }

 derive.frame <- function(result) {
    res <- list(
-            derive.fragment(result),
-            derive.iso(result),
-            derive.intensity(result),
-            derive.average(result)
-        )
-    names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time")
+        derive.fragment(result),
+        derive.iso(result),
+        derive.intensity(result),
+        derive.average(result)
+    )
+    names(res) <-
+        c("Fragment_ID", "Peaks", "Intensities", "Average_time")
    return(res)
 }

 verbose.dump <- function(result) {
-    path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt",
-                                           sep=""))
+    path <-
+        file.path(verbose.outputdir,
+                  paste(derive.fragment(result), ".txt",
+                        sep = ""))
    sink(path)
    dput(result)
    sink()
@ -165,7 +182,7 @@ verbose.dump <- function(result) {

 for (i in seq(1, nrow(table))) {
    fragment <- table[i, columns.spectra]
-    if (! usable[fragment]) {
+    if (!usable[fragment]) {
        minint[fragment] <- i
    }
    maxint[fragment] <- max(maxint[fragment], i)
@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) {
 # make fragment unusable if it does not contain enough data points.
 for (i in 1:fragments) {
    if (maxint[i] - minint[i] + 1 < min_cluster_size)
-        usable[i] = FALSE
+        usable[i] <- FALSE
 }

 # Filter all usable fragments
-use <- Filter(function(x) usable[x], 1:fragments)
+use <- Filter(function(x)
+    usable[x], 1:fragments)

 # Apply diagnostics function to all usable fragments (in parallel).
-results <- mclapply(use, diagnostics, mc.cores=use_cores)
+results <- mclapply(use, diagnostics, mc.cores = use_cores)

-results.positive <- Filter(function(x) positive(x), results)
+results.positive <- Filter(function(x)
+    positive(x), results)

 # print to files if verbose output requested

 if (verbose.enable) {
    if (!dir.exists(verbose.outputdir)) {
-        stop(str_interp("Directory $[s]{dir} does not exist", 
-                        list(dir=verbose.outputdir)))
+        stop(str_interp(
+            "Directory $[s]{dir} does not exist",
+            list(dir = verbose.outputdir)
+        ))
    }
-    res <- readline(prompt=paste("Are you sure you want to overwrite files in ", 
-                                 verbose.outputdir, "? [y/N] "))
-    if (res != "y") {
-        stop("Process aborted.")
+    if (interactive()) {
+        res <-
+            readline(
+                prompt = paste(
+                    "Are you sure you want to overwrite files in ",
+                    verbose.outputdir,
+                    "? [y/N] "
+                )
+            )
+        if (res != "y")
+        {
+            stop("Process aborted.")
+        }
    }
    print(paste("printing output to ", verbose.outputdir))
-    mapply(verbose.dump, results.positive)
+    lapply(results.positive, verbose.dump)
 }

-result.frame <- mapply(derive.frame, results.positive)
-result.fragment <- mapply(derive.fragment, results.positive)
-result.iso <- mapply(derive.iso, results.positive)
-result.intensity <- mapply(derive.intensity, results.positive)
-result.average <- mapply(derive.average, results.positive)
-
-print(result.fragment)
+result.frame <- lapply(results.positive, derive.frame)
+result.fragment <- lapply(results.positive, derive.fragment)
+result.iso <- lapply(results.positive, derive.iso)
+result.intensity <- lapply(results.positive, derive.intensity)
+result.average <- lapply(results.positive, derive.average)

-##############################################################################
+print("All Fragments that contain desired isotopes:")
+print(result.frame)