From 7e40feedb034c5664221dfe7d73993628e2f1a49 Mon Sep 17 00:00:00 2001
From: junikimm717 <junikimm717@gmail.com>
Date: Tue, 20 Jul 2021 22:55:41 -0400
Subject: [PATCH] changed README and copyright notice

---
 README.md        | 23 +++++++++++++++--------
 src/script.def.R | 49 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 0ab1911..80bc8ec 100644
--- a/README.md
+++ b/README.md
@@ -6,19 +6,26 @@ choosing (preferably halogenated compounds).
 
 ## Installation
 
-The script has the following dependencies:
+The script uses the following packages:
+
 - latest version of the [Nontarget R Package](https://github.com/blosloos/nontarget) 
-(This should be installed
-through the devtools package)
-- [purrr](https://www.rdocumentation.org/packages/purrr/versions/0.2.5)
+(It is recommended to install this package through the devtools package)
 - [enviPat](https://rdocumentation.org/packages/enviPat/versions/2.2)
 - [stringr](https://www.rdocumentation.org/packages/stringr/versions/1.4.0)
 - [parallel](https://www.rdocumentation.org/packages/parallel/versions/3.6.2)
 
 Before running/configuring the script, call `make` while in the `/src`
-directory or copy `script.def.R` to `script.R`.
+directory or simply copy `/src/script.def.R` to `/src/script.R`.
+
+## Configuration
+
+All configuration variables are located at the top of the script.
+
+## Output
 
-## Additional Notes
+The script should print a list with data for all clusters that contain the
+isotopes to be searched for.
 
-In order to make the search run in linear time, as of right now, the entire
-data table will be sorted before searching.
+If `verbose.enable` is set to `TRUE`, files of the form `(fragment number).txt`
+containing the `pattern.search` output will be printed to the directory
+specified in `verbose.outputdir`, assuming the directory already exists.
diff --git a/src/script.def.R b/src/script.def.R
index 599b3ed..551e5ec 100644
--- a/src/script.def.R
+++ b/src/script.def.R
@@ -1,18 +1,34 @@
+# Copyright (C) 2021-2021 Juni Christopher Kim
+# This script is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This script is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Copyright Header.  If not, see <http://www.gnu.org/licenses/>.
+
 library("nontarget")
 library("enviPat")
 library("stringr")
 library("parallel")
 
 # Configurations #############################################################
+
 # file to read from
-input.filename <- ""
+input.filename <- " "
 # separator to use (\t for tabs, "," for CSV)
 input.sep <- ","
 
-# All isotopes to check for
+# All isotopes to check for in search
 check_isos <- c("37Cl", "81Br")
 
-# Minimum size of a cluster
+# Minimum size of a cluster (clusters smaller than this size will not
+# be accepted)
 min_cluster_size <- 2
 
 # Number of cores to be used in pattern.search evaluation
@@ -21,8 +37,7 @@ use_cores <- 6
 
 
 # Table name configuration
-
-# Column name for m/z
+# Column name for m/z values
 columns.mz <- "mz"
 # Column name for time in gc
 columns.time <- "time"
@@ -36,13 +51,13 @@ columns.spectra <- "Spectra_Number"
 # Number of highest intensities to display in output
 output.intensities <- 5
 
-# Verbose output
-verbose.enable <- TRUE
+# Verbose output (dumping all output into a file)
+verbose.enable <- FALSE
 # output directory must already exist.
 verbose.outputdir <- ""
 
 
-# Script #####################################################################
+# Variables/Functions ########################################################
 
 ## Defining/setting variables ################################################
 
@@ -54,7 +69,7 @@ if (!("13C" %in% search_isos)) {
 }
 # Read in the Table and sort by spectra ID
 table <- read.table(input.filename, header = TRUE, sep = input.sep)
-table <- table[order(table[, columns.spectra]), ]
+table <- table[order(table[, columns.spectra]),]
 
 # Organize the tables by number
 fragments <- max(table[, columns.spectra])
@@ -67,7 +82,7 @@ use_cores = min(use_cores, detectCores() - 1)
 minint <- rep(0, fragments)
 # upper bound of indices per fragment
 maxint <- rep(0, fragments)
-# memoizes as to
+# memoizes what clusters are usable (exist and enough data sets)
 usable <- rep(FALSE, fragments)
 
 # initialize isotopes.
@@ -139,7 +154,9 @@ derive.fragment <- function(result) {
 
 derive.iso <- function(result) {
     res <- result$`Counts of isotopes`
-    return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")])
+    res <-
+        res[res$"isotope" %in% check_isos, c("isotope", "group counts")]
+    return(res)
 }
 
 derive.intensity <- function(result) {
@@ -162,7 +179,7 @@ derive.frame <- function(result) {
         derive.average(result)
     )
     names(res) <-
-        c("Fragment_ID", "Peaks", "Intensities", "Average_time")
+        c("Fragment_ID", "Isotopes", "Intensities", "Average_time")
     return(res)
 }
 
@@ -195,6 +212,8 @@ for (i in 1:fragments) {
         usable[i] <- FALSE
 }
 
+## Run pattern.search function ##############################################
+
 # Filter all usable fragments
 use <- Filter(function(x)
     usable[x], 1:fragments)
@@ -205,7 +224,7 @@ results <- mclapply(use, diagnostics, mc.cores = use_cores)
 results.positive <- Filter(function(x)
     positive(x), results)
 
-# print to files if verbose output requested
+## Verbose output handling ##################################################
 
 if (verbose.enable) {
     if (!dir.exists(verbose.outputdir)) {
@@ -232,6 +251,8 @@ if (verbose.enable) {
     lapply(results.positive, verbose.dump)
 }
 
+## Data Analysis/Evaluation :################################################
+
 result.frame <- lapply(results.positive, derive.frame)
 result.fragment <- lapply(results.positive, derive.fragment)
 result.iso <- lapply(results.positive, derive.iso)
@@ -239,4 +260,4 @@ result.intensity <- lapply(results.positive, derive.intensity)
 result.average <- lapply(results.positive, derive.average)
 
 print("All Fragments that contain desired isotopes:")
-print(result.frame)
\ No newline at end of file
+print(result.frame)