From 7e40feedb034c5664221dfe7d73993628e2f1a49 Mon Sep 17 00:00:00 2001 From: junikimm717 Date: Tue, 20 Jul 2021 22:55:41 -0400 Subject: [PATCH] changed README and copyright notice --- README.md | 23 +++++++++++++++-------- src/script.def.R | 49 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 0ab1911..80bc8ec 100644 --- a/README.md +++ b/README.md @@ -6,19 +6,26 @@ choosing (preferably halogenated compounds). ## Installation -The script has the following dependencies: +The script uses the following packages: + - latest version of the [Nontarget R Package](https://github.com/blosloos/nontarget) -(This should be installed -through the devtools package) -- [purrr](https://www.rdocumentation.org/packages/purrr/versions/0.2.5) +(It is recommended to install this package through the devtools package) - [enviPat](https://rdocumentation.org/packages/enviPat/versions/2.2) - [stringr](https://www.rdocumentation.org/packages/stringr/versions/1.4.0) - [parallel](https://www.rdocumentation.org/packages/parallel/versions/3.6.2) Before running/configuring the script, call `make` while in the `/src` -directory or copy `script.def.R` to `script.R`. +directory or simply copy `/src/script.def.R` to `/src/script.R`. + +## Configuration + +All configuration variables are located at the top of the script. + +## Output -## Additional Notes +The script should print a list with data for all clusters that contain the +isotopes to be searched for. -In order to make the search run in linear time, as of right now, the entire -data table will be sorted before searching. +If `verbose.enable` is set to `TRUE`, files of the form `(fragment number).txt` +containing the `pattern.search` output will be printed to the directory +specified in `verbose.outputdir`, assuming the directory already exists. diff --git a/src/script.def.R b/src/script.def.R index 599b3ed..551e5ec 100644 --- a/src/script.def.R +++ b/src/script.def.R @@ -1,18 +1,34 @@ +# Copyright (C) 2021-2021 Juni Christopher Kim +# This script is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This script is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Copyright Header. If not, see . + library("nontarget") library("enviPat") library("stringr") library("parallel") # Configurations ############################################################# + # file to read from -input.filename <- "" +input.filename <- " " # separator to use (\t for tabs, "," for CSV) input.sep <- "," -# All isotopes to check for +# All isotopes to check for in search check_isos <- c("37Cl", "81Br") -# Minimum size of a cluster +# Minimum size of a cluster (clusters smaller than this size will not +# be accepted) min_cluster_size <- 2 # Number of cores to be used in pattern.search evaluation @@ -21,8 +37,7 @@ use_cores <- 6 # Table name configuration - -# Column name for m/z +# Column name for m/z values columns.mz <- "mz" # Column name for time in gc columns.time <- "time" @@ -36,13 +51,13 @@ columns.spectra <- "Spectra_Number" # Number of highest intensities to display in output output.intensities <- 5 -# Verbose output -verbose.enable <- TRUE +# Verbose output (dumping all output into a file) +verbose.enable <- FALSE # output directory must already exist. verbose.outputdir <- "" -# Script ##################################################################### +# Variables/Functions ######################################################## ## Defining/setting variables ################################################ @@ -54,7 +69,7 @@ if (!("13C" %in% search_isos)) { } # Read in the Table and sort by spectra ID table <- read.table(input.filename, header = TRUE, sep = input.sep) -table <- table[order(table[, columns.spectra]), ] +table <- table[order(table[, columns.spectra]),] # Organize the tables by number fragments <- max(table[, columns.spectra]) @@ -67,7 +82,7 @@ use_cores = min(use_cores, detectCores() - 1) minint <- rep(0, fragments) # upper bound of indices per fragment maxint <- rep(0, fragments) -# memoizes as to +# memoizes what clusters are usable (exist and enough data sets) usable <- rep(FALSE, fragments) # initialize isotopes. @@ -139,7 +154,9 @@ derive.fragment <- function(result) { derive.iso <- function(result) { res <- result$`Counts of isotopes` - return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")]) + res <- + res[res$"isotope" %in% check_isos, c("isotope", "group counts")] + return(res) } derive.intensity <- function(result) { @@ -162,7 +179,7 @@ derive.frame <- function(result) { derive.average(result) ) names(res) <- - c("Fragment_ID", "Peaks", "Intensities", "Average_time") + c("Fragment_ID", "Isotopes", "Intensities", "Average_time") return(res) } @@ -195,6 +212,8 @@ for (i in 1:fragments) { usable[i] <- FALSE } +## Run pattern.search function ############################################## + # Filter all usable fragments use <- Filter(function(x) usable[x], 1:fragments) @@ -205,7 +224,7 @@ results <- mclapply(use, diagnostics, mc.cores = use_cores) results.positive <- Filter(function(x) positive(x), results) -# print to files if verbose output requested +## Verbose output handling ################################################## if (verbose.enable) { if (!dir.exists(verbose.outputdir)) { @@ -232,6 +251,8 @@ if (verbose.enable) { lapply(results.positive, verbose.dump) } +## Data Analysis/Evaluation :################################################ + result.frame <- lapply(results.positive, derive.frame) result.fragment <- lapply(results.positive, derive.fragment) result.iso <- lapply(results.positive, derive.iso) @@ -239,4 +260,4 @@ result.intensity <- lapply(results.positive, derive.intensity) result.average <- lapply(results.positive, derive.average) print("All Fragments that contain desired isotopes:") -print(result.frame) \ No newline at end of file +print(result.frame)