added README and streamlined configuration

4 years ago · 21b89f4e66
2 changed files with 45 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,19 @@
 # Patternmatch
 This is a short R pattern.search wrapper that takes in a CSV file containing
 HRMS Peaks and prints all of the fragment numbers that may contain isotopes of one's
 choosing (preferably halogenated compounds).
 ## Installation
 The script has the following dependencies:
 - latest version of the [Nontarget R Package](https://github.com/blosloos/nontarget) (This should be installed through the devtools package)
 - [purrr](https://www.rdocumentation.org/packages/purrr/versions/0.2.5)
 - [enviPat](https://rdocumentation.org/packages/enviPat/versions/2.2)
 - [stringr](https://www.rdocumentation.org/packages/stringr/versions/1.4.0)
 - [parallel](https://www.rdocumentation.org/packages/parallel/versions/3.6.2)
 ## Additional Notes
 In order to make the search run in linear time, as of right now, the entire
 data table will be sorted before searching.
--- a/src/script.def.R
+++ b/src/script.def.R
@ -6,22 +6,31 @@ library("parallel")
 # Configurations #############################################################
 # file : decides which file to read in data from #############################
 file <- "/home/junikim/programming/patternmatch/data/allcluster_mz.csv"
 #file <- "/home/junikim/programming/patternmatch/data/15_Clusters_for_Tuning_29June21.txt"
 # check cluster 2846
 file <- "/path/to/script"
 # All isotopes to search for
 search_isos <- c("37Cl", "81Br")
 # Minimum size of a cluster
 min_cluster_size <- 2
 # Number of cores to be used (will be adjusted if not possible)
 # Number of cores to be used (will be adjusted if it exceeds the true number of cores)
 use_cores <- 6
 # Do not edit below.
 # Table name configuration
 # Column name for m/z
 columns.mz <- "mz"
 # Column name for time in gc
 columns.time <- "time"
 # Column name for intensities
 columns.intensity <- "Intensity"
 # Column name for fragment numbers (only numbers accepted)
 columns.spectra <- "Spectra_Number"
 ##############################################################################
 # Script
 iso_length <- length(search_isos)
 if (!("13C" %in% search_isos)) {
    search_isos <- append(search_isos, "13C")
@ -29,8 +38,11 @@ if (!("13C" %in% search_isos)) {
 # Read in the Table ##########################################################
 table <- read.table(file, header=TRUE, sep=",")
 # Sort table by spectra ID
 table <- table[order(table[,columns.spectra]),]
 # Organize the tables by number ##############################################
 fragments <- max(table[,"Spectra_Number"])
 fragments <- max(table[,columns.spectra])
 # The algorithm below guarantees linear complexity of looking up data points.#
@ -48,7 +60,7 @@ usable <- unlist(map(1:fragments, function(x) FALSE))
 # Set all of the intervals ###################################################
 for (i in seq(1, nrow(table))) {
    fragment <- table[i, "Spectra_Number"]
    fragment <- table[i, columns.spectra]
    if (! usable[fragment]) {
        minint[fragment] <- i
    }
@ -71,9 +83,10 @@ getdata <- function(fragment, key) {
 # Add all data frames as necessary for evaluation. ############################
 getdataframe <- function (fragment) {
    mz <- getdata(fragment, "mz")
    time <- getdata(fragment, "time")
    Intensity <- getdata(fragment, "Intensity")
    mz <- getdata(fragment, columns.mz)
    time <- getdata(fragment, columns.time)
    Intensity <- getdata(fragment, columns.intensity)
    # Must be indexed in this order.
    return(data.frame(mz=mz, Intensity=Intensity,time=time))
 }