Browse Source

added README and streamlined configuration

master
junikimm717 3 years ago
parent
commit
21b89f4e66
  1. 19
      README.md
  2. 39
      src/script.def.R

19
README.md

@ -0,0 +1,19 @@
# Patternmatch
This is a short R pattern.search wrapper that takes in a CSV file containing
HRMS Peaks and prints all of the fragment numbers that may contain isotopes of one's
choosing (preferably halogenated compounds).
## Installation
The script has the following dependencies:
- latest version of the [Nontarget R Package](https://github.com/blosloos/nontarget) (This should be installed through the devtools package)
- [purrr](https://www.rdocumentation.org/packages/purrr/versions/0.2.5)
- [enviPat](https://rdocumentation.org/packages/enviPat/versions/2.2)
- [stringr](https://www.rdocumentation.org/packages/stringr/versions/1.4.0)
- [parallel](https://www.rdocumentation.org/packages/parallel/versions/3.6.2)
## Additional Notes
In order to make the search run in linear time, as of right now, the entire
data table will be sorted before searching.

39
src/script.def.R

@ -6,22 +6,31 @@ library("parallel")
# Configurations #############################################################
# file : decides which file to read in data from #############################
file <- "/home/junikim/programming/patternmatch/data/allcluster_mz.csv"
#file <- "/home/junikim/programming/patternmatch/data/15_Clusters_for_Tuning_29June21.txt"
# check cluster 2846
file <- "/path/to/script"
# All isotopes to search for
search_isos <- c("37Cl", "81Br")
# Minimum size of a cluster
min_cluster_size <- 2
# Number of cores to be used (will be adjusted if not possible)
# Number of cores to be used (will be adjusted if it exceeds the true number of cores)
use_cores <- 6
# Do not edit below.
# Table name configuration
# Column name for m/z
columns.mz <- "mz"
# Column name for time in gc
columns.time <- "time"
# Column name for intensities
columns.intensity <- "Intensity"
# Column name for fragment numbers (only numbers accepted)
columns.spectra <- "Spectra_Number"
##############################################################################
# Script
iso_length <- length(search_isos)
if (!("13C" %in% search_isos)) {
search_isos <- append(search_isos, "13C")
@ -29,8 +38,11 @@ if (!("13C" %in% search_isos)) {
# Read in the Table ##########################################################
table <- read.table(file, header=TRUE, sep=",")
# Sort table by spectra ID
table <- table[order(table[,columns.spectra]),]
# Organize the tables by number ##############################################
fragments <- max(table[,"Spectra_Number"])
fragments <- max(table[,columns.spectra])
# The algorithm below guarantees linear complexity of looking up data points.#
@ -48,7 +60,7 @@ usable <- unlist(map(1:fragments, function(x) FALSE))
# Set all of the intervals ###################################################
for (i in seq(1, nrow(table))) {
fragment <- table[i, "Spectra_Number"]
fragment <- table[i, columns.spectra]
if (! usable[fragment]) {
minint[fragment] <- i
}
@ -71,9 +83,10 @@ getdata <- function(fragment, key) {
# Add all data frames as necessary for evaluation. ############################
getdataframe <- function (fragment) {
mz <- getdata(fragment, "mz")
time <- getdata(fragment, "time")
Intensity <- getdata(fragment, "Intensity")
mz <- getdata(fragment, columns.mz)
time <- getdata(fragment, columns.time)
Intensity <- getdata(fragment, columns.intensity)
# Must be indexed in this order.
return(data.frame(mz=mz, Intensity=Intensity,time=time))
}

Loading…
Cancel
Save