Browse Source

cleaned up codebase

master
junikimm717 4 years ago
parent
commit
9c4fa981ca
  1. 19
      specification.md
  2. 162
      src/script.def.R

19
specification.md

@ -62,7 +62,24 @@ isotopes.
## Data Summary
- Flags for certainty of halogenated compounds
Per row:
- Fragment number
- Number of peaks
- top 5 (configurable) most intense m/z values.
- Averaged time output
### Derivations for above
- Fragment number - trivial
- Number of each isotope (check group counts per isotope)
- most intense - loop through the fragment and isolate. (Preserve all three categories)
- Averaged time output.
## Output num 2
- each cluster of elements, dump into a file (cluster number).
# Test Notes

162
src/script.def.R

@ -1,15 +1,14 @@
library("nontarget")
library("purrr")
library("enviPat")
library("stringr")
library("parallel")
# Configurations #############################################################
# file : decides which file to read in data from #############################
file <- "/path/to/script"
file <- "/path/to/csv"
# All isotopes to search for
search_isos <- c("37Cl", "81Br")
# All isotopes to check for
check_isos <- c("37Cl", "81Br", "34S")
# Minimum size of a cluster
min_cluster_size <- 2
@ -28,50 +27,49 @@ columns.intensity <- "Intensity"
# Column name for fragment numbers (only numbers accepted)
columns.spectra <- "Spectra_Number"
##############################################################################
# Script
# output options
output.intensities <- 5
# Verbose output
verbose.enable <- TRUE
verbose.outputdir <- "/output/dir"
# Script #####################################################################
## Defining/setting variables ################################################
# C-13 is required in the rules of pattern.search, so it must be included. ###
search_isos <- check_isos
iso_length <- length(search_isos)
if (!("13C" %in% search_isos)) {
search_isos <- append(search_isos, "13C")
}
# Read in the Table ##########################################################
# Read in the Table and sort by spectra ID
table <- read.table(file, header=TRUE, sep=",")
# Sort table by spectra ID
table <- table[order(table[,columns.spectra]),]
# Organize the tables by number ##############################################
# Organize the tables by number
fragments <- max(table[,columns.spectra])
# The algorithm below guarantees linear complexity of looking up data points.#
# set use_cores to safe amount
use_cores = min(use_cores, detectCores()-1)
# minint is the lower bound of the interval. #################################
# maxint is the upper bound of the interval that contains the fragment. ######
# usable memoizes as to whether or not each fragment number exists. #####
minint <- unlist(map(1:fragments, function(x) 0))
maxint <- unlist(map(1:fragments, function(x) 0))
usable <- unlist(map(1:fragments, function(x) FALSE))
# lower bound of indices per fragment
# (sorting guarantees that data points of same fragment are together)
minint <- rep(0,fragments)
# upper bound of indices per fragment
maxint <- rep(0,fragments)
# memoizes as to
usable <- rep(FALSE,fragments)
# initialize isotopes.
data(isotopes)
isos <- make.isos(isotopes,use_isotopes=search_isos,
use_charges=rep(1, length(search_isos)))
# Set all of the intervals ###################################################
for (i in seq(1, nrow(table))) {
fragment <- table[i, columns.spectra]
if (! usable[fragment]) {
minint[fragment] <- i
}
maxint[fragment] <- max(maxint[fragment], i)
usable[fragment] <- TRUE
}
for (i in 1:fragments) {
if (maxint[i] - minint[i] + 1 < min_cluster_size)
usable[i] = FALSE
}
## Setting functions #########################################################
getdata <- function(fragment, key) {
if (! usable[fragment]) {
@ -81,21 +79,18 @@ getdata <- function(fragment, key) {
return(table[minint[fragment]:maxint[fragment],key])
}
# Add all data frames as necessary for evaluation. ############################
# Add all data frames as necessary for evaluation.
# Note: for pattern.search to work, the data must be ordered in
# mz, intensity, time.
getdataframe <- function (fragment) {
mz <- getdata(fragment, columns.mz)
time <- getdata(fragment, columns.time)
Intensity <- getdata(fragment, columns.intensity)
# Must be indexed in this order.
return(data.frame(mz=mz, Intensity=Intensity,time=time))
}
# initialize isotopes.
data(isotopes)
isos <- make.isos(isotopes,use_isotopes=search_isos,
use_charges=rep(1, length(search_isos)))
# returns both pattern.search result and fragment number.
diagnostics <- function(fragment) {
points <- getdataframe(fragment)
ptrn <- pattern.search(
@ -108,35 +103,88 @@ diagnostics <- function(fragment) {
mzfrac=0.1,
ppm=TRUE,
inttol=0.05,
# Do not modify anything below.
rules=rep(TRUE,11),
#rules=c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE),
deter=FALSE,
entry=50
);
return(c(ptrn, fragment))
}
use <- Filter(function(x) usable[x], 1:fragments)
# checks if the result is positive.
results <- mclapply(use, diagnostics, mc.cores=use_cores)
positive <- function(result) {
res <- result$`Counts of isotopes`
v <- res[res$"isotope" %in% check_isos,"peak counts"]
return (!all(v %in% c(0)))
}
handle_res <- function(result) {
allzero <- function(v) {
for (x in v) {
if (x != 0)
return (FALSE)
}
return (TRUE)
}
v <- result$`Counts of isotopes`[seq(1,iso_length),"peak counts"]
if (!allzero(v)) {
print(result[[13]])
derive.fragment <- function(result) {
return(result[[13]])
}
derive.iso <- function(result) {
res <- result$`Counts of isotopes`
return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")])
}
derive.intensity <- function(result) {
ptrn <- result$Patterns
N <- max(output.intensities, nrow(result))
inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N]
return(ptrn[inds, c(2, 1, 3)])
}
derive.average <- function(result) {
ptrn <- result$Patterns[,2]
return(mean(ptrn))
}
derive.frame <- function(result) {
res <- list(
derive.fragment(result),
derive.iso(result),
derive.intensity(result),
derive.average(result)
)
names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time")
return(res)
}
# Execution ##################################################################
## Set all of the intervals ##################################################
for (i in seq(1, nrow(table))) {
fragment <- table[i, columns.spectra]
if (! usable[fragment]) {
minint[fragment] <- i
}
maxint[fragment] <- max(maxint[fragment], i)
usable[fragment] <- TRUE
}
# make fragment unusable if it does not contain enough data points.
for (i in 1:fragments) {
if (maxint[i] - minint[i] + 1 < min_cluster_size)
usable[i] = FALSE
}
for (result in results) {
handle_res(result)
# Filter all usable fragments
use <- Filter(function(x) usable[x], 1:fragments)
# Apply diagnostics function to all usable fragments (in parallel).
results <- mclapply(use, diagnostics, mc.cores=use_cores)
results.positive <- Filter(function(x) positive(x), results)
result.frame <- mapply(derive.frame, results.positive)
result.fragment <- mapply(derive.fragment, results.positive)
result.iso <- mapply(derive.iso, results.positive)
result.intensity <- mapply(derive.intensity, results.positive)
result.average <- mapply(derive.average, results.positive)
for (x in result.frame) {
print(x)
}
##############################################################################
Loading…
Cancel
Save