|
@ -1,27 +1,33 @@ |
|
|
library("nontarget") |
|
|
library("nontarget") |
|
|
library("purrr") |
|
|
library("purrr") |
|
|
#library("funprog") |
|
|
|
|
|
library("enviPat") |
|
|
library("enviPat") |
|
|
library("stringr") |
|
|
library("stringr") |
|
|
library("parallel") |
|
|
library("parallel") |
|
|
|
|
|
|
|
|
# Configurations ############################################################# |
|
|
# Configurations ############################################################# |
|
|
# file : decides which file to read in data from ############################# |
|
|
# file : decides which file to read in data from ############################# |
|
|
file <- "path/to/file" |
|
|
|
|
|
|
|
|
file <- "/home/junikim/programming/patternmatch/data/allcluster_mz.csv" |
|
|
|
|
|
#file <- "/home/junikim/programming/patternmatch/data/15_Clusters_for_Tuning_29June21.txt" |
|
|
|
|
|
|
|
|
# check cluster 2846 |
|
|
# check cluster 2846 |
|
|
|
|
|
|
|
|
search_isos <- c("13C", "37Cl") |
|
|
|
|
|
|
|
|
search_isos <- c("37Cl", "81Br") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Minimum size of a cluster |
|
|
# Minimum size of a cluster |
|
|
|
|
|
|
|
|
min_cluster_size <- 3 |
|
|
|
|
|
|
|
|
min_cluster_size <- 2 |
|
|
|
|
|
|
|
|
# Number of cores to be used (will be adjusted if not possible) |
|
|
# Number of cores to be used (will be adjusted if not possible) |
|
|
use_cores <- 6 ############################################################################## |
|
|
|
|
|
|
|
|
use_cores <- 6 |
|
|
|
|
|
|
|
|
|
|
|
# Do not edit below. |
|
|
|
|
|
iso_length <- length(search_isos) |
|
|
|
|
|
if (!("13C" %in% search_isos)) { |
|
|
|
|
|
search_isos <- append(search_isos, "13C") |
|
|
|
|
|
} |
|
|
# Read in the Table ########################################################## |
|
|
# Read in the Table ########################################################## |
|
|
table <- read.table(file, header=TRUE, sep="\t") |
|
|
|
|
|
|
|
|
table <- read.table(file, header=TRUE, sep=",") |
|
|
|
|
|
|
|
|
# Organize the tables by number ############################################## |
|
|
# Organize the tables by number ############################################## |
|
|
fragments <- max(table[,"Spectra_Number"]) |
|
|
fragments <- max(table[,"Spectra_Number"]) |
|
@ -68,40 +74,56 @@ getdataframe <- function (fragment) { |
|
|
mz <- getdata(fragment, "mz") |
|
|
mz <- getdata(fragment, "mz") |
|
|
time <- getdata(fragment, "time") |
|
|
time <- getdata(fragment, "time") |
|
|
Intensity <- getdata(fragment, "Intensity") |
|
|
Intensity <- getdata(fragment, "Intensity") |
|
|
return(data.frame(mz=mz, time=time, Intensity=Intensity)) |
|
|
|
|
|
|
|
|
return(data.frame(mz=mz, Intensity=Intensity,time=time)) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# Incomplete: cannot get pattern.search function to work per cluster. ######### |
|
|
|
|
|
# Incomplete: Get the diagnostics for a cluster and turn them into a portable# |
|
|
|
|
|
# format. (such as through tidyverse) ######################################## |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# initialize isotopes. |
|
|
|
|
|
data(isotopes) |
|
|
isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
isos <- make.isos(isotopes,use_isotopes=search_isos, |
|
|
use_charges=rep(1, length(search_isos))) |
|
|
use_charges=rep(1, length(search_isos))) |
|
|
|
|
|
|
|
|
|
|
|
# returns both pattern.search result and fragment number. |
|
|
diagnostics <- function(fragment) { |
|
|
diagnostics <- function(fragment) { |
|
|
points <- getdataframe(fragment) |
|
|
points <- getdataframe(fragment) |
|
|
ptrn <- pattern.search( |
|
|
ptrn <- pattern.search( |
|
|
points, |
|
|
points, |
|
|
isos, |
|
|
isos, |
|
|
cutint=100000, |
|
|
|
|
|
rttol=c(-0.2,0.2), |
|
|
|
|
|
mztol=1000, |
|
|
|
|
|
mzfrac=1.0, |
|
|
|
|
|
|
|
|
cutint=1000, |
|
|
|
|
|
rttol=c(-20,20), |
|
|
|
|
|
# kept because of gc limitations. |
|
|
|
|
|
mztol=3, |
|
|
|
|
|
mzfrac=0.1, |
|
|
ppm=TRUE, |
|
|
ppm=TRUE, |
|
|
inttol=0.2, |
|
|
|
|
|
|
|
|
inttol=0.05, |
|
|
# Do not modify anything below. |
|
|
# Do not modify anything below. |
|
|
rules=rep(FALSE, 11), |
|
|
|
|
|
|
|
|
rules=rep(TRUE,11), |
|
|
|
|
|
#rules=c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), |
|
|
deter=FALSE, |
|
|
deter=FALSE, |
|
|
entry=50 |
|
|
entry=50 |
|
|
); |
|
|
); |
|
|
return(ptrn) |
|
|
|
|
|
|
|
|
return(c(ptrn, fragment)) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
use <- Filter(function(x) usable[x], 1:fragments) |
|
|
use <- Filter(function(x) usable[x], 1:fragments) |
|
|
|
|
|
|
|
|
results <- mclapply(use, diagnostics, mc.cores=use_cores) |
|
|
results <- mclapply(use, diagnostics, mc.cores=use_cores) |
|
|
|
|
|
|
|
|
|
|
|
handle_res <- function(result) { |
|
|
|
|
|
allzero <- function(v) { |
|
|
|
|
|
for (x in v) { |
|
|
|
|
|
if (x != 0) |
|
|
|
|
|
return (FALSE) |
|
|
|
|
|
} |
|
|
|
|
|
return (TRUE) |
|
|
|
|
|
} |
|
|
|
|
|
v <- result$`Counts of isotopes`[seq(1,iso_length),"peak counts"] |
|
|
|
|
|
if (!allzero(v)) { |
|
|
|
|
|
print(result[[13]]) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
# Incomplete: Make the analysis more resiliant to different sorting. |
|
|
|
|
|
|
|
|
for (result in results) { |
|
|
|
|
|
handle_res(result) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
############################################################################## |
|
|
############################################################################## |