noninteractive corrections and reformat

5 years ago · 02b5115a28
2 changed files with 97 additions and 66 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,5 @@
 /data
 /.RData
 /.Rhistory
 /.Rproj.user
 /*.Rproj
--- a/src/script.def.R
+++ b/src/script.def.R
@ -4,18 +4,22 @@ library("stringr")
 library("parallel")
 # Configurations #############################################################
 # file : decides which file to read in data from #############################
 file <- "/path/to/csv"
 # file to read from
 input.filename <- ""
 # separator to use (\t for tabs, "," for CSV)
 input.sep <- ","
 # All isotopes to check for
 check_isos <- c("37Cl", "81Br", "34S")
 check_isos <- c("37Cl", "81Br")
 # Minimum size of a cluster
 min_cluster_size <- 2
 # Number of cores to be used (will be adjusted if it exceeds the true number of cores)
 # Number of cores to be used in pattern.search evaluation
 # (will be adjusted if it exceeds the true number of cores)
 use_cores <- 6
 # Table name configuration
 # Column name for m/z
@ -28,56 +32,61 @@ columns.intensity <- "Intensity"
 columns.spectra <- "Spectra_Number"
 # output options
 # Number of highest intensities to display in output
 output.intensities <- 5
 # Verbose output
 verbose.enable <- FALSE
 verbose.enable <- TRUE
 # output directory must already exist.
 verbose.outputdir <- "/path/to/directory"
 verbose.outputdir <- ""
 # Script #####################################################################
 ## Defining/setting variables ################################################
 # C-13 is required in the rules of pattern.search, so it must be included. ###
 # C-13 is required in the rules of pattern.search.
 search_isos <- check_isos
 iso_length <- length(search_isos)
 if (!("13C" %in% search_isos)) {
    search_isos <- append(search_isos, "13C")
 }
 # Read in the Table and sort by spectra ID
 table <- read.table(file, header=TRUE, sep=",")
 table <- table[order(table[,columns.spectra]),]
 table <- read.table(input.filename, header = TRUE, sep = input.sep)
 table <- table[order(table[, columns.spectra]), ]
 # Organize the tables by number
 fragments <- max(table[,columns.spectra])
 fragments <- max(table[, columns.spectra])
 # set use_cores to safe amount
 use_cores = min(use_cores, detectCores()-1)
 use_cores = min(use_cores, detectCores() - 1)
 # lower bound of indices per fragment
 # (sorting guarantees that data points of same fragment are together)
 minint <- rep(0,fragments)
 minint <- rep(0, fragments)
 # upper bound of indices per fragment
 maxint <- rep(0,fragments)
 maxint <- rep(0, fragments)
 # memoizes as to
 usable <- rep(FALSE,fragments)
 usable <- rep(FALSE, fragments)
 # initialize isotopes.
 data(isotopes)
 isos <- make.isos(isotopes,use_isotopes=search_isos, 
                  use_charges=rep(1, length(search_isos)))
 isos <- make.isos(isotopes,
                  use_isotopes = search_isos,
                  use_charges = rep(1, length(search_isos)))
 ## Setting functions #########################################################
 getdata <- function(fragment, key) {
    if (! usable[fragment]) {
        stop(str_interp("Fragment $[d]{fragment} does not exist in the data set", 
                    list(fragment=fragment)))
    if (!usable[fragment]) {
        stop(str_interp(
            "Fragment $[d]{fragment} does not exist in the data set",
            list(fragment = fragment)
        ))
    }
    return(table[minint[fragment]:maxint[fragment],key])
    return(table[minint[fragment]:maxint[fragment], key])
 }
 # Add all data frames as necessary for evaluation.
@ -87,7 +96,11 @@ getdataframe <- function (fragment) {
    mz <- getdata(fragment, columns.mz)
    time <- getdata(fragment, columns.time)
    Intensity <- getdata(fragment, columns.intensity)
    return(data.frame(mz=mz, Intensity=Intensity,time=time))
    return(data.frame(
        mz = mz,
        Intensity = Intensity,
        time = time
    ))
 }
 # returns both pattern.search result and fragment number.
@ -97,17 +110,18 @@ diagnostics <- function(fragment) {
    ptrn <- pattern.search(
        points,
        isos,
        cutint=1000,
        rttol=c(-20,20),
        cutint = 1000,
        rttol = c(-20, 20),
        # kept because of gc limitations.
        mztol=3,
        mzfrac=0.1,
        ppm=TRUE,
        inttol=0.05,
        rules=rep(TRUE,11),
        deter=FALSE,
        entry=50
    );
        mztol = 3,
        mzfrac = 0.1,
        ppm = TRUE,
        inttol = 0.05,
        rules = rep(TRUE, 11),
        deter = FALSE,
        entry = 50
    )
    return(c(ptrn, fragment))
 }
@ -115,7 +129,7 @@ diagnostics <- function(fragment) {
 positive <- function(result) {
    res <- result$`Counts of isotopes`
    v <- res[res$"isotope" %in% check_isos,"peak counts"]
    v <- res[res$"isotope" %in% check_isos, "peak counts"]
    return (!all(v %in% c(0)))
 }
@ -125,35 +139,38 @@ derive.fragment <- function(result) {
 derive.iso <- function(result) {
    res <- result$`Counts of isotopes`
    return (res[res$"isotope" %in% check_isos,c("isotope", "group counts")])
    return (res[res$"isotope" %in% check_isos, c("isotope", "group counts")])
 }
 derive.intensity <- function(result) {
    ptrn <- result$Patterns
    N <- max(output.intensities, nrow(result))
    inds <- order(ptrn[,columns.intensity], decreasing=TRUE)[1:N]
    inds <- order(ptrn[, columns.intensity], decreasing = TRUE)[1:N]
    return(ptrn[inds, c(2, 1, 3)])
 }
 derive.average <- function(result) {
    ptrn <- result$Patterns[,2]
    ptrn <- result$Patterns[, 2]
    return(mean(ptrn))
 }
 derive.frame <- function(result) {
    res <- list(
            derive.fragment(result),
            derive.iso(result),
            derive.intensity(result),
            derive.average(result)
        )
    names(res) <- c("Fragment_ID", "Peaks", "Intensities", "Average_time")
        derive.fragment(result),
        derive.iso(result),
        derive.intensity(result),
        derive.average(result)
    )
    names(res) <-
        c("Fragment_ID", "Peaks", "Intensities", "Average_time")
    return(res)
 }
 verbose.dump <- function(result) {
    path <- file.path(verbose.outputdir, paste(derive.fragment(result), ".txt",
                                           sep=""))
    path <-
        file.path(verbose.outputdir,
                  paste(derive.fragment(result), ".txt",
                        sep = ""))
    sink(path)
    dput(result)
    sink()
@ -165,7 +182,7 @@ verbose.dump <- function(result) {
 for (i in seq(1, nrow(table))) {
    fragment <- table[i, columns.spectra]
    if (! usable[fragment]) {
    if (!usable[fragment]) {
        minint[fragment] <- i
    }
    maxint[fragment] <- max(maxint[fragment], i)
@ -175,39 +192,51 @@ for (i in seq(1, nrow(table))) {
 # make fragment unusable if it does not contain enough data points.
 for (i in 1:fragments) {
    if (maxint[i] - minint[i] + 1 < min_cluster_size)
        usable[i] = FALSE
        usable[i] <- FALSE
 }
 # Filter all usable fragments
 use <- Filter(function(x) usable[x], 1:fragments)
 use <- Filter(function(x)
    usable[x], 1:fragments)
 # Apply diagnostics function to all usable fragments (in parallel).
 results <- mclapply(use, diagnostics, mc.cores=use_cores)
 results <- mclapply(use, diagnostics, mc.cores = use_cores)
 results.positive <- Filter(function(x) positive(x), results)
 results.positive <- Filter(function(x)
    positive(x), results)
 # print to files if verbose output requested
 if (verbose.enable) {
    if (!dir.exists(verbose.outputdir)) {
        stop(str_interp("Directory $[s]{dir} does not exist", 
                        list(dir=verbose.outputdir)))
        stop(str_interp(
            "Directory $[s]{dir} does not exist",
            list(dir = verbose.outputdir)
        ))
    }
    res <- readline(prompt=paste("Are you sure you want to overwrite files in ", 
                                 verbose.outputdir, "? [y/N] "))
    if (res != "y") {
        stop("Process aborted.")
    if (interactive()) {
        res <-
            readline(
                prompt = paste(
                    "Are you sure you want to overwrite files in ",
                    verbose.outputdir,
                    "? [y/N] "
                )
            )
        if (res != "y")
        {
            stop("Process aborted.")
        }
    }
    print(paste("printing output to ", verbose.outputdir))
    mapply(verbose.dump, results.positive)
    lapply(results.positive, verbose.dump)
 }
 result.frame <- mapply(derive.frame, results.positive)
 result.fragment <- mapply(derive.fragment, results.positive)
 result.iso <- mapply(derive.iso, results.positive)
 result.intensity <- mapply(derive.intensity, results.positive)
 result.average <- mapply(derive.average, results.positive)
 print(result.fragment)
 result.frame <- lapply(results.positive, derive.frame)
 result.fragment <- lapply(results.positive, derive.fragment)
 result.iso <- lapply(results.positive, derive.iso)
 result.intensity <- lapply(results.positive, derive.intensity)
 result.average <- lapply(results.positive, derive.average)
 ##############################################################################
 print("All Fragments that contain desired isotopes:")
 print(result.frame)