%\VignetteIndexEntry{MiniDIFDUP_1} %\VignettePackage{readJDX} %\VignetteIndexEntry{readJDX} %\VignetteKeyword{JCAMP, DX, NMR, IR, Raman} \documentclass[12 pt]{article} \usepackage[margin = 1.5 cm, letterpaper]{geometry} \usepackage{inconsolata} \renewcommand*\familydefault{\ttdefault} \usepackage[T1]{fontenc} \usepackage[parfill]{parskip} \usepackage[dvipsnames, svgnames, table]{xcolor} \pagestyle{empty} %%%%% Document Contents %%%%% % https://tex.stackexchange.com/a/25906/6580 for text boxes \begin{document} \fboxrule = 2 pt \noindent\fcolorbox{red}{white}{ % highlight entire file \minipage[t]{\dimexpr1.0\linewidth-2\fboxsep-2\fboxrule\relax} \noindent\fcolorbox{blue}{white}{ % highlight meta data in blue \minipage[t]{\dimexpr0.95\linewidth-2\fboxsep-2\fboxrule\relax} \#\#TITLE=Tiny IR Spectrum for Demo\\ \#\#JCAMP-DX=4.24\\ \#\#DATA TYPE=INFRARED SPECTRUM\\ \#\#OWNER= GPL-3 (modified from BRUKER1.JCM (IUPAC) by B. Hanson)\\ \#\#XUNITS=1/CM\\ \#\#YUNITS=TRANSMITTANCE\\ \#\#FIRSTX= 4000.655017\\ \#\#LASTX= 400.1619262\\ \#\#DELTAX= -9.64245605E-1\\ \#\#MAXY= 95.83563804\\ \#\#MINY= -2.87246704E-1\\ \#\#XFACTOR= 4.882812500E-4\\ \#\#YFACTOR= 1.220703125E-2\\ \#\#NPOINTS= 612\\ \#\#FIRSTY= 7493 \endminipage}\hfill \noindent\fcolorbox{orange}{white}{ % highlight variable list in orange \minipage[t]{\dimexpr0.95\linewidth-2\fboxsep-2\fboxrule\relax} \#\#XYDATA=(X++(Y..Y))\\ 4861896G493jknkJlkJjJlKjOkNTjNKJNKMlmKqMnKkOjJKJKTNNMNKJMJjKTJTkMlKlKJ\\ 4729586G542nNjKmJMJmMjljKmklTlJkNjMLNTMKlKjmkjkTlokmqkqj0oTrj4j3j9k3k8l8\\ 4611099G336m7o0q2j07j38j73k10k40k62k53k11j55p2K0J09J76K11K28K20J97J70J45J22J01\\ 4563705G025Q6P0O0N1M4L8L2K8K4K1J7J8J1RTQNMTkMTjNLNMJULJKUJjJkTnmkTLTJ0OLRMULJM\\ 4453117G637JTLJKTJJLkKLlLjJLKjTMJjLJjKlKLljKJjJKTJljKJTlKTkLTjKJTMJTK\\ 4320808G688JKTkLjJOkjJTjTKkKUkjLNkmLjLlJKlOmjKkMJTTkNKkJjkJjLTmKJkKkLJmJL\\ 4186523G715jTKLKlJLkjKJjUKUkJKTljKnMJLmNJjUMkLkTUjMKJKjj3J1PnMKLJTKjMkK\\ 4050263G750JjKJTKJqNKJlKlKTpNMlKKlJUjTOj9J8kjKklLlKLmMjJNj1j2K0lQj5\\ 3929802G753k9L4QmOkMnJMkNJTkJKJlKTmTQkMk4LK3kJlJNLkLJLMnJKjLl8JK7lJ0MjLlj\\ 3811316G794JkMkMpKkMmTnPJ5r1J2Q0k2J7kTjqJ6rPqOmn3L1K3Lm8L3K0j4J3pOjrMTkJl \#\#END= \endminipage}\hfill \endminipage}\hfill Key to the above: \begin{itemize} \item \textcolor{red}{Entire File}. Input for \textit{findVariableLists}, as a vector of character strings. \item \textcolor{blue}{Metadata}. Input for \textit{extractParams}, as a vector of character strings. \item \textcolor{orange}{Variable List}. Input for \textit{processVariableList} and \textit{processXYY}, as a vector of character strings. \end{itemize} << setup, echo = FALSE >>= suppressPackageStartupMessages(library("stringr")) @ The entire file is the input to \textit{findVariableLists}. Here are two raw lines from the variable list; each is a character vector of length one: << getData, echo = FALSE >>= mdd <- system.file("extdata", "MiniDIFDUP.JDX", package = "readJDX") lines <- readLines(mdd) @ << rawLines, echo = FALSE >>= print(lines[17:18]) @ The input to \textit{decompLines} and \textit{getComp} is a vector of \emph{named} raw lines: << namedRawLines, echo = FALSE >>= names(lines) <- paste("Line", 1:length(lines), sep = "_") print(lines[17:18]) @ The input to \textit{unSQZ}, \textit{insertDUPs}, \textit{deDIF} and \textit{yValueCheck} is a \emph{named} list of lines. Each raw line has been split into the individual pieces and named according to its compression mode: << lineList, echo = FALSE >>= # The following code is from decompLines lineNames <- names(lines) # save to replace when functions nuke # Helper Function nameComp <- function(cvec) { # cvec = character vector names(cvec) <- rep(NA_character_, length(cvec)) names(cvec)[grepl("[@A-Ia-i]{1}", cvec)] <- "SQZ" names(cvec)[grepl("[%J-Rj-r]{1}", cvec)] <- "DIF" names(cvec)[grepl("[S-Zs]{1}", cvec)] <- "DUP" names(cvec)[!grepl("[@%A-Za-rs]", cvec)] <- "NUM" # anything else is a NUM/AFFN cvec } # Preliminaries lines <- gsub(",", ".", lines) # replace ',' with '.' -- needed for EU style files lines <- gsub("\\s+\\${2}.*$", "", lines) # remove any ...xxxx $$ checkpoint type entries lines <- gsub("(\\+|-){1}([0-9]+)", " \\1\\2", lines) # put space ahead of +|- signs (PAC) lines <- str_trim(lines, side = "both") # remove extra white space lines <- str_replace_all(lines, "([@%A-Za-rs])", " \\1") # break into pieces by compression mode # Convert to list to process each piece separately lines <- as.list(lines) names(lines) <- lineNames # replace the names that were nuked FUN <- function(x) {unlist(strsplit(x, "\\s+"))} lines <- lapply(lines, FUN) # Name each entry by compression mode lines <- lapply(lines, nameComp) # name each entry by the compression mode print(lines[17]) @ The input to \textit{unDIF} is a character vector with entries named with ASDF mode: << unDIFinput, echo = FALSE >>= line17 <- unlist(lines[17]) names(line17) <- gsub("Line_[0-9]*\\.", "", names(line17)) # names were whacked during unlisting print(line17) @ The input to \textit{repDUPs} is a character vector with length one: << repDUPsinput, echo = FALSE >>= print(line17[21]) @ \end{document}