Return to previous page

Contents of file 'sgfilter/sgfilter.w':



    1   % File:        sgfilter.w [CWEB source code]
    2   % Residence:   http://jonsson.eu/programs/cweb/sgfilter/
    3   % Created:     January 23, 2006 [v.1.0]
    4   % Last change: December 4, 2011 [v.1.6]
    5   % Author:      Fredrik Jonsson <http://jonsson.eu>
    6   % Description: The CWEB source code for the SGFILTER stand-alone implementation
    7   %              of the Savitzky-Golay smoothing filter. For information on the
    8   %              CWEB programming language, see
    9   %              http://www.literateprogramming.com.
   10   % Compilation: Compile this program by using the enclosed Makefile, or use
   11   %              the blocks of the Makefile as listed in the documentation
   12   %              (file sgfilter.ps or sgfilter.pdf). The C source code (as
   13   %              generated from this CWEB code) conforms to the ANSI standard
   14   %              for the C programming language (which is equivalent to the
   15   %              ISO C90 standard for C).
   16   %
   17   % Copyright (C) 2011, Fredrik Jonsson <http://jonsson.eu>
   18   % Non-commercial copying welcome.
   19   %
   20   \input ext/epsf.tex   % For the inclusion of Encapsulated PostScript graphics
   21   \input ext/eplain.tex % To access equation numbering and other cross-referencing
   22   \def\version{1.6}
   23   \def\lastrevdate{December 4, 2011}
   24   \font\eightcmr=cmr8
   25   \font\tensc=cmcsc10
   26   \font\eightcmssq=cmssq8
   27   \font\eightcmssqi=cmssqi8
   28   \font\twentycmcsc=cmcsc10 at 20 truept
   29   \def\sgfilter{{\eightcmr SGFILTER\spacefactor1000}}
   30   \def\ANSI{{\eightcmr ANSI\spacefactor1000}} % The language standard we stick to
   31   \def\GNU{{\eightcmr GNU\spacefactor1000}}       % GNU is Not Unix
   32   \def\GCC{{\eightcmr GCC\spacefactor1000}}       % The GNU C-compiler
   33   \def\CEE{{\eightcmr C\spacefactor1000}}         % The C programming language
   34   \def\FORTRAN{{\eightcmr Fortran\spacefactor1000}} % The Fortran language
   35   \def\CPP{\CEE{\tt++}}                           % The C++ programming language
   36   \def\ANSICEE{{\eightcmr ANSI~C\spacefactor1000}}% The ANSI-C standard language
   37   \def\CWEB{{\eightcmr CWEB\spacefactor1000}}     % The CWEB programming language
   38   \def\UNIX{{\eightcmr UNIX\spacefactor1000}}
   39   \def\CYGWIN{{\eightcmr CYGWIN\spacefactor1000}}
   40   \def\GNUPLOT{{\eightcmr GNUPLOT\spacefactor1000}}
   41   \def\CTANGLE{{\eightcmr CTANGLE\spacefactor1000}}
   42   \def\CWEAVE{{\eightcmr CWEAVE\spacefactor1000}}
   43   \def\mod{\mathop{\rm mod}\nolimits}  % The real part of a complex number
   44   \def\re{\mathop{\rm Re}\nolimits}  % The real part of a complex number
   45   \def\im{\mathop{\rm Im}\nolimits}  % The imaginary part of a complex number
   46   \def\backslash{\char'134}          % The `\' character
   47   \def\vertbar{\char'174}            % The `|' character
   48   \def\dollar{\char'044}             % The `$' character
   49   \def\tilde{\char'176}              % The `~' character
   50   \def\tothepower{\char'136}         % The `^' character
   51   \def\onehalf{{\textstyle{{1}\over{2}}}}
   52   \def\threefourth{{\textstyle{{3}\over{4}}}}
   53   \def\endalg{\vrule height 1.4ex width .6ex depth .4ex} % Rectangular bullet
   54   \def\ie{i.\thinspace{e.}~\ignorespaces}
   55   \def\eg{e.\thinspace{g.}~\ignorespaces}
   56   \let\,\thinspace
   57   %
   58   % Define a handy macro for listing the steps of an algorithm.
   59   %
   60   \newdimen\aitemindent \aitemindent=26pt
   61   \newdimen\aitemleftskip \aitemleftskip=36pt
   62   \def\aitem[#1]{\smallbreak\noindent\hbox to 10pt{}%
   63     \hbox to\aitemindent{\bf #1\hfill}%
   64     \hangindent\aitemleftskip\ignorespaces}
   65   %
   66   % Define a handy macro for bibliographic references.
   67   %
   68   \newdimen\refitemindent \refitemindent=18pt
   69   \def\refitem[#1]{\smallbreak\noindent%
   70     \hbox to\refitemindent{[#1]\hfill}%
   71     \hangindent\refitemindent\ignorespaces}
   72   \def\references{\medskip\goodbreak\noindent{\bf References}\nobreak\smallskip}
   73   %
   74   % Define a handy macro for nicely typeset variable descriptions.
   75   %
   76   \newdimen\varitemindent \varitemindent=100pt
   77   \def\varitem[#1]#2{\smallbreak\noindent\hbox to 20pt{}%
   78     \hbox to\varitemindent{#1\hfill}%
   79     \hangindent 120pt\ignorespaces#2\par}
   80   %
   81   % Define a handy macro for nicely typeset descriptions of command line options.
   82   %
   83   \newdimen\optitemindent \optitemindent=80pt
   84   \def\optitem[#1]#2{\smallbreak\noindent\hbox to\hsize{\hskip40pt{#1}\hfill}
   85     \hangindent\optitemindent\ignorespaces{\hskip\optitemindent{#2}}\smallskip}
   86   %
   87   % Define a handy macro for the list of program revisions.
   88   %
   89   \newdimen\citemindent \citemindent=80pt
   90   \newdimen\citemleftskip \citemleftskip=90pt
   91   \def\citem[#1]{\smallbreak\noindent\hbox to 10pt{}%
   92     \hbox to\citemindent{\bf #1\hfill}%
   93     \hangindent\citemleftskip\ignorespaces}
   94   \def\citindent{\smallbreak\noindent\hbox to 10pt{}%
   95     \hbox to\citemindent{\hfil}%
   96     \hangindent\citemleftskip\ignorespaces}
   97   %
   98   % Define a handy macro for the typesetting of quotes of text.
   99   %
  100   \newdimen\quoteindent \quoteindent=40pt
  101   \def\quote#1{\par{\advance\leftskip by\quoteindent
  102     \advance\rightskip by\quoteindent
  103     \medskip\noindent{``#1''}\medskip}\par}
  104   %
  105   % Define a handy macro for the typesetting of figure captions. The double
  106   % '{{' and '}}' are here in place to prevent the increased \leftskip and
  107   % \rightskip to apply globally after the macro has been expanded.
  108   %
  109   \newdimen\figcapindent \figcapindent=\quoteindent
  110   \def\figcaption[#1]#2{{\advance\leftskip by\figcapindent
  111     \advance\rightskip by\figcapindent
  112     \smallskip\noindent{\bf Figure #1.} {#2}\smallskip}}
  113   %
  114   % Define the \beginvrulealign and \endvrulealign macros as described in
  115   % Donald Knuth's The TeXbook, Appendix D: Dirty Tricks. These macros are
  116   % used in typesetting nicely looking tables.
  117   %
  118   \def\beginvrulealign{\setbox0=\vbox\bgroup}
  119   \def\endvrulealign{\egroup % now \box0 holds the entire alignment
  120     \setbox0=\vbox{\setbox2=\hbox{\vrule height\ht0 depth\dp0 width0pt}
  121       \unvbox0 \setbox0=\lastbox % now \box0 is the bottom row
  122       \nointerlineskip \copy0 % put it back
  123       \global\setbox1=\hbox{} % initialize box that will contain rules
  124       \setbox4=\hbox{\unhbox0 % now open up the bottom row
  125         \loop \skip0=\lastskip \unskip % remove tabskip glue
  126         \advance\skip0 by-.4pt % rules are .4 pt wide
  127         \divide\skip0 by 2
  128         \global\setbox1=\hbox{\hskip\skip0\vrule\hskip\skip0
  129           \unhbox2\unhbox1}%
  130         \setbox2=\lastbox % remove alignment entry
  131         \ifhbox2 \setbox2=\hbox{\kern\wd2}\repeat}}%
  132     \hbox{\rlap{\box0}\box1}} % superimpose the alignment on the rules
  133   %
  134   % Make sure that the METAPOST logo can be loaded even in plain TeX.
  135   %
  136   \ifx\MP\undefined
  137       \ifx\manfnt\undefined
  138               \font\manfnt=logo10
  139       \fi
  140       \ifx\manfntsl\undefined
  141               \font\manfntsl=logosl10
  142       \fi
  143       \def\MP{{\ifdim\fontdimen1\font>0pt \let\manfnt = \manfntsl \fi
  144         {\manfnt META}\-{\manfnt POST}}\spacefactor1000 }%
  145   \fi
  146   \ifx\METAPOST\undefined \let\METAPOST=\MP \fi
  147   \datethis
  148   
  149   @*Introduction.
  150   \vskip 120pt
  151   \centerline{\twentycmcsc SGFilter}
  152   \vskip 20pt
  153   \centerline{A stand-alone implementation of the Savitzky--Golay smoothing
  154     filter.}
  155   \vskip 2pt
  156   \centerline{(Version \version\ of \lastrevdate)}
  157   \vskip 10pt
  158   \centerline{Written by Fredrik Jonsson}
  159   \vskip 10pt
  160   \centerline{\epsfbox{fig0.1}}
  161   \vfill\eject
  162   \noindent
  163   This document was automatically extracted from the \CWEB\ master source code
  164   for the \sgfilter\ program and typeset in the Computer Modern typeface using
  165   plain \TeX\ and \MP.
  166   The source code and documentation of this
  167   program is electronically available at
  168   \.{http://jonsson.eu/programs/cweb/sgfilter/}.
  169   \vfill
  170   \line{Copyright \copyright\ Fredrik Jonsson \the\year\hfil}
  171   \medskip
  172   \noindent{All rights reserved. No part of this publication may be reproduced,
  173   stored in a retrieval system, or transmitted, in any form, or by any means,
  174   electronic, mechanical, photo-copying, recording, or otherwise, without the
  175   prior consent of the author. Non-commercial copying welcome.\hfil}
  176   \medskip
  177   \line{Printed on \today, at \hours\hfil}
  178   \medskip
  179   \line{\TeX\ is a trademark of the American Mathematical Society\hfil}
  180   
  181   @*The Savitzky--Golay smoothing filter.
  182   The Savitzky--Golay smoothing filter was originally presented in 1964 by
  183   Abraham Savitzky\footnote{$\dagger$}{Abraham Savitzky (1919--1999) was
  184   an American analytical chemist. (Wikipedia)} and Marcel J.~E.
  185   Golay\footnote{$\ddagger$}{Marcel J.~E.~Golay (1902--1989) was
  186   a Swiss-born mathematician, physicist, and information theorist, who applied
  187   mathematics to real-world military and industrial problems. (Wikipedia)}
  188   in their paper ``Smoothing and Differentiation of Data by Simplified Least
  189   Squares Procedures'', Anal.~Chem., {\bf 36}, 1627--1639 (1964).
  190   % \.{http://dx.doi.org/10.1021/ac60214a047}
  191   Being chemists and physicists, at the time of publishing associated with
  192   the Perkin--Elmer Corporation (still today a reputable manufacturer of
  193   equipment for spectroscopy), they found themselves often encountering noisy
  194   spectra where simple noise-reduction techniques, such as running averages,
  195   simply were not good enough for extracting well-determined characteristica
  196   of spectral peaks.
  197   In particular, any running averaging tend to flatten and widening peaks in
  198   a spectrum, and as the peak width is an important parameter when determining
  199   relaxation times in molecular systems, such noise-reduction techniques are
  200   clearly non-desirable.
  201   
  202   The main idea presented by Savitzky and Golay was a work-around avoiding the
  203   problems encountered with running averages, while still maintaining the
  204   smoothing of data and preserving features of the distribution such as relative
  205   maxima, minima and width.
  206   To quote the original paper on the target and purpose:
  207   \quote{This paper is
  208   concerned with computational methods for the removal of the random noise from
  209   such information, and with the simple evaluation of the first few derivatives
  210   of the information with respect to the graph abscissa.
  211   [$\ldots$]
  212   The objective here is to present specific methods for handling current
  213   problems in the processing of such tables of analytical data. The methods
  214   apply as well to the desk calculator, or to simple paper and pencil
  215   operations for small amounts of data, as they do to the digital computer for
  216   large amounts of data, since their major utility is to simplify and speed up
  217   the processing of data.}
  218   
  219   @ The work-around presented by Savitzky and Golay for avoiding distortion of
  220   peaks or features in their spectral data is essentially based on the
  221   idea to perform a linear regression of some polynomial {\sl individually for
  222   each sample}, followed by the {\sl evaluation of that polynomial exactly at the
  223   very position of the sample}. While this may seem a plausible idea, the actual
  224   task of performing a separate regression for each point easily becomes a very
  225   time-consuming task unless we make a few observations about this problem.
  226   However (and this is the key point in the method), for the regression of a
  227   polynomial of a finite power, say of an order below 10, the coefficients
  228   involved in the actual regression may be computed once and for all in an
  229   early stage, followed by performing a {\sl convolution} of the discretely
  230   sampled input data with the coefficient vector. As the coefficient vector
  231   is significantly shorter than the data vector, this convolution is fast and
  232   straightforward to implement.
  233   
  234   The starting point in deriving the algorithm for the Savitzky--Golay smoothing
  235   filter is to consider a smoothing method in which an equidistantly spaced
  236   set of $M$ samples $f_n$, $n=1,\ldots,M$ are linearly combined to form a
  237   filtered value $h_k$ according to\footnote{$\dagger$}{More generally,
  238   Eq.~\eqref{eq:10} can be interpreted as a discretized version of the
  239   convolution between a kernel $c(x)$ and a function $f(x)$,
  240   $$
  241     h(x)=\int^{\Delta_R}_{-\Delta_L}c(s) f(x+s)\,ds
  242         \rightarrow\sum^{n_R}_{j=-n_L} c_j f_{k+j}.
  243   $$
  244   Notice, however, the different sign of $s$ as compared to the standard form
  245   of the convolution integral in mathematics, where the argument of the
  246   function usually yields ``$f(x-s)$''.}
  247   $$
  248     h_k=\sum^{n_R}_{j=-n_L} c_j f_{k+j},
  249     \eqdef{eq:10}
  250   $$
  251   where $n_L$ is the number of samples ``to the left'' and $n_R$ the number of
  252   samples ``to the right'' of the centre index $k$.
  253   Notice that the running average smoothing corresponds to the case where
  254   all coefficients $c_n$ are equal, with $c_n=1/(n_L+n_R+1)$.
  255   The idea of the Savitzky--Golay filter is, however, to find the set of $c_n$
  256   which better preserves the shape of features present in the sampled profile.
  257   The approach is to make a linear regression of a polynomial to the $n_L+n_R+1$
  258   samples in the window around sample $k$, and then evaluating this polynomial
  259   at that very sample, for all $k$ from $1$ to $M$.
  260   
  261   We consider the regression of an $m$:th degree polynomial
  262   $$
  263     p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m
  264     \eqdef{eq:20}
  265   $$
  266   to the set of $n_L$ samples to the left and $n_R$ to the right of sample $f_k$,
  267   including the sample inbetween. Evaluating this polynomial at $x=x_k$ is
  268   particularly easy, as we at that point simply have $p(x_k)=a_0$.
  269   The linear regression of this polynomial to the samples $f_{k+j}$, with
  270   $-n_L\le j\le n_R$, means that we look for the least-square approximated
  271   solution to the linear system of $n_L+n_R+1$ equations
  272   $$
  273     \eqalign{
  274       p(x_{k-n_L})&=a_0+a_1(x_{k-n_L}-x_k)+a_2(x_{k-n_L}-x_k)^2
  275                       +\ldots+a_m (x_{k-n_L}-x_k)^m\approx f_{k-n_L},\cr
  276                 &\hskip60pt\vdots\hskip100pt\vdots\cr
  277       p(x_{k-1})&=a_0+a_1(x_{k-1}-x_k)+a_2(x_{k-1}-x_k)^2
  278                       +\ldots+a_m (x_{k-1}-x_k)^m\approx f_{k-1},\cr
  279       p(x_{k})&=a_0\approx f_{k}\cr
  280       p(x_{k+1})&=a_0+a_1(x_{k+1}-x_k)+a_2(x_{k+1}-x_k)^2
  281                       +\ldots+a_m (x_{k+1}-x_k)^m\approx f_{k+1},\cr
  282                 &\hskip60pt\vdots\hskip100pt\vdots\cr
  283       p(x_{k+n_R})&=a_0+a_1(x_{k+n_R}-x_k)+a_2(x_{k+n_R}-x_k)^2
  284                       +\ldots+a_m (x_{k+n_R}-x_k)^m\approx f_{k+n_R},\cr
  285     }
  286     \eqdef{eq:30}
  287   $$
  288   which (under the assumption that $n_L+n_R+1>m+1$) provides an overdetermined
  289   system for the $m+1$ coefficients $a_j$ which can be expressed in matrix
  290   form as
  291   $$
  292     {\bf A}\cdot{\bf a}\equiv
  293     \underbrace{
  294       \pmatrix{
  295         1&(x_{k-n_L}-x_k)&(x_{k-n_L}-x_k)^2&\cdots&(x_{k-n_L}-x_k)^m\cr
  296         \vdots& &\vdots& &\vdots\cr
  297         1&(x_{k-1}-x_k)&(x_{k-1}-x_k)^2&\cdots&(x_{k-1}-x_k)^m\cr
  298         1&0&0&\cdots&0\cr
  299         1&(x_{k+1}-x_k)&(x_{k+1}-x_k)^2&\cdots&(x_{k+1}-x_k)^m\cr
  300         \vdots& &\vdots& &\vdots\cr
  301         1&(x_{k+n_R}-x_k)&(x_{k+n_R}-x_k)^2&\cdots&(x_{k+n_R}-x_k)^m\cr
  302       }
  303     }_{[(n_L+n_R+1)\times(m+1)]}
  304     \underbrace{
  305       \pmatrix{
  306         a_0\cr
  307         a_1\cr
  308         a_2\cr
  309         \vdots\cr
  310         a_m\cr
  311       }
  312       \vphantom{
  313           \pmatrix{
  314             ()^m\cr
  315             \vdots\cr
  316             ()^m\cr
  317             1\cr
  318             ()^m\cr
  319             \vdots\cr
  320             ()^m\cr
  321           }
  322       }
  323     }_{[(m+1)\times1]}
  324     =
  325     \underbrace{
  326       \pmatrix{
  327         f_{k-n_L}\cr
  328         \vdots\cr
  329         f_{k-1}\cr
  330         f_{k}\cr
  331         f_{k+1}\cr
  332         \vdots\cr
  333         f_{k+n_R}\cr
  334       }
  335       \vphantom{
  336           \pmatrix{
  337             ()^m\cr
  338             \vdots\cr
  339             ()^m\cr
  340             1\cr
  341             ()^m\cr
  342             \vdots\cr
  343             ()^m\cr
  344           }
  345       }
  346     }_{[(m+1)\times1]}
  347     \equiv{\bf f}.
  348     \eqdef{eq:40}
  349   $$
  350   The least squares solution to Eq.~\eqref{eq:40} is obtained by multiplying its
  351   left- and right-hand sides by the transpose of the system matrix ${\bf A}$,
  352   followed by solving the resulting $[(m+1)\times(m+1)]$-system of linear
  353   equations for ${\bf a}$,
  354   $$
  355     {\bf A}^{\rm T}\cdot({\bf A}\cdot{\bf a})=
  356       ({\bf A}^{\rm T}\cdot{\bf A})\cdot{\bf a}=
  357       {\bf A}^{\rm T}\cdot{\bf f}
  358     \qquad\Leftrightarrow\qquad
  359     {\bf a}=
  360       ({\bf A}^{\rm T}\cdot{\bf A})^{-1}\cdot
  361       ({\bf A}^{\rm T}\cdot{\bf f})
  362     \eqdef{eq:50}
  363   $$
  364   Recapitulate that what we here target is the evaluation of $p(x_k)=a_0$, which
  365   according to Eqs.~\eqref{eq:20} and~\eqref{eq:50} is equivalent to evaluating
  366   the first (``zero:th'') row of the solution for ${\bf a}$, or
  367   $$
  368     p(x_k)=a_0=
  369       \big[
  370         \underbrace{
  371           ({\bf A}^{\rm T}\cdot{\bf A})^{-1}
  372         }_{[(m+1)\times(m+1)]}
  373         \cdot
  374         \underbrace{
  375           ({\bf A}^{\rm T}\cdot{\bf f})
  376         }_{[(m+1)\times1]}
  377       \big]_{\{{\rm row\ }0\}}
  378     \eqdef{eq:60}
  379   $$
  380   So, having arrived at this result for the regression, we clearly have a
  381   solution for $a_0$ depending on the actual function values $f_{k+j}$ in the
  382   vicinity of sample $f_k$. Doesn't this mean that we nevertheless need to
  383   repeat the regression for every single sample to be included in the smoothing?
  384   Moreover, how does the result presented in Eq.~\eqref{eq:60} relate to the
  385   original mission, which we recapitulate was to find a general way of computing
  386   the coefficients $c_j$ in the kernel of the convolution in Eq.~\eqref{eq:10}?
  387   
  388   The answer to these questions lies in expanding Eq.~\eqref{eq:60} to yield the
  389   expression for the first (``zero:th'') row as
  390   $$
  391     \eqalign{
  392     p(x_k)=a_0
  393          &=\big[
  394           ({\bf A}^{\rm T}\cdot{\bf A})^{-1}\cdot
  395           ({\bf A}^{\rm T}\cdot{\bf f})\big]_{\{{\rm row\ }0\}}\cr
  396          &=\sum^{m+1}_{j=1}
  397           \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}
  398           \big[{\bf A}^{\rm T}\cdot{\bf f}\big]_{j}\cr
  399          &=\sum^{m+1}_{j=1}
  400           \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}
  401           \sum^{m+1}_{k=1}\big[{\bf A}^{\rm T}\big]_{jk}\big[{\bf f}\big]_{k}\cr
  402     }
  403     \eqdef{eq:70}
  404   $$
  405   and observing that the $n$:th coefficient $c_n$ is obtained as equal to the
  406   coefficient $a_0$ whenever ${\bf f}$ is replaced by the unit vector
  407   ${\bf e}_n$ (with all elements zero except for the unitary $n$:th element).
  408   Hence
  409   $$
  410     \eqalign{
  411       c_n&=\sum^{m+1}_{j=1}
  412         \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}
  413         \sum^{m+1}_{k=1}\big[{\bf A}^{\rm T}\big]_{jk}\big[{\bf e}_n\big]_{k}\cr
  414       &=\sum^{m+1}_{j=1}
  415         \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}
  416         \sum^{m+1}_{k=1}\big[{\bf A}^{\rm T}\big]_{jk}\delta_{nk}\cr
  417       &=\sum^{m+1}_{j=1}
  418         \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}
  419         \big[{\bf A}^{\rm T}\big]_{jn}\cr
  420       &=\sum^{m+1}_{j=1}
  421         \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}A_{nj}\cr
  422     }
  423     \eqdef{eq:80}
  424   $$
  425   This concludes the derivation of the coefficients in the Savitzky--Golay
  426   filter, which may be computed once and for all in the beginning and afterwards
  427   used as the (for any practical purposes short) kernel $c_j$ in the convolution
  428   described by Eq.~\eqref{eq:10}.
  429   
  430   @ In 2000, editors James Riordon, Elizabeth Zubritsky, and Alan Newman of
  431   {\sl Analytical Chemistry} made a review article of what they had identified
  432   as the top-ten seminal papers in the history of the journal, based on the
  433   number of citations.
  434   Among the listed papers, of which some were written by Nobel-laureates-to-be,
  435   the original paper by Savitzky and Golay makes a somewhat odd appearance, as
  436   it not only concerns mainly numerical analysis, but also because it actually
  437   includes \FORTRAN\ code for the implementation.
  438   The review article\footnote{$\dagger$}{Available at
  439   \.{http://pubs.acs.org/doi/pdf/10.1021/ac002801q}} concludes the discussion
  440   of the Savitzky--Golay smoothing filter with a reminiscence by Abraham Savitzky
  441   about the work:
  442   \quote{In thinking about why the technique has been so widely used, I've come
  443   to the following conclusions. First, it solves a common problem--the reduction
  444   of random noise by the well-recognized least-squares technique. Second, the
  445   method was spelled out in detail in the paper, including tables and a sample
  446   computer subroutine. Third, the mathematical basis for the technique, although
  447   explicitly and rigorously stated in the article, was separated from a
  448   completely nonmathematical explanation and justification. Finally, the
  449   technique itself is simple and easy to use, and it works.}
  450   
  451   @ As a final remark on the Savitzky--Golay filtering algorithm, a few points
  452   on the actual implementation of the convolution need to be made.
  453   While the \sgfilter\ program relies on the method for computation of the
  454   Savitzky--Golay coefficients as presented in {\sl Numerical Recipes in C},
  455   2nd Edn~(Cambridge University Press, New York, 1994), it must be emphasized
  456   that the suggestion there presented for the convolution, which is to apply
  457   the |convlv| routine of {\sl Numerical Recipes in C}, is significantly
  458   increasing the complexity and memory consumption in the filtering.
  459   In particular, the |convlv| routine in turn relies on consistent calls to the
  460   |twofft| routine, which in order to deliver proper data needs to be supplied
  461   with a return vector of {\sl twice} the size of the input vector.
  462   In addition, |convlv| requires the size of the input vector to be of an
  463   integer power of two (say, $M=1024$, $4096$, etc.), which may be acceptable
  464   for one-off tests but is a rather inconvenient limitation for any more general
  465   applications.
  466   
  467   Whether the \sgfilter\ program should employ the convolution engine supplied by
  468   the |convlv| routine (recommended in {\sl Numerical Recipes in C}) or the
  469   direct convolution as implemented in the |sgfilter| routine (recommended by
  470   me) is controlled by the |CONVOLVE_WITH_NR_CONVLV| definition in the
  471   \.{sgfilter.h} header file. With reference to the above issues with |convlv|,
  472   I strongly advise keeping the default (\.{0}) setting for
  473   |CONVOLVE_WITH_NR_CONVLV|.
  474   
  475   @*Revision history of the program.
  476   \medskip
  477   \citem[2006-01-18]{[v.1.0]} {\tt <fj@@phys.soton.ac.uk>}\hfill\break
  478   First properly working version of the \sgfilter\ program.
  479   
  480   \citem[2006-01-20]{[v.1.1]} {\tt <fj@@phys.soton.ac.uk>}\hfill\break
  481   Added the test case for Savitzky--Golay filtering, modeling an underlying
  482   function $g(x)$ with superimposed Gaussian noise as
  483   $$
  484     f(x)=\underbrace{
  485            \cos(3x)\sin^2(x^3)+4\sum^4_{k=1}\exp(-(x-x_k)^2/w^2_k)
  486          }_{g(x)} + \underbrace{Vu(x)\vphantom{\sum^4_{k=1}}}_{\rm noise}
  487   $$
  488   where $u(x)$ is a normally distributed stochastic variable of mean zero and
  489   unit variance, $V$ is the local variance as specified arbitrarily, and the
  490   remaining parameters $(x_k,w_k)$ are the positions and widths of four
  491   Gaussian peaks superimposed onto the otherwise trigonometric expression
  492   for the underlying function.
  493   
  494   \citem[2006-01-21]{[v.1.2]} {\tt <fj@@phys.soton.ac.uk>}\hfill\break
  495   Changed the streaming of output (filtered) data so that the |stdout| stream
  496   now is directed to file whenever \.{-o} or \.{\--outputfile} options are
  497   present at the calling command line.
  498   
  499   \citem[2006-05-06]{[v.1.3]} {\tt <fj@@phys.soton.ac.uk>}\hfill\break
  500   Added an introductory section documenting the derivation of the
  501   Savitzky--Golay filter as such. Always nice to have at hand when it comes
  502   to actually understanding why certain parameters in the filtering need to
  503   be in certain ranges. Also added automatic support for extracting the number
  504   of input samples automatically from the input file, hence making the \.{-M}
  505   and \.{--num\_samples} option obsolete.
  506   
  507   \citem[2006-05-06]{[v.1.4]} {\tt <fj@@phys.soton.ac.uk>}\hfill\break
  508   Replaced the convolution from the previously used |convlv| routine to a
  509   brute-force but more economical one, which now does not rely on the input
  510   data being a set of $2^N$ samples for some $N$. However, I have chosen to
  511   keep the old implementation, which can be re-applied simply by changing
  512   the definition of |CONVOLVE_WITH_NR_CONVLV| to ``(\.{1}).''
  513   
  514   \citem[2009-11-01]{[v.1.5]} {\tt <http://jonsson.eu>}\hfill\break
  515   Included the \.{example.c} file in the \CWEB\ source of \sgfilter, for
  516   automatically is generated from the master \CWEB\ source when passed through
  517   \CTANGLE. A very nifty way indeed for keeping updated test cases.
  518   
  519   
  520   \citem[2011-12-04]{[v.1.6]} {\tt <http://jonsson.eu>}\hfill\break
  521   In the block concerned with the redirection of |stdout| when delivering
  522   filtered data, I found strange behaviour when executing \sgfilter\ under
  523   Windows. What happens is that any redirection of |stdout| back to terminal
  524   output in Windows naturally must be done in a different way than with the
  525   |freopen("/dev/tty", "a", stdout)| which is accepted by OS X (Free BSD),
  526   Linux, or any other \UNIX-like platforms. Hence, I simply added a (primitive)
  527   check on the platform type in the header file.
  528   
  529   @*Compiling the source code. The program is written in \CWEB, generating
  530   \ANSICEE\ (ISO C99) conforming source code and documentation as plain
  531   \TeX-source, and is to be compiled using the sequences as outlined in the
  532   \.{Makefile} listed below.
  533   For general information on literate programming, \CTANGLE, or \CWEAVE, see
  534   \.{http://www.literateprogramming.com}.
  535   \bigskip
  536   {\obeylines\obeyspaces\tt
  537   \#
  538   \# Makefile designed for use with ctangle, cweave, gcc, and plain TeX.
  539   \#
  540   \# Copyright (C) 2002-2011, Fredrik Jonsson <http://jonsson.eu>
  541   \#
  542   \# The CTANGLE program converts a CWEB source document into a C program
  543   \# which may be compiled in the usual way. The output file includes \#line
  544   \# specifications so that debugging can be done in terms of the CWEB source
  545   \# file.
  546   \#
  547   \# The CWEAVE program converts the same CWEB file into a TeX file that may
  548   \# be formatted and printed in the usual way. It takes appropriate care of
  549   \# typographic details like page layout and the use of indentation, italics,
  550   \# boldface, etc., and it supplies extensive cross-index information that it
  551   \# gathers automatically.
  552   \#
  553   \# CWEB allows you to prepare a single document containing all the informa-
  554   \# tion that is needed both to produce a compilable C program and to produce
  555   \# a well-formatted document describing the program in as much detail as the
  556   \# writer may desire.  The user of CWEB ought to be familiar with TeX as well
  557   \# as C.
  558   \#
  559   PROJECT  = sgfilter
  560   CTANGLE  = ctangle
  561   CWEAVE   = cweave
  562   CC       = gcc
  563   CCOPTS   = -O2 -Wall -ansi -std=iso9899:1999 -pedantic
  564   LNOPTS   = -lm
  565   TEX      = tex
  566   DVIPS    = dvips
  567   DVIPSOPT = -ta4 -D1200
  568   PS2PDF   = ps2pdf
  569   METAPOST = mpost
  570   ~   ~
  571   all: \$(PROJECT) \$(PROJECT).pdf
  572   ~   ~
  573   \$(PROJECT): \$(PROJECT).o
  574   ~        \$(CC) \$(CCOPTS) -o \$(PROJECT) \$(PROJECT).o \$(LNOPTS)
  575   ~   ~
  576   \$(PROJECT).o: \$(PROJECT).c
  577   ~        \$(CC) \$(CCOPTS) -c \$(PROJECT).c
  578   ~   ~
  579   \$(PROJECT).c: \$(PROJECT).w
  580   ~        \$(CTANGLE) \$(PROJECT) \$(PROJECT).c
  581   ~   ~
  582   \$(PROJECT).pdf: \$(PROJECT).ps
  583   ~        \$(PS2PDF) \$(PROJECT).ps \$(PROJECT).pdf
  584   ~   ~
  585   \$(PROJECT).ps: \$(PROJECT).dvi
  586   ~        \$(DVIPS) \$(DVIPSOPT) \$(PROJECT).dvi -o \$(PROJECT).ps
  587   ~   ~
  588   \$(PROJECT).dvi: \$(PROJECT).tex
  589   ~        \$(TEX) \$(PROJECT).tex
  590   ~   ~
  591   \$(PROJECT).tex: \$(PROJECT).w
  592   ~        \$(CWEAVE) \$(PROJECT)
  593   ~   ~
  594   clean:
  595   ~        -rm -Rf \$(PROJECT) *~ *.c *.h *.o *.exe *.dat *.pdf *.mp *.trj *.mpx
  596   ~        -rm -Rf *.tex *.aux *.log *.toc *.idx *.scn *.dvi *.ps *.1 *.eps
  597   ~   ~
  598   archive:
  599   ~        make -ik clean
  600   ~        tar --gzip --directory=../ -cf ../\$(PROJECT).tar.gz \$(PROJECT)
  601   }
  602   \bigskip
  603   This \.{Makefile} essentially executes two major calls. First, the \CTANGLE\
  604   program parses the \CWEB\ source file \.{sgfilter.w} to extract a \CEE\ source
  605   file \.{sgfilter.c} which may be compiled into an executable program using any
  606   \ANSICEE\ conformant compiler. The output source file \.{sgfilter.c} includes
  607   \.{\#line} specifications so that any debugging conveniently can be done in
  608   terms of line numbers in the original \CWEB\ source file \.{sgfilter.w}.
  609   Second, the \CWEAVE\ program parses the same \CWEB\ source file \.{sgfilter.w}
  610   to extract a plain \TeX\ file \.{sgfilter.tex} which may be compiled into a
  611   PostScript or PDF document.
  612   The document file \.{sgfilter.tex} takes appropriate care of typographic
  613   details like page layout and text formatting, and supplies extensive
  614   cross-indexing information which is gathered automatically.
  615   In addition to extracting the documentary text, \CWEAVE\ also includes the
  616   source code in cross-referenced blocks corresponding to the descriptors as
  617   entered in the \CWEB\ source code.
  618   
  619   Having executed \.{make} (or \.{gmake} for the \GNU\ enthusiast) in the same
  620   directory where the files \.{sgfilter.w} and \.{Makefile} are located, one is
  621   left with the executable file \.{sgfilter}, being the ready-to-use compiled
  622   program, and the PostScript file \.{sgfilter.ps} (or PDF file \.{sgfilter.pdf})
  623   which contains the full documentation of the program, that is to say the
  624   document you currently are reading.
  625   Notice that on platforms running any operating system by Microsoft, the
  626   executable file will instead automatically be named \.{sgfilter.exe}.
  627   This convention also applies to programs compiled under the \UNIX-like
  628   environment \CYGWIN.
  629   
  630   @*Running the program. The program is entirely controlled by the command
  631   line options supplied when invoking the program. The syntax for executing the
  632   program is\par
  633   \medskip
  634   \line{\hskip 40pt\.{sgfilter [options]}\hfill}\par
  635   \medskip
  636   \noindent
  637   where \.{options} include the following, given in their long as well as
  638   their short forms (with prefixes `\.{--}' and `\.{-}', respectively):
  639   \medskip
  640   \optitem[\.{--inputfile}, \.{-i} $\langle${\it input filename}$\rangle$]%
  641     {Specifies the raw, unfiltered input data to be crunched. The input file
  642     should describe the input as two columns containing $x$- and $y$-coordinates
  643     of the samples.}
  644   \optitem[\.{--outputfile}, \.{-o} $\langle${\it output filename}$\rangle$]%
  645     {Specifies the output file to which the program should write the filtered
  646     profile, again in a two-column format containing $x$- and $y$-coordinates
  647     of the filtered samples. If this option is omitted, the generated
  648     filtered data will instead be written to the console (terminal).}
  649   \optitem[\.{-nl} $\langle n_L\rangle$]%
  650     {Specifies the number of samples $n_L$ to use to the ``left'' of the basis
  651     sample in the regression window (kernel). The total number of samples in
  652     the window will be $nL+nR+1$.}
  653   \optitem[\.{-nr} $\langle n_R\rangle$]%
  654     {Specifies the number of samples $n_R$ to use to the ``right'' of the basis
  655     sample in the regression window (kernel). The total number of samples in
  656     the window will be $nL+nR+1$.}
  657   \optitem[\.{-m} $\langle m\rangle$]%
  658     {Specifies the order $m$ of the polynomial
  659     $p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m$ to use in the
  660     regression analysis leading to the Savitzky--Golay coefficients.
  661     Typical values are between $m=2$ and $m=6$. Beware of too high values,
  662     which easily makes the regression too sensitive, with an oscillatory
  663     result.}
  664   \optitem[\.{-ld} $\langle l_D\rangle$]%
  665     {Specifies the order of the derivative to extract from the Savitzky--Golay
  666     smoothing algorithm. For regular Savitzky--Golay smoothing of the input
  667     data as such, use $l_D=0$. For the Savitzky--Golay smoothing and extraction
  668     of derivatives, set $l_D$ to the order of the desired derivative and make
  669     sure that you correctly interpret the scaling parameters as described in
  670     {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New
  671     York, 1994).}
  672   \optitem[\.{--help}, \.{-h}]%
  673     {Displays a brief help message and terminates the \sgfilter\ program clean
  674     from any error codes.}
  675   \optitem[\.{--verbose}, \.{-v}]%
  676     {Toggle verbose mode. (Default: Off.) This option should always be omitted
  677     whenever no output file has been specified (that is to say, omit any
  678     \.{--verbose} or \.{-v} option whenever \.{--outputfile} or \.{-o} has
  679     been omitted), as the verbose logging otherwise will contaminate the
  680     filtered data stream written to the console (terminal).}
  681   
  682   @*Example of Savitzky--Golay filtering with the \sgfilter\ program.
  683   It is always a good idea to create a rather ``nasty'' test case for an
  684   algorithm, and in this case it also provides the reader of this code with
  685   a (hopefully) clear example of usage of the \sgfilter\ program.
  686   
  687   We start off with generating a test suite of noisy data, in this case
  688   modeling an underlying function $g(x)$ with superimposed Gaussian noise as
  689   $$
  690     f(x)=\underbrace{
  691            \cos(3x)\sin^2(x^3)+4\sum^4_{k=1}\exp(-(x-x_k)^2/w^2_k)
  692          }_{g(x)} + \underbrace{Vu(x)\vphantom{\sum^4_{k=1}}}_{\rm noise}
  693   $$
  694   where $u(x)$ is a normally distributed stochastic variable of mean zero and
  695   unit variance, $V$ is the local variance as specified arbitrarily, and the
  696   remaining parameters $(x_k,w_k)$ are the positions and widths of four
  697   Gaussian peaks superimposed onto the otherwise trigonometric expression
  698   for the underlying function. For the current test suite we use
  699   $(x_1,w_1)=(0.2,0.007)$, $(x_2,w_2)=(0.4,0.01)$, $(x_3,w_3)=(0.6,0.02)$,
  700   and $(x_4,w_4)=(0.8,0.04)$. These Gaussian peaks serve as to provide various
  701   degrees of rapidly varying data, to check the performance in finding maxima.
  702   Meanwhile, the less rapidly varying domains which are dominated by the
  703   trigonometric expression serves as a test for the capability of the filter
  704   to handle rather moderate variations of low amplitude.
  705   
  706   The underlying test function $g(x)$ is shown\footnote{$\dagger$}{See the
  707   \.{*.eps} blocks in the enclosed \.{Makefile} for details on how \METAPOST
  708   was used in the generation of the encapsulated PostScript images shown in
  709   Figs.~1--6.} in Fig.~1.
  710   \medskip
  711   \centerline{\epsfbox{fig1.1}}
  712   \figcaption[1]{The underlying test function $g(x)=\cos(3x)\sin^2(x^3)%
  713   +4\sum^4_{k=1}\exp(-(x-x_k)^2/w^2_k)$ without any added noise.
  714   Here the positions and widths of the Gaussian peaks are $(x_1,w_1)=(0.2,0.007)$,
  715   $(x_2,w_2)=(0.4,0.01)$, $(x_3,w_3)=(0.6,0.02)$, and $(x_4,w_4)=(0.8,0.04)$.}
  716   \medskip
  717   \noindent
  718   The generator of artificial test data to be tested by the Savitsky--Golay
  719   filtering algorithm is here included as a simple \CEE\ program, which
  720   automatically is generated from the master \CWEB\ source when passed through
  721   \CTANGLE.
  722   
  723   @(example.c@>=
  724   #include <stdio.h>
  725   #include <stdlib.h>
  726   #include <time.h>
  727   #include <math.h>
  728   #define TWOPI (2.0*3.141592653589793)
  729   @|
  730   float gauss(float x, float w, float xa) {
  731      return(exp(-pow(((x-xa)/w),2)));
  732   }
  733   @|
  734   float func(float x) {
  735      float retval=gauss(x,0.007,0.2); /* $x_1=0.2$, $w_1=0.007$ */
  736      retval+=gauss(x,0.01,0.4);       /* $x_2=0.4$, $w_2=0.01$ */
  737      retval+=gauss(x,0.02,0.6);       /* $x_3=0.6$, $w_3=0.02$ */
  738      retval+=gauss(x,0.04,0.8);       /* $x_4=0.8$, $w_4=0.04$ */
  739      retval*=4.0;
  740      retval+=cos(3.0*x)*pow(sin(pow(x,3)),2);
  741      return(retval);
  742   }
  743   @|
  744   int main(int argc, char *argv[]) {
  745      int k, mm=1024;
  746      float var=1.0,xmax=2.5, x1, x2, u, v, f, z;
  747      if (argc>1) sscanf(argv[1],"%f",&var); /* Read first argument as variance */
  748      srand((unsigned)time(NULL)); /* Initialize random number generator */
  749      for (k=0;k<mm-1;k+=2) {
  750         x1=xmax*k/((double)mm-1);
  751         x2=xmax*(k+1)/((double)mm-1);
  752         u=((float)rand())/RAND_MAX; /* Uniformly distributed over $[0,1]$ */
  753         v=((float)rand())/RAND_MAX; /* Uniformly distributed over $[0,1]$ */
  754         if (u>0.0) { /* Apply the Box--Muller algorithm on |u| and |v| */
  755            f=sqrt(-2*log(u));
  756            z=TWOPI*v;
  757            u=f*cos(z); /* Normally distributed with E(|u|)=0 and Var(|u|)=1 */
  758            v=f*sin(z); /* Normally distributed with E(|u|)=0 and Var(|u|)=1 */
  759            fprintf(stdout,"%1.8f %1.8f\n",x1,func(x1)+var*u); /* $f(x_1)$ */
  760            fprintf(stdout,"%1.8f %1.8f\n",x2,func(x2)+var*v); /* $f(x_2)$ */
  761         }
  762      }
  763      return(0);
  764   }
  765   
  766   @ After having compiled the above code \.{example.c}, simply run
  767   \.{./example }$\langle${\it noise variance}$\rangle$\.{ > }$\langle$%
  768   {\it file name}$\rangle$ in order to generate the test function with a
  769   superimposed normally distributed (gaussian) noise of desired variance.
  770   In particular, we will in this test suite consider variances of 0 (that is to
  771   say, the underlying function without any noise), 0.5, 1.0, and 2.0.
  772   Such data files are simply generated by executing\footnote{$\dagger$}{The
  773   resulting test data, which so far has not been subject to any filtering,
  774   may easily be viewed by running the following script in Octave/Matlab:
  775   \medskip
  776   {\obeylines\obeyspaces\tt
  777   clear all; close all;
  778   hold on;
  779   u=load('example-2.0.dat'); plot(u(:,1),u(:,2),'-b');
  780   u=load('example-1.0.dat'); plot(u(:,1),u(:,2),'-c');
  781   u=load('example-0.5.dat'); plot(u(:,1),u(:,2),'-r');
  782   u=load('example-0.0.dat'); plot(u(:,1),u(:,2),'-k');
  783   legend('var(f(x))=2.0','var(f(x))=1.0','var(f(x))=0.5','var(f(x))=0.0');
  784   hold off;
  785   title('Artificial data generated for tests of Savitzky-Golay filtering');
  786   xlabel('x');
  787   ylabel('f(x)');
  788   }
  789   }
  790   \bigskip
  791   {\obeylines\obeyspaces\tt
  792   ./example 0.0 > example-0.0.dat
  793   ./example 0.5 > example-0.5.dat
  794   ./example 1.0 > example-1.0.dat
  795   ./example 2.0 > example-2.0.dat
  796   }
  797   \bigskip
  798   \noindent
  799   The resulting ``noisified'' suite of test data are in Figs.~2--4 shown for the
  800   respective noise variances of $V=0.5$, $V=1.0$, and $V=2.0$, respectively.
  801   \medskip
  802   \centerline{\epsfbox{fig2.1}}
  803   \figcaption[2]{The test function $g(x)$ with added Gaussian noise of variance
  804     $V=2.0$, as stored in file \.{example-2.0.dat} in the test suite.}
  805   
  806   @ Applying the Savitzky--Golay filter to the test data is now a straightforward
  807   task.
  808   Say that we wish to test filtering with polynomial degree $|m|=4$ and $|ld|=0$
  809   (which is the default value of |ld|, for regular smoothing with a delivered
  810   ``zero:th order derivative'', that is to say the smoothed non-differentiated
  811   function), for the two cases
  812   $|nl|=|nr|=10$ (in total 21 points in the regression kernel) and
  813   $|nl|=|nr|=60$ (in total 121 points in the regression kernel).
  814   Using the previously generated test suite of noisy data, the filtering is then
  815   easily accomplished by executing:
  816   \bigskip
  817   {\obeylines\obeyspaces\tt
  818   ./sgfilter -m 4 -nl 60 -nr 60 -i example-0.0.dat -o example-0.0-f-60.dat
  819   ./sgfilter -m 4 -nl 10 -nr 10 -i example-0.0.dat -o example-0.0-f-10.dat
  820   ./sgfilter -m 4 -nl 60 -nr 60 -i example-0.5.dat -o example-0.5-f-60.dat
  821   ./sgfilter -m 4 -nl 10 -nr 10 -i example-0.5.dat -o example-0.5-f-10.dat
  822   ./sgfilter -m 4 -nl 60 -nr 60 -i example-1.0.dat -o example-1.0-f-60.dat
  823   ./sgfilter -m 4 -nl 10 -nr 10 -i example-1.0.dat -o example-1.0-f-10.dat
  824   ./sgfilter -m 4 -nl 60 -nr 60 -i example-2.0.dat -o example-2.0-f-60.dat
  825   ./sgfilter -m 4 -nl 10 -nr 10 -i example-2.0.dat -o example-2.0-f-10.dat
  826   }
  827   \bigskip
  828   \noindent
  829   The resulting filtered data sets are shown in Figs.~3--6 for noise variances
  830   of $V=2.0$, $V=1.0$, $V=0.5$, and $V=0$, respectively. The final case
  831   corresponds to the interesting case of filtering the underlying function
  832   $g(x)$ without any added noise whatsoever, which corresponds to performing
  833   a local regression of the regression polynomial to the analytical trigonometric
  834   and exponential functions being terms of $g(x)$, a regression where we by no
  835   means should expect a perfect match.
  836   \medskip
  837   \centerline{\epsfbox{fig3.1}}
  838   \figcaption[3]{The profiles resulting from Savitzky--Golay-filtering of the
  839     test function $g(x)$ with added Gaussian noise of variance
  840     $V=2.0$.}
  841   \medskip
  842   As can be seen in Fig.~3, the results from filtering the ``worst-case'' set
  843   (with noise variance $V=2.0$) with $|nl|=|nr|=10$ ($|nl|+|nr|+1=21$ samples
  844   in the regression kernel) and $|m|=4$ (curve in blue) yield a rather good
  845   tracking of the narrow Gaussian peaks, meanwhile performing rather poor in
  846   the low-amplitude and rather slowly varying trigonometric hills in the
  847   right-hand side of the graph.
  848   As a reference, the underlying function $g(x)$ is mapped in dashed black.
  849   On the other hand, with $|nl|=|nr|=60$ ($|nl|+|nr|+1=121$ samples in the
  850   regression kernel) and keeping the same degree of the
  851   regression polynomial (curve in red), the narrow peaks are barely followed,
  852   meanwhile having a rather poor performance in the slowly varying hills as
  853   well (albeit of a very poor signal-to-noise ratio).
  854   \vfill\eject
  855   
  856   However, with a lower variance of the superimposed noise, we at $V=1$ have a
  857   nice tracking of the slowly varying hills by the 121-sample regression kernel,
  858   as shown in Fig.~4, meanwhile having a good tracking of the narrow Gaussian
  859   peaks by the 21-sample regression kernel.
  860   That the $|nl|+||=21$-sample window is noisier than the 121-sample one should
  861   not really be surprising, as the higher number of samples in the regression
  862   window tend to smoothen out the regression even further.
  863   
  864   \centerline{\epsfbox{fig4.1}}
  865   \figcaption[4]{The profiles resulting from Savitzky--Golay-filtering of the
  866     test function $g(x)$ with added Gaussian noise of variance
  867     $V=1.0$.}
  868   \medskip
  869   \centerline{\epsfbox{fig5.1}}
  870   \figcaption[5]{The profiles resulting from Savitzky--Golay-filtering of the
  871     test function $g(x)$ with added Gaussian noise of variance
  872     $V=0.5$.}
  873   \vfill\eject
  874   
  875   \centerline{\epsfbox{fig6.1}}
  876   \figcaption[6]{The profiles resulting from Savitzky--Golay-filtering of the
  877     test function $g(x)$ with added Gaussian noise of variance
  878     $V=0$, that is to say a direct regression against the underlying function
  879     $g(x)$. This figure illustrates the limitations of the attempts of
  880     linear regression of a polynomial to an underlying function which
  881     clearly cannot be approximated by a simple polynomial expressin in
  882     certain domains. One should always keep this limitation in mind before
  883     accepting (or discarding) data filtered by the Savitzky--Golay algorithm,
  884     despite the many cases in which it performs exceptionally well.}
  885   \vfill\eject
  886   
  887   @*Header files. We put the interface part of our routines in the header file
  888   \.{sgfilter.h}, and we restrict ourselves to a lowercase file name to
  889   maintain portability among operating systems with case-insensitive file names.
  890   There are just a few global definitions present in the \sgfilter\ program:\par
  891   \varitem[|VERSION|]{The current program revision number.}
  892   \varitem[|COPYRIGHT|]{The copyright banner.}
  893   \varitem[|DEFAULT_NL|]{The default value to use for the |nl| variable unless
  894     stated otherwise at the command line during startup of the program.
  895     This parameter specifies the number of samples $n_L$ to use to the ``left''
  896     of the basis sample in the regression window (kernel). The total number of
  897     samples in the window will be $nL+nR+1$.}
  898   \varitem[|DEFAULT_NR|]{The default value to use for the |nr| variable unless
  899     stated otherwise at the command line during startup of the program.
  900     This parameter specifies the number of samples $n_R$ to use to the ``right''
  901     of the basis sample in the regression window (kernel). The total number of
  902     samples in the window will be $nL+nR+1$.}
  903   \varitem[|DEFAULT_M|]{The default value to use for the |m| variable unless
  904     stated otherwise at the command line during startup of the program.
  905     This parameter specifies the order $m$ of the polynomial
  906     $p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m$ to use in the
  907     regression analysis leading to the Savitzky--Golay coefficients.
  908     Typical values are between $m=2$ and $m=6$. Beware of too high values,
  909     which easily makes the regression too sensitive, with an oscillatory
  910     result.}
  911   \varitem[|DEFAULT_LD|]{The default value to use for the |ld| variable unless
  912     stated otherwise at the command line during startup of the program.
  913     This parameter specifies the order of the derivative to extract from the
  914     Savitzky--Golay smoothing algorithm.
  915     For regular Savitzky--Golay smoothing of the input
  916     data as such, use $l_D=0$. For the Savitzky--Golay smoothing and extraction
  917     of derivatives, set $l_D$ to the order of the desired derivative and make
  918     sure that you correctly interpret the scaling parameters as described in
  919     {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New
  920     York, 1994).}
  921   \varitem[|NCHMAX|]{The maximum number of characters allowed in strings for
  922     storing file names, including path.}
  923   \varitem[|log(...)|]{The |log()| macro forms the user interface to run-time
  924     error messaging and console logging activities, by invoking the
  925     |log_printf()| routine. The |log()| macro is preferrably used rather than
  926     direct calls to |log_printf()|, as it automatizes the extraction of the
  927     calling routine, via the |__func__| macro. Notice that the clean construction
  928     of the macro using variable-length arguments (via |__VA_ARGS__|) {\sl only} is
  929     supported at ISO 9899:1999 (ISO C99) standard level or above (see, for
  930     example, \.{http://en.wikipedia.org/wiki/C99}), which is the default
  931     compliance level used in the enclosed \.{Makefile}. (In fact, this is here
  932     {\sl the} very reason for using ISO 9899:1999 rather than ISO 9899:1990).}
  933   
  934   @(sgfilter.h@>=
  935   #define VERSION "1.6"
  936   #define COPYRIGHT "Copyright (C) 2006-2011, Fredrik Jonsson"
  937   #define DEFAULT_NL (15)
  938   #define DEFAULT_NR (15)
  939   #define DEFAULT_M (4)
  940   #define DEFAULT_LD (0)
  941   #define EPSILON ((double)(1.0e-20))
  942   #define NCHMAX (256)
  943   #define CONVOLVE_WITH_NR_CONVLV (0)
  944   #define log(...) log_printf(__func__, __VA_ARGS__)
  945   
  946   @|
  947   #if defined(_CYGWIN_SIGNAL_H)||defined(__APPLE__)||defined(__unix__)||defined(__linux)
  948   #define UNIX_LIKE_OS (1)
  949   #endif
  950   
  951   @|
  952   void log_printf(const char *function_name, const char *format, ...);
  953   int *ivector(long nl, long nh);
  954   double *dvector(long nl, long nh);
  955   double **dmatrix(long nrl, long nrh, long ncl, long nch);
  956   void free_ivector(int *v, long nl, long nh);
  957   void free_dvector(double *v, long nl, long nh);
  958   void free_dmatrix(double **m,long nrl,long nrh,long ncl,long nch);
  959   void lubksb(double **a, int n, int *indx, double b[]);
  960   void ludcmp(double **a, int n, int *indx, double *d);
  961   void four1(double data[], unsigned long nn, int isign);
  962   void twofft(double data1[], double data2[], double fft1[], double fft2[],
  963      unsigned long n);
  964   void realft(double data[], unsigned long n, int isign);
  965   char convlv(double data[], unsigned long n, double respns[], unsigned long m,
  966               int isign, double ans[]);
  967   char sgcoeff(double c[], int np, int nl, int nr, int ld, int m);
  968   char sgfilter(double yr[], double yf[], int mm, int nl, int nr, int ld, int m);
  969   char *strip_away_path(char filename[]);
  970   long int num_coordinate_pairs(FILE *file);
  971   
  972   @*The main program. Here follows the general outline of the |main| routine of
  973   the \sgfilter\ program. This is where it all starts.
  974   
  975   @c
  976   @<Library inclusions@>@;
  977   @<Global variables@>@;
  978   @<Definitions of routines@>@;
  979   
  980   int main(int argc, char *argv[])
  981   {
  982      @<Definition of variables@>@;
  983      @<Parse command line for options and parameters@>@;
  984      @<Allocate memory and read |M| samples of unfiltered data from file@>@;
  985      @<Filter raw data through the Savitzky--Golay smoothing filter@>@;
  986      @<Write filtered data to file or terminal output@>@;
  987      @<Deallocate memory@>@;
  988      return(EXIT_SUCCESS);
  989   }
  990   
  991   @ Library dependencies.
  992   The standard \ANSICEE\ libraries included in this program are:\par
  993   \varitem[\.{math.h}]{For access to common mathematical functions.}
  994   \varitem[\.{stdio.h}]{For file access and any block involving |fprintf|.}
  995   \varitem[\.{stdlib.h}]{For access to the |exit| function.}
  996   \varitem[\.{stdarg.h}]{For access to |vsprintf|, |vfprintf| etc.}
  997   \varitem[\.{string.h}]{For string manipulation, |strcpy|, |strcmp| etc.}
  998   \varitem[\.{ctype.h}]{For access to the |isalnum| function.}
  999   \varitem[\.{time.h}]{For access to |ctime|, |clock| and time stamps.}
 1000   \varitem[\.{sys/time.h}]{For access to millisecond-resolved timer.}
 1001   
 1002   @<Library inclusions@>=
 1003   #include <math.h>
 1004   #include <stdlib.h>
 1005   #include <stdarg.h>
 1006   #include <stdio.h>
 1007   #include <string.h>
 1008   #include <ctype.h>
 1009   #include <time.h>
 1010   #include <sys/time.h>
 1011   #include "sgfilter.h"
 1012   
 1013   @ Declaration of global variables. The only global variables allowed in
 1014   my programs are |optarg|, which is a pointer to the the string of characters
 1015   that specified the call from the command line, and |progname|, which simply
 1016   is a pointer to the string containing the name of the program, as it was
 1017   invoked from the command line.
 1018   Typically, the |progname| string is the result remaining after having passed
 1019   |argv[0]| through the |strip_away_path| routine, just before parsing the
 1020   command line for parameters.
 1021   
 1022   @<Global variables@>=
 1023   extern char *optarg;
 1024   char *progname;
 1025   
 1026   @ Declaration of local variables of the |main| program.
 1027   In \CWEB\ one has the option of adding variables along the program, for example
 1028   by locally adding temporary variables related to a given sub-block of code.
 1029   However, the philosophy in the \sgfilter\ program is to keep all variables of
 1030   the |main| section collected together, so as to simplify tasks as, for example,
 1031   tracking down a given variable type definition.
 1032   The local variables of the program are as follows:
 1033   \medskip
 1034   
 1035   \varitem[|*x|, |*yr|, |*yf|]{Pointers to the double precision vectors keeping
 1036     the abscissa, unfiltered (raw) ordinata, and the and the resulting filtered
 1037     ordinata, respectively.}
 1038   \varitem[|mm|]{Keeps track of the number of samples to analyze, $|mm|\equiv M$.}
 1039   \varitem[|*file|]{Dummy file pointer used for scanning through input data and
 1040     for writing data to an output file.}
 1041   \varitem[|input_filename|]{String for keeping the file name of the input data.}
 1042   \varitem[|output_filename|]{Ditto for the output data.}
 1043   \varitem[|verbose|]{Determines whether the program should run in verbose mode
 1044     (logging various activities along the execution) or not. Default is off.}
 1045   
 1046   @<Definition of variables@>=
 1047   int no_arg;
 1048   int nl=DEFAULT_NL;
 1049   int nr=DEFAULT_NR;
 1050   int ld=DEFAULT_LD;
 1051   int m=DEFAULT_M;
 1052   long int k, mm=0;
 1053   double *x, *yr, *yf;
 1054   char input_filename[NCHMAX]="",output_filename[NCHMAX]="";
 1055   char verbose=0;
 1056   FILE *file;
 1057   
 1058   @ Parsing command line options. All input parameters are passed to the
 1059   program through command line options to the \sgfilter\ program.
 1060   The syntax of the accepted command line options is listed whenever the
 1061   program is invoked without any options, or whenever any of the \.{--help}
 1062   or \.{-h} options are specified at startup.
 1063   
 1064   @<Parse command line for options and parameters@>=
 1065   progname=strip_away_path(argv[0]);
 1066   no_arg=argc;
 1067   while(--argc) {
 1068      if(!strcmp(argv[no_arg-argc],"-o") ||
 1069         !strcmp(argv[no_arg-argc],"--outputfile")) {
 1070         --argc;
 1071         strcpy(output_filename,argv[no_arg-argc]);
 1072      } else if(!strcmp(argv[no_arg-argc],"-i") ||
 1073         !strcmp(argv[no_arg-argc],"--inputfile")) {
 1074         --argc;
 1075         strcpy(input_filename,argv[no_arg-argc]);
 1076      } else if(!strcmp(argv[no_arg-argc],"-h") ||
 1077                !strcmp(argv[no_arg-argc],"--help")) {
 1078         showsomehelp();
 1079         exit(0);
 1080      } else if(!strcmp(argv[no_arg-argc],"-v") ||
 1081         !strcmp(argv[no_arg-argc],"--verbose")) {
 1082         verbose=(verbose?0:1);
 1083      } else if(!strcmp(argv[no_arg-argc],"-nl")) {
 1084         --argc;
 1085         if (!sscanf(argv[no_arg-argc],"%d",&nl)) {
 1086            log("Error in '-nl' option.");
 1087            exit(1);
 1088         }
 1089      } else if(!strcmp(argv[no_arg-argc],"-nr")) {
 1090         --argc;
 1091         if (!sscanf(argv[no_arg-argc],"%d",&nr)) {
 1092            log("Error in '-nr' option.");
 1093            exit(1);
 1094         }
 1095      } else if(!strcmp(argv[no_arg-argc],"-ld")) {
 1096         --argc;
 1097         if (!sscanf(argv[no_arg-argc],"%d",&ld)) {
 1098            log("Error in '-ld' option.");
 1099            exit(1);
 1100         }
 1101      } else if(!strcmp(argv[no_arg-argc],"-m")) {
 1102         --argc;
 1103         if (!sscanf(argv[no_arg-argc],"%d",&m)) {
 1104            log("Error in '-m' option.");
 1105            exit(1);
 1106         }
 1107      } else {
 1108         log("Unrecognized option '%s'.",argv[no_arg-argc]);
 1109         showsomehelp();
 1110         exit(1);
 1111      }
 1112   }
 1113   if (verbose) fprintf(stdout,"This is %s v.%s. %s\n",progname,VERSION,COPYRIGHT);
 1114   
 1115   @ This is the where the raw (unfiltered) data is loaded into the computer
 1116   memory. This block assumes that the number of data points, $M$, has {\sl not}
 1117   previously been determined, neither by analysis of the input file nor
 1118   explicitly stated via command line options, and hence the number of samples
 1119   is automatically extracted using the |num_coordinate_pairs| routine.
 1120   
 1121   @<Allocate memory and read |M| samples of unfiltered data from file@>=
 1122   if (!strcmp(input_filename,"")) {
 1123      log("No input file specified! (Please use the '--inputfile' option.)");
 1124      log("Execute '%s --help' for help.",progname);
 1125      exit(1);
 1126   }
 1127   if ((file=fopen(input_filename,"r"))==NULL) {
 1128      log("Could not open %s for loading raw data!",input_filename);
 1129      exit(1);
 1130   }
 1131   mm=num_coordinate_pairs(file);
 1132   if (mm<nl+nr+1) {
 1133      log("Error: The number M=%ld of data points must be at least nl+nr+1=%d",
 1134         mm, nl+nr+1);
 1135      log("Please check your -nl or -nr options.");
 1136      exit(1);
 1137   }
 1138   if (verbose) {
 1139      log("Loading %ld unfiltered samples from %s...", mm, input_filename);
 1140      log(" ... allocating memory for storage ...");
 1141   }
 1142   x=dvector(1,mm);
 1143   yr=dvector(1,mm);
 1144   #if CONVOLVE_WITH_NR_CONVLV
 1145   yf=dvector(1,2*mm);
 1146   #else
 1147   yf=dvector(1,mm);
 1148   #endif
 1149   if (verbose)
 1150      log(" ... scanning %s for input data ...",input_filename);
 1151   for (k=1;k<=mm;k++) {
 1152      fscanf(file,"%lf",&x[k]); /* Scan $x$-coordinate */
 1153      fscanf(file,"%lf",&yr[k]); /* Scan unfiltered $y$-coordinate */
 1154   }
 1155   fclose(file);
 1156   if (verbose)
 1157      log(" ... done. Input now residing in RAM.");
 1158   
 1159   @ Filter data. This is simple. One single call to the |sgfilter| routine and
 1160   we are done.
 1161   
 1162   @<Filter raw data through the Savitzky--Golay smoothing filter@>=
 1163   if (!sgfilter(yr, yf, mm, nl, nr, ld, m)) {
 1164       if (verbose)
 1165         log("Successfully performed Savitzky-Golay filtering.");
 1166   } else {
 1167      if (verbose)
 1168         log("Error: Could not perform Savitzky-Golay filtering.");
 1169   }
 1170   
 1171   @ Write the filtered data to file or |stdout|, depending on whether or not the
 1172   command line options \.{-o} or \.{-outputfile} were present when starting the
 1173   \sgfilter\ program. We here use a redirection of the |stdout| stream using
 1174   |freopen(output_filename, "w", stdout)) == NULL)| and (in the case of unix-like
 1175   systems) a re-redirection back to terminal output again with
 1176   |freopen("/dev/tty", "a", stdout)|.
 1177   
 1178   @<Write filtered data to file or terminal output@>=
 1179   if (!strcmp(output_filename,"")) { /* No filename specified */
 1180      if (verbose)
 1181         log("Writing %ld filtered samples to console...", mm);
 1182   } else { /* If file name specified $\Rightarrow$ redirect |stdout| to file */
 1183      if (verbose)
 1184         log("Writing %ld filtered samples to %s...", mm, output_filename);
 1185      if((file = freopen(output_filename, "w", stdout)) == NULL) {
 1186         log("Error: Unable to redirect stdout stream to file %s.",
 1187            output_filename);
 1188         exit(1);
 1189      }
 1190   }
 1191   for (k=1;k<=mm;k++) fprintf(stdout,"%1.8f %1.8f\n",x[k],yf[k]);
 1192   #ifdef UNIX_LIKE_OS
 1193   freopen("/dev/tty", "a", stdout); /* Redirect |stdout| back to console output */
 1194   #endif
 1195   if (verbose) log(" ... done.");
 1196   
 1197   @ Deallocate memory.
 1198   
 1199   @<Deallocate memory@>=
 1200   free_dvector(x,1,mm);
 1201   free_dvector(yr,1,mm);
 1202   #if CONVOLVE_WITH_NR_CONVLV
 1203   free_dvector(yf,1,2*mm);
 1204   #else
 1205   free_dvector(yf,1,mm);
 1206   #endif
 1207   
 1208   @*Routines used by the program.
 1209   
 1210   @<Definitions of routines@>=
 1211      @<Routine for error messaging@>@;
 1212      @<Routine for allocation of integer precision vectors@>@;
 1213      @<Routine for allocation of double floating-point precision vectors@>@;
 1214      @<Routine for allocation of double floating-point precision matrices@>@;
 1215      @<Routine for deallocation of integer precision vectors@>@;
 1216      @<Routine for deallocation of double floating-point precision vectors@>@;
 1217      @<Routine for deallocation of double floating-point precision matrices@>@;
 1218      @<Routine for solving systems of linear equations by LU decomposition@>@;
 1219      @<Routine for performing LU decomposition of a matrix@>@;
 1220      @<Routine for discrete Fourier transformation of real-valued data@>@;
 1221      @<Routine for simultaneous fast Fourier transformation of two data sets@>@;
 1222      @<Routine for Fourier transformation of real-valued data@>@;
 1223      @<Routine for numerical convolution@>@;
 1224      @<Routine for computation of coefficients for Savitzky--Golay filtering@>@;
 1225      @<Routine for Savitzky--Golay filtering@>@;
 1226      @<Routine for removing preceding path of filenames@>@;
 1227      @<Routine for displaying a brief help message on usage@>@;
 1228      @<Routine for obtaining the number of coordinate pairs in a file@>@;
 1229   
 1230   @ The |void log_printf(const char *function_name,const char *format, ...)|
 1231   routine writes formatted entries to standard output, displaying time and
 1232   calling routine in a coherent manner. Notice that although the |log_printf()|
 1233   routine is the one which performs the actual messaging, the |log()| macro
 1234   (defined in the header file) is the preferred way of accessing this routine,
 1235   as it provides a more compact notation and automatically takes care of supplying
 1236   the reference to the name of the calling function.
 1237   
 1238   Also notice that the |const char| type of the last two input pointer arguments
 1239   here is absolutely essential in order to pass strict pedantic compilation with
 1240   \GCC.
 1241   
 1242   The routine accepts two input parameters. First, |function_name| which should
 1243   be the name of the calling function. This is to ensure that any displayed
 1244   error messages are properly matched to the issuing routines. Notice, however,
 1245   that the |log()| macro (which is the preferred way of displaying error messages)
 1246   automatically takes care of supplying the proper function name.
 1247   Second, |format|, which simply is the format and message string to be
 1248   displayed, formatted in the \CEE-standard |printf()| or |fprintf()| syntax.
 1249   
 1250   @<Routine for error messaging@>=
 1251   void log_printf(const char *function_name, const char *format, ...) {
 1252      va_list args;
 1253      time_t time0;
 1254      struct tm lt;
 1255      struct timeval tv;
 1256      char logentry[1024];
 1257   
 1258      gettimeofday(&tv, NULL);
 1259      time(&time0);
 1260      lt = *localtime(&time0);
 1261      sprintf(logentry, "%02u%02u%02u %02u:%02u:%02u.%03d ",
 1262         lt.tm_year-100, lt.tm_mon+1, lt.tm_mday,
 1263         lt.tm_hour, lt.tm_min, lt.tm_sec, tv.tv_usec/1000);
 1264      sprintf(logentry+strlen(logentry),"(%s) ",function_name);
 1265      va_start(args, format); /* Initialize args by the |va_start()| macro */
 1266      vsprintf(logentry+strlen(logentry), format, args);
 1267      va_end(args); /* Terminate the use of args by the |va_end()| macro */
 1268      sprintf(logentry+strlen(logentry), "\n"); /* Always append newline */
 1269      fprintf(stdout, "%s", logentry);
 1270      return;
 1271   }
 1272   
 1273   @ The |int *ivector(long nl, long nh)| routine allocates a real-valued vector
 1274   of integer precision, with vector index ranging from |nl| to |nh|.
 1275   
 1276   @<Routine for allocation of integer precision vectors@>=
 1277   int *ivector(long nl, long nh) {
 1278      int *v;
 1279      v=(int *)malloc((size_t) ((nh-nl+2)*sizeof(int)));
 1280      if (!v) {
 1281         log("Error: Allocation failure.");
 1282         exit(1);
 1283      }
 1284      return v-nl+1;
 1285   }
 1286   
 1287   @ The |double *dvector(long nl, long nh)| routine allocates a real-valued
 1288   vector of double precision, with vector index ranging from |nl| to |nh|.
 1289   
 1290   @<Routine for allocation of double floating-point precision vectors@>=
 1291   double *dvector(long nl, long nh) {
 1292      double *v;
 1293      long k;
 1294      v=(double *)malloc((size_t) ((nh-nl+2)*sizeof(double)));
 1295      if (!v) {
 1296         log("Error: Allocation failure.");
 1297         exit(1);
 1298      }
 1299      for (k=nl;k<=nh;k++) v[k]=0.0;
 1300      return v-nl+1;
 1301   }
 1302   
 1303   @ The |double **dmatrix(long nrl, long nrh, long ncl, long nch)| routine
 1304   allocates an array of double floating-point precision, with row index
 1305   ranging from |nrl| to |nrh| and column index ranging from |ncl| to |nch|.
 1306   
 1307   @<Routine for allocation of double floating-point precision matrices@>=
 1308   double **dmatrix(long nrl, long nrh, long ncl, long nch)
 1309   {
 1310      long i, nrow=nrh-nrl+1,ncol=nch-ncl+1;
 1311      double **m;
 1312      m=(double **) malloc((size_t)((nrow+1)*sizeof(double*)));
 1313      if (!m) {
 1314         log("Allocation failure 1 occurred.");
 1315         exit(1);
 1316      }
 1317      m += 1;
 1318      m -= nrl;
 1319      m[nrl]=(double *) malloc((size_t)((nrow*ncol+1)*sizeof(double)));
 1320      if (!m[nrl]) {
 1321         log("Allocation failure 2 occurred.");
 1322         exit(1);
 1323      }
 1324      m[nrl] += 1;
 1325      m[nrl] -= ncl;
 1326      for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol;
 1327      return m;
 1328   }
 1329   
 1330   @ The |void free_ivector(int *v, long nl, long nh)| routine release the
 1331   memory occupied by the real-valued vector |v[nl...nh]|.
 1332   
 1333   @<Routine for deallocation of integer precision vectors@>=
 1334   void free_ivector(int *v, long nl, long nh) {
 1335      free((char*) (v+nl-1));
 1336   }
 1337   
 1338   @ The |free_dvector| routine release the memory occupied by the
 1339   real-valued vector |v[nl...nh]|.
 1340   
 1341   @<Routine for deallocation of double floating-point precision vectors@>=
 1342   void free_dvector(double *v, long nl, long nh) {
 1343      free((char*) (v+nl-1));
 1344   }
 1345   
 1346   @ The |void free_dmatrix(double **m,long nrl,long nrh,long ncl,long nch)|
 1347   routine releases the memory occupied by the double floating-point precision
 1348   matrix |v[nl...nh]|, as allocated by |dmatrix()|.
 1349   
 1350   @<Routine for deallocation of double floating-point precision matrices@>=
 1351   void free_dmatrix(double **m,long nrl,long nrh,long ncl,long nch) {
 1352      free((char*) (m[nrl]+ncl-1));
 1353      free((char*) (m+nrl-1));
 1354   }
 1355   
 1356   @ The |lubksb(double **a, int n, int *indx, double b[])| routine solves the set
 1357   of |n| linear equations ${\bf A}\cdot{\bf x}={\bf b}$ where ${\bf A}$ is a
 1358   real-valued $[n\times n]$-matrix and ${\bf x}$ and ${\bf b}$ are real-valued
 1359   $[n\times1]$-vectors. Here |a[1...n][1...n]| is input, however {\sl not} as the
 1360   matrix ${\bf A}$ but rather as its corresponding LU decomposition as determined
 1361   by the |ludcmp| routine. Here |indx[1...n]| is input as the permutation vector
 1362   returned by |ludcmp|, |b[1...n]| is input as the right-hand side vector
 1363   ${\bf b}$, and returns with the solution vector ${\bf x}$.
 1364   The parameters |a|, |n|, and |indx| are not modified by this routine and
 1365   can be left in place for successive calls with different right-hand sides
 1366   ${\bf b}$. This routine takes into account the possibility that ${\bf b}$
 1367   will begin with many zero elements, so it is efficient for use in matrix
 1368   inversion.
 1369   The |lubksb| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1370   (Cambridge University Press, New York, 1994).
 1371   
 1372   @<Routine for solving systems of linear equations by LU decomposition@>=
 1373   void lubksb(double **a, int n, int *indx, double b[])
 1374   {
 1375      int i,ii=0,ip,j;
 1376      double sum;
 1377   
 1378      for (i=1;i<=n;i++) {
 1379         ip=indx[i];
 1380         sum=b[ip];
 1381         b[ip]=b[i];
 1382         if (ii)
 1383            for (j=ii;j<=i-1;j++) sum -= a[i][j]*b[j];
 1384         else if (sum) ii=i;
 1385         b[i]=sum;
 1386      }
 1387      for (i=n;i>=1;i--) {
 1388         sum=b[i];
 1389         for (j=i+1;j<=n;j++) sum -= a[i][j]*b[j];
 1390         b[i]=sum/a[i][i];
 1391      }
 1392   }
 1393   
 1394   @ Given a square and real-valued matrix |a[1...n][1...n]|, the
 1395   |ludcmp(float **a, int n, int *indx, float *d)| routine replaces it by the
 1396   corresponding LU decomposition of a rowwise permutation
 1397   of itself. Entering the routine, the square matrix |a| and its number of
 1398   columns (or rows) |n| are inputs. On return, |a| is arranged as in Eq.~(2.3.14)
 1399   of {\sl Numerical Recipes in C}, 2nd edition, while |indx[1...n]| is an output
 1400   vector that records the row permutation effected by the partial pivoting, and
 1401   |d| is output as $\pm1$ depending on whether the number of row interchanges
 1402   was even or odd, respectively. This routine is commonly used in combination
 1403   with |lubksb| to solve linear equations or invert a matrix.
 1404   The |ludcmp| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1405   (Cambridge University Press, New York, 1994).
 1406   
 1407   @<Routine for performing LU decomposition of a matrix@>=
 1408   void ludcmp(double **a, int n, int *indx, double *d)
 1409   {
 1410      int i,imax=0,j,k;
 1411      double big,dum,sum,temp;
 1412      double *vv;
 1413   
 1414      vv=dvector(1,n);
 1415      *d=1.0;
 1416      for (i=1;i<=n;i++) {
 1417         big=0.0;
 1418         for (j=1;j<=n;j++)
 1419            if ((temp=fabs(a[i][j])) > big) big=temp;
 1420         if (big == 0.0) {
 1421            log("Error: Singular matrix found in routine ludcmp()");
 1422            exit(1);
 1423         }
 1424         vv[i]=1.0/big;
 1425      }
 1426      for (j=1;j<=n;j++) {
 1427         for (i=1;i<j;i++) {
 1428            sum=a[i][j];
 1429            for (k=1;k<i;k++) sum -= a[i][k]*a[k][j];
 1430            a[i][j]=sum;
 1431         }
 1432         big=0.0;
 1433         for (i=j;i<=n;i++) {
 1434            sum=a[i][j];
 1435            for (k=1;k<j;k++)
 1436               sum -= a[i][k]*a[k][j];
 1437            a[i][j]=sum;
 1438            if ( (dum=vv[i]*fabs(sum)) >= big) {
 1439               big=dum;
 1440               imax=i;
 1441            }
 1442         }
 1443         if (j != imax) {
 1444            for (k=1;k<=n;k++) {
 1445               dum=a[imax][k];
 1446               a[imax][k]=a[j][k];
 1447               a[j][k]=dum;
 1448            }
 1449            *d = -(*d);
 1450            vv[imax]=vv[j];
 1451         }
 1452         indx[j]=imax;
 1453         if (a[j][j] == 0.0) a[j][j]=EPSILON;
 1454         if (j != n) {
 1455            dum=1.0/(a[j][j]);
 1456            for (i=j+1;i<=n;i++) a[i][j] *= dum;
 1457         }
 1458      }
 1459      free_dvector(vv,1,n);
 1460   }
 1461   
 1462   @ The |four1(double data[], unsigned long nn, int isign)| routine replaces
 1463   |data[1...2*nn]| by its discrete Fourier transform, if |isign| is input
 1464   as $+1$; or replaces |data[1..2*nn]| by |nn| times its inverse discrete Fourier
 1465   transform, if |isign| is input as $-1$. Here |data| is a complex-valued array
 1466   of length |nn| or, equivalently, a real array of length |2*nn|, wher |nn|
 1467   {\sl must} be an integer power of 2 (this is not checked for).
 1468   The |four1| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1469   (Cambridge University Press, New York, 1994).
 1470   
 1471   @<Routine for discrete Fourier transformation of real-valued data@>=
 1472   #include <math.h>
 1473   #define SWAP(a,b) tempr=(a);(a)=(b);(b)=tempr
 1474   void four1(double data[], unsigned long nn, int isign)
 1475   {
 1476      unsigned long n,mmax,m,j,istep,i;
 1477      double wtemp,wr,wpr,wpi,wi,theta;
 1478      double tempr,tempi;
 1479   
 1480      n=nn << 1;
 1481      j=1;
 1482      for (i=1;i<n;i+=2) {
 1483         if (j > i) {
 1484            SWAP(data[j],data[i]);
 1485            SWAP(data[j+1],data[i+1]);
 1486         }
 1487         m=n >> 1;
 1488         while (m >= 2 && j > m) {
 1489            j -= m;
 1490            m >>= 1;
 1491         }
 1492         j += m;
 1493      }
 1494      mmax=2;
 1495      while (n > mmax) {
 1496         istep=mmax << 1;
 1497         theta=isign*(6.28318530717959/mmax);
 1498         wtemp=sin(0.5*theta);
 1499         wpr = -2.0*wtemp*wtemp;
 1500         wpi=sin(theta);
 1501         wr=1.0;
 1502         wi=0.0;
 1503         for (m=1;m<mmax;m+=2) {
 1504            for (i=m;i<=n;i+=istep) {
 1505               j=i+mmax;
 1506               tempr=wr*data[j]-wi*data[j+1];
 1507               tempi=wr*data[j+1]+wi*data[j];
 1508               data[j]=data[i]-tempr;
 1509               data[j+1]=data[i+1]-tempi;
 1510               data[i] += tempr;
 1511               data[i+1] += tempi;
 1512            }
 1513            wr=(wtemp=wr)*wpr-wi*wpi+wr;
 1514            wi=wi*wpr+wtemp*wpi+wi;
 1515         }
 1516         mmax=istep;
 1517      }
 1518   }
 1519   #undef SWAP
 1520   
 1521   
 1522   @ The |twofft(double data1[], double data2[], double fft1[], double fft2[],
 1523   unsigned long n)| routine takes two real-valued arrays |data1[1...n]|
 1524   and |data2[1...n]| as input, makes a call to the |four1| routine and
 1525   returns two complex-valued output arrays |fft1[1...2*n]|
 1526   and |fft2[1...2*n]|, each of complex length |n| (that is to say, a real
 1527   length of |2*n| elements), which contain the discrete Fourier transforms
 1528   of the respective data arrays. Here |n| {\sl must} be an integer power of 2.
 1529   The |twofft| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1530   (Cambridge University Press, New York, 1994).
 1531   
 1532   @<Routine for simultaneous fast Fourier transformation of two data sets@>=
 1533   void twofft(double data1[], double data2[], double fft1[], double fft2[],
 1534      unsigned long n)
 1535   {
 1536      void four1(double data[], unsigned long nn, int isign);
 1537      unsigned long nn3,nn2,jj,j;
 1538      double rep,rem,aip,aim;
 1539   
 1540      nn3=1+(nn2=2+n+n);
 1541      for (j=1,jj=2;j<=n;j++,jj+=2) {
 1542         fft1[jj-1]=data1[j];
 1543         fft1[jj]=data2[j];
 1544      }
 1545      four1(fft1,n,1);
 1546      fft2[1]=fft1[2];
 1547      fft1[2]=fft2[2]=0.0;
 1548      for (j=3;j<=n+1;j+=2) {
 1549         rep=0.5*(fft1[j]+fft1[nn2-j]);
 1550         rem=0.5*(fft1[j]-fft1[nn2-j]);
 1551         aip=0.5*(fft1[j+1]+fft1[nn3-j]);
 1552         aim=0.5*(fft1[j+1]-fft1[nn3-j]);
 1553         fft1[j]=rep;
 1554         fft1[j+1]=aim;
 1555         fft1[nn2-j]=rep;
 1556         fft1[nn3-j] = -aim;
 1557         fft2[j]=aip;
 1558         fft2[j+1] = -rem;
 1559         fft2[nn2-j]=aip;
 1560         fft2[nn3-j]=rem;
 1561      }
 1562   }
 1563   
 1564   @ The |realft(double data[], unsigned long n, int isign)| routine
 1565   calculates the Fourier transform of a set of |n| real-valued data points.
 1566   On return the routine replaces this data (which is stored in array
 1567   |data[1...n]|) by the positive frequency half of its complex Fourier
 1568   transform. The real-valued first and last components of the complex
 1569   transform are returned in elements |data[1]| and |data[2]|, respectively.
 1570   Here |n| must be a power of 2. This routine also calculates the inverse
 1571   transform of a complex data array if it is the transform of real data.
 1572   (In this case, the result must be multiplied by |2/n|.)
 1573   The |realft| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1574   (Cambridge University Press, New York, 1994).
 1575   
 1576   @<Routine for Fourier transformation of real-valued data@>=
 1577   void realft(double data[], unsigned long n, int isign)
 1578   {
 1579      void four1(double data[], unsigned long nn, int isign);
 1580      unsigned long i,i1,i2,i3,i4,np3;
 1581      double c1=0.5,c2,h1r,h1i,h2r,h2i;
 1582      double wr,wi,wpr,wpi,wtemp,theta;
 1583   
 1584      theta=3.141592653589793/(double) (n>>1);
 1585      if (isign == 1) {
 1586         c2 = -0.5;
 1587         four1(data,n>>1,1);
 1588      } else {
 1589         c2=0.5;
 1590         theta = -theta;
 1591      }
 1592      wtemp=sin(0.5*theta);
 1593      wpr = -2.0*wtemp*wtemp;
 1594      wpi=sin(theta);
 1595      wr=1.0+wpr;
 1596      wi=wpi;
 1597      np3=n+3;
 1598      for (i=2;i<=(n>>2);i++) {
 1599         i4=1+(i3=np3-(i2=1+(i1=i+i-1)));
 1600         h1r=c1*(data[i1]+data[i3]);
 1601         h1i=c1*(data[i2]-data[i4]);
 1602         h2r = -c2*(data[i2]+data[i4]);
 1603         h2i=c2*(data[i1]-data[i3]);
 1604         data[i1]=h1r+wr*h2r-wi*h2i;
 1605         data[i2]=h1i+wr*h2i+wi*h2r;
 1606         data[i3]=h1r-wr*h2r+wi*h2i;
 1607         data[i4] = -h1i+wr*h2i+wi*h2r;
 1608         wr=(wtemp=wr)*wpr-wi*wpi+wr;
 1609         wi=wi*wpr+wtemp*wpi+wi;
 1610      }
 1611      if (isign == 1) {
 1612         data[1] = (h1r=data[1])+data[2];
 1613         data[2] = h1r-data[2];
 1614      } else {
 1615         data[1]=c1*((h1r=data[1])+data[2]);
 1616         data[2]=c1*(h1r-data[2]);
 1617         four1(data,n>>1,-1);
 1618      }
 1619   }
 1620   
 1621   @ The |convlv(double data[], unsigned long n, double respns[],
 1622   unsigned long m, int isign, double ans[])| routine convolves or deconvolves
 1623   a real data set |data[1...n]| (including any user-supplied zero padding)
 1624   with a response function |respns[1...n]|. The response function must be
 1625   stored in wrap-around order in the first |m| elements of respns, where |m|
 1626   is an odd integer less than or equal to |n|. Wrap-around order means
 1627   that the first half of the array respns contains the impulse response
 1628   function at positive times, while the second half of the array contains
 1629   the impulse response function at negative times, counting down from the
 1630   highest element |respns[m]|. On input, |isign| is $+1$ for convolution, and
 1631   $-1$ for deconvolution. The answer is returned in the first |n| components
 1632   of |ans|. However, |ans| {\sl must} be supplied in the calling program with
 1633   dimensions |[1...2*n]|, for consistency with the |twofft| routine.
 1634   Here |n| {\sl must} be an integer power of two.
 1635   The |convlv| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1636   (Cambridge University Press, New York, 1994).
 1637   
 1638   @<Routine for numerical convolution@>=
 1639   char convlv(double data[], unsigned long n, double respns[], unsigned long m,
 1640               int isign, double ans[])
 1641   {
 1642      void realft(double data[], unsigned long n, int isign);
 1643      void twofft(double data1[], double data2[], double fft1[], double fft2[],
 1644         unsigned long n);
 1645      unsigned long i,no2;
 1646      double dum,mag2,*fft;
 1647   
 1648      fft=dvector(1,n<<1);
 1649      for (i=1;i<=(m-1)/2;i++)
 1650         respns[n+1-i]=respns[m+1-i];
 1651      for (i=(m+3)/2;i<=n-(m-1)/2;i++)
 1652         respns[i]=0.0;
 1653      twofft(data,respns,fft,ans,n);
 1654      no2=n>>1;
 1655      for (i=2;i<=n+2;i+=2) {
 1656         if (isign == 1) {
 1657            ans[i-1]=(fft[i-1]*(dum=ans[i-1])-fft[i]*ans[i])/no2;
 1658            ans[i]=(fft[i]*dum+fft[i-1]*ans[i])/no2;
 1659         } else if (isign == -1) {
 1660            if ((mag2=ans[i-1]*ans[i-1]+ans[i]*ans[i]) == 0.0) {
 1661               log("Attempt of deconvolving at zero response in convlv().");
 1662               return(1);
 1663            }
 1664            ans[i-1]=(fft[i-1]*(dum=ans[i-1])+fft[i]*ans[i])/mag2/no2;
 1665            ans[i]=(fft[i]*dum-fft[i-1]*ans[i])/mag2/no2;
 1666         } else {
 1667            log("No meaning for isign in convlv().");
 1668            return(1);
 1669         }
 1670      }
 1671      ans[2]=ans[n+1];
 1672      realft(ans,n,-1);
 1673      free_dvector(fft,1,n<<1);
 1674      return(0);
 1675   }
 1676   
 1677   @ The |void sgcoeff(double c[], int np, int nl, int nr, int ld, int m)| routine
 1678   computes the coefficients |c[1...np]| for Savitzky--Golay filtering.
 1679   The coefficient vector |c[1...np]| is returned in {\sl wrap-around order}
 1680   consistent with the argument |respns| in the {\sl Numerical Recipes in C}
 1681   routine |convlv|.
 1682   ``Wrap-around order'' means that the first half of the array |respns|
 1683   contains the impulse response function at positive times, while the second
 1684   half of the array contains the impulse response function at negative times,
 1685   counting down from the highest element |respns[m]|.
 1686   The Savitzky--Golay filter coefficients are computed for |nl|
 1687   leftward (past) data points and |nr| rightward (future) data points, making
 1688   the total number of data points used in the window as $|np|=|nl|+|nr|+1$.
 1689   |ld| is the order of the derivative desired (for example, $|ld|=0$ for smoothed
 1690   function). Here $m$ is the order of the smoothing polynomial, also equal to the
 1691   highest conserved moment; usual values are $|m|=2$ or $|m|=4$.
 1692   The |sgcoeff| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn
 1693   (Cambridge University Press, New York, 1994).
 1694   
 1695   @<Routine for computation of coefficients for Savitzky--Golay filtering@>=
 1696   char sgcoeff(double c[], int np, int nl, int nr, int ld, int m)
 1697   {
 1698      void lubksb(double **a, int n, int *indx, double b[]);
 1699      void ludcmp(double **a, int n, int *indx, double *d);
 1700      int imj,ipj,j,k,kk,mm,*indx;
 1701      double d,fac,sum,**a,*b;
 1702   
 1703      if (np < nl+nr+1 || nl < 0 || nr < 0 || ld > m || nl+nr < m) {
 1704         log("Inconsistent arguments detected in routine sgcoeff.");
 1705         return(1);
 1706      }
 1707      indx=ivector(1,m+1);
 1708      a=dmatrix(1,m+1,1,m+1);
 1709      b=dvector(1,m+1);
 1710      for (ipj=0;ipj<=(m << 1);ipj++) {
 1711         sum=(ipj ? 0.0 : 1.0);
 1712         for (k=1;k<=nr;k++) sum += pow((double)k,(double)ipj);
 1713         for (k=1;k<=nl;k++) sum += pow((double)-k,(double)ipj);
 1714         mm=(ipj<2*m-ipj?ipj:2*m-ipj);
 1715         for (imj = -mm;imj<=mm;imj+=2) a[1+(ipj+imj)/2][1+(ipj-imj)/2]=sum;
 1716      }
 1717      ludcmp(a,m+1,indx,&d);
 1718      for (j=1;j<=m+1;j++) b[j]=0.0;
 1719      b[ld+1]=1.0;
 1720      lubksb(a,m+1,indx,b);
 1721      for (kk=1;kk<=np;kk++) c[kk]=0.0;
 1722      for (k = -nl;k<=nr;k++) {
 1723         sum=b[1];
 1724         fac=1.0;
 1725         for (mm=1;mm<=m;mm++) sum += b[mm+1]*(fac *= k);
 1726         kk=((np-k) % np)+1;
 1727         c[kk]=sum;
 1728      }
 1729      free_dvector(b,1,m+1);
 1730      free_dmatrix(a,1,m+1,1,m+1);
 1731      free_ivector(indx,1,m+1);
 1732      return(0);
 1733   }
 1734   
 1735   @ The |sgfilter(double yr[],double yf[],int mm, int nl, int nr, int ld, int m)|
 1736   routine provides the interface for the actual Savitzky--Golay filtering of
 1737   data. As input, this routine takes the following parameters:
 1738   \varitem[|yr[1...mm]|]%
 1739     {A vector containing the raw, unfiltered data}
 1740   \varitem[|mm|]%
 1741     {The number of data points in the input vector}
 1742   \varitem[|nl|]%
 1743     {The number of samples $n_L$ to use to the ``left'' of the basis
 1744     sample in the regression window (kernel). The total number of samples in
 1745     the window will be $nL+nR+1$.}
 1746   \varitem[|nr|]%
 1747     {The number of samples $n_R$ to use to the ``right'' of the basis
 1748     sample in the regression window (kernel). The total number of samples in
 1749     the window will be $nL+nR+1$.}
 1750   \varitem[|m|]%
 1751     {The order $m$ of the polynomial
 1752     $p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m$ to use in the
 1753     regression analysis leading to the Savitzky--Golay coefficients.
 1754     Typical values are between $m=2$ and $m=6$. Beware of too high values,
 1755     which easily makes the regression too sensitive, with an oscillatory
 1756     result.}
 1757   \varitem[|ld|]%
 1758     {The order of the derivative to extract from the Savitzky--Golay
 1759     smoothing algorithm. For regular Savitzky--Golay smoothing of the input
 1760     data as such, use $l_D=0$. For the Savitzky--Golay smoothing and extraction
 1761     of derivatives, set $l_D$ to the order of the desired derivative and make
 1762     sure that you correctly interpret the scaling parameters as described in
 1763     {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New
 1764     York, 1994).}
 1765   \medskip
 1766   \noindent
 1767   On return, the Savitzky--Golay-filtered profile is contained in the vector
 1768   |yf[1...mm]|.
 1769   Notice the somewhat peculiar accessing of the coefficients $c_j$ via the
 1770   |c[(j>=0?j+1:nr+nl+2+j)]| construction in this routine. This reflects the
 1771   {\sl wrap-around storage} of the coefficients, where $c[1]=c_0$, $c[2]=c_1$,
 1772   $\ldots$, $c[|nr|+1]=c_{n_R}$, $c[|nr|+2]=c_{-n_L}$, $c[|nr|+3]=c_{-n_L+1}$,
 1773   $\ldots$, $c[|nr|+|nl|+1]=c_{-1}$.
 1774   
 1775   @<Routine for Savitzky--Golay filtering@>=
 1776   char sgfilter(double yr[], double yf[], int mm, int nl, int nr, int ld, int m) {
 1777      int np=nl+1+nr;
 1778      double *c;
 1779      char retval;
 1780   
 1781   #if CONVOLVE_WITH_NR_CONVLV /* Please do not use this $\ldots$ */
 1782      c=dvector(1,mm); /* Size required by the {\sl NR in C} |convlv| routine */
 1783      retval=sgcoeff(c,np,nl,nr,ld,m);
 1784      if (retval==0)
 1785         convlv(yr,mm,c,np,1,yf);
 1786      free_dvector(c,1,mm);
 1787   #else /* $\ldots$ use this instead. (Strongly recommended.) */
 1788      int j;
 1789      long int k;
 1790      c=dvector(1,nl+nr+1); /* Size required by direct convolution */
 1791      retval=sgcoeff(c,np,nl,nr,ld,m);
 1792      if (retval==0) {
 1793         for (k=1;k<=nl;k++) { /* The first |nl| samples */
 1794            for (yf[k]=0.0,j=-nl;j<=nr;j++) {
 1795               if (k+j>=1) {
 1796                  yf[k]+=c[(j>=0?j+1:nr+nl+2+j)]*yr[k+j];
 1797               }
 1798            }
 1799         }
 1800         for (k=nl+1;k<=mm-nr;k++) { /* Samples $|nl|+1 \ldots |mm|-|nr|$ */
 1801            for (yf[k]=0.0,j=-nl;j<=nr;j++) {
 1802               yf[k]+=c[(j>=0?j+1:nr+nl+2+j)]*yr[k+j];
 1803            }
 1804         }
 1805         for (k=mm-nr+1;k<=mm;k++) { /* The last |nr| samples */
 1806            for (yf[k]=0.0,j=-nl;j<=nr;j++) {
 1807               if (k+j<=mm) {
 1808                  yf[k]+=c[(j>=0?j+1:nr+nl+2+j)]*yr[k+j];
 1809               }
 1810            }
 1811         }
 1812      }
 1813      free_dvector(c,1,nr+nl+1);
 1814   #endif
 1815      return(retval); /* Returning |0| if successful filtering */
 1816   }
 1817   
 1818   @ Routines for removing preceding path of filenames.
 1819   In this block all routines related to removing preceding path strings go.
 1820   Not really fancy programming, and no contribution to any increase of numerical
 1821   efficiency or precision; just for the sake of keeping a tidy terminal output
 1822   of the program. The |strip_away_path()| routine is typically called when
 1823   initializing the program name string |progname| from the command line string
 1824   |argv[0]|, and is typically located in the blocks related to parsing of the
 1825   command line options.
 1826   The |strip_away_path()| routine takes a character string |filename| as
 1827   argument, and returns a pointer to the same string but without any
 1828   preceding path segments.
 1829   
 1830   @<Routine for removing preceding path of filenames@>=
 1831   @<Routine for checking for a valid path character@>@;
 1832   char *strip_away_path(char filename[]) {
 1833      int j,k=0;
 1834      while (pathcharacter(filename[k])) k++;
 1835      j=(--k); /* this is the uppermost index of the full path+file string */
 1836      while (isalnum((int)(filename[j]))) j--;
 1837      j++; /* this is the lowermost index of the stripped file name */
 1838      return(&filename[j]);
 1839   }
 1840   
 1841   @ In this program, valid path characters are any alphanumeric character or
 1842   `\.{.}', `\.{/}', `\.{\\}', `\.{\_}', `\.{-}', or `\.{+}'.
 1843   
 1844   @<Routine for checking for a valid path character@>=
 1845   short pathcharacter(int ch) {
 1846      return(isalnum(ch)||(ch=='.')||(ch=='/')||(ch=='\\')||(ch=='_')||
 1847          (ch=='-')||(ch=='+'));
 1848   }
 1849   
 1850   @ The |void showsomehelp(void)| displays a brief help message to standard
 1851   terminal output. This is where the caller always should end up in case
 1852   anything is wrong in the input parameters.
 1853   
 1854   @<Routine for displaying a brief help message on usage@>=
 1855   @<Routine for displaying a single line of the help message@>@;
 1856   void showsomehelp(void) {
 1857      hl("Usage: %s [options]",progname);
 1858      hl("Options:");
 1859      hl(" -h, --help");
 1860      hl("    Display this help message and exit clean.");
 1861      hl(" -i, --inputfile <str>");
 1862      hl("    Specifies the file name from which unfiltered data is to be read.");
 1863      hl("    The input file should describe the input as two columns contain-");
 1864      hl("    ing $x$- and $y$-coordinates of the samples.");
 1865      hl(" -o, --outputfile <str>");
 1866      hl("    Specifies the file name to which filtered data is to be written,");
 1867      hl("    again in a two-column format containing $x$- and $y$-coordinates");
 1868      hl("    of the filtered samples. If this option is omitted, the generated");
 1869      hl("    filtered data will instead be written to the console (terminal).");
 1870      hl(" -nl <nl>");
 1871      hl("    Specifies the number of samples nl to use to the 'left' of the");
 1872      hl("    basis sample in the regression window (kernel). The total number");
 1873      hl("    of samples in the window will be nL+nR+1.");
 1874      hl(" -nr <nr>");
 1875      hl("    Specifies the number of samples nr to use to the 'right' of the");
 1876      hl("    basis sample in the regression window (kernel). The total number");
 1877      hl("    of samples in the window will be nL+nR+1.");
 1878      hl(" -m <m>");
 1879      hl("    Specifies the order m of the polynomial to use in the regression");
 1880      hl("    analysis leading to the Savitzky-Golay coefficients. Typical");
 1881      hl("    values are between m=2 and m=6. Beware of too high values, which");
 1882      hl("    easily makes the regression too sensitive, with an oscillatory");
 1883      hl("    result.");
 1884      hl(" -ld <ld>");
 1885      hl("    Specifies the order of the derivative to extract from the ");
 1886      hl("    Savitzky--Golay smoothing algorithm. For regular Savitzky-Golay");
 1887      hl("    smoothing of the input data as such, use ld=0. For the Savitzky-");
 1888      hl("    Golay smoothing and extraction of derivatives, set ld to the");
 1889      hl("    order of the desired derivative and make sure that you correctly");
 1890      hl("    interpret the scaling parameters as described in 'Numerical");
 1891      hl("    Recipes in C', 2nd Edn (Cambridge University Press, New York,");
 1892      hl("    1994).");
 1893      hl(" -v, --verbose");
 1894      hl("    Toggle verbose mode. (Default: Off.)  This option should always");
 1895      hl("    be omitted whenever no  output file has been specified (that is");
 1896      hl("    to say, omit any --verbose or -v option whenever --outputfile or");
 1897      hl("    -o has been omitted), as the verbose logging otherwise will");
 1898      hl("    contaminate the filtered data stream written to the console");
 1899      hl("    (terminal).");
 1900   }
 1901   
 1902   @ In order to simplify the messaging, the |hl(const char *format, ...)| routine
 1903   acts as a simple front-end merely for compactifying the code by successive
 1904   calls to |hl(...)| rather than the full |fprintf(stderr, ...)|, still
 1905   maintaining all the functionality of string formatting in the regular
 1906   |printf()| or |fprintf()| syntax.
 1907   
 1908   @<Routine for displaying a single line of the help message@>=
 1909   void hl(const char *format, ...) {
 1910      va_list args;
 1911      char line[1024];
 1912   
 1913      va_start(args, format); /* Initialize args by the |va_start()| macro */
 1914      vsprintf(line, format, args);
 1915      va_end(args); /* Terminate the use of args by the |va_end()| macro */
 1916      sprintf(line+strlen(line), "\n"); /* Always append newline */
 1917      fprintf(stdout, "%s", line);
 1918      return;
 1919   }
 1920   
 1921   @ Routine for obtaining the number of coordinate pairs in a file. This
 1922   routine is called prior to loading the input data, in order to automatically
 1923   extract the size needed for allocating the memory for the storage.
 1924   
 1925   @<Routine for obtaining the number of coordinate pairs in a file@>=
 1926   long int num_coordinate_pairs(FILE *file) {
 1927      double tmp;
 1928      int tmpch;
 1929      long int mm=0;
 1930      fseek(file,0L,SEEK_SET); /* rewind file to beginning */
 1931      while((tmpch=getc(file))!=EOF) {
 1932         ungetc(tmpch,file);
 1933         fscanf(file,"%lf",&tmp); /* Read away the $x$ coordinate */
 1934         fscanf(file,"%lf",&tmp); /* Read away the $y$ coordinate */
 1935         mm++;
 1936         tmpch=getc(file); /* Read away any blanks or linefeeds */
 1937         while ((tmpch!=EOF)&&(!isdigit(tmpch))) tmpch=getc(file);
 1938         if (tmpch!=EOF) ungetc(tmpch,file);
 1939      }
 1940      fseek(file,0L,SEEK_SET); /* rewind file to beginning */
 1941      return(mm);
 1942   }
 1943   
 1944   @*Index.
 1945   

Return to previous page

Generated by ::viewsrc::

Last modified Wednesday 15 Feb 2023