% File: sgfilter.w [CWEB source code] % Residence: http://jonsson.eu/programs/cweb/sgfilter/ % Created: January 23, 2006 [v.1.0] % Last change: December 4, 2011 [v.1.6] % Author: Fredrik Jonsson % Description: The CWEB source code for the SGFILTER stand-alone implementation % of the Savitzky-Golay smoothing filter. For information on the % CWEB programming language, see % http://www.literateprogramming.com. % Compilation: Compile this program by using the enclosed Makefile, or use % the blocks of the Makefile as listed in the documentation % (file sgfilter.ps or sgfilter.pdf). The C source code (as % generated from this CWEB code) conforms to the ANSI standard % for the C programming language (which is equivalent to the % ISO C90 standard for C). % % Copyright (C) 2011, Fredrik Jonsson % Non-commercial copying welcome. % \input ext/epsf.tex % For the inclusion of Encapsulated PostScript graphics \input ext/eplain.tex % To access equation numbering and other cross-referencing \def\version{1.6} \def\lastrevdate{December 4, 2011} \font\eightcmr=cmr8 \font\tensc=cmcsc10 \font\eightcmssq=cmssq8 \font\eightcmssqi=cmssqi8 \font\twentycmcsc=cmcsc10 at 20 truept \def\sgfilter{{\eightcmr SGFILTER\spacefactor1000}} \def\ANSI{{\eightcmr ANSI\spacefactor1000}} % The language standard we stick to \def\GNU{{\eightcmr GNU\spacefactor1000}} % GNU is Not Unix \def\GCC{{\eightcmr GCC\spacefactor1000}} % The GNU C-compiler \def\CEE{{\eightcmr C\spacefactor1000}} % The C programming language \def\FORTRAN{{\eightcmr Fortran\spacefactor1000}} % The Fortran language \def\CPP{\CEE{\tt++}} % The C++ programming language \def\ANSICEE{{\eightcmr ANSI~C\spacefactor1000}}% The ANSI-C standard language \def\CWEB{{\eightcmr CWEB\spacefactor1000}} % The CWEB programming language \def\UNIX{{\eightcmr UNIX\spacefactor1000}} \def\CYGWIN{{\eightcmr CYGWIN\spacefactor1000}} \def\GNUPLOT{{\eightcmr GNUPLOT\spacefactor1000}} \def\CTANGLE{{\eightcmr CTANGLE\spacefactor1000}} \def\CWEAVE{{\eightcmr CWEAVE\spacefactor1000}} \def\mod{\mathop{\rm mod}\nolimits} % The real part of a complex number \def\re{\mathop{\rm Re}\nolimits} % The real part of a complex number \def\im{\mathop{\rm Im}\nolimits} % The imaginary part of a complex number \def\backslash{\char'134} % The `\' character \def\vertbar{\char'174} % The `|' character \def\dollar{\char'044} % The `$' character \def\tilde{\char'176} % The `~' character \def\tothepower{\char'136} % The `^' character \def\onehalf{{\textstyle{{1}\over{2}}}} \def\threefourth{{\textstyle{{3}\over{4}}}} \def\endalg{\vrule height 1.4ex width .6ex depth .4ex} % Rectangular bullet \def\ie{i.\thinspace{e.}~\ignorespaces} \def\eg{e.\thinspace{g.}~\ignorespaces} \let\,\thinspace % % Define a handy macro for listing the steps of an algorithm. % \newdimen\aitemindent \aitemindent=26pt \newdimen\aitemleftskip \aitemleftskip=36pt \def\aitem[#1]{\smallbreak\noindent\hbox to 10pt{}% \hbox to\aitemindent{\bf #1\hfill}% \hangindent\aitemleftskip\ignorespaces} % % Define a handy macro for bibliographic references. % \newdimen\refitemindent \refitemindent=18pt \def\refitem[#1]{\smallbreak\noindent% \hbox to\refitemindent{[#1]\hfill}% \hangindent\refitemindent\ignorespaces} \def\references{\medskip\goodbreak\noindent{\bf References}\nobreak\smallskip} % % Define a handy macro for nicely typeset variable descriptions. % \newdimen\varitemindent \varitemindent=100pt \def\varitem[#1]#2{\smallbreak\noindent\hbox to 20pt{}% \hbox to\varitemindent{#1\hfill}% \hangindent 120pt\ignorespaces#2\par} % % Define a handy macro for nicely typeset descriptions of command line options. % \newdimen\optitemindent \optitemindent=80pt \def\optitem[#1]#2{\smallbreak\noindent\hbox to\hsize{\hskip40pt{#1}\hfill} \hangindent\optitemindent\ignorespaces{\hskip\optitemindent{#2}}\smallskip} % % Define a handy macro for the list of program revisions. % \newdimen\citemindent \citemindent=80pt \newdimen\citemleftskip \citemleftskip=90pt \def\citem[#1]{\smallbreak\noindent\hbox to 10pt{}% \hbox to\citemindent{\bf #1\hfill}% \hangindent\citemleftskip\ignorespaces} \def\citindent{\smallbreak\noindent\hbox to 10pt{}% \hbox to\citemindent{\hfil}% \hangindent\citemleftskip\ignorespaces} % % Define a handy macro for the typesetting of quotes of text. % \newdimen\quoteindent \quoteindent=40pt \def\quote#1{\par{\advance\leftskip by\quoteindent \advance\rightskip by\quoteindent \medskip\noindent{``#1''}\medskip}\par} % % Define a handy macro for the typesetting of figure captions. The double % '{{' and '}}' are here in place to prevent the increased \leftskip and % \rightskip to apply globally after the macro has been expanded. % \newdimen\figcapindent \figcapindent=\quoteindent \def\figcaption[#1]#2{{\advance\leftskip by\figcapindent \advance\rightskip by\figcapindent \smallskip\noindent{\bf Figure #1.} {#2}\smallskip}} % % Define the \beginvrulealign and \endvrulealign macros as described in % Donald Knuth's The TeXbook, Appendix D: Dirty Tricks. These macros are % used in typesetting nicely looking tables. % \def\beginvrulealign{\setbox0=\vbox\bgroup} \def\endvrulealign{\egroup % now \box0 holds the entire alignment \setbox0=\vbox{\setbox2=\hbox{\vrule height\ht0 depth\dp0 width0pt} \unvbox0 \setbox0=\lastbox % now \box0 is the bottom row \nointerlineskip \copy0 % put it back \global\setbox1=\hbox{} % initialize box that will contain rules \setbox4=\hbox{\unhbox0 % now open up the bottom row \loop \skip0=\lastskip \unskip % remove tabskip glue \advance\skip0 by-.4pt % rules are .4 pt wide \divide\skip0 by 2 \global\setbox1=\hbox{\hskip\skip0\vrule\hskip\skip0 \unhbox2\unhbox1}% \setbox2=\lastbox % remove alignment entry \ifhbox2 \setbox2=\hbox{\kern\wd2}\repeat}}% \hbox{\rlap{\box0}\box1}} % superimpose the alignment on the rules % % Make sure that the METAPOST logo can be loaded even in plain TeX. % \ifx\MP\undefined \ifx\manfnt\undefined \font\manfnt=logo10 \fi \ifx\manfntsl\undefined \font\manfntsl=logosl10 \fi \def\MP{{\ifdim\fontdimen1\font>0pt \let\manfnt = \manfntsl \fi {\manfnt META}\-{\manfnt POST}}\spacefactor1000 }% \fi \ifx\METAPOST\undefined \let\METAPOST=\MP \fi \datethis @*Introduction. \vskip 120pt \centerline{\twentycmcsc SGFilter} \vskip 20pt \centerline{A stand-alone implementation of the Savitzky--Golay smoothing filter.} \vskip 2pt \centerline{(Version \version\ of \lastrevdate)} \vskip 10pt \centerline{Written by Fredrik Jonsson} \vskip 10pt \centerline{\epsfbox{fig0.1}} \vfill\eject \noindent This document was automatically extracted from the \CWEB\ master source code for the \sgfilter\ program and typeset in the Computer Modern typeface using plain \TeX\ and \MP. The source code and documentation of this program is electronically available at \.{http://jonsson.eu/programs/cweb/sgfilter/}. \vfill \line{Copyright \copyright\ Fredrik Jonsson \the\year\hfil} \medskip \noindent{All rights reserved. No part of this publication may be reproduced, stored in a retrieval system, or transmitted, in any form, or by any means, electronic, mechanical, photo-copying, recording, or otherwise, without the prior consent of the author. Non-commercial copying welcome.\hfil} \medskip \line{Printed on \today, at \hours\hfil} \medskip \line{\TeX\ is a trademark of the American Mathematical Society\hfil} @*The Savitzky--Golay smoothing filter. The Savitzky--Golay smoothing filter was originally presented in 1964 by Abraham Savitzky\footnote{$\dagger$}{Abraham Savitzky (1919--1999) was an American analytical chemist. (Wikipedia)} and Marcel J.~E. Golay\footnote{$\ddagger$}{Marcel J.~E.~Golay (1902--1989) was a Swiss-born mathematician, physicist, and information theorist, who applied mathematics to real-world military and industrial problems. (Wikipedia)} in their paper ``Smoothing and Differentiation of Data by Simplified Least Squares Procedures'', Anal.~Chem., {\bf 36}, 1627--1639 (1964). % \.{http://dx.doi.org/10.1021/ac60214a047} Being chemists and physicists, at the time of publishing associated with the Perkin--Elmer Corporation (still today a reputable manufacturer of equipment for spectroscopy), they found themselves often encountering noisy spectra where simple noise-reduction techniques, such as running averages, simply were not good enough for extracting well-determined characteristica of spectral peaks. In particular, any running averaging tend to flatten and widening peaks in a spectrum, and as the peak width is an important parameter when determining relaxation times in molecular systems, such noise-reduction techniques are clearly non-desirable. The main idea presented by Savitzky and Golay was a work-around avoiding the problems encountered with running averages, while still maintaining the smoothing of data and preserving features of the distribution such as relative maxima, minima and width. To quote the original paper on the target and purpose: \quote{This paper is concerned with computational methods for the removal of the random noise from such information, and with the simple evaluation of the first few derivatives of the information with respect to the graph abscissa. [$\ldots$] The objective here is to present specific methods for handling current problems in the processing of such tables of analytical data. The methods apply as well to the desk calculator, or to simple paper and pencil operations for small amounts of data, as they do to the digital computer for large amounts of data, since their major utility is to simplify and speed up the processing of data.} @ The work-around presented by Savitzky and Golay for avoiding distortion of peaks or features in their spectral data is essentially based on the idea to perform a linear regression of some polynomial {\sl individually for each sample}, followed by the {\sl evaluation of that polynomial exactly at the very position of the sample}. While this may seem a plausible idea, the actual task of performing a separate regression for each point easily becomes a very time-consuming task unless we make a few observations about this problem. However (and this is the key point in the method), for the regression of a polynomial of a finite power, say of an order below 10, the coefficients involved in the actual regression may be computed once and for all in an early stage, followed by performing a {\sl convolution} of the discretely sampled input data with the coefficient vector. As the coefficient vector is significantly shorter than the data vector, this convolution is fast and straightforward to implement. The starting point in deriving the algorithm for the Savitzky--Golay smoothing filter is to consider a smoothing method in which an equidistantly spaced set of $M$ samples $f_n$, $n=1,\ldots,M$ are linearly combined to form a filtered value $h_k$ according to\footnote{$\dagger$}{More generally, Eq.~\eqref{eq:10} can be interpreted as a discretized version of the convolution between a kernel $c(x)$ and a function $f(x)$, $$ h(x)=\int^{\Delta_R}_{-\Delta_L}c(s) f(x+s)\,ds \rightarrow\sum^{n_R}_{j=-n_L} c_j f_{k+j}. $$ Notice, however, the different sign of $s$ as compared to the standard form of the convolution integral in mathematics, where the argument of the function usually yields ``$f(x-s)$''.} $$ h_k=\sum^{n_R}_{j=-n_L} c_j f_{k+j}, \eqdef{eq:10} $$ where $n_L$ is the number of samples ``to the left'' and $n_R$ the number of samples ``to the right'' of the centre index $k$. Notice that the running average smoothing corresponds to the case where all coefficients $c_n$ are equal, with $c_n=1/(n_L+n_R+1)$. The idea of the Savitzky--Golay filter is, however, to find the set of $c_n$ which better preserves the shape of features present in the sampled profile. The approach is to make a linear regression of a polynomial to the $n_L+n_R+1$ samples in the window around sample $k$, and then evaluating this polynomial at that very sample, for all $k$ from $1$ to $M$. We consider the regression of an $m$:th degree polynomial $$ p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m \eqdef{eq:20} $$ to the set of $n_L$ samples to the left and $n_R$ to the right of sample $f_k$, including the sample inbetween. Evaluating this polynomial at $x=x_k$ is particularly easy, as we at that point simply have $p(x_k)=a_0$. The linear regression of this polynomial to the samples $f_{k+j}$, with $-n_L\le j\le n_R$, means that we look for the least-square approximated solution to the linear system of $n_L+n_R+1$ equations $$ \eqalign{ p(x_{k-n_L})&=a_0+a_1(x_{k-n_L}-x_k)+a_2(x_{k-n_L}-x_k)^2 +\ldots+a_m (x_{k-n_L}-x_k)^m\approx f_{k-n_L},\cr &\hskip60pt\vdots\hskip100pt\vdots\cr p(x_{k-1})&=a_0+a_1(x_{k-1}-x_k)+a_2(x_{k-1}-x_k)^2 +\ldots+a_m (x_{k-1}-x_k)^m\approx f_{k-1},\cr p(x_{k})&=a_0\approx f_{k}\cr p(x_{k+1})&=a_0+a_1(x_{k+1}-x_k)+a_2(x_{k+1}-x_k)^2 +\ldots+a_m (x_{k+1}-x_k)^m\approx f_{k+1},\cr &\hskip60pt\vdots\hskip100pt\vdots\cr p(x_{k+n_R})&=a_0+a_1(x_{k+n_R}-x_k)+a_2(x_{k+n_R}-x_k)^2 +\ldots+a_m (x_{k+n_R}-x_k)^m\approx f_{k+n_R},\cr } \eqdef{eq:30} $$ which (under the assumption that $n_L+n_R+1>m+1$) provides an overdetermined system for the $m+1$ coefficients $a_j$ which can be expressed in matrix form as $$ {\bf A}\cdot{\bf a}\equiv \underbrace{ \pmatrix{ 1&(x_{k-n_L}-x_k)&(x_{k-n_L}-x_k)^2&\cdots&(x_{k-n_L}-x_k)^m\cr \vdots& &\vdots& &\vdots\cr 1&(x_{k-1}-x_k)&(x_{k-1}-x_k)^2&\cdots&(x_{k-1}-x_k)^m\cr 1&0&0&\cdots&0\cr 1&(x_{k+1}-x_k)&(x_{k+1}-x_k)^2&\cdots&(x_{k+1}-x_k)^m\cr \vdots& &\vdots& &\vdots\cr 1&(x_{k+n_R}-x_k)&(x_{k+n_R}-x_k)^2&\cdots&(x_{k+n_R}-x_k)^m\cr } }_{[(n_L+n_R+1)\times(m+1)]} \underbrace{ \pmatrix{ a_0\cr a_1\cr a_2\cr \vdots\cr a_m\cr } \vphantom{ \pmatrix{ ()^m\cr \vdots\cr ()^m\cr 1\cr ()^m\cr \vdots\cr ()^m\cr } } }_{[(m+1)\times1]} = \underbrace{ \pmatrix{ f_{k-n_L}\cr \vdots\cr f_{k-1}\cr f_{k}\cr f_{k+1}\cr \vdots\cr f_{k+n_R}\cr } \vphantom{ \pmatrix{ ()^m\cr \vdots\cr ()^m\cr 1\cr ()^m\cr \vdots\cr ()^m\cr } } }_{[(m+1)\times1]} \equiv{\bf f}. \eqdef{eq:40} $$ The least squares solution to Eq.~\eqref{eq:40} is obtained by multiplying its left- and right-hand sides by the transpose of the system matrix ${\bf A}$, followed by solving the resulting $[(m+1)\times(m+1)]$-system of linear equations for ${\bf a}$, $$ {\bf A}^{\rm T}\cdot({\bf A}\cdot{\bf a})= ({\bf A}^{\rm T}\cdot{\bf A})\cdot{\bf a}= {\bf A}^{\rm T}\cdot{\bf f} \qquad\Leftrightarrow\qquad {\bf a}= ({\bf A}^{\rm T}\cdot{\bf A})^{-1}\cdot ({\bf A}^{\rm T}\cdot{\bf f}) \eqdef{eq:50} $$ Recapitulate that what we here target is the evaluation of $p(x_k)=a_0$, which according to Eqs.~\eqref{eq:20} and~\eqref{eq:50} is equivalent to evaluating the first (``zero:th'') row of the solution for ${\bf a}$, or $$ p(x_k)=a_0= \big[ \underbrace{ ({\bf A}^{\rm T}\cdot{\bf A})^{-1} }_{[(m+1)\times(m+1)]} \cdot \underbrace{ ({\bf A}^{\rm T}\cdot{\bf f}) }_{[(m+1)\times1]} \big]_{\{{\rm row\ }0\}} \eqdef{eq:60} $$ So, having arrived at this result for the regression, we clearly have a solution for $a_0$ depending on the actual function values $f_{k+j}$ in the vicinity of sample $f_k$. Doesn't this mean that we nevertheless need to repeat the regression for every single sample to be included in the smoothing? Moreover, how does the result presented in Eq.~\eqref{eq:60} relate to the original mission, which we recapitulate was to find a general way of computing the coefficients $c_j$ in the kernel of the convolution in Eq.~\eqref{eq:10}? The answer to these questions lies in expanding Eq.~\eqref{eq:60} to yield the expression for the first (``zero:th'') row as $$ \eqalign{ p(x_k)=a_0 &=\big[ ({\bf A}^{\rm T}\cdot{\bf A})^{-1}\cdot ({\bf A}^{\rm T}\cdot{\bf f})\big]_{\{{\rm row\ }0\}}\cr &=\sum^{m+1}_{j=1} \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j} \big[{\bf A}^{\rm T}\cdot{\bf f}\big]_{j}\cr &=\sum^{m+1}_{j=1} \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j} \sum^{m+1}_{k=1}\big[{\bf A}^{\rm T}\big]_{jk}\big[{\bf f}\big]_{k}\cr } \eqdef{eq:70} $$ and observing that the $n$:th coefficient $c_n$ is obtained as equal to the coefficient $a_0$ whenever ${\bf f}$ is replaced by the unit vector ${\bf e}_n$ (with all elements zero except for the unitary $n$:th element). Hence $$ \eqalign{ c_n&=\sum^{m+1}_{j=1} \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j} \sum^{m+1}_{k=1}\big[{\bf A}^{\rm T}\big]_{jk}\big[{\bf e}_n\big]_{k}\cr &=\sum^{m+1}_{j=1} \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j} \sum^{m+1}_{k=1}\big[{\bf A}^{\rm T}\big]_{jk}\delta_{nk}\cr &=\sum^{m+1}_{j=1} \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j} \big[{\bf A}^{\rm T}\big]_{jn}\cr &=\sum^{m+1}_{j=1} \big[({\bf A}^{\rm T}\cdot{\bf A})^{-1}\big]_{0j}A_{nj}\cr } \eqdef{eq:80} $$ This concludes the derivation of the coefficients in the Savitzky--Golay filter, which may be computed once and for all in the beginning and afterwards used as the (for any practical purposes short) kernel $c_j$ in the convolution described by Eq.~\eqref{eq:10}. @ In 2000, editors James Riordon, Elizabeth Zubritsky, and Alan Newman of {\sl Analytical Chemistry} made a review article of what they had identified as the top-ten seminal papers in the history of the journal, based on the number of citations. Among the listed papers, of which some were written by Nobel-laureates-to-be, the original paper by Savitzky and Golay makes a somewhat odd appearance, as it not only concerns mainly numerical analysis, but also because it actually includes \FORTRAN\ code for the implementation. The review article\footnote{$\dagger$}{Available at \.{http://pubs.acs.org/doi/pdf/10.1021/ac002801q}} concludes the discussion of the Savitzky--Golay smoothing filter with a reminiscence by Abraham Savitzky about the work: \quote{In thinking about why the technique has been so widely used, I've come to the following conclusions. First, it solves a common problem--the reduction of random noise by the well-recognized least-squares technique. Second, the method was spelled out in detail in the paper, including tables and a sample computer subroutine. Third, the mathematical basis for the technique, although explicitly and rigorously stated in the article, was separated from a completely nonmathematical explanation and justification. Finally, the technique itself is simple and easy to use, and it works.} @ As a final remark on the Savitzky--Golay filtering algorithm, a few points on the actual implementation of the convolution need to be made. While the \sgfilter\ program relies on the method for computation of the Savitzky--Golay coefficients as presented in {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New York, 1994), it must be emphasized that the suggestion there presented for the convolution, which is to apply the |convlv| routine of {\sl Numerical Recipes in C}, is significantly increasing the complexity and memory consumption in the filtering. In particular, the |convlv| routine in turn relies on consistent calls to the |twofft| routine, which in order to deliver proper data needs to be supplied with a return vector of {\sl twice} the size of the input vector. In addition, |convlv| requires the size of the input vector to be of an integer power of two (say, $M=1024$, $4096$, etc.), which may be acceptable for one-off tests but is a rather inconvenient limitation for any more general applications. Whether the \sgfilter\ program should employ the convolution engine supplied by the |convlv| routine (recommended in {\sl Numerical Recipes in C}) or the direct convolution as implemented in the |sgfilter| routine (recommended by me) is controlled by the |CONVOLVE_WITH_NR_CONVLV| definition in the \.{sgfilter.h} header file. With reference to the above issues with |convlv|, I strongly advise keeping the default (\.{0}) setting for |CONVOLVE_WITH_NR_CONVLV|. @*Revision history of the program. \medskip \citem[2006-01-18]{[v.1.0]} {\tt }\hfill\break First properly working version of the \sgfilter\ program. \citem[2006-01-20]{[v.1.1]} {\tt }\hfill\break Added the test case for Savitzky--Golay filtering, modeling an underlying function $g(x)$ with superimposed Gaussian noise as $$ f(x)=\underbrace{ \cos(3x)\sin^2(x^3)+4\sum^4_{k=1}\exp(-(x-x_k)^2/w^2_k) }_{g(x)} + \underbrace{Vu(x)\vphantom{\sum^4_{k=1}}}_{\rm noise} $$ where $u(x)$ is a normally distributed stochastic variable of mean zero and unit variance, $V$ is the local variance as specified arbitrarily, and the remaining parameters $(x_k,w_k)$ are the positions and widths of four Gaussian peaks superimposed onto the otherwise trigonometric expression for the underlying function. \citem[2006-01-21]{[v.1.2]} {\tt }\hfill\break Changed the streaming of output (filtered) data so that the |stdout| stream now is directed to file whenever \.{-o} or \.{\--outputfile} options are present at the calling command line. \citem[2006-05-06]{[v.1.3]} {\tt }\hfill\break Added an introductory section documenting the derivation of the Savitzky--Golay filter as such. Always nice to have at hand when it comes to actually understanding why certain parameters in the filtering need to be in certain ranges. Also added automatic support for extracting the number of input samples automatically from the input file, hence making the \.{-M} and \.{--num\_samples} option obsolete. \citem[2006-05-06]{[v.1.4]} {\tt }\hfill\break Replaced the convolution from the previously used |convlv| routine to a brute-force but more economical one, which now does not rely on the input data being a set of $2^N$ samples for some $N$. However, I have chosen to keep the old implementation, which can be re-applied simply by changing the definition of |CONVOLVE_WITH_NR_CONVLV| to ``(\.{1}).'' \citem[2009-11-01]{[v.1.5]} {\tt }\hfill\break Included the \.{example.c} file in the \CWEB\ source of \sgfilter, for automatically is generated from the master \CWEB\ source when passed through \CTANGLE. A very nifty way indeed for keeping updated test cases. \citem[2011-12-04]{[v.1.6]} {\tt }\hfill\break In the block concerned with the redirection of |stdout| when delivering filtered data, I found strange behaviour when executing \sgfilter\ under Windows. What happens is that any redirection of |stdout| back to terminal output in Windows naturally must be done in a different way than with the |freopen("/dev/tty", "a", stdout)| which is accepted by OS X (Free BSD), Linux, or any other \UNIX-like platforms. Hence, I simply added a (primitive) check on the platform type in the header file. @*Compiling the source code. The program is written in \CWEB, generating \ANSICEE\ (ISO C99) conforming source code and documentation as plain \TeX-source, and is to be compiled using the sequences as outlined in the \.{Makefile} listed below. For general information on literate programming, \CTANGLE, or \CWEAVE, see \.{http://www.literateprogramming.com}. \bigskip {\obeylines\obeyspaces\tt \# \# Makefile designed for use with ctangle, cweave, gcc, and plain TeX. \# \# Copyright (C) 2002-2011, Fredrik Jonsson \# \# The CTANGLE program converts a CWEB source document into a C program \# which may be compiled in the usual way. The output file includes \#line \# specifications so that debugging can be done in terms of the CWEB source \# file. \# \# The CWEAVE program converts the same CWEB file into a TeX file that may \# be formatted and printed in the usual way. It takes appropriate care of \# typographic details like page layout and the use of indentation, italics, \# boldface, etc., and it supplies extensive cross-index information that it \# gathers automatically. \# \# CWEB allows you to prepare a single document containing all the informa- \# tion that is needed both to produce a compilable C program and to produce \# a well-formatted document describing the program in as much detail as the \# writer may desire. The user of CWEB ought to be familiar with TeX as well \# as C. \# PROJECT = sgfilter CTANGLE = ctangle CWEAVE = cweave CC = gcc CCOPTS = -O2 -Wall -ansi -std=iso9899:1999 -pedantic LNOPTS = -lm TEX = tex DVIPS = dvips DVIPSOPT = -ta4 -D1200 PS2PDF = ps2pdf METAPOST = mpost ~ ~ all: \$(PROJECT) \$(PROJECT).pdf ~ ~ \$(PROJECT): \$(PROJECT).o ~ \$(CC) \$(CCOPTS) -o \$(PROJECT) \$(PROJECT).o \$(LNOPTS) ~ ~ \$(PROJECT).o: \$(PROJECT).c ~ \$(CC) \$(CCOPTS) -c \$(PROJECT).c ~ ~ \$(PROJECT).c: \$(PROJECT).w ~ \$(CTANGLE) \$(PROJECT) \$(PROJECT).c ~ ~ \$(PROJECT).pdf: \$(PROJECT).ps ~ \$(PS2PDF) \$(PROJECT).ps \$(PROJECT).pdf ~ ~ \$(PROJECT).ps: \$(PROJECT).dvi ~ \$(DVIPS) \$(DVIPSOPT) \$(PROJECT).dvi -o \$(PROJECT).ps ~ ~ \$(PROJECT).dvi: \$(PROJECT).tex ~ \$(TEX) \$(PROJECT).tex ~ ~ \$(PROJECT).tex: \$(PROJECT).w ~ \$(CWEAVE) \$(PROJECT) ~ ~ clean: ~ -rm -Rf \$(PROJECT) *~ *.c *.h *.o *.exe *.dat *.pdf *.mp *.trj *.mpx ~ -rm -Rf *.tex *.aux *.log *.toc *.idx *.scn *.dvi *.ps *.1 *.eps ~ ~ archive: ~ make -ik clean ~ tar --gzip --directory=../ -cf ../\$(PROJECT).tar.gz \$(PROJECT) } \bigskip This \.{Makefile} essentially executes two major calls. First, the \CTANGLE\ program parses the \CWEB\ source file \.{sgfilter.w} to extract a \CEE\ source file \.{sgfilter.c} which may be compiled into an executable program using any \ANSICEE\ conformant compiler. The output source file \.{sgfilter.c} includes \.{\#line} specifications so that any debugging conveniently can be done in terms of line numbers in the original \CWEB\ source file \.{sgfilter.w}. Second, the \CWEAVE\ program parses the same \CWEB\ source file \.{sgfilter.w} to extract a plain \TeX\ file \.{sgfilter.tex} which may be compiled into a PostScript or PDF document. The document file \.{sgfilter.tex} takes appropriate care of typographic details like page layout and text formatting, and supplies extensive cross-indexing information which is gathered automatically. In addition to extracting the documentary text, \CWEAVE\ also includes the source code in cross-referenced blocks corresponding to the descriptors as entered in the \CWEB\ source code. Having executed \.{make} (or \.{gmake} for the \GNU\ enthusiast) in the same directory where the files \.{sgfilter.w} and \.{Makefile} are located, one is left with the executable file \.{sgfilter}, being the ready-to-use compiled program, and the PostScript file \.{sgfilter.ps} (or PDF file \.{sgfilter.pdf}) which contains the full documentation of the program, that is to say the document you currently are reading. Notice that on platforms running any operating system by Microsoft, the executable file will instead automatically be named \.{sgfilter.exe}. This convention also applies to programs compiled under the \UNIX-like environment \CYGWIN. @*Running the program. The program is entirely controlled by the command line options supplied when invoking the program. The syntax for executing the program is\par \medskip \line{\hskip 40pt\.{sgfilter [options]}\hfill}\par \medskip \noindent where \.{options} include the following, given in their long as well as their short forms (with prefixes `\.{--}' and `\.{-}', respectively): \medskip \optitem[\.{--inputfile}, \.{-i} $\langle${\it input filename}$\rangle$]% {Specifies the raw, unfiltered input data to be crunched. The input file should describe the input as two columns containing $x$- and $y$-coordinates of the samples.} \optitem[\.{--outputfile}, \.{-o} $\langle${\it output filename}$\rangle$]% {Specifies the output file to which the program should write the filtered profile, again in a two-column format containing $x$- and $y$-coordinates of the filtered samples. If this option is omitted, the generated filtered data will instead be written to the console (terminal).} \optitem[\.{-nl} $\langle n_L\rangle$]% {Specifies the number of samples $n_L$ to use to the ``left'' of the basis sample in the regression window (kernel). The total number of samples in the window will be $nL+nR+1$.} \optitem[\.{-nr} $\langle n_R\rangle$]% {Specifies the number of samples $n_R$ to use to the ``right'' of the basis sample in the regression window (kernel). The total number of samples in the window will be $nL+nR+1$.} \optitem[\.{-m} $\langle m\rangle$]% {Specifies the order $m$ of the polynomial $p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m$ to use in the regression analysis leading to the Savitzky--Golay coefficients. Typical values are between $m=2$ and $m=6$. Beware of too high values, which easily makes the regression too sensitive, with an oscillatory result.} \optitem[\.{-ld} $\langle l_D\rangle$]% {Specifies the order of the derivative to extract from the Savitzky--Golay smoothing algorithm. For regular Savitzky--Golay smoothing of the input data as such, use $l_D=0$. For the Savitzky--Golay smoothing and extraction of derivatives, set $l_D$ to the order of the desired derivative and make sure that you correctly interpret the scaling parameters as described in {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New York, 1994).} \optitem[\.{--help}, \.{-h}]% {Displays a brief help message and terminates the \sgfilter\ program clean from any error codes.} \optitem[\.{--verbose}, \.{-v}]% {Toggle verbose mode. (Default: Off.) This option should always be omitted whenever no output file has been specified (that is to say, omit any \.{--verbose} or \.{-v} option whenever \.{--outputfile} or \.{-o} has been omitted), as the verbose logging otherwise will contaminate the filtered data stream written to the console (terminal).} @*Example of Savitzky--Golay filtering with the \sgfilter\ program. It is always a good idea to create a rather ``nasty'' test case for an algorithm, and in this case it also provides the reader of this code with a (hopefully) clear example of usage of the \sgfilter\ program. We start off with generating a test suite of noisy data, in this case modeling an underlying function $g(x)$ with superimposed Gaussian noise as $$ f(x)=\underbrace{ \cos(3x)\sin^2(x^3)+4\sum^4_{k=1}\exp(-(x-x_k)^2/w^2_k) }_{g(x)} + \underbrace{Vu(x)\vphantom{\sum^4_{k=1}}}_{\rm noise} $$ where $u(x)$ is a normally distributed stochastic variable of mean zero and unit variance, $V$ is the local variance as specified arbitrarily, and the remaining parameters $(x_k,w_k)$ are the positions and widths of four Gaussian peaks superimposed onto the otherwise trigonometric expression for the underlying function. For the current test suite we use $(x_1,w_1)=(0.2,0.007)$, $(x_2,w_2)=(0.4,0.01)$, $(x_3,w_3)=(0.6,0.02)$, and $(x_4,w_4)=(0.8,0.04)$. These Gaussian peaks serve as to provide various degrees of rapidly varying data, to check the performance in finding maxima. Meanwhile, the less rapidly varying domains which are dominated by the trigonometric expression serves as a test for the capability of the filter to handle rather moderate variations of low amplitude. The underlying test function $g(x)$ is shown\footnote{$\dagger$}{See the \.{*.eps} blocks in the enclosed \.{Makefile} for details on how \METAPOST was used in the generation of the encapsulated PostScript images shown in Figs.~1--6.} in Fig.~1. \medskip \centerline{\epsfbox{fig1.1}} \figcaption[1]{The underlying test function $g(x)=\cos(3x)\sin^2(x^3)% +4\sum^4_{k=1}\exp(-(x-x_k)^2/w^2_k)$ without any added noise. Here the positions and widths of the Gaussian peaks are $(x_1,w_1)=(0.2,0.007)$, $(x_2,w_2)=(0.4,0.01)$, $(x_3,w_3)=(0.6,0.02)$, and $(x_4,w_4)=(0.8,0.04)$.} \medskip \noindent The generator of artificial test data to be tested by the Savitsky--Golay filtering algorithm is here included as a simple \CEE\ program, which automatically is generated from the master \CWEB\ source when passed through \CTANGLE. @(example.c@>= #include #include #include #include #define TWOPI (2.0*3.141592653589793) @| float gauss(float x, float w, float xa) { return(exp(-pow(((x-xa)/w),2))); } @| float func(float x) { float retval=gauss(x,0.007,0.2); /* $x_1=0.2$, $w_1=0.007$ */ retval+=gauss(x,0.01,0.4); /* $x_2=0.4$, $w_2=0.01$ */ retval+=gauss(x,0.02,0.6); /* $x_3=0.6$, $w_3=0.02$ */ retval+=gauss(x,0.04,0.8); /* $x_4=0.8$, $w_4=0.04$ */ retval*=4.0; retval+=cos(3.0*x)*pow(sin(pow(x,3)),2); return(retval); } @| int main(int argc, char *argv[]) { int k, mm=1024; float var=1.0,xmax=2.5, x1, x2, u, v, f, z; if (argc>1) sscanf(argv[1],"%f",&var); /* Read first argument as variance */ srand((unsigned)time(NULL)); /* Initialize random number generator */ for (k=0;k0.0) { /* Apply the Box--Muller algorithm on |u| and |v| */ f=sqrt(-2*log(u)); z=TWOPI*v; u=f*cos(z); /* Normally distributed with E(|u|)=0 and Var(|u|)=1 */ v=f*sin(z); /* Normally distributed with E(|u|)=0 and Var(|u|)=1 */ fprintf(stdout,"%1.8f %1.8f\n",x1,func(x1)+var*u); /* $f(x_1)$ */ fprintf(stdout,"%1.8f %1.8f\n",x2,func(x2)+var*v); /* $f(x_2)$ */ } } return(0); } @ After having compiled the above code \.{example.c}, simply run \.{./example }$\langle${\it noise variance}$\rangle$\.{ > }$\langle$% {\it file name}$\rangle$ in order to generate the test function with a superimposed normally distributed (gaussian) noise of desired variance. In particular, we will in this test suite consider variances of 0 (that is to say, the underlying function without any noise), 0.5, 1.0, and 2.0. Such data files are simply generated by executing\footnote{$\dagger$}{The resulting test data, which so far has not been subject to any filtering, may easily be viewed by running the following script in Octave/Matlab: \medskip {\obeylines\obeyspaces\tt clear all; close all; hold on; u=load('example-2.0.dat'); plot(u(:,1),u(:,2),'-b'); u=load('example-1.0.dat'); plot(u(:,1),u(:,2),'-c'); u=load('example-0.5.dat'); plot(u(:,1),u(:,2),'-r'); u=load('example-0.0.dat'); plot(u(:,1),u(:,2),'-k'); legend('var(f(x))=2.0','var(f(x))=1.0','var(f(x))=0.5','var(f(x))=0.0'); hold off; title('Artificial data generated for tests of Savitzky-Golay filtering'); xlabel('x'); ylabel('f(x)'); } } \bigskip {\obeylines\obeyspaces\tt ./example 0.0 > example-0.0.dat ./example 0.5 > example-0.5.dat ./example 1.0 > example-1.0.dat ./example 2.0 > example-2.0.dat } \bigskip \noindent The resulting ``noisified'' suite of test data are in Figs.~2--4 shown for the respective noise variances of $V=0.5$, $V=1.0$, and $V=2.0$, respectively. \medskip \centerline{\epsfbox{fig2.1}} \figcaption[2]{The test function $g(x)$ with added Gaussian noise of variance $V=2.0$, as stored in file \.{example-2.0.dat} in the test suite.} @ Applying the Savitzky--Golay filter to the test data is now a straightforward task. Say that we wish to test filtering with polynomial degree $|m|=4$ and $|ld|=0$ (which is the default value of |ld|, for regular smoothing with a delivered ``zero:th order derivative'', that is to say the smoothed non-differentiated function), for the two cases $|nl|=|nr|=10$ (in total 21 points in the regression kernel) and $|nl|=|nr|=60$ (in total 121 points in the regression kernel). Using the previously generated test suite of noisy data, the filtering is then easily accomplished by executing: \bigskip {\obeylines\obeyspaces\tt ./sgfilter -m 4 -nl 60 -nr 60 -i example-0.0.dat -o example-0.0-f-60.dat ./sgfilter -m 4 -nl 10 -nr 10 -i example-0.0.dat -o example-0.0-f-10.dat ./sgfilter -m 4 -nl 60 -nr 60 -i example-0.5.dat -o example-0.5-f-60.dat ./sgfilter -m 4 -nl 10 -nr 10 -i example-0.5.dat -o example-0.5-f-10.dat ./sgfilter -m 4 -nl 60 -nr 60 -i example-1.0.dat -o example-1.0-f-60.dat ./sgfilter -m 4 -nl 10 -nr 10 -i example-1.0.dat -o example-1.0-f-10.dat ./sgfilter -m 4 -nl 60 -nr 60 -i example-2.0.dat -o example-2.0-f-60.dat ./sgfilter -m 4 -nl 10 -nr 10 -i example-2.0.dat -o example-2.0-f-10.dat } \bigskip \noindent The resulting filtered data sets are shown in Figs.~3--6 for noise variances of $V=2.0$, $V=1.0$, $V=0.5$, and $V=0$, respectively. The final case corresponds to the interesting case of filtering the underlying function $g(x)$ without any added noise whatsoever, which corresponds to performing a local regression of the regression polynomial to the analytical trigonometric and exponential functions being terms of $g(x)$, a regression where we by no means should expect a perfect match. \medskip \centerline{\epsfbox{fig3.1}} \figcaption[3]{The profiles resulting from Savitzky--Golay-filtering of the test function $g(x)$ with added Gaussian noise of variance $V=2.0$.} \medskip As can be seen in Fig.~3, the results from filtering the ``worst-case'' set (with noise variance $V=2.0$) with $|nl|=|nr|=10$ ($|nl|+|nr|+1=21$ samples in the regression kernel) and $|m|=4$ (curve in blue) yield a rather good tracking of the narrow Gaussian peaks, meanwhile performing rather poor in the low-amplitude and rather slowly varying trigonometric hills in the right-hand side of the graph. As a reference, the underlying function $g(x)$ is mapped in dashed black. On the other hand, with $|nl|=|nr|=60$ ($|nl|+|nr|+1=121$ samples in the regression kernel) and keeping the same degree of the regression polynomial (curve in red), the narrow peaks are barely followed, meanwhile having a rather poor performance in the slowly varying hills as well (albeit of a very poor signal-to-noise ratio). \vfill\eject However, with a lower variance of the superimposed noise, we at $V=1$ have a nice tracking of the slowly varying hills by the 121-sample regression kernel, as shown in Fig.~4, meanwhile having a good tracking of the narrow Gaussian peaks by the 21-sample regression kernel. That the $|nl|+||=21$-sample window is noisier than the 121-sample one should not really be surprising, as the higher number of samples in the regression window tend to smoothen out the regression even further. \centerline{\epsfbox{fig4.1}} \figcaption[4]{The profiles resulting from Savitzky--Golay-filtering of the test function $g(x)$ with added Gaussian noise of variance $V=1.0$.} \medskip \centerline{\epsfbox{fig5.1}} \figcaption[5]{The profiles resulting from Savitzky--Golay-filtering of the test function $g(x)$ with added Gaussian noise of variance $V=0.5$.} \vfill\eject \centerline{\epsfbox{fig6.1}} \figcaption[6]{The profiles resulting from Savitzky--Golay-filtering of the test function $g(x)$ with added Gaussian noise of variance $V=0$, that is to say a direct regression against the underlying function $g(x)$. This figure illustrates the limitations of the attempts of linear regression of a polynomial to an underlying function which clearly cannot be approximated by a simple polynomial expressin in certain domains. One should always keep this limitation in mind before accepting (or discarding) data filtered by the Savitzky--Golay algorithm, despite the many cases in which it performs exceptionally well.} \vfill\eject @*Header files. We put the interface part of our routines in the header file \.{sgfilter.h}, and we restrict ourselves to a lowercase file name to maintain portability among operating systems with case-insensitive file names. There are just a few global definitions present in the \sgfilter\ program:\par \varitem[|VERSION|]{The current program revision number.} \varitem[|COPYRIGHT|]{The copyright banner.} \varitem[|DEFAULT_NL|]{The default value to use for the |nl| variable unless stated otherwise at the command line during startup of the program. This parameter specifies the number of samples $n_L$ to use to the ``left'' of the basis sample in the regression window (kernel). The total number of samples in the window will be $nL+nR+1$.} \varitem[|DEFAULT_NR|]{The default value to use for the |nr| variable unless stated otherwise at the command line during startup of the program. This parameter specifies the number of samples $n_R$ to use to the ``right'' of the basis sample in the regression window (kernel). The total number of samples in the window will be $nL+nR+1$.} \varitem[|DEFAULT_M|]{The default value to use for the |m| variable unless stated otherwise at the command line during startup of the program. This parameter specifies the order $m$ of the polynomial $p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m$ to use in the regression analysis leading to the Savitzky--Golay coefficients. Typical values are between $m=2$ and $m=6$. Beware of too high values, which easily makes the regression too sensitive, with an oscillatory result.} \varitem[|DEFAULT_LD|]{The default value to use for the |ld| variable unless stated otherwise at the command line during startup of the program. This parameter specifies the order of the derivative to extract from the Savitzky--Golay smoothing algorithm. For regular Savitzky--Golay smoothing of the input data as such, use $l_D=0$. For the Savitzky--Golay smoothing and extraction of derivatives, set $l_D$ to the order of the desired derivative and make sure that you correctly interpret the scaling parameters as described in {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New York, 1994).} \varitem[|NCHMAX|]{The maximum number of characters allowed in strings for storing file names, including path.} \varitem[|log(...)|]{The |log()| macro forms the user interface to run-time error messaging and console logging activities, by invoking the |log_printf()| routine. The |log()| macro is preferrably used rather than direct calls to |log_printf()|, as it automatizes the extraction of the calling routine, via the |__func__| macro. Notice that the clean construction of the macro using variable-length arguments (via |__VA_ARGS__|) {\sl only} is supported at ISO 9899:1999 (ISO C99) standard level or above (see, for example, \.{http://en.wikipedia.org/wiki/C99}), which is the default compliance level used in the enclosed \.{Makefile}. (In fact, this is here {\sl the} very reason for using ISO 9899:1999 rather than ISO 9899:1990).} @(sgfilter.h@>= #define VERSION "1.6" #define COPYRIGHT "Copyright (C) 2006-2011, Fredrik Jonsson" #define DEFAULT_NL (15) #define DEFAULT_NR (15) #define DEFAULT_M (4) #define DEFAULT_LD (0) #define EPSILON ((double)(1.0e-20)) #define NCHMAX (256) #define CONVOLVE_WITH_NR_CONVLV (0) #define log(...) log_printf(__func__, __VA_ARGS__) @| #if defined(_CYGWIN_SIGNAL_H)||defined(__APPLE__)||defined(__unix__)||defined(__linux) #define UNIX_LIKE_OS (1) #endif @| void log_printf(const char *function_name, const char *format, ...); int *ivector(long nl, long nh); double *dvector(long nl, long nh); double **dmatrix(long nrl, long nrh, long ncl, long nch); void free_ivector(int *v, long nl, long nh); void free_dvector(double *v, long nl, long nh); void free_dmatrix(double **m,long nrl,long nrh,long ncl,long nch); void lubksb(double **a, int n, int *indx, double b[]); void ludcmp(double **a, int n, int *indx, double *d); void four1(double data[], unsigned long nn, int isign); void twofft(double data1[], double data2[], double fft1[], double fft2[], unsigned long n); void realft(double data[], unsigned long n, int isign); char convlv(double data[], unsigned long n, double respns[], unsigned long m, int isign, double ans[]); char sgcoeff(double c[], int np, int nl, int nr, int ld, int m); char sgfilter(double yr[], double yf[], int mm, int nl, int nr, int ld, int m); char *strip_away_path(char filename[]); long int num_coordinate_pairs(FILE *file); @*The main program. Here follows the general outline of the |main| routine of the \sgfilter\ program. This is where it all starts. @c @@; @@; @@; int main(int argc, char *argv[]) { @@; @@; @@; @@; @@; @@; return(EXIT_SUCCESS); } @ Library dependencies. The standard \ANSICEE\ libraries included in this program are:\par \varitem[\.{math.h}]{For access to common mathematical functions.} \varitem[\.{stdio.h}]{For file access and any block involving |fprintf|.} \varitem[\.{stdlib.h}]{For access to the |exit| function.} \varitem[\.{stdarg.h}]{For access to |vsprintf|, |vfprintf| etc.} \varitem[\.{string.h}]{For string manipulation, |strcpy|, |strcmp| etc.} \varitem[\.{ctype.h}]{For access to the |isalnum| function.} \varitem[\.{time.h}]{For access to |ctime|, |clock| and time stamps.} \varitem[\.{sys/time.h}]{For access to millisecond-resolved timer.} @= #include #include #include #include #include #include #include #include #include "sgfilter.h" @ Declaration of global variables. The only global variables allowed in my programs are |optarg|, which is a pointer to the the string of characters that specified the call from the command line, and |progname|, which simply is a pointer to the string containing the name of the program, as it was invoked from the command line. Typically, the |progname| string is the result remaining after having passed |argv[0]| through the |strip_away_path| routine, just before parsing the command line for parameters. @= extern char *optarg; char *progname; @ Declaration of local variables of the |main| program. In \CWEB\ one has the option of adding variables along the program, for example by locally adding temporary variables related to a given sub-block of code. However, the philosophy in the \sgfilter\ program is to keep all variables of the |main| section collected together, so as to simplify tasks as, for example, tracking down a given variable type definition. The local variables of the program are as follows: \medskip \varitem[|*x|, |*yr|, |*yf|]{Pointers to the double precision vectors keeping the abscissa, unfiltered (raw) ordinata, and the and the resulting filtered ordinata, respectively.} \varitem[|mm|]{Keeps track of the number of samples to analyze, $|mm|\equiv M$.} \varitem[|*file|]{Dummy file pointer used for scanning through input data and for writing data to an output file.} \varitem[|input_filename|]{String for keeping the file name of the input data.} \varitem[|output_filename|]{Ditto for the output data.} \varitem[|verbose|]{Determines whether the program should run in verbose mode (logging various activities along the execution) or not. Default is off.} @= int no_arg; int nl=DEFAULT_NL; int nr=DEFAULT_NR; int ld=DEFAULT_LD; int m=DEFAULT_M; long int k, mm=0; double *x, *yr, *yf; char input_filename[NCHMAX]="",output_filename[NCHMAX]=""; char verbose=0; FILE *file; @ Parsing command line options. All input parameters are passed to the program through command line options to the \sgfilter\ program. The syntax of the accepted command line options is listed whenever the program is invoked without any options, or whenever any of the \.{--help} or \.{-h} options are specified at startup. @= progname=strip_away_path(argv[0]); no_arg=argc; while(--argc) { if(!strcmp(argv[no_arg-argc],"-o") || !strcmp(argv[no_arg-argc],"--outputfile")) { --argc; strcpy(output_filename,argv[no_arg-argc]); } else if(!strcmp(argv[no_arg-argc],"-i") || !strcmp(argv[no_arg-argc],"--inputfile")) { --argc; strcpy(input_filename,argv[no_arg-argc]); } else if(!strcmp(argv[no_arg-argc],"-h") || !strcmp(argv[no_arg-argc],"--help")) { showsomehelp(); exit(0); } else if(!strcmp(argv[no_arg-argc],"-v") || !strcmp(argv[no_arg-argc],"--verbose")) { verbose=(verbose?0:1); } else if(!strcmp(argv[no_arg-argc],"-nl")) { --argc; if (!sscanf(argv[no_arg-argc],"%d",&nl)) { log("Error in '-nl' option."); exit(1); } } else if(!strcmp(argv[no_arg-argc],"-nr")) { --argc; if (!sscanf(argv[no_arg-argc],"%d",&nr)) { log("Error in '-nr' option."); exit(1); } } else if(!strcmp(argv[no_arg-argc],"-ld")) { --argc; if (!sscanf(argv[no_arg-argc],"%d",&ld)) { log("Error in '-ld' option."); exit(1); } } else if(!strcmp(argv[no_arg-argc],"-m")) { --argc; if (!sscanf(argv[no_arg-argc],"%d",&m)) { log("Error in '-m' option."); exit(1); } } else { log("Unrecognized option '%s'.",argv[no_arg-argc]); showsomehelp(); exit(1); } } if (verbose) fprintf(stdout,"This is %s v.%s. %s\n",progname,VERSION,COPYRIGHT); @ This is the where the raw (unfiltered) data is loaded into the computer memory. This block assumes that the number of data points, $M$, has {\sl not} previously been determined, neither by analysis of the input file nor explicitly stated via command line options, and hence the number of samples is automatically extracted using the |num_coordinate_pairs| routine. @= if (!strcmp(input_filename,"")) { log("No input file specified! (Please use the '--inputfile' option.)"); log("Execute '%s --help' for help.",progname); exit(1); } if ((file=fopen(input_filename,"r"))==NULL) { log("Could not open %s for loading raw data!",input_filename); exit(1); } mm=num_coordinate_pairs(file); if (mm= if (!sgfilter(yr, yf, mm, nl, nr, ld, m)) { if (verbose) log("Successfully performed Savitzky-Golay filtering."); } else { if (verbose) log("Error: Could not perform Savitzky-Golay filtering."); } @ Write the filtered data to file or |stdout|, depending on whether or not the command line options \.{-o} or \.{-outputfile} were present when starting the \sgfilter\ program. We here use a redirection of the |stdout| stream using |freopen(output_filename, "w", stdout)) == NULL)| and (in the case of unix-like systems) a re-redirection back to terminal output again with |freopen("/dev/tty", "a", stdout)|. @= if (!strcmp(output_filename,"")) { /* No filename specified */ if (verbose) log("Writing %ld filtered samples to console...", mm); } else { /* If file name specified $\Rightarrow$ redirect |stdout| to file */ if (verbose) log("Writing %ld filtered samples to %s...", mm, output_filename); if((file = freopen(output_filename, "w", stdout)) == NULL) { log("Error: Unable to redirect stdout stream to file %s.", output_filename); exit(1); } } for (k=1;k<=mm;k++) fprintf(stdout,"%1.8f %1.8f\n",x[k],yf[k]); #ifdef UNIX_LIKE_OS freopen("/dev/tty", "a", stdout); /* Redirect |stdout| back to console output */ #endif if (verbose) log(" ... done."); @ Deallocate memory. @= free_dvector(x,1,mm); free_dvector(yr,1,mm); #if CONVOLVE_WITH_NR_CONVLV free_dvector(yf,1,2*mm); #else free_dvector(yf,1,mm); #endif @*Routines used by the program. @= @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @@; @ The |void log_printf(const char *function_name,const char *format, ...)| routine writes formatted entries to standard output, displaying time and calling routine in a coherent manner. Notice that although the |log_printf()| routine is the one which performs the actual messaging, the |log()| macro (defined in the header file) is the preferred way of accessing this routine, as it provides a more compact notation and automatically takes care of supplying the reference to the name of the calling function. Also notice that the |const char| type of the last two input pointer arguments here is absolutely essential in order to pass strict pedantic compilation with \GCC. The routine accepts two input parameters. First, |function_name| which should be the name of the calling function. This is to ensure that any displayed error messages are properly matched to the issuing routines. Notice, however, that the |log()| macro (which is the preferred way of displaying error messages) automatically takes care of supplying the proper function name. Second, |format|, which simply is the format and message string to be displayed, formatted in the \CEE-standard |printf()| or |fprintf()| syntax. @= void log_printf(const char *function_name, const char *format, ...) { va_list args; time_t time0; struct tm lt; struct timeval tv; char logentry[1024]; gettimeofday(&tv, NULL); time(&time0); lt = *localtime(&time0); sprintf(logentry, "%02u%02u%02u %02u:%02u:%02u.%03d ", lt.tm_year-100, lt.tm_mon+1, lt.tm_mday, lt.tm_hour, lt.tm_min, lt.tm_sec, tv.tv_usec/1000); sprintf(logentry+strlen(logentry),"(%s) ",function_name); va_start(args, format); /* Initialize args by the |va_start()| macro */ vsprintf(logentry+strlen(logentry), format, args); va_end(args); /* Terminate the use of args by the |va_end()| macro */ sprintf(logentry+strlen(logentry), "\n"); /* Always append newline */ fprintf(stdout, "%s", logentry); return; } @ The |int *ivector(long nl, long nh)| routine allocates a real-valued vector of integer precision, with vector index ranging from |nl| to |nh|. @= int *ivector(long nl, long nh) { int *v; v=(int *)malloc((size_t) ((nh-nl+2)*sizeof(int))); if (!v) { log("Error: Allocation failure."); exit(1); } return v-nl+1; } @ The |double *dvector(long nl, long nh)| routine allocates a real-valued vector of double precision, with vector index ranging from |nl| to |nh|. @= double *dvector(long nl, long nh) { double *v; long k; v=(double *)malloc((size_t) ((nh-nl+2)*sizeof(double))); if (!v) { log("Error: Allocation failure."); exit(1); } for (k=nl;k<=nh;k++) v[k]=0.0; return v-nl+1; } @ The |double **dmatrix(long nrl, long nrh, long ncl, long nch)| routine allocates an array of double floating-point precision, with row index ranging from |nrl| to |nrh| and column index ranging from |ncl| to |nch|. @= double **dmatrix(long nrl, long nrh, long ncl, long nch) { long i, nrow=nrh-nrl+1,ncol=nch-ncl+1; double **m; m=(double **) malloc((size_t)((nrow+1)*sizeof(double*))); if (!m) { log("Allocation failure 1 occurred."); exit(1); } m += 1; m -= nrl; m[nrl]=(double *) malloc((size_t)((nrow*ncol+1)*sizeof(double))); if (!m[nrl]) { log("Allocation failure 2 occurred."); exit(1); } m[nrl] += 1; m[nrl] -= ncl; for(i=nrl+1;i<=nrh;i++) m[i]=m[i-1]+ncol; return m; } @ The |void free_ivector(int *v, long nl, long nh)| routine release the memory occupied by the real-valued vector |v[nl...nh]|. @= void free_ivector(int *v, long nl, long nh) { free((char*) (v+nl-1)); } @ The |free_dvector| routine release the memory occupied by the real-valued vector |v[nl...nh]|. @= void free_dvector(double *v, long nl, long nh) { free((char*) (v+nl-1)); } @ The |void free_dmatrix(double **m,long nrl,long nrh,long ncl,long nch)| routine releases the memory occupied by the double floating-point precision matrix |v[nl...nh]|, as allocated by |dmatrix()|. @= void free_dmatrix(double **m,long nrl,long nrh,long ncl,long nch) { free((char*) (m[nrl]+ncl-1)); free((char*) (m+nrl-1)); } @ The |lubksb(double **a, int n, int *indx, double b[])| routine solves the set of |n| linear equations ${\bf A}\cdot{\bf x}={\bf b}$ where ${\bf A}$ is a real-valued $[n\times n]$-matrix and ${\bf x}$ and ${\bf b}$ are real-valued $[n\times1]$-vectors. Here |a[1...n][1...n]| is input, however {\sl not} as the matrix ${\bf A}$ but rather as its corresponding LU decomposition as determined by the |ludcmp| routine. Here |indx[1...n]| is input as the permutation vector returned by |ludcmp|, |b[1...n]| is input as the right-hand side vector ${\bf b}$, and returns with the solution vector ${\bf x}$. The parameters |a|, |n|, and |indx| are not modified by this routine and can be left in place for successive calls with different right-hand sides ${\bf b}$. This routine takes into account the possibility that ${\bf b}$ will begin with many zero elements, so it is efficient for use in matrix inversion. The |lubksb| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn (Cambridge University Press, New York, 1994). @= void lubksb(double **a, int n, int *indx, double b[]) { int i,ii=0,ip,j; double sum; for (i=1;i<=n;i++) { ip=indx[i]; sum=b[ip]; b[ip]=b[i]; if (ii) for (j=ii;j<=i-1;j++) sum -= a[i][j]*b[j]; else if (sum) ii=i; b[i]=sum; } for (i=n;i>=1;i--) { sum=b[i]; for (j=i+1;j<=n;j++) sum -= a[i][j]*b[j]; b[i]=sum/a[i][i]; } } @ Given a square and real-valued matrix |a[1...n][1...n]|, the |ludcmp(float **a, int n, int *indx, float *d)| routine replaces it by the corresponding LU decomposition of a rowwise permutation of itself. Entering the routine, the square matrix |a| and its number of columns (or rows) |n| are inputs. On return, |a| is arranged as in Eq.~(2.3.14) of {\sl Numerical Recipes in C}, 2nd edition, while |indx[1...n]| is an output vector that records the row permutation effected by the partial pivoting, and |d| is output as $\pm1$ depending on whether the number of row interchanges was even or odd, respectively. This routine is commonly used in combination with |lubksb| to solve linear equations or invert a matrix. The |ludcmp| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn (Cambridge University Press, New York, 1994). @= void ludcmp(double **a, int n, int *indx, double *d) { int i,imax=0,j,k; double big,dum,sum,temp; double *vv; vv=dvector(1,n); *d=1.0; for (i=1;i<=n;i++) { big=0.0; for (j=1;j<=n;j++) if ((temp=fabs(a[i][j])) > big) big=temp; if (big == 0.0) { log("Error: Singular matrix found in routine ludcmp()"); exit(1); } vv[i]=1.0/big; } for (j=1;j<=n;j++) { for (i=1;i= big) { big=dum; imax=i; } } if (j != imax) { for (k=1;k<=n;k++) { dum=a[imax][k]; a[imax][k]=a[j][k]; a[j][k]=dum; } *d = -(*d); vv[imax]=vv[j]; } indx[j]=imax; if (a[j][j] == 0.0) a[j][j]=EPSILON; if (j != n) { dum=1.0/(a[j][j]); for (i=j+1;i<=n;i++) a[i][j] *= dum; } } free_dvector(vv,1,n); } @ The |four1(double data[], unsigned long nn, int isign)| routine replaces |data[1...2*nn]| by its discrete Fourier transform, if |isign| is input as $+1$; or replaces |data[1..2*nn]| by |nn| times its inverse discrete Fourier transform, if |isign| is input as $-1$. Here |data| is a complex-valued array of length |nn| or, equivalently, a real array of length |2*nn|, wher |nn| {\sl must} be an integer power of 2 (this is not checked for). The |four1| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn (Cambridge University Press, New York, 1994). @= #include #define SWAP(a,b) tempr=(a);(a)=(b);(b)=tempr void four1(double data[], unsigned long nn, int isign) { unsigned long n,mmax,m,j,istep,i; double wtemp,wr,wpr,wpi,wi,theta; double tempr,tempi; n=nn << 1; j=1; for (i=1;i i) { SWAP(data[j],data[i]); SWAP(data[j+1],data[i+1]); } m=n >> 1; while (m >= 2 && j > m) { j -= m; m >>= 1; } j += m; } mmax=2; while (n > mmax) { istep=mmax << 1; theta=isign*(6.28318530717959/mmax); wtemp=sin(0.5*theta); wpr = -2.0*wtemp*wtemp; wpi=sin(theta); wr=1.0; wi=0.0; for (m=1;m= void twofft(double data1[], double data2[], double fft1[], double fft2[], unsigned long n) { void four1(double data[], unsigned long nn, int isign); unsigned long nn3,nn2,jj,j; double rep,rem,aip,aim; nn3=1+(nn2=2+n+n); for (j=1,jj=2;j<=n;j++,jj+=2) { fft1[jj-1]=data1[j]; fft1[jj]=data2[j]; } four1(fft1,n,1); fft2[1]=fft1[2]; fft1[2]=fft2[2]=0.0; for (j=3;j<=n+1;j+=2) { rep=0.5*(fft1[j]+fft1[nn2-j]); rem=0.5*(fft1[j]-fft1[nn2-j]); aip=0.5*(fft1[j+1]+fft1[nn3-j]); aim=0.5*(fft1[j+1]-fft1[nn3-j]); fft1[j]=rep; fft1[j+1]=aim; fft1[nn2-j]=rep; fft1[nn3-j] = -aim; fft2[j]=aip; fft2[j+1] = -rem; fft2[nn2-j]=aip; fft2[nn3-j]=rem; } } @ The |realft(double data[], unsigned long n, int isign)| routine calculates the Fourier transform of a set of |n| real-valued data points. On return the routine replaces this data (which is stored in array |data[1...n]|) by the positive frequency half of its complex Fourier transform. The real-valued first and last components of the complex transform are returned in elements |data[1]| and |data[2]|, respectively. Here |n| must be a power of 2. This routine also calculates the inverse transform of a complex data array if it is the transform of real data. (In this case, the result must be multiplied by |2/n|.) The |realft| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn (Cambridge University Press, New York, 1994). @= void realft(double data[], unsigned long n, int isign) { void four1(double data[], unsigned long nn, int isign); unsigned long i,i1,i2,i3,i4,np3; double c1=0.5,c2,h1r,h1i,h2r,h2i; double wr,wi,wpr,wpi,wtemp,theta; theta=3.141592653589793/(double) (n>>1); if (isign == 1) { c2 = -0.5; four1(data,n>>1,1); } else { c2=0.5; theta = -theta; } wtemp=sin(0.5*theta); wpr = -2.0*wtemp*wtemp; wpi=sin(theta); wr=1.0+wpr; wi=wpi; np3=n+3; for (i=2;i<=(n>>2);i++) { i4=1+(i3=np3-(i2=1+(i1=i+i-1))); h1r=c1*(data[i1]+data[i3]); h1i=c1*(data[i2]-data[i4]); h2r = -c2*(data[i2]+data[i4]); h2i=c2*(data[i1]-data[i3]); data[i1]=h1r+wr*h2r-wi*h2i; data[i2]=h1i+wr*h2i+wi*h2r; data[i3]=h1r-wr*h2r+wi*h2i; data[i4] = -h1i+wr*h2i+wi*h2r; wr=(wtemp=wr)*wpr-wi*wpi+wr; wi=wi*wpr+wtemp*wpi+wi; } if (isign == 1) { data[1] = (h1r=data[1])+data[2]; data[2] = h1r-data[2]; } else { data[1]=c1*((h1r=data[1])+data[2]); data[2]=c1*(h1r-data[2]); four1(data,n>>1,-1); } } @ The |convlv(double data[], unsigned long n, double respns[], unsigned long m, int isign, double ans[])| routine convolves or deconvolves a real data set |data[1...n]| (including any user-supplied zero padding) with a response function |respns[1...n]|. The response function must be stored in wrap-around order in the first |m| elements of respns, where |m| is an odd integer less than or equal to |n|. Wrap-around order means that the first half of the array respns contains the impulse response function at positive times, while the second half of the array contains the impulse response function at negative times, counting down from the highest element |respns[m]|. On input, |isign| is $+1$ for convolution, and $-1$ for deconvolution. The answer is returned in the first |n| components of |ans|. However, |ans| {\sl must} be supplied in the calling program with dimensions |[1...2*n]|, for consistency with the |twofft| routine. Here |n| {\sl must} be an integer power of two. The |convlv| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn (Cambridge University Press, New York, 1994). @= char convlv(double data[], unsigned long n, double respns[], unsigned long m, int isign, double ans[]) { void realft(double data[], unsigned long n, int isign); void twofft(double data1[], double data2[], double fft1[], double fft2[], unsigned long n); unsigned long i,no2; double dum,mag2,*fft; fft=dvector(1,n<<1); for (i=1;i<=(m-1)/2;i++) respns[n+1-i]=respns[m+1-i]; for (i=(m+3)/2;i<=n-(m-1)/2;i++) respns[i]=0.0; twofft(data,respns,fft,ans,n); no2=n>>1; for (i=2;i<=n+2;i+=2) { if (isign == 1) { ans[i-1]=(fft[i-1]*(dum=ans[i-1])-fft[i]*ans[i])/no2; ans[i]=(fft[i]*dum+fft[i-1]*ans[i])/no2; } else if (isign == -1) { if ((mag2=ans[i-1]*ans[i-1]+ans[i]*ans[i]) == 0.0) { log("Attempt of deconvolving at zero response in convlv()."); return(1); } ans[i-1]=(fft[i-1]*(dum=ans[i-1])+fft[i]*ans[i])/mag2/no2; ans[i]=(fft[i]*dum-fft[i-1]*ans[i])/mag2/no2; } else { log("No meaning for isign in convlv()."); return(1); } } ans[2]=ans[n+1]; realft(ans,n,-1); free_dvector(fft,1,n<<1); return(0); } @ The |void sgcoeff(double c[], int np, int nl, int nr, int ld, int m)| routine computes the coefficients |c[1...np]| for Savitzky--Golay filtering. The coefficient vector |c[1...np]| is returned in {\sl wrap-around order} consistent with the argument |respns| in the {\sl Numerical Recipes in C} routine |convlv|. ``Wrap-around order'' means that the first half of the array |respns| contains the impulse response function at positive times, while the second half of the array contains the impulse response function at negative times, counting down from the highest element |respns[m]|. The Savitzky--Golay filter coefficients are computed for |nl| leftward (past) data points and |nr| rightward (future) data points, making the total number of data points used in the window as $|np|=|nl|+|nr|+1$. |ld| is the order of the derivative desired (for example, $|ld|=0$ for smoothed function). Here $m$ is the order of the smoothing polynomial, also equal to the highest conserved moment; usual values are $|m|=2$ or $|m|=4$. The |sgcoeff| routine is adopted from {\sl Numerical Recipes in C}, 2nd Edn (Cambridge University Press, New York, 1994). @= char sgcoeff(double c[], int np, int nl, int nr, int ld, int m) { void lubksb(double **a, int n, int *indx, double b[]); void ludcmp(double **a, int n, int *indx, double *d); int imj,ipj,j,k,kk,mm,*indx; double d,fac,sum,**a,*b; if (np < nl+nr+1 || nl < 0 || nr < 0 || ld > m || nl+nr < m) { log("Inconsistent arguments detected in routine sgcoeff."); return(1); } indx=ivector(1,m+1); a=dmatrix(1,m+1,1,m+1); b=dvector(1,m+1); for (ipj=0;ipj<=(m << 1);ipj++) { sum=(ipj ? 0.0 : 1.0); for (k=1;k<=nr;k++) sum += pow((double)k,(double)ipj); for (k=1;k<=nl;k++) sum += pow((double)-k,(double)ipj); mm=(ipj<2*m-ipj?ipj:2*m-ipj); for (imj = -mm;imj<=mm;imj+=2) a[1+(ipj+imj)/2][1+(ipj-imj)/2]=sum; } ludcmp(a,m+1,indx,&d); for (j=1;j<=m+1;j++) b[j]=0.0; b[ld+1]=1.0; lubksb(a,m+1,indx,b); for (kk=1;kk<=np;kk++) c[kk]=0.0; for (k = -nl;k<=nr;k++) { sum=b[1]; fac=1.0; for (mm=1;mm<=m;mm++) sum += b[mm+1]*(fac *= k); kk=((np-k) % np)+1; c[kk]=sum; } free_dvector(b,1,m+1); free_dmatrix(a,1,m+1,1,m+1); free_ivector(indx,1,m+1); return(0); } @ The |sgfilter(double yr[],double yf[],int mm, int nl, int nr, int ld, int m)| routine provides the interface for the actual Savitzky--Golay filtering of data. As input, this routine takes the following parameters: \varitem[|yr[1...mm]|]% {A vector containing the raw, unfiltered data} \varitem[|mm|]% {The number of data points in the input vector} \varitem[|nl|]% {The number of samples $n_L$ to use to the ``left'' of the basis sample in the regression window (kernel). The total number of samples in the window will be $nL+nR+1$.} \varitem[|nr|]% {The number of samples $n_R$ to use to the ``right'' of the basis sample in the regression window (kernel). The total number of samples in the window will be $nL+nR+1$.} \varitem[|m|]% {The order $m$ of the polynomial $p(x)=a_0+a_1(x-x_k)+a_2(x-x_k)^2+\ldots+a_m (x-x_k)^m$ to use in the regression analysis leading to the Savitzky--Golay coefficients. Typical values are between $m=2$ and $m=6$. Beware of too high values, which easily makes the regression too sensitive, with an oscillatory result.} \varitem[|ld|]% {The order of the derivative to extract from the Savitzky--Golay smoothing algorithm. For regular Savitzky--Golay smoothing of the input data as such, use $l_D=0$. For the Savitzky--Golay smoothing and extraction of derivatives, set $l_D$ to the order of the desired derivative and make sure that you correctly interpret the scaling parameters as described in {\sl Numerical Recipes in C}, 2nd Edn~(Cambridge University Press, New York, 1994).} \medskip \noindent On return, the Savitzky--Golay-filtered profile is contained in the vector |yf[1...mm]|. Notice the somewhat peculiar accessing of the coefficients $c_j$ via the |c[(j>=0?j+1:nr+nl+2+j)]| construction in this routine. This reflects the {\sl wrap-around storage} of the coefficients, where $c[1]=c_0$, $c[2]=c_1$, $\ldots$, $c[|nr|+1]=c_{n_R}$, $c[|nr|+2]=c_{-n_L}$, $c[|nr|+3]=c_{-n_L+1}$, $\ldots$, $c[|nr|+|nl|+1]=c_{-1}$. @= char sgfilter(double yr[], double yf[], int mm, int nl, int nr, int ld, int m) { int np=nl+1+nr; double *c; char retval; #if CONVOLVE_WITH_NR_CONVLV /* Please do not use this $\ldots$ */ c=dvector(1,mm); /* Size required by the {\sl NR in C} |convlv| routine */ retval=sgcoeff(c,np,nl,nr,ld,m); if (retval==0) convlv(yr,mm,c,np,1,yf); free_dvector(c,1,mm); #else /* $\ldots$ use this instead. (Strongly recommended.) */ int j; long int k; c=dvector(1,nl+nr+1); /* Size required by direct convolution */ retval=sgcoeff(c,np,nl,nr,ld,m); if (retval==0) { for (k=1;k<=nl;k++) { /* The first |nl| samples */ for (yf[k]=0.0,j=-nl;j<=nr;j++) { if (k+j>=1) { yf[k]+=c[(j>=0?j+1:nr+nl+2+j)]*yr[k+j]; } } } for (k=nl+1;k<=mm-nr;k++) { /* Samples $|nl|+1 \ldots |mm|-|nr|$ */ for (yf[k]=0.0,j=-nl;j<=nr;j++) { yf[k]+=c[(j>=0?j+1:nr+nl+2+j)]*yr[k+j]; } } for (k=mm-nr+1;k<=mm;k++) { /* The last |nr| samples */ for (yf[k]=0.0,j=-nl;j<=nr;j++) { if (k+j<=mm) { yf[k]+=c[(j>=0?j+1:nr+nl+2+j)]*yr[k+j]; } } } } free_dvector(c,1,nr+nl+1); #endif return(retval); /* Returning |0| if successful filtering */ } @ Routines for removing preceding path of filenames. In this block all routines related to removing preceding path strings go. Not really fancy programming, and no contribution to any increase of numerical efficiency or precision; just for the sake of keeping a tidy terminal output of the program. The |strip_away_path()| routine is typically called when initializing the program name string |progname| from the command line string |argv[0]|, and is typically located in the blocks related to parsing of the command line options. The |strip_away_path()| routine takes a character string |filename| as argument, and returns a pointer to the same string but without any preceding path segments. @= @@; char *strip_away_path(char filename[]) { int j,k=0; while (pathcharacter(filename[k])) k++; j=(--k); /* this is the uppermost index of the full path+file string */ while (isalnum((int)(filename[j]))) j--; j++; /* this is the lowermost index of the stripped file name */ return(&filename[j]); } @ In this program, valid path characters are any alphanumeric character or `\.{.}', `\.{/}', `\.{\\}', `\.{\_}', `\.{-}', or `\.{+}'. @= short pathcharacter(int ch) { return(isalnum(ch)||(ch=='.')||(ch=='/')||(ch=='\\')||(ch=='_')|| (ch=='-')||(ch=='+')); } @ The |void showsomehelp(void)| displays a brief help message to standard terminal output. This is where the caller always should end up in case anything is wrong in the input parameters. @= @@; void showsomehelp(void) { hl("Usage: %s [options]",progname); hl("Options:"); hl(" -h, --help"); hl(" Display this help message and exit clean."); hl(" -i, --inputfile "); hl(" Specifies the file name from which unfiltered data is to be read."); hl(" The input file should describe the input as two columns contain-"); hl(" ing $x$- and $y$-coordinates of the samples."); hl(" -o, --outputfile "); hl(" Specifies the file name to which filtered data is to be written,"); hl(" again in a two-column format containing $x$- and $y$-coordinates"); hl(" of the filtered samples. If this option is omitted, the generated"); hl(" filtered data will instead be written to the console (terminal)."); hl(" -nl "); hl(" Specifies the number of samples nl to use to the 'left' of the"); hl(" basis sample in the regression window (kernel). The total number"); hl(" of samples in the window will be nL+nR+1."); hl(" -nr "); hl(" Specifies the number of samples nr to use to the 'right' of the"); hl(" basis sample in the regression window (kernel). The total number"); hl(" of samples in the window will be nL+nR+1."); hl(" -m "); hl(" Specifies the order m of the polynomial to use in the regression"); hl(" analysis leading to the Savitzky-Golay coefficients. Typical"); hl(" values are between m=2 and m=6. Beware of too high values, which"); hl(" easily makes the regression too sensitive, with an oscillatory"); hl(" result."); hl(" -ld "); hl(" Specifies the order of the derivative to extract from the "); hl(" Savitzky--Golay smoothing algorithm. For regular Savitzky-Golay"); hl(" smoothing of the input data as such, use ld=0. For the Savitzky-"); hl(" Golay smoothing and extraction of derivatives, set ld to the"); hl(" order of the desired derivative and make sure that you correctly"); hl(" interpret the scaling parameters as described in 'Numerical"); hl(" Recipes in C', 2nd Edn (Cambridge University Press, New York,"); hl(" 1994)."); hl(" -v, --verbose"); hl(" Toggle verbose mode. (Default: Off.) This option should always"); hl(" be omitted whenever no output file has been specified (that is"); hl(" to say, omit any --verbose or -v option whenever --outputfile or"); hl(" -o has been omitted), as the verbose logging otherwise will"); hl(" contaminate the filtered data stream written to the console"); hl(" (terminal)."); } @ In order to simplify the messaging, the |hl(const char *format, ...)| routine acts as a simple front-end merely for compactifying the code by successive calls to |hl(...)| rather than the full |fprintf(stderr, ...)|, still maintaining all the functionality of string formatting in the regular |printf()| or |fprintf()| syntax. @= void hl(const char *format, ...) { va_list args; char line[1024]; va_start(args, format); /* Initialize args by the |va_start()| macro */ vsprintf(line, format, args); va_end(args); /* Terminate the use of args by the |va_end()| macro */ sprintf(line+strlen(line), "\n"); /* Always append newline */ fprintf(stdout, "%s", line); return; } @ Routine for obtaining the number of coordinate pairs in a file. This routine is called prior to loading the input data, in order to automatically extract the size needed for allocating the memory for the storage. @= long int num_coordinate_pairs(FILE *file) { double tmp; int tmpch; long int mm=0; fseek(file,0L,SEEK_SET); /* rewind file to beginning */ while((tmpch=getc(file))!=EOF) { ungetc(tmpch,file); fscanf(file,"%lf",&tmp); /* Read away the $x$ coordinate */ fscanf(file,"%lf",&tmp); /* Read away the $y$ coordinate */ mm++; tmpch=getc(file); /* Read away any blanks or linefeeds */ while ((tmpch!=EOF)&&(!isdigit(tmpch))) tmpch=getc(file); if (tmpch!=EOF) ungetc(tmpch,file); } fseek(file,0L,SEEK_SET); /* rewind file to beginning */ return(mm); } @*Index.