StringToken.c

Go to the documentation of this file.
00001 /*
00002 *  Copyright (C) 2007 Jolien Creighton
00003 *
00004 *  This program is free software; you can redistribute it and/or modify
00005 *  it under the terms of the GNU General Public License as published by
00006 *  the Free Software Foundation; either version 2 of the License, or
00007 *  (at your option) any later version.
00008 *
00009 *  This program is distributed in the hope that it will be useful,
00010 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 *  GNU General Public License for more details.
00013 *
00014 *  You should have received a copy of the GNU General Public License
00015 *  along with with program; see the file COPYING. If not, write to the
00016 *  Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
00017 *  MA  02111-1307  USA
00018 */
00019 
00020 /********************************** <lalVerbatim file="StringTokenCV">
00021 Author: Creighton, T. D.
00022 $Id: StringToken.c,v 1.2 2007/06/08 14:41:53 bema Exp $
00023 **************************************************** </lalVerbatim> */
00024 
00025 /********************************************************** <lalLaTeX>
00026 
00027 \subsection{Module \texttt{StringToken.c}}
00028 \label{ss:StringToken.c}
00029 
00030 Converts a string into a series of tokens, for use by other routines.
00031 
00032 \subsubsection*{Prototypes}
00033 \vspace{0.1in}
00034 \input{StringTokenCP}
00035 \idx{LALCreateTokenList()}
00036 \idx{LALDestroyTokenList()}
00037 
00038 \subsubsection*{Description}
00039 
00040 The routine \verb@LALCreateTokenList()@ parses \verb@*string@ as a
00041 sequence of tokens (substrings of non-null characters that do not
00042 appear in \verb@delimiters@), separated by delimiters (substrings
00043 consisting only of characters that appear in \verb@delimiters@), and
00044 terminated by the null character \verb@'\0'@.  The structure
00045 \verb@**list@ is created, storing the sequence of tokens as a list
00046 null-terminated character strings.
00047 
00048 The output \verb@list@ should be a non-\verb@NULL@ handle that points
00049 to the value \verb@NULL@ (i.e.\ \verb@list@$\neq$\verb@NULL@ but
00050 \verb@*list@=\verb@NULL@).  Even if no tokens were found, \verb@*list@
00051 will be created, but will have \verb@(*list)->nTokens@=0,
00052 \verb@(*list)->tokens[0]@=\verb@NULL@, and
00053 \verb@(*list)->list@=\verb@NULL@.  Note that this is \emph{not} an
00054 error, so the calling routine need not guarantee in advance that
00055 \verb@string@ contain any non-delimiter characters.
00056 
00057 The routine \verb@LALDestroyTokenList()@ destroys a list of tokens as
00058 created by \verb@LALCreateTokenList()@, setting \verb@*list@ to
00059 \verb@NULL@.
00060 
00061 \subsubsection*{Algorithm}
00062 
00063 The \verb@LALCreateTokenList()@ function is not particularly
00064 memory-efficient, requiring internal storage up to twice the length of
00065 \verb@*string@.  It first creates a working copy of
00066 \verb@string->data@, and replaces all occurences of characters
00067 appearing in \verb@*delimiters@ with \verb@'\0'@, while at the same
00068 time keeping track of the number and total length of all tokens.  It
00069 then allocates a contiguous block of memory to store all the tokens
00070 (separated by and terminated with single \verb@'\0'@ characters), and
00071 a set of \verb@CHAR *@ pointers to point to the individual tokens in
00072 this block.  Then the routine proceeds through the working copy one
00073 last time, copying tokens into the token list and setting the token
00074 pointers accordingly, before destroying the working copy.
00075 
00076 \subsubsection*{Uses}
00077 \begin{verbatim}
00078 LALMalloc()                     LALCHARCreateVector()
00079 LALFree()                       LALCHARDestroyVector()
00080 \end{verbatim}
00081 
00082 \subsubsection*{Notes}
00083 
00084 \vfill{\footnotesize\input{StringTokenCV}}
00085 
00086 ******************************************************* </lalLaTeX> */
00087 
00088 #include <string.h>
00089 #include <lal/LALStdlib.h>
00090 #include <lal/AVFactories.h>
00091 #include <lal/StringInput.h>
00092 
00093 NRCSID( STRINGTOKENC, "$Id: StringToken.c,v 1.2 2007/06/08 14:41:53 bema Exp $" );
00094 
00095 /* <lalVerbatim file="StringTokenCP"> */
00096 void
00097 LALCreateTokenList( LALStatus  *stat,
00098                     TokenList  **list,
00099                     const CHAR *string,
00100                     const CHAR *delimiters )
00101 { /* </lalVerbatim> */
00102   BOOLEAN delimiter = 1; /* whether current character is a delimiter */
00103   UINT4 i = 0, j = 0;    /* indecies */
00104   UINT4 nTokens = 0;     /* number of tokens */
00105   UINT4 sLength;         /* length of string */
00106   UINT4 tLength = 0;     /* length of token list */
00107   CHAR *copy;            /* working copy of token list */
00108 
00109   INITSTATUS( stat, "LALCreateTokenList", STRINGTOKENC );
00110   ATTATCHSTATUSPTR( stat );
00111 
00112   /* Check for valid input arguments. */
00113   ASSERT( list, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00114   ASSERT( string, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00115   ASSERT( delimiters, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00116   ASSERT( !*list, stat, STRINGINPUTH_EOUT, STRINGINPUTH_MSGEOUT );
00117 
00118   /* Create working copy of token list. */
00119   sLength = strlen( string ) + 1;
00120   if ( !( copy = (CHAR *)LALMalloc( sLength*sizeof(CHAR) ) ) ) {
00121     ABORT( stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00122   }
00123   for ( i = 0; i < sLength; i++ ) {
00124     CHAR c = string[i];
00125     if ( strchr( delimiters, c ) ) {
00126       copy[i] = '\0';
00127       delimiter = 1;
00128     } else {
00129       copy[i] = c;
00130       tLength++;
00131       if ( delimiter ) {
00132         delimiter = 0;
00133         nTokens++;
00134       }
00135     }
00136   }
00137 
00138   /* Create the token list. */
00139   if ( !( *list = (TokenList *)LALMalloc( sizeof(TokenList) ) ) ) {
00140     LALFree( copy );
00141     ABORT( stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00142   }
00143   if ( !( (*list)->tokens =
00144           (CHAR **)LALMalloc( ( nTokens + 1 )*sizeof(CHAR *) ) ) ) {
00145     LALFree( *list );
00146     *list = NULL;
00147     LALFree( copy );
00148     ABORT( stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00149   }
00150   (*list)->nTokens = nTokens;
00151   (*list)->list = NULL;
00152 
00153 
00154   /* If tokens were found, copy them over and set up pointers. */
00155   if ( nTokens ) {
00156     CHAR *listData; /* pointer to token list data */
00157     LALCHARCreateVector( stat->statusPtr, &((*list)->list),
00158                          nTokens + tLength );
00159     BEGINFAIL( stat ) {
00160       LALFree( (*list)->tokens );
00161       LALFree( *list );
00162       *list = NULL;
00163       LALFree( copy );
00164     } ENDFAIL( stat );
00165     listData = (*list)->list->data;
00166     i = 0;
00167     while ( i < sLength ) {
00168       if ( copy[i] ) {
00169         tLength = strlen( copy + i ) + 1;
00170         memcpy( listData, copy + i, tLength*sizeof(CHAR) );
00171         (*list)->tokens[j++] = listData;
00172         i += tLength;
00173         listData += tLength;
00174       } else
00175         i++;
00176     }
00177   }
00178   (*list)->tokens[j] = NULL;
00179 
00180   /* Clean up and exit. */
00181   LALFree( copy );
00182   DETATCHSTATUSPTR( stat );
00183   RETURN( stat );
00184 }
00185 
00186 
00187 /* <lalVerbatim file="StringTokenCP"> */
00188 void
00189 LALDestroyTokenList( LALStatus *stat,
00190                      TokenList **list )
00191 { /* </lalVerbatim> */
00192   INITSTATUS( stat, "LALCreateTokenList", STRINGTOKENC );
00193   ATTATCHSTATUSPTR( stat );
00194 
00195   /* Check for valid input arguments. */
00196   ASSERT( list, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00197   ASSERT( *list, stat, STRINGINPUTH_ENUL, STRINGINPUTH_MSGENUL );
00198 
00199   /* Free everything and exit. */
00200   if ( (*list)->list ) {
00201     TRY( LALCHARDestroyVector( stat->statusPtr, &((*list)->list) ),
00202          stat );
00203   }
00204   LALFree( (*list)->tokens );
00205   LALFree( *list );
00206   *list = NULL;
00207   DETATCHSTATUSPTR( stat );
00208   RETURN( stat );
00209 }

Generated on Sat Aug 30 03:13:23 2008 for LAL by  doxygen 1.5.2