/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- word_string
- word_string
- word_string
- word_string
- empty
- show
- trim
- ucase
- lcase
- words
- indxword
- wordlen
- word
- substr
- left
- right
- command_words
- command_indxword
- command_wordlen
- command_word
- find
- find_char
- command_worddel
- worddel
- wordput
- num
- parse_extend
- str_replace
/* Word string class implementation
Rick Smereka, Copyright (C) 2000-2002.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, get a copy via the Internet at
http://gnu.org/copyleft/gpl.html or write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
You can contact the author via email at rsmereka@future-lab.com.
Original version, Oct/2000, Rick Smereka
Added 'using namespace std'. Dec/2002, Rick Smereka */
#include "stdhead.h"
#include "wstring.h"
using namespace std;
word_string::word_string(char *text)
{
// Create a word_string using an existing cstring.
int length;
// check for null string
if (text == (char *)NULL)
ws_contents = "";
else
{
length = strlen(text);
// check for empty string
if (!length)
ws_contents = "";
else
ws_contents = text;
}
}
word_string::word_string(word_string& o_string)
{
// Copy constructor.
// null or empty string check
if (o_string.empty())
ws_contents = "";
else
ws_contents = o_string.ws_contents;
}
word_string::word_string(const word_string& o_string)
{
// Copy constructor.
// null or empty string check
if (o_string.empty())
ws_contents = "";
else
ws_contents = o_string.ws_contents;
}
word_string::word_string(char ch, int cnt)
{
// Create and fill with 'cnt' number of the character 'ch.'
int i;
// error trap
if (cnt <= 0)
ws_contents = "";
else
{
ws_contents = "";
for(i = 0; i < cnt; i++)
ws_contents += ch;
}
}
word_string operator + (word_string str1, word_string str2)
{
// Concatenate. Friend method.
word_string result;
int s1_len, s2_len, nlen;
s1_len = str1.length();
s2_len = str2.length();
nlen = s1_len + s2_len ; // get total length
// check for empty string
if (!nlen)
return(result);
// only concatenate non-empty parts
if (s1_len && s2_len)
result.ws_contents = str1.ws_contents + str2.ws_contents;
else
if (!s1_len)
result.ws_contents = str2.ws_contents;
else
result.ws_contents = str1.ws_contents;
return(result);
}
word_string operator + (word_string str1, string str2)
{
// Concatenate. Friend method.
word_string result;
int s1_len, s2_len, nlen;
s1_len = str1.length();
s2_len = str2.length();
nlen = s1_len + s2_len ; // get total length
// check for empty string
if (!nlen)
return(result);
// only concatenate non-empty parts
if (s1_len && s2_len)
result.ws_contents = str1.ws_contents + str2;
else
if (!s1_len)
result.ws_contents = str2;
else
result.ws_contents = str1.ws_contents;
return(result);
}
word_string operator + (string str1, word_string str2)
{
// Concatenate. Friend method.
word_string result;
int s1_len, s2_len, nlen;
s1_len = str1.length();
s2_len = str2.length();
nlen = s1_len + s2_len ; // get total length
// check for empty string
if (!nlen)
return(result);
// only concatenate non-empty parts
if (s1_len && s2_len)
result.ws_contents = str1 + str2.ws_contents;
else
if (!s1_len)
result.ws_contents = str2.ws_contents;
else
result.ws_contents = str1;
return(result);
}
word_string operator + (word_string str1, char *str2)
{
// Concatenate. Friend method.
word_string result;
int s1_len, s2_len, nlen;
s1_len = str1.length();
s2_len = strlen(str2);
nlen = s1_len + s2_len;
// check for empty string
if (!nlen)
return(result);
// only concatenate non-empty parts
if (s1_len && s2_len)
result.ws_contents = str1.ws_contents + str2;
else
if (!s1_len)
result.ws_contents = str2;
else
result.ws_contents = str1.ws_contents;
return(result);
}
word_string operator + (char *str1, word_string str2)
{
// Concatenate. Friend method.
word_string result;
int s1_len, s2_len, nlen;
s1_len = strlen(str1);
s2_len = str2.length();
nlen = s1_len + s2_len;
// check for empty string
if (!nlen)
return(result);
// only concatenate non-empty parts
if (s1_len && s2_len)
result.ws_contents = str1 + str2.ws_contents;
else
if (!s1_len)
result.ws_contents = str2.ws_contents;
else
result.ws_contents = str1;
return(result);
}
word_string operator + (word_string str1, char ch)
{
// Concatenate. Friend method.
word_string result;
int s1_len;
s1_len = str1.length();
if (s1_len)
result.ws_contents = str1.ws_contents + ch;
else
result.ws_contents = ch;
return(result);
}
word_string operator + (char ch, word_string str1)
{
// Concatenate. Friend method.
word_string result;
int s1_len;
s1_len = str1.length();
if (s1_len)
result.ws_contents = ch + str1.ws_contents;
else
result.ws_contents = ch;
return(result);
}
bool word_string::operator == (char ch)
{
/* Equality test. Only the first character
of '*this->ws_contents' is compared. */
if (ws_contents[0] == ch)
return(TRUE);
return(FALSE);
}
bool word_string::operator != (char ch)
{
/* In-equality test. Only the first character
of '*this->ws_contents' is compared. */
if (ws_contents[0] != ch)
return(TRUE);
return(FALSE);
}
char word_string::operator [] (int pos)
{
// Retrieve a single character.
if (pos < 0 || pos >= ws_contents.length())
return(EOS);
return(ws_contents[pos]);
}
bool word_string::empty(void)
{
// Return TRUE if 'ws_contents' is an empty string.
if (ws_contents == "")
return(TRUE);
return(FALSE);
}
void word_string::show(void)
{
// Print string.
cout << '\'' << ws_contents << '\'' << ",len=" << ws_contents.length() << "\n";
}
word_string word_string::trim(void)
{
/* Trim a string by removing all spaces from the beginning
and the end plus remove all extra spaces between words.
The result word string is returned. */
word_string result;
int i, len, pos, done;
len = length();
if (!len)
return(result);
// get position of first non-blank character
for(i = 0, pos = -1, done = FALSE; i < len && !done; i++)
if (ws_contents[i] != ' ')
{
pos = i;
done = TRUE;
}
// check for all blank string
if (pos == -1)
return(result);
i = pos;
while(i < len)
{
while(ws_contents[i] != ' ' && i < len)
{
result += ws_contents[i];
i++;
}
if (i < len)
while(ws_contents[i] == ' ' && i < len)
i++;
if (i < len)
result += ' ';
}
return(result);
}
word_string word_string::ucase(void)
{
/* Copy a string to its uppercase version. Result string
is returned. */
word_string result;
int i, len;
result = "";
len = length();
if (!len)
return(result);
for(i = 0; i < len; i++)
result += toupper(ws_contents[i]);
return(result);
}
word_string word_string::lcase(void)
{
/* Copy a string to its lowercase version. Result string
is returned. */
word_string result;
int i, len;
result = "";
len = length();
if (!len)
return(result);
for(i = 0; i < len; i++)
result += tolower(ws_contents[i]);
return(result);
}
int word_string::words(char delim)
{
// Count the number of 'delim' delimited words.
word_string tmp;
int i, len, nwords;
len = length();
if (!len)
return(0);
// trim string if delimiter is a space
if (delim == ' ')
{
tmp = trim();
len = tmp.length();
if (!len)
return(0);
}
else
tmp.ws_contents = ws_contents;
// count instances
for(i = 0, nwords = 0; i < len; i++)
if (tmp[i] == delim)
nwords++;
return(nwords + 1);
}
int word_string::indxword(int which, char delim)
{
/* Return the index of word 'which' delimited by 'delim'.
Function returns the index on success, -1 otherwise. */
word_string tmp;
int i, len, nwords, index;
len = length();
if (!len)
return(-1);
if (which <= 0 || which > words(delim))
return(-1);
// trim string if delimiter is a space
if (delim == ' ')
{
tmp = trim();
len = tmp.length();
if (!len)
return(-1);
}
else
tmp.ws_contents = ws_contents;
i = 0;
nwords = 1;
while(i <= len)
{
index = i;
if (nwords == which)
return(index);
while(tmp[i] != delim && i < len)
i++;
nwords++;
i++;
}
return(-1);
}
int word_string::wordlen(int which, char delim)
{
/* Get the length of 'delim' delimited word 'which'.
Functions returns length upon sucess, -1 otherwise.
It is acceptable to have a word of zero length. */
int nwords;
int len;
int wrdlen;
if ((len = length()) == 0)
return(-1);
nwords = words(delim);
if (!nwords)
return(-1);
if (which <= 0 || which > nwords)
return(-1);
if (which == nwords)
wrdlen = len - indxword(which, delim);
else
wrdlen = indxword(which + 1, delim) - (indxword(which, delim) + 1);
return(wrdlen);
}
bool word_string::word(word_string& outstr, int which, char delim)
{
/* Extract the 'which' word and return it in 'outstr'.
Each word is delimited by 'delim' Function returns TRUE on
success, FALSE otherwise. */
int len;
int pos;
len = length();
outstr = "";
if (!len)
return(FALSE);
if (which <= 0 || which > words(delim))
return(FALSE);
pos = indxword(which, delim);
len = wordlen(which, delim);
outstr = substr(pos, len);
return(TRUE);
}
word_string word_string::substr(int pos, int len)
{
// Sub-string extract.
word_string result;
if (pos < 0 || len <= 0)
return(result);
result.ws_contents = ws_contents.substr(pos, len);
return(result);
}
word_string word_string::left(int len)
{
// Extract from the left side of the string.
word_string result;
if (len <= 0)
return(result);
if (len >= length())
result.ws_contents = ws_contents;
else
result.ws_contents = ws_contents.substr(0, len);
return(result);
}
word_string word_string::right(int len)
{
// Extract from the right side of the string.
word_string result;
int pos, tlen;
tlen = length();
if (len <= 0 || !tlen)
return(result);
pos = tlen - len;
if (pos < 0)
result.ws_contents = ws_contents;
else
result.ws_contents = ws_contents.substr(pos, len);
return(result);
}
int word_string::command_words(void)
{
/* Count the number of 'command' words in a string. A
command word can be either a space delimited word or
multiple words delimited by matching quotes. Function
returns the number of command words found. */
char ch;
char prev_ch;
char delim;
int len;
int nwords;
int pos;
int found;
len = length();
if (!len)
return(0);
nwords = 0;
pos = 0;
// loop through all characters in string looking for delimiters
do
{
ch = ws_contents[pos];
switch(ch)
{
case ' ':
// a space is an unconditional word
nwords++;
pos++;
break;
case '\'':
case '\"':
if (pos > 0)
{
prev_ch = ws_contents[pos - 1];
if (prev_ch != ' ' && prev_ch != '\"' && prev_ch != '\'')
nwords++;
}
delim = ch;
found = FALSE;
pos++;
// find matching quote
do
{
ch = ws_contents[pos];
if (delim == ch)
{
if (ws_contents[pos + 1] == ' ')
pos += 2;
else
pos++;
/* add another word as long as the quote is
not the last character */
if (pos < len)
nwords++;
found = TRUE;
}
else
pos++;
}
while(!found && pos <= len);
break;
default:
pos++;
};
}
while(pos <= len);
return(nwords + 1);
}
int word_string::command_indxword(int which)
{
/* Return the index of word 'which' command
delimited. Function returns the index
on success, -1 otherwise. */
int pos, len, nword, done;
int found;
char ch, delim, prev_ch;
len = length();
if (!len)
return(-1);
if (which <= 0 || which > command_words())
return(-1);
pos = 0;
nword = 1;
done = FALSE;
// loop through all characters in string looking for delimiters
do
{
// handle first word requested
if (nword == which)
{
if (ws_contents[pos] == '\'' || ws_contents[pos] == '\"')
pos++;
done = TRUE;
continue;
}
ch = ws_contents[pos];
switch(ch)
{
case ' ':
// a space is an unconditional word
nword++;
pos++;
if (nword == which)
done = TRUE;
// move position if the next character is a quote
if (done && pos < len)
if (ws_contents[pos] == '\'' || ws_contents[pos] == '\"')
pos++;
break;
case '\'':
case '\"':
// if quote is to the right of a non-delimiter, add one word
if (pos > 0)
{
prev_ch = ws_contents[pos - 1];
if (prev_ch != ' ' && prev_ch != '\"' && prev_ch != '\'')
{
nword++;
if (nword == which)
{
done = TRUE;
break;
}
}
}
delim = ch;
found = FALSE;
pos++;
// find matching quote
do
{
ch = ws_contents[pos];
if (delim == ch)
{
if (ws_contents[pos + 1] == ' ')
pos += 2;
else
pos++;
if (pos < len)
{
nword++;
if (nword == which)
{
done = TRUE;
if (ws_contents[pos] == '\'' || ws_contents[pos] == '"')
pos++;
}
}
found = TRUE;
}
else
pos++;
}
while(!found && pos <= len);
break;
default:
pos++;
break;
}
}
while(pos <= len && !done);
if (done)
return(pos);
return(-1);
}
int word_string::command_wordlen(int which)
{
/* Get and return the length of the 'which' command word.
Function returns the word length upon success (which
may be zero), -1 otherwise. */
int indx, i, len, outlen;
char delim;
len = length();
if (!len)
return(-1);
if (which <= 0 || which > command_words())
return(-1);
indx = command_indxword(which);
// set delimiter
if (indx == 0)
delim = ' ';
else
delim = ws_contents[indx - 1];
// loop looking for the next delimiter or end of the string
for(i = indx, outlen = 0; i < len; i++, outlen++)
if (ws_contents[i] == delim)
break;
return(outlen);
}
bool word_string::command_word(word_string& o_string, int which)
{
/* Extract the 'which' command word. Output command word
will be placed into the string 'o_string'. Function
returns 'TRUE' upon success, 'FALSE' otherwise. */
int len, pos;
o_string = "";
if (which <= 0 || which > command_words())
return(FALSE);
pos = command_indxword(which);
len = command_wordlen(which);
o_string.ws_contents = ws_contents.substr(pos, len);
return(TRUE);
}
int word_string::find(word_string& sub)
{
/* Find the occurrance of 'sub' within '*this.' Returns
a numeric index if found, -1 otherwise. */
int i, j, k;
for(i = 0; ws_contents[i] != EOS; i++)
{
for(j = i, k = 0; sub[k] == ws_contents[j] && sub[k] != EOS; j++, k++)
;
if (sub[k] == EOS)
return(i);
}
return(-1);
}
int word_string::find_char(char findch)
{
/* Find the first instance of the character 'findch'. Function returns
the index if found, '-1' otherwise. */
int len, i;
len = length();
if (!len)
return(-1);
for(i = 0; i < len; i++)
if (ws_contents[i] == findch)
return(i);
return(-1);
}
bool word_string::command_worddel(int which)
{
/* Delete the 'which' command word. Upon success, '*this->ws_contents'
be loaded with the new string. Function returns 'TRUE' upon
success, 'FALSE' otherwise. */
word_string tmpout, tmpwrd;
char quote;
int nwords, nwords_out;
int len;
int pos;
int i;
len = length();
if (!len || which <= 0)
return(FALSE);
nwords = command_words();
nwords_out = 0;
if (which > nwords)
return(FALSE);
// loop to copy all words except the one to delete
for(i = 1; i <= nwords; i++)
{
if (i != which)
{
if (!command_word(tmpwrd, i))
return(FALSE);
/* if word contains multiple space delimited words,
put the quotes back. */
if (tmpwrd.words() > 1)
{
/* decide which quotes to use based on if there
are any existing quotes */
if (tmpwrd.find_char('\'') != -1)
quote = '\"';
else
quote = '\'';
tmpout += quote;
tmpout += tmpwrd;
tmpout += quote;
}
else
tmpout += tmpwrd;
nwords_out++;
/* concatenate the output string with a delimiter
unless we are on the last word to be output */
if (nwords_out < (nwords - 1))
tmpout += ' ';
/* special logic to break if we are done before
the natual end of the loop, this should occurr
only if the last word is being deleted. */
if (nwords_out == (nwords - 1))
break;
}
}
ws_contents = tmpout.ws_contents;
return(TRUE);
}
bool word_string::worddel(int which, char delim)
{
/* Delete the 'which' word delimited by 'delim'.
Upon success, '*this->ws_contents' will be loaded with the new string.
Function returns 'TRUE' upon success, 'FALSE' otherwise. */
word_string tmpout, tmpwrd;
int nwords, nwords_out;
int len, i;
len = length();
if (!len || which <= 0)
return(FALSE);
nwords = words(delim);
nwords_out = 0;
if (which > nwords)
return(FALSE);
// loop to copy all words except the one to delete
for(i = 1; i <= nwords; i++)
{
if (i != which)
{
if (!word(tmpwrd, i, delim))
return(FALSE);
tmpout += tmpwrd;
nwords_out++;
/* concatenate the output string with a delimiter
unless we are on the last word to be output */
if (nwords_out < (nwords - 1))
tmpout += delim;
/* special logic to break if we are done before
the natual end of the loop, this should occurr
only if the last word is being deleted. */
if (nwords_out == (nwords - 1))
break;
}
}
ws_contents = tmpout.ws_contents;
return(TRUE);
}
bool word_string::wordput(word_string& wdata, int which, char delim)
{
/* Place the word 'wdata' into '*this->ws_contents' as the 'which' word. If the
word already exists, it will be replaced, if does not exist, the
string will be extended out to the require word before copying.
Each word is delimited by 'delim'. Function returns 'TRUE' upon
success, 'FALSE' otherwise. */
word_string new_str, before, after, tmp;
int existing_rec_len;
int new_rec_len;
int new_field_len;
int existing_field_len;
int nwords;
int len;
int indx;
int process;
len = length();
if (which <= 0)
return(FALSE);
nwords = words(delim);
/* create a copy of input (if present) or a new string
large enough for the word plus the number of word
delimiters plus a little extra */
if (len)
new_str = ws_contents;
// extend string out if required
if (which > nwords)
{
if (!new_str.parse_extend(which, delim))
return(FALSE);
len = new_str.length();
nwords = which;
}
if (!len)
existing_field_len = existing_rec_len = 0;
else
{
existing_field_len = new_str.wordlen(which, delim);
existing_rec_len = len;
}
new_field_len = wdata.length();
// if existing and new strings are length zero, nothing to do
if (new_field_len == 0 && existing_field_len == 0)
return(TRUE);
// if strings are equal in length, place new data over top of old
if (existing_field_len == new_field_len)
{
indx = new_str.indxword(which, delim);
if (!new_str.str_replace(indx, wdata))
return(FALSE);
ws_contents = new_str.ws_contents;
return(TRUE);
}
process = TRUE;
// figure out and copy before and after data
// handle first word in string
if (which == 1)
{
before = "";
if (nwords == which)
after = "";
else
{
// load 'after' from start of second word to end
indx = new_str.indxword(2, delim);
after = new_str.substr(indx, len - indx + 1);
}
process = FALSE;
}
// handle last word in string
if (process && which == nwords)
{
// load 'before' from start to delimiter just before 'which'
indx = new_str.indxword(which, delim);
before = new_str.substr(0, indx);
after = "";
process = FALSE;
}
// handle 'which' in the middle of the string
if (process)
{
// load 'before' from start to delimiter just before 'which'
indx = new_str.indxword(which, delim);
before = new_str.substr(0, indx);
/* load 'after' from start of word after 'which' to the end
(no delimiters) */
indx = new_str.indxword(which + 1, delim);
after = new_str.substr(indx, len - indx + 1);
}
// build final string from three parts
tmp = "";
// part one, 'before' string
if (before.length())
tmp = before;
// part two, new word data
tmp += wdata;
// part three, 'after' string
if (after.length())
{
tmp += delim;
tmp += after;
}
ws_contents = tmp.ws_contents;
return(TRUE);
}
bool word_string::num(void)
{
/* Evaluate a string to determine whether it is a valid number.
Numbers are the digits 0-9. A minus sign is allowed in the
first character. Function returns 'TRUE' if the string is
a valid number, 'FALSE' otherwise. */
int len, i;
len = length();
if (!len)
return(FALSE);
for(i = 0; i < len; i++)
{
if (ws_contents[i] == '-')
{
if (i > 0)
return(FALSE);
}
else
if (!isdigit(ws_contents[i]))
return(FALSE);
}
return(TRUE);
}
bool word_string::parse_extend(int which, char delim)
{
/* Extend string data out to 'which' 'delim' delimited word.
You may not extend using the space delimiter because of the
nature of the space and the 'trim' function. Function returns
'TRUE' upon success, 'FALSE' otherwise. Upon success, '*this->ws_contents'
will be loaded with the extended string. */
word_string new_rec;
int nwords;
int i;
nwords = words(delim);
/* if the existing string is empty and we are adding the
first word, do not extend */
if (!nwords && which == 1)
return(TRUE);
if (which <= 0 || delim == ' ')
return(FALSE);
// if word is already present, extend not necessary
if (nwords >= which)
return(TRUE);
new_rec = ws_contents;
// extend string
for(i = nwords; i < which; i++)
new_rec += delim;
ws_contents = new_rec.ws_contents;
return(TRUE);
}
bool word_string::str_replace(int spos, word_string& wdata)
{
/* Replace bytes in a string. Bytes from index 'spos' will
be replaced with content from 'wdata'. Number of bytes
replaced depends on length of 'wdata'. Start index plus
length of 'wdata' cannot exceed length of string.
String '*this->ws_contents' is directly modified.
Function returns 'TRUE' upon success, 'FALSE'
otherwise. */
int len, wdata_len, i, cnt;
len = length();
wdata_len = wdata.length();
if (!len || !wdata_len || spos < 0)
return(FALSE);
if (spos + wdata_len > len)
return(FALSE);
for(i = spos, cnt = 0; cnt < wdata_len; i++, cnt++)
ws_contents[i] = wdata[cnt];
return(TRUE);
}