// Name : variable_tokenizer // Author : Chris Koeritz /* * Copyright (c) 1997-$now By Author. This program is free software; you can * * redistribute it and/or modify it under the terms of the GNU General Public * * License as published by the Free Software Foundation; either version 2 of * * the License or (at your option) any later version. This is online at: * * http://www.fsf.org/copyleft/gpl.html * * Please send any updates to: fred@gruntose.com * */ #include "variable_tokenizer.h" #include #include #include #include #include #include //#define DEBUG_VARIABLE_TOKENIZER // uncomment for noisier run. const char *SPECIAL_VALUE = " "; // special value stored for entries with assignment operators but no // value contents. #undef LOG #ifdef DEBUG_VARIABLE_TOKENIZER #include #define LOG(to_print) printf("%s\n", astring(to_print).s()); #else #define LOG(to_print) #endif using namespace basis; using namespace structures; using namespace textual; namespace configuration { variable_tokenizer::variable_tokenizer(int max_bits) : _implementation(new string_table(max_bits)), _assignments(new astring("=")), _separators(new astring(",")), _quotes(new astring), _nesting(false), _comments(new astring), _comment_number(1), _add_spaces(false) {} variable_tokenizer::variable_tokenizer(const astring &separator, const astring &assignment, int max_bits) : _implementation(new string_table(max_bits)), _assignments(new astring(assignment)), _separators(new astring(separator)), _quotes(new astring), _nesting(false), _comments(new astring), _comment_number(1), _add_spaces(false) {} variable_tokenizer::variable_tokenizer(const astring &separator, const astring &assignment, const astring "es, bool nesting, int max_bits) : _implementation(new string_table(max_bits)), _assignments(new astring(assignment)), _separators(new astring(separator)), _quotes(new astring(quotes)), _nesting(nesting), _comments(new astring), _comment_number(1), _add_spaces(false) {} variable_tokenizer::variable_tokenizer(const variable_tokenizer &to_copy) : _implementation(new string_table), _assignments(new astring), _separators(new astring), _quotes(new astring), _nesting(false), _comments(new astring), _comment_number(1), _add_spaces(false) { *this = to_copy; } variable_tokenizer::~variable_tokenizer() { WHACK(_separators); WHACK(_assignments); WHACK(_implementation); WHACK(_quotes); WHACK(_comments); } int variable_tokenizer::symbols() const { return _implementation->symbols(); } void variable_tokenizer::set_comment_chars(const astring &comments) { *_comments = comments; } const astring &variable_tokenizer::assignments() const { return *_assignments; } const astring &variable_tokenizer::separators() const { return *_separators; } const astring &variable_tokenizer::quotes() const { return *_quotes; } bool variable_tokenizer::exists(const astring &name) const { return !!_implementation->find(name); } void variable_tokenizer::reset() { _implementation->reset(); } const string_table &variable_tokenizer::table() const { return *_implementation; } string_table &variable_tokenizer::table() { return *_implementation; } variable_tokenizer &variable_tokenizer::operator =(const variable_tokenizer &to_copy) { if (this == &to_copy) return *this; *_implementation = *to_copy._implementation; *_separators = *to_copy._separators; *_assignments = *to_copy._assignments; *_quotes = *to_copy._quotes; _nesting = to_copy._nesting; _add_spaces = to_copy._add_spaces; return *this; } astring variable_tokenizer::find(const astring &name) const { astring *found = _implementation->find(name); if (!found) return ""; // check that the contents are not just our significator of emptiness. if (found->equal_to(SPECIAL_VALUE)) return ""; return *found; } bool variable_tokenizer::okay_for_variable_name(char to_check) const { if (!to_check || separator(to_check) || assignment(to_check)) return false; return true; } bool variable_tokenizer::separator(char to_check) const { // special case allows a CR separator to be either flavor. if (parser_bits::is_eol(to_check) && (astring::matches(*_separators, '\n') || astring::matches(*_separators, '\r')) ) return true; return astring::matches(*_separators, to_check); } bool variable_tokenizer::assignment(char to_check) const { return astring::matches(*_assignments, to_check); } bool variable_tokenizer::quote_mark(char to_check) const { return astring::matches(*_quotes, to_check); } bool variable_tokenizer::comment_char(char to_check) const { return astring::matches(*_comments, to_check); } #define COOL to_tokenize.length() // true if the string should continue to be parsed. // sets "current" to the first character in the string. #define CHOP { \ current = to_tokenize[0]; \ to_tokenize.zap(0, 0); \ } bool variable_tokenizer::parse(const astring &to_tokenize_in) { FUNCDEF("parse"); astring to_tokenize(to_tokenize_in); // de-const. //hmmm: do we need a copy? try scooting based on a current pos. astring name, value; // accumulated during the loop. char current; // the most recent character from to_tokenize. bool just_ate_blank_line = false; // records when we handle a blank line as a comment. // loop over the string. while (COOL) { name.reset(); value.reset(); // pre-processing to remove extra eols and white space in front. if (is_eol_a_separator() && parser_bits::is_eol(to_tokenize[0])) { CHOP; // chop any white space but don't eat any non-white space coming up. while (COOL && parser_bits::white_space(current)) { CHOP; if (!parser_bits::white_space(current)) { // oops; we ate something we shouldn't have, since it will be // chopped when we get in the main loop. to_tokenize.insert(0, astring(current, 1)); } } } // chop the first character off for analysis. CHOP; // ignore any white space until we hit a variable or other good stuff. if (parser_bits::white_space_no_cr(current)) continue; // ignore eol unless they are in separator list. bool handle_as_comment = false; if (parser_bits::is_eol(current) && !is_eol_a_separator()) { continue; } else if (just_ate_blank_line && parser_bits::is_eol(current)) { just_ate_blank_line = false; continue; } else if (parser_bits::is_eol(current) && is_eol_a_separator()) { //LOG("found eol and it's a separator here"); handle_as_comment = true; } if (comment_char(current) || handle_as_comment) { // set our flag since we are going to eat the end of line in any case. just_ate_blank_line = true; // seek all text until next separator. while (COOL && !separator(current)) { value += current; CHOP; } // add the item with our ongoing comment number. a_sprintf name("%s%d", STRTAB_COMMENT_PREFIX, _comment_number); _implementation->add(name, value); _comment_number++; // go to next comment number to keep unique. LOG(astring("got comment: ") + name + " -> " + value); continue; // got our chunk, keep going. } just_ate_blank_line = false; // reset our flag. // skip characters we can't use for a variable name. if (!okay_for_variable_name(current)) continue; // we've found the start of a variable. while (COOL && okay_for_variable_name(current)) { // accumulate the variable name. name += current; CHOP; // get the next character. } if (!COOL) { // we're at the end of the line, so deal with this situation. if (!separator(current) && !parser_bits::white_space(current) ) name += current; // get the character from the end of the line. LOG(astring("last add: ") + name + " -> " + value); _implementation->add(name, value); // store what we built. continue; // skip the rest; we're at the END of the line man. } // skip spaces after variable name. while (COOL && parser_bits::white_space_no_cr(current)) CHOP; bool found_assignment = false; // assume there isn't one. if (assignment(current)) { // we found the assignment operator and are starting on the value. CHOP; // skip the assignment operator. found_assignment = true; } // skip spaces after the assignment statement. while (COOL && parser_bits::white_space_no_cr(current)) CHOP; // track the quoting that we have to deal with in parsing a value. stack q_stack(!int(_nesting)); // create an unbounded stack for nesting. while (COOL) { // check if the current character is a quote. bool ignore_separator = false; if (quote_mark(current)) { if (!q_stack.size()) { // nothing on the stack yet, so start accumulating. ignore_separator = true; q_stack.push(current); } else if (current == q_stack.top()) { // we got the end of this quoting. q_stack.pop(); // check if we're done with any quotes. if not, we still need to // ignore the separators. if (q_stack.size()) ignore_separator = true; } else { // if we are using a bounded stack, it means we only support one // level of quoting at a time. thus, this quote character simply // falls in as a regular character. but if we're unbound, then // we can nest arbitrary levels of quotes. if (q_stack.kind() == stack::UNBOUNDED) q_stack.push(current); // we have something on the stack already so we're still ignoring // separators. we just don't care about this type of quote. ignore_separator = true; } } else if (q_stack.size()) { // it's not a quote but we're still trying to chow the matching // quote character. ignore_separator = true; } // look for the separator. if (!ignore_separator && separator(current)) { break; } // accumulate the value. value += current; CHOP; // get the next character. } // get the last character if it's relevant. if (!separator(current) && !parser_bits::white_space(current) ) { value += current; } if (found_assignment && !value) { // use our special case for empty values, since there was an assignment // operator but no value afterwards. value = SPECIAL_VALUE; } // store the accumulated variable name and value, but only if the name // is non-empty. otherwise, it's not much of a definition. if (name.t()) { // strip spaces at the end of the name. while (parser_bits::white_space_no_cr(name[name.end()])) name.zap(name.end(), name.end()); // strip spaces at the end of the value unless it's the special case. if (!value.equal_to(SPECIAL_VALUE)) { while (parser_bits::white_space(value[value.end()])) value.zap(value.end(), value.end()); } LOG(astring("normal add: ") + name + " -> " + value); _implementation->add(name, value); // store what we built. just_ate_blank_line = true; // flag that we don't want next EOL. // reset, just in case. name.reset(); value.reset(); } } // currently we just kind of bully through whatever string is provided and do not // flag any error conditions. but people do like to know if it worked or not. they can // make their own conclusions if there are not enough variables defined for their needs. return true; } bool variable_tokenizer::is_eol_a_separator() const { for (int i = 0; i < _separators->length(); i++) { char sep = _separators->get(i); // correct the separator for platform when it's the end of the line. if (parser_bits::is_eol(sep)) return true; } return false; } void variable_tokenizer::text_form(astring &accumulator) const { accumulator.reset(); bool added_sep = false; for (int i = 0; i < _implementation->symbols(); i++) { added_sep = false; if (!string_table::is_comment(_implementation->name(i))) { // a normal assignment is here. accumulator += _implementation->name(i); if (_implementation->operator [](i).t()) { if (_add_spaces) accumulator += " "; accumulator += _assignments->get(0); if (_add_spaces) accumulator += " "; accumulator += _implementation->operator [](i); } } else { // this one is a comment. just spit out the value. if (_implementation->operator [](i).t()) accumulator += _implementation->operator [](i); } // correct the separator for platform when it's the end of the line. if (is_eol_a_separator()) { accumulator += parser_bits::platform_eol_to_chars(); } else { added_sep = true; // record that we put a separator in there. accumulator += _separators->get(0); accumulator += ' '; } } // strip the final separator and space back off, if we added them. if (added_sep) accumulator.zap(accumulator.end() - 1, accumulator.end()); } astring variable_tokenizer::text_form() const { astring accumulator; text_form(accumulator); return accumulator; } } //namespace.