From 2dcf7d9fecb776290b33c35f80857986ff3e7f21 Mon Sep 17 00:00:00 2001 From: Jan Breuer <jan.breuer@jaybee.cz> Date: 周三, 13 3月 2013 01:42:25 +0800 Subject: [PATCH] Convert pattern matching functions to NFA --- libscpi/src/utils.c | 293 ++++++++++++++++++++++++++++++++++----------------------- 1 files changed, 174 insertions(+), 119 deletions(-) diff --git a/libscpi/src/utils.c b/libscpi/src/utils.c index 63051f5..583b5f7 100644 --- a/libscpi/src/utils.c +++ b/libscpi/src/utils.c @@ -154,146 +154,201 @@ return FALSE; } -bool_t locateText(const char * str1, size_t len1, char ** str2, size_t * len2) { +enum locate_text_states { + STATE_FIRST_WHITESPACE, + STATE_TEXT_QUOTED, + STATE_TEXT, + STATE_LAST_WHITESPACE, + STATE_COMMA, + STATE_ERROR, +}; + +struct locate_text_nfa { + enum locate_text_states state; + int32_t startIdx; + int32_t stopIdx; size_t i; - int quot = 0; - int32_t strStart = -1; - int32_t strStop = -1; - int valid = 0; +}; - - for (i = 0; i < len1; i++) { - if ((strStart < 0) && isspace((unsigned char)str1[i])) { - continue; - } - - if ((strStart < 0) && !quot && (str1[i] == '"')) { - quot = 1; - continue; - } - - if (strStart < 0) { - strStart = i; - } - - if ((strStop < 0) && quot && (str1[i] == '"')) { - strStop = i; - valid = 1; - continue; - } - - if ((strStop >= 0) && quot && (str1[i] == ',')) { - break; - } - - if ((strStop >= 0) && quot && !isspace((unsigned char)str1[i])) { - valid = 0; - } - - if (!quot && !isspace((unsigned char)str1[i]) && (str1[i] != ',')) { - strStop = i; - } - - if (isspace((unsigned char)str1[i])) { - continue; - } - - if ((strStop >= 0) && (str1[i] == ',')) { - valid = 1; - break; - } - } - - if ((i == len1) && !quot) { - valid = 1; - if (strStop < 0) { - strStop = i; - } else { - strStop++; - } - if (strStart < 0) { - strStart = i; - } - } else if (!quot) { - strStop++; - } - - - if (valid) { - if (str2) { - *str2 = (char *) &str1[strStart]; - } - - if (len2) { - *len2 = strStop - strStart; - } - } - - return valid; +/** + * Test locate text state, if it is correct final state + */ +static inline bool_t isFinalState(enum locate_text_states state) { + return ( + ((state) == STATE_COMMA) + || ((state) == STATE_LAST_WHITESPACE) + || ((state) == STATE_TEXT) || + ((state) == STATE_FIRST_WHITESPACE) + ); } -bool_t locateStr(const char * str1, size_t len1, char ** str2, size_t * len2) { - size_t i; - int32_t strStart = -1; - int32_t strStop = -1; - int valid = 0; - - - for (i = 0; i < len1; i++) { - if ((strStart < 0) && isspace((unsigned char)str1[i])) { - continue; - } - - if (strStart < 0) { - strStart = i; - } - - if (!isspace((unsigned char)str1[i]) && (str1[i] != ',')) { - strStop = i; - } - - if (isspace((unsigned char)str1[i])) { - continue; - } - - if (str1[i] == ',') { - valid = 1; - - if (strStop < 0) { - strStop = i; +/** + * Perform locateText automaton to search string pattern + * @param nfa stores automaton state + * @param c current char processed + */ +static inline bool_t locateTextAutomaton(struct locate_text_nfa * nfa, unsigned char c) { + switch(nfa->state) { + /* first state locating only white spaces */ + case STATE_FIRST_WHITESPACE: + if(isspace(c)) { + nfa->startIdx = nfa->stopIdx = nfa->i + 1; + } else if (c == ',') { + nfa->state = STATE_COMMA; + } else if (c == '"') { + nfa->startIdx = nfa->i + 1; + nfa->state = STATE_TEXT_QUOTED; + } else { + nfa->startIdx = nfa->i; + nfa->stopIdx = nfa->i + 1; + nfa->state = STATE_TEXT; } break; - } + /* state locating any text inside "" */ + case STATE_TEXT_QUOTED: + if(c == '"') { + nfa->state = STATE_LAST_WHITESPACE; + nfa->stopIdx = nfa->i; + } + break; + /* locate text ignoring quotes */ + case STATE_TEXT: + if (c == ',') { + nfa->state = STATE_COMMA; + } else if (!isspace(c)) { + nfa->stopIdx = nfa->i + 1; + } + break; + /* locating text after last quote */ + case STATE_LAST_WHITESPACE: + if (c == ',') { + nfa->state = STATE_COMMA; + } else if (!isspace(c)) { + nfa->state = STATE_ERROR; + } + break; } - if (i == len1) { - valid = 1; - if (strStop < 0) { - strStop = i; - } else { - strStop++; - } - if (strStart < 0) { - strStart = i; - } + /* if it is terminating state, break from for loop */ + if ((nfa->state == STATE_COMMA) || (nfa->state == STATE_ERROR)) { + return FALSE; } else { - strStop++; + return TRUE; + } +} + +/** + * Locate text in string. Text is separated by two "" + * example: "text", next parameter + * regexp: ^[ \t\r\n]*"([^"]*)"[ \t\r\n]*,? + * regexp: ^[ \t\r\n]*([^,]*)[ \t\r\n]*,? + * @param str1 string to be searched + * @param len1 length of string + * @param str2 result + * @param len2 length of result + * @return string str1 contains text and str2 was set + */ +bool_t locateText(const char * str1, size_t len1, const char ** str2, size_t * len2) { + struct locate_text_nfa nfa = { + .startIdx = 0, + .stopIdx = 0, + .state = STATE_FIRST_WHITESPACE, + }; + + for (nfa.i = 0; nfa.i < len1; nfa.i++) { + if(FALSE == locateTextAutomaton(&nfa, str1[nfa.i])) { + break; + } } + if (isFinalState(nfa.state)) { - if (valid) { if (str2) { - *str2 = (char *) &str1[strStart]; + *str2 = &str1[nfa.startIdx]; } if (len2) { - *len2 = strStop - strStart; + *len2 = nfa.stopIdx - nfa.startIdx; + } + return TRUE; + } + return FALSE; +} + +/** + * Perform locateStr automaton to search string pattern + * @param nfa stores automaton state + * @param c current char processed + */ +static inline bool_t locateStrAutomaton(struct locate_text_nfa * nfa, unsigned char c) { + switch(nfa->state) { + /* first state locating only white spaces */ + case STATE_FIRST_WHITESPACE: + if(isspace(c)) { + nfa->startIdx = nfa->stopIdx = nfa->i + 1; + } else if (c == ',') { + nfa->state = STATE_COMMA; + } else { + nfa->startIdx = nfa->i; + nfa->stopIdx = nfa->i + 1; + nfa->state = STATE_TEXT; + } + break; + /* locate text ignoring quotes */ + case STATE_TEXT: + if (c == ',') { + nfa->state = STATE_COMMA; + } else if (!isspace(c)) { + nfa->stopIdx = nfa->i + 1; + } + break; + } + + /* if it is terminating state, break from for loop */ + if ((nfa->state == STATE_COMMA) || (nfa->state == STATE_ERROR)) { + return FALSE; + } else { + return TRUE; + } +} + +/** + * Locate string in string. + * regexp: ^[ \t\r\n]*([^,]*)[ \t\r\n]*,? + * @param str1 string to be searched + * @param len1 length of string + * @param str2 result + * @param len2 length of result + * @return string str1 contains text and str2 was set + */ +bool_t locateStr(const char * str1, size_t len1, const char ** str2, size_t * len2) { + struct locate_text_nfa nfa = { + .startIdx = 0, + .stopIdx = 0, + .state = STATE_FIRST_WHITESPACE, + }; + + for (nfa.i = 0; nfa.i < len1; nfa.i++) { + if(FALSE == locateStrAutomaton(&nfa, str1[nfa.i])) { + break; } } - return valid; + if (isFinalState(nfa.state)) { + + if (str2) { + *str2 = &str1[nfa.startIdx]; + } + + if (len2) { + *len2 = nfa.stopIdx - nfa.startIdx; + } + return TRUE; + } + return FALSE; } + /** * Count white spaces from the beggining * @param cmd - command -- Gitblit v1.9.1