Kjetil's Information Center: A Blog About My Projects

Simple Expression Parsing

I have been researching for other ways to parse expressions without the use of Abstract Syntax Trees (AST) or complex parser generators. In turn, also without the use of dynamic memory. I figured out a way to use an array with tokens to represent the complete expression. And then in order to reach the result, reduce token by token, until only the result itself remains.

Below is a simple calculator program that uses this technique. It supports parenthesis and the basic operators '*', '/', '+' and '-'. The code can also be easily embedded into other programs that require simple parsing of this kind.

Check out the code:

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define EXPRESSION_MAX 32

typedef enum {
  TOKEN_NONE    = 0,
  TOKEN_MUL     = 1,
  TOKEN_DIV     = 2,
  TOKEN_ADD     = 3,
  TOKEN_SUB     = 4,
  TOKEN_INT     = 5,
  TOKEN_L_PAREN = 6,
  TOKEN_R_PAREN = 7,
} token_t;

typedef struct element_s {
  token_t token;
  int value;
} element_t;

static int exp_evaluate(element_t *exp, int from, int to)
{
  int i, j, left_paren, prev_val, next_val;

  /* Reduce parenthesis first... */
exp_evaluate_again:
  left_paren = -1;
  for (i = from; i <= to; i++) {
    if (exp[i].token == TOKEN_L_PAREN) {
      left_paren = i;
    } else if (exp[i].token == TOKEN_R_PAREN) {
      if (left_paren >= 0) {
        exp[left_paren].value = exp_evaluate(exp, left_paren + 1, i - 1);
        exp[left_paren].token = TOKEN_INT;
        for (j = left_paren + 1; j <= i; j++)
          exp[j].token = TOKEN_NONE;
        goto exp_evaluate_again;
      } else {
        fprintf(stderr, "Warning: Unmatched '('.\n");
      }
    }
  }

  /* ...then reduce the operators. */
  for (i = TOKEN_MUL; i <= TOKEN_SUB; i++) {
    for (j = from; j <= to; j++) {
      if (exp[j].token == i) {

        prev_val = j - 1;
        if (prev_val < from) {
          fprintf(stderr, "Warning: Overflow due to invalid syntax.\n");
          return 0;
        }
        while (exp[prev_val].token != TOKEN_INT) {
          prev_val--;
          if (prev_val < from) {
            fprintf(stderr, "Warning: Overflow due to invalid syntax.\n");
            return 0;
          }
        }

        next_val = j + 1;
        if (next_val > to) {
          fprintf(stderr, "Warning: Overflow due to invalid syntax.\n");
          return 0;
        }
        while (exp[next_val].token != TOKEN_INT) {
          next_val++;
          if (next_val > to) {
            fprintf(stderr, "Warning: Overflow due to invalid syntax.\n");
            return 0;
          }
        }

        if (i == TOKEN_MUL) {
          exp[prev_val].value *= exp[next_val].value;
        } else if (i == TOKEN_DIV) {
          if (exp[next_val].value == 0)
            exp[prev_val].value = 0; /* Division by zero. */
          else
            exp[prev_val].value /= exp[next_val].value;
        } else if (i == TOKEN_ADD) {
          exp[prev_val].value += exp[next_val].value;
        } else if (i == TOKEN_SUB) {
          exp[prev_val].value -= exp[next_val].value;
        }

        exp[prev_val].token = TOKEN_INT;
        exp[j].token        = TOKEN_NONE;
        exp[next_val].token = TOKEN_NONE;
      }
    }
  }

  return exp[from].value;
}

static int string_evaluate(const char *s)
{
  int i, n, current;
  char intbuf[8];
  element_t exp[EXPRESSION_MAX];

  for (i = 0; i < EXPRESSION_MAX; i++)
    exp[i].token = TOKEN_NONE;

  current = 0;
  for (i = 0; s[i] != '\0'; i++) {
    if (isdigit(s[i])) {
      n = 0;
      intbuf[n++] = s[i];
      while (isdigit(s[i + 1])) {
        intbuf[n++] = s[++i];
        if (n >= 8) {
          fprintf(stderr, "Warning: Number too long.\n");
          break;
        }
      }
      intbuf[n] = '\0';
      exp[current].value = atoi(intbuf);
      exp[current++].token = TOKEN_INT;
    } else if (s[i] == '(') {
      exp[current++].token = TOKEN_L_PAREN;
    } else if (s[i] == ')') {
      exp[current++].token = TOKEN_R_PAREN;
    } else if (s[i] == '*') {
      exp[current++].token = TOKEN_MUL;
    } else if (s[i] == '/') {
      exp[current++].token = TOKEN_DIV;
    } else if (s[i] == '+') {
      exp[current++].token = TOKEN_ADD;
    } else if (s[i] == '-') {
      exp[current++].token = TOKEN_SUB;
    }

    if (current >= EXPRESSION_MAX) {
      fprintf(stderr, "Warning: Maximum expressions reached.\n");
      break;
    }
  }

  return exp_evaluate(exp, 0, EXPRESSION_MAX - 1);
}

int main(int argc, char *argv[])
{
  if (argc != 2) {
    fprintf(stderr, "Usage: %s <expression>\n", argv[0]);
    return 1;
  }

  printf("%d\n", string_evaluate(argv[1]));
  return 0;
}
          


Topic: Scripts and Code, by Kjetil @ 10/06-2012, Article Link