Kjetil's Information Center: A Blog About My Projects

Recursive String Search Bugfix

Remember Recursive String Search Improved?
It actually contains a very subtle bug, which I discovered after a search I made did not make sense. Something that should have been found wasn't.

As an example, it turns out that searching for "obar" did not locate the string "Foobar". The current algorithm would find the "o" in the string first, then the other "o", but since this is not what was searched for, the search was stopped. It did not backtrack in the original string, which it should have, and this is what has been fixed.

Only a single line of code was added:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <limits.h>
#include <unistd.h>
#include <dirent.h>

#define FILTER_DELIMITER ";"
#define FILTER_MAX 10

static char *filters[FILTER_MAX];
static int no_of_filters = 0;

static void display_help(char *progname)
{
  fprintf(stderr, "Usage: %s <options> <string>\n", progname);
  fprintf(stderr, "Options:\n"
     "  -h          Display this help and exit.\n"
     "  -n          Print filename on each search match line.\n"
     "  -d DIR      Search DIR instead of current directory.\n"
     "  -f FILTER   Apply FILTER on filename extension when searching.\n"
     "                Delimited by ';', e.g. '.c;.cpp;.h;.hpp'\n"
     "\n");
}

static void build_filter(char *filter)
{
  no_of_filters = 0;

  filters[0] = strtok(filter, FILTER_DELIMITER);
  if (filters[0] == NULL)
    return;

  for (no_of_filters = 1; no_of_filters < FILTER_MAX; no_of_filters++) {
    filters[no_of_filters] = strtok(NULL, FILTER_DELIMITER);
    if (filters[no_of_filters] == NULL)
      break;
  }
}

static int matches_filter(char *name, int name_len)
{
  int i, n1, n2, match, filter_len;

  if (no_of_filters == 0)
    return 1; /* No filters, always matches. */

  for (i = 0; i < no_of_filters; i++) {
    filter_len = strlen(filters[i]);
    if (filter_len > name_len)
      return 0; /* Filter cannot be longer than name! */

    match = 0;
    n2 = name_len - 1;
    for (n1 = filter_len - 1; n1 >= 0; n1--, n2--) {
      if (toupper(filters[i][n1]) != toupper(name[n2]))
        break;
      match++;
    }

    if (filter_len == match)
      return 1; /* Whole filter matched. */
  }

  return 0; /* No matches. */
}

static void recurse(char *path, char *string, int string_len, int show_mode)
{
  static char line[1024]; /* Allocate in BSS. */
  char full_path[PATH_MAX];
  int n, match, name_shown, line_no;
  struct dirent *entry;
  struct stat st;
  DIR *dh;
  FILE *fh;

  dh = opendir(path);
  if (dh == NULL) {
    fprintf(stderr, "Warning: Unable to open directory: %s\n", path);
    return;
  }

  while ((entry = readdir(dh))) {
    if (entry->d_name[0] == '.')
      continue; /* Ignore files with leading dot. */

#ifdef WINNT
    snprintf(full_path, PATH_MAX, "%s\\%s", path, entry->d_name);
#else
    snprintf(full_path, PATH_MAX, "%s/%s", path, entry->d_name);
#endif

    stat(full_path, &st);
    if (S_ISDIR(st.st_mode)) {
      /* Traverse. */
      recurse(full_path, string, string_len, show_mode);

    } else if (S_ISREG(st.st_mode)) {
      /* Search. */
      if (! matches_filter(full_path, strlen(full_path)))
        continue;

      fh = fopen(full_path, "r");
      if (fh == NULL) {
        fprintf(stderr, "Warning: Unable to open file: %s\n", full_path);
        continue;
      }

      name_shown = line_no = 0;
      while (fgets(line, 1024, fh) != NULL) {
        line_no++;
        match = 0;
        for (n = 0; line[n] != '\0'; n++) {
          if (toupper(line[n]) == toupper(string[match])) {
            match++;
            if (match >= string_len) {
              if (show_mode == 0) {
                if (! name_shown) {
                  printf("%s\n", full_path);
                  name_shown = 1;
                }
                printf("%d:%s", line_no, line);
              } else {
                printf("%s:%d:%s", full_path, line_no, line);
              }
              break;
            }
          } else {
            n -= match; /* Backtrack */
            match = 0;
          }
        }
      }
      fclose(fh);
    }
  }

  closedir(dh);
  return;
}

int main(int argc, char *argv[])
{
  int c;
  int show_mode = 0;
  char *search_dir = NULL;

  while ((c = getopt(argc, argv, "hnd:f:")) != -1) {
    switch (c) {
    case 'h':
      display_help(argv[0]);
      exit(EXIT_SUCCESS);

    case 'n':
      show_mode = 1;
      break;

    case 'd':
      search_dir = optarg;
      break;

    case 'f':
      build_filter(optarg);
      break;
    
    case '?':
    default:
      display_help(argv[0]);
      exit(EXIT_FAILURE);
    }
  }

  if (search_dir == NULL) {
    search_dir = "."; /* Current directory. */
  }

  if (optind >= argc) {
    display_help(argv[0]);
    return EXIT_FAILURE;
  }

  recurse(search_dir, argv[optind], strlen(argv[optind]), show_mode);
  return EXIT_SUCCESS;
}
          


Here is also a binary for Win32 (compiled with MinGW).

Topic: Scripts and Code, by Kjetil @ 03/04-2017, Article Link