Kjetil's Information Center: A Blog About My Projects

Recursive String Search Improved

Remember Recursive String Search?
I have made an improved version based on some experience with that program. First of all, this new one will search in the current directory by default, unless the '-d' option is used. Second, there is a simple filter that can be applied for filename extensions, which is helpful for code search. Finally, it's possible to print the filename on each line found. This is very convenient if the search results are to be "grepped" further.

Once again, there is a binary for Win32 (compiled with MinGW) and here's the source code:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <limits.h>
#include <unistd.h>
#include <dirent.h>

#define FILTER_DELIMITER ";"
#define FILTER_MAX 10

static char *filters[FILTER_MAX];
static int no_of_filters = 0;

static void display_help(char *progname)
{
  fprintf(stderr, "Usage: %s <options> <string>\n", progname);
  fprintf(stderr, "Options:\n"
     "  -h          Display this help and exit.\n"
     "  -n          Print filename on each search match line.\n"
     "  -d DIR      Search DIR instead of current directory.\n"
     "  -f FILTER   Apply FILTER on filename extension when searching.\n"
     "                Delimited by ';', e.g. '.c;.cpp;.h;.hpp'\n"
     "\n");
}

static void build_filter(char *filter)
{
  no_of_filters = 0;

  filters[0] = strtok(filter, FILTER_DELIMITER);
  if (filters[0] == NULL)
    return;

  for (no_of_filters = 1; no_of_filters < FILTER_MAX; no_of_filters++) {
    filters[no_of_filters] = strtok(NULL, FILTER_DELIMITER);
    if (filters[no_of_filters] == NULL)
      break;
  }
}

static int matches_filter(char *name, int name_len)
{
  int i, n1, n2, match, filter_len;

  if (no_of_filters == 0)
    return 1; /* No filters, always matches. */

  for (i = 0; i < no_of_filters; i++) {
    filter_len = strlen(filters[i]);
    if (filter_len > name_len)
      return 0; /* Filter cannot be longer than name! */

    match = 0;
    n2 = name_len - 1;
    for (n1 = filter_len - 1; n1 >= 0; n1--, n2--) {
      if (toupper(filters[i][n1]) != toupper(name[n2]))
        break;
      match++;
    }

    if (filter_len == match)
      return 1; /* Whole filter matched. */
  }

  return 0; /* No matches. */
}

static void recurse(char *path, char *string, int string_len, int show_mode)
{
  static char line[1024]; /* Allocate in BSS. */
  char full_path[PATH_MAX];
  int n, match, name_shown, line_no;
  struct dirent *entry;
  struct stat st;
  DIR *dh;
  FILE *fh;

  dh = opendir(path);
  if (dh == NULL) {
    fprintf(stderr, "Warning: Unable to open directory: %s\n", path);
    return;
  }

  while ((entry = readdir(dh))) {
    if (entry->d_name[0] == '.')
      continue; /* Ignore files with leading dot. */

#ifdef WINNT
    snprintf(full_path, PATH_MAX, "%s\\%s", path, entry->d_name);
#else
    snprintf(full_path, PATH_MAX, "%s/%s", path, entry->d_name);
#endif

    stat(full_path, &st);
    if (S_ISDIR(st.st_mode)) {
      /* Traverse. */
      recurse(full_path, string, string_len, show_mode);

    } else if (S_ISREG(st.st_mode)) {
      /* Search. */
      if (! matches_filter(full_path, strlen(full_path)))
        continue;

      fh = fopen(full_path, "r");
      if (fh == NULL) {
        fprintf(stderr, "Warning: Unable to open file: %s\n", full_path);
        continue;
      }

      name_shown = line_no = 0;
      while (fgets(line, 1024, fh) != NULL) {
        line_no++;
        match = 0;
        for (n = 0; line[n] != '\0'; n++) {
          if (toupper(line[n]) == toupper(string[match])) {
            match++;
            if (match >= string_len) {
              if (show_mode == 0) {
                if (! name_shown) {
                  printf("%s\n", full_path);
                  name_shown = 1;
                }
                printf("%d:%s", line_no, line);
              } else {
                printf("%s:%d:%s", full_path, line_no, line);
              }
              break;
            }
          } else {
            match = 0;
          }
        }
      }
      fclose(fh);
    }
  }

  closedir(dh);
  return;
}

int main(int argc, char *argv[])
{
  int c;
  int show_mode = 0;
  char *search_dir = NULL;

  while ((c = getopt(argc, argv, "hnd:f:")) != -1) {
    switch (c) {
    case 'h':
      display_help(argv[0]);
      exit(EXIT_SUCCESS);

    case 'n':
      show_mode = 1;
      break;

    case 'd':
      search_dir = optarg;
      break;

    case 'f':
      build_filter(optarg);
      break;
    
    case '?':
    default:
      display_help(argv[0]);
      exit(EXIT_FAILURE);
    }
  }

  if (search_dir == NULL) {
    search_dir = "."; /* Current directory. */
  }

  if (optind >= argc) {
    display_help(argv[0]);
    return EXIT_FAILURE;
  }

  recurse(search_dir, argv[optind], strlen(argv[optind]), show_mode);
  return EXIT_SUCCESS;
}
          


Topic: Scripts and Code, by Kjetil @ 05/05-2014, Article Link