From 2dac8d16aaf4041411249d3d52c4a9af259687de Mon Sep 17 00:00:00 2001 From: Dmitry Mikhirev Date: Thu, 27 Nov 2014 20:13:15 +0300 Subject: added global search option --- GNUmakefile | 10 ++++++- README.md | 4 ++- pcre.c | 86 ++++++++++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 74 insertions(+), 26 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 266bf8a..b18dcbf 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -5,7 +5,8 @@ PCRE_CFLAGS := $(shell $(PCRE_CONFIG) --cflags) PCRE_LIBS := $(shell $(PCRE_CONFIG) --libs) LIBS = $(PCRE_LIBS) -tests = test001 test002 test003 test004 test005 test006 test007 +tests = test001 test002 test003 test004 test005 test006 test007 test008 \ + test009 ifneq ($(findstring 4.,$(MAKE_VERSION)),4.) $(error GNU make version 4.x is required) @@ -50,6 +51,13 @@ test006 = "$(m ^TEST+,testtttt,iU)" = test # test passing `$' characters to variable test007 = "$(m a(.*)b,a\$$b)" = "a\$$b" -a "$(1)" = "\$$" +# test global search +test008 = "$(m test\d,test1test2test3,g)" = "test1 test2 test3" -a \ + "$(m test,TestesT,gi)" = "Test" + +# check that search performed only once without `g' option +test009 = "$(m test\d,test1test2test3)" = "test1" + ### END OF TEST EXPRESSIONS ### diff --git a/README.md b/README.md index 365b677..cedd3a4 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ in source directory. Optionally, type $ make -k check -to run self-tests. +to run self-tests. Please report me if it fails on your system. Install ------- @@ -86,6 +86,8 @@ The following options are implemented: - `E` enables expansion of pattern before compilation. Note that you will need to use `$$` instead `$` for matching end of line in this case; +- `g` enables global search, like in Perl. Space separated list of all matched + substrings will be returned; - `i` makes search case insensitive. The same as in Perl; - `m` makes regexp treating string as multi-line, i. e. `^` and `$` will match immediately after or immediately before internal newlines. The same diff --git a/pcre.c b/pcre.c index ec8a120..f7a8c53 100644 --- a/pcre.c +++ b/pcre.c @@ -271,16 +271,20 @@ int set_named_vars(const pcre *re, const char *subj, int *ovec, const int ncap) /* match() - function to be attached to make pattern matching function */ char *match(const char *name, int argc, char **argv) { - char *pat = NULL; /* expanded pattern */ - char *p; /* iteration pointer */ - int co = 0; /* pattern compilation options */ - pcre *re; /* compiled regexp */ - const char *err; /* compilation error */ - int erroffset; /* offset in pattern where error occured */ - char *str = NULL; /* expanded subject string */ - int ncap = 0; /* number of captured substrings */ - int ovec[MAX_CAP*3]; /* ovector */ - char *retstr = NULL; /* string to be returned */ + char *pat = NULL; /* expanded pattern */ + char *p; /* iteration pointer */ + int global = 0; /* global search? */ + int co = 0; /* pattern compilation options */ + pcre *re; /* compiled regexp */ + const char *err; /* compilation error */ + int erroffset; /* offset in pattern where error occured */ + pcre_extra *sd = NULL; /* pattern study data */ + char *str = NULL; /* expanded subject string */ + int offset = 0; /* subject string offset */ + int ncap = 0; /* number of captured substrings */ + int ovec[MAX_CAP*3]; /* ovector */ + char *retstr = NULL; /* string to be returned */ + int retlen = 0; /* length of retstr */ if (argc > 2) { /* options provided, parse them */ for (p = argv[2]; *p != '\0'; p++) { @@ -288,6 +292,9 @@ char *match(const char *name, int argc, char **argv) case 'E': /* expand pattern */ pat = gmk_expand(argv[0]); break; + case 'g': /* global search */ + global = 1; + break; default: /* not match-specific option */ co |= parse_comp_opt(*p, name); break; @@ -306,23 +313,54 @@ char *match(const char *name, int argc, char **argv) goto end_match; } - /* expand subject string and execute regexp */ - str = gmk_expand(argv[1]); - ncap = pcre_exec(re, NULL, str, strlen(str), 0, 0, ovec, MAX_CAP*3); - if ((ncap < 0) && (ncap != PCRE_ERROR_NOMATCH)) { /* error occured */ - mk_error("%s: pattern matching error: %d\n", name, ncap); + if (global) { /* study compiled pattern */ + sd = pcre_study(re, 0, &err); + if (err) { + mk_warning("%s: %s", name, err); + sd = NULL; + } } - if (ncap > 0) { - /* set retstr to matched substring */ - int len = ovec[1] - ovec[0]; - retstr = gmk_alloc(len + 1); - strncpy(retstr, str + ovec[0], len); - retstr[len] = '\0'; + /* expand subject string */ + str = gmk_expand(argv[1]); + + do { + /* execute regexp */ + ncap = pcre_exec(re, sd, str, strlen(str), offset, 0, + ovec, MAX_CAP*3); + if ((ncap < 0) && (ncap != PCRE_ERROR_NOMATCH)) { /* error occured */ + mk_error("%s: pattern matching error: %d\n", name, ncap); + } + + if (ncap > 0) { /* copy or append matched string to retstr */ + int len = ovec[1] - ovec[0]; + int newlen = retlen + len; - /* set named make vars to captured substrings */ - set_named_vars(re, str, ovec, ncap); - } + char *s = realloc(retstr, (newlen + 1)); + if (s == NULL) { /* let make allocate memory or die */ + s = gmk_alloc(newlen); + strncpy(s, retstr, retlen + 1); + gmk_free(retstr); + } + retstr = s; + + if (retlen > 0) { /* add whitespace */ + retstr[retlen] = ' '; + retlen++; + newlen++; + } + + strncpy(retstr + retlen, str + ovec[0], len); + retlen = newlen; + retstr[retlen] = '\0'; + + /* where to start next search */ + offset = ovec[1]; + + /* set named make vars to captured substrings */ + set_named_vars(re, str, ovec, ncap); + } + } while (global && (ncap != PCRE_ERROR_NOMATCH)); pcre_free(re); -- cgit v1.2.1