From e32303b41512e3386b83893b8331fb2d4f83be95 Mon Sep 17 00:00:00 2001 From: Dmitry Mikhirev Date: Fri, 6 Mar 2015 21:55:42 +0300 Subject: added php-like `S' option --- README.md | 9 +++++---- pcre.c | 12 ++++++++++-- tests.mk | 9 ++++++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6fc02d9..09c7c31 100644 --- a/README.md +++ b/README.md @@ -97,10 +97,10 @@ Options Both `pcre_find` and `pcre_subst` can take an optional argument consisting of one ore more characters, each of which enables some option: - $(pcre_find PATTERN,IN,EgimsuUxX8) - $(m PATTERN,IN,EgimsuUxX8) - $(pcre_subst PATTERN,REPLACEMENT,TEXT,EgimsuUxX8) - $(s PATTERN,REPLACEMENT,TEXT,EgimsuUxX8) + $(pcre_find PATTERN,IN,ADEgimsSuUxX8) + $(m PATTERN,IN,ADEgimsSuUxX8) + $(pcre_subst PATTERN,REPLACEMENT,TEXT,ADEgimsSuUxX8) + $(s PATTERN,REPLACEMENT,TEXT,ADEgimsSuUxX8) The following options are implemented: @@ -119,6 +119,7 @@ The following options are implemented: as in Perl; - `s` forces `.` metacharacter to match any character including newline. The same as in Perl; +- `S` enables additional studying of compiled regexp. The same as in PHP; - `u` changes the way of processing `\B`, `\b`, `\D`, `\d`, `\S`, `\s`, `\W`, `\w` and some of the POSIX character classes forsing them to use Unicode properties; diff --git a/pcre.c b/pcre.c index ccb5528..c6deef1 100644 --- a/pcre.c +++ b/pcre.c @@ -314,6 +314,7 @@ static char *match(const char *name, int argc, char **argv) char *pat = NULL; /* expanded pattern */ char *p; /* iteration pointer */ int global = 0; /* global search? */ + int study = 0; /* study pattern? */ int co = 0; /* pattern compilation options */ pcre *re = NULL; /* compiled regexp */ const char *err; /* compilation error */ @@ -335,6 +336,9 @@ static char *match(const char *name, int argc, char **argv) case 'g': /* global search */ global = 1; break; + case 'S': /* study pattern */ + study = 1; + break; default: /* not match-specific option */ co |= parse_comp_opt(*p, name); break; @@ -353,7 +357,7 @@ static char *match(const char *name, int argc, char **argv) goto end_match; } - if (global) { /* study compiled pattern */ + if (study) { /* study compiled pattern */ sd = pcre_study(re, 0, &err); if (err) { mk_warning("%s: %s", name, err); @@ -436,6 +440,7 @@ static char *subst(const char *name, int argc, char **argv) char *pat = NULL; /* expanded pattern */ char *p; /* iteration pointer */ int global = 0; /* global search? */ + int study = 0; /* study pattern? */ int co = 0; /* pattern compilation options */ pcre *re = NULL; /* compiled regexp */ const char *err; /* compilation error */ @@ -462,6 +467,9 @@ static char *subst(const char *name, int argc, char **argv) case 'g': /* global search */ global = 1; break; + case 'S': /* study pattern */ + study = 1; + break; default: /* not subst-specific option */ co |= parse_comp_opt(*p, name); break; @@ -480,7 +488,7 @@ static char *subst(const char *name, int argc, char **argv) goto end_subst; } - if (global) { /* study compiled pattern */ + if (study) { /* study compiled pattern */ sd = pcre_study(re, 0, &err); if (err) { mk_warning("%s: %s", name, err); diff --git a/tests.mk b/tests.mk index 353735a..6838b1c 100644 --- a/tests.mk +++ b/tests.mk @@ -2,7 +2,7 @@ ifneq ($(findstring 4.,$(MAKE_VERSION)),4.) $(error you need GNU make 4.x to run tests) endif -NUMTESTS = 34 +NUMTESTS = 36 tests := $(foreach num,$(shell seq -f%03g $(NUMTESTS)),test$(num)) load pcre.so @@ -129,6 +129,13 @@ endef test034 = "$(m line\d$,$(subj034))" = "line2" -a -z "$(m line\d$,$(subj034),D)" -a \ "$(m test$,test,D)" = "test" +# test `S' option +# no way to check directly that it really works, so just enshure that +# it does not break anything +# TODO: add indirect (speed) test +test035 = "$(m test,test,S)" = test +test036 = "$(s a,x,a,S)" = "x" + ### END OF TEST EXPRESSIONS ### test%: -- cgit v1.2.1