diff options
| author | Dmitry Mikhirev | 2014-11-27 20:13:15 +0300 | 
|---|---|---|
| committer | Dmitry Mikhirev | 2014-11-27 20:13:15 +0300 | 
| commit | 2dac8d16aaf4041411249d3d52c4a9af259687de (patch) | |
| tree | 0d2f99ffce8e4aacaa41850ee594e4f3d33a38ed | |
| parent | b2bb204fa2146d7606aa62cf138315d487206289 (diff) | |
| download | make_pcre-2dac8d16aaf4041411249d3d52c4a9af259687de.tar.gz make_pcre-2dac8d16aaf4041411249d3d52c4a9af259687de.tar.bz2 make_pcre-2dac8d16aaf4041411249d3d52c4a9af259687de.tar.xz make_pcre-2dac8d16aaf4041411249d3d52c4a9af259687de.zip | |
added global search option
| -rw-r--r-- | GNUmakefile | 10 | ||||
| -rw-r--r-- | README.md | 4 | ||||
| -rw-r--r-- | pcre.c | 86 | 
3 files changed, 74 insertions, 26 deletions
| diff --git a/GNUmakefile b/GNUmakefile index 266bf8a..b18dcbf 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -5,7 +5,8 @@ PCRE_CFLAGS := $(shell $(PCRE_CONFIG) --cflags)  PCRE_LIBS := $(shell $(PCRE_CONFIG) --libs)  LIBS = $(PCRE_LIBS) -tests = test001 test002 test003 test004 test005 test006 test007 +tests = test001 test002 test003 test004 test005 test006 test007 test008 \ +        test009  ifneq ($(findstring 4.,$(MAKE_VERSION)),4.)      $(error GNU make version 4.x is required) @@ -50,6 +51,13 @@ test006 = "$(m ^TEST+,testtttt,iU)" = test  # test passing `$' characters to variable  test007 = "$(m a(.*)b,a\$$b)" = "a\$$b" -a "$(1)" = "\$$" +# test global search +test008 = "$(m test\d,test1test2test3,g)" = "test1 test2 test3" -a \ +          "$(m test,TestesT,gi)" = "Test" + +# check that search performed only once without `g' option +test009 = "$(m test\d,test1test2test3)" = "test1" +  ### END OF TEST EXPRESSIONS ### @@ -24,7 +24,7 @@ in source directory. Optionally, type      $ make -k check -to run self-tests. +to run self-tests. Please report me if it fails on your system.  Install  ------- @@ -86,6 +86,8 @@ The following options are implemented:  - `E` enables expansion of pattern before compilation. Note that you will need    to use `$$` instead `$` for matching end of line in this case; +- `g` enables global search, like in Perl. Space separated list of all matched +  substrings will be returned;  - `i` makes search case insensitive. The same as in Perl;  - `m` makes regexp treating string as multi-line, i. e. `^` and `$` will match    immediately after or immediately before internal newlines. The same @@ -271,16 +271,20 @@ int set_named_vars(const pcre *re, const char *subj, int *ovec, const int ncap)  /* match() - function to be attached to make pattern matching function */  char *match(const char *name, int argc, char **argv)  { -	char *pat = NULL;    /* expanded pattern */ -	char *p;             /* iteration pointer */ -	int co = 0;          /* pattern compilation options */ -	pcre *re;            /* compiled regexp */ -	const char *err;     /* compilation error */ -	int erroffset;       /* offset in pattern where error occured */ -	char *str = NULL;    /* expanded subject string */ -	int ncap = 0;        /* number of captured substrings */ -	int ovec[MAX_CAP*3]; /* ovector */ -	char *retstr = NULL; /* string to be returned */ +	char *pat = NULL;      /* expanded pattern */ +	char *p;               /* iteration pointer */ +	int global = 0;        /* global search? */ +	int co = 0;            /* pattern compilation options */ +	pcre *re;              /* compiled regexp */ +	const char *err;       /* compilation error */ +	int erroffset;         /* offset in pattern where error occured */ +	pcre_extra *sd = NULL; /* pattern study data */ +	char *str = NULL;      /* expanded subject string */ +	int offset = 0;        /* subject string offset */ +	int ncap = 0;          /* number of captured substrings */ +	int ovec[MAX_CAP*3];   /* ovector */ +	char *retstr = NULL;   /* string to be returned */ +	int retlen = 0;        /* length of retstr */  	if (argc > 2) { /* options provided, parse them */  		for (p = argv[2]; *p != '\0'; p++) { @@ -288,6 +292,9 @@ char *match(const char *name, int argc, char **argv)  			case 'E': /* expand pattern */  				pat = gmk_expand(argv[0]);  				break; +			case 'g': /* global search */ +				global = 1; +				break;  			default: /* not match-specific option */  				co |= parse_comp_opt(*p, name);  				break; @@ -306,23 +313,54 @@ char *match(const char *name, int argc, char **argv)  		goto end_match;  	} -	/* expand subject string and execute regexp */ -	str = gmk_expand(argv[1]); -	ncap = pcre_exec(re, NULL, str, strlen(str), 0, 0, ovec, MAX_CAP*3); -	if ((ncap < 0) && (ncap != PCRE_ERROR_NOMATCH)) { /* error occured */ -		mk_error("%s: pattern matching error: %d\n", name, ncap); +	if (global) { /* study compiled pattern */ +		sd = pcre_study(re, 0, &err); +		if (err) { +			mk_warning("%s: %s", name, err); +			sd = NULL; +		}  	} -	if (ncap > 0) { -		/* set retstr to matched substring */ -		int len = ovec[1] - ovec[0]; -		retstr = gmk_alloc(len + 1); -		strncpy(retstr, str + ovec[0], len); -		retstr[len] = '\0'; +	/* expand subject string */ +	str = gmk_expand(argv[1]); + +	do { +		/* execute regexp */ +		ncap = pcre_exec(re, sd, str, strlen(str), offset, 0, +				ovec, MAX_CAP*3); +		if ((ncap < 0) && (ncap != PCRE_ERROR_NOMATCH)) { /* error occured */ +			mk_error("%s: pattern matching error: %d\n", name, ncap); +		} + +		if (ncap > 0) { /* copy or append matched string to retstr */ +			int len = ovec[1] - ovec[0]; +			int newlen = retlen + len; -		/* set named make vars to captured substrings */ -		set_named_vars(re, str, ovec, ncap); -	} +			char *s = realloc(retstr, (newlen + 1)); +			if (s == NULL) { /* let make allocate memory or die */ +				s = gmk_alloc(newlen); +				strncpy(s, retstr, retlen + 1); +				gmk_free(retstr); +			} +			retstr = s; + +			if (retlen > 0) { /* add whitespace */ +				retstr[retlen] = ' '; +				retlen++; +				newlen++; +			} + +			strncpy(retstr + retlen, str + ovec[0], len); +			retlen = newlen; +			retstr[retlen] = '\0'; + +			/* where to start next search */ +			offset = ovec[1]; + +			/* set named make vars to captured substrings */ +			set_named_vars(re, str, ovec, ncap); +		} +	} while (global && (ncap != PCRE_ERROR_NOMATCH));  	pcre_free(re); | 
