diff options
| -rw-r--r-- | GNUmakefile | 13 | ||||
| -rw-r--r-- | pcre.c | 101 | 
2 files changed, 59 insertions, 55 deletions
| diff --git a/GNUmakefile b/GNUmakefile index 52b0091..10dbff9 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,7 +1,7 @@  MAKE_INCLUDE := -I$(HOME)/src/make-4.1  CFLAGS = -Wall -g -tests = test001 test002 test003 test004 test005 +tests = test001 test002 test003 test004 test005 test006 test007  -load pcre.so @@ -19,23 +19,27 @@ check: $(tests)  # each expression is passed to test(1)  # simple test for pattern matching -test001 = '$(m ^test$,test)' = test +test001 := '$(m ^test$,test)' = test  # test for pattern expansion  test002: var = es  test002 = '$(m ^t$(var)t$$,test,E)' = test  # test for string capturing by number -test003 = '$(m ^t(es)t$,test)$0$1' = testtestes -a -z '$(m a,b)$0$1' +test003 := '$(m ^t(es)t$,test)$0$1' = testtestes -a -z '$(m a,b)$0$1'  # test named string capturing -test004 = '$(m ^t(?<var>es)t$,test)' = test -a '$(var)' = es +test004 := '$(m ^t(?<var>es)t$,test)' = test -a '$(var)' = es  # test with large number of strings to capture  test005: pat = (1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)(12)(13)(14)(15)(16)(17)(18)(19)(20)(21)(22)(23)(24)(25)(26)(27)(28)(29)(30)(31)(32)(33)(34)(35)(36)(37)(38)(39)(40)(41)(42)(43)(44)(45)(46)(47)(48)(49)(50)(51)(52)(53)(54)(55)(56)(57)(58)(59)(60)(61)(62)(63)(64)(65)(66)(67)(68)(69)(70)(71)(72)(73)(74)(75)(76)(77)(78)(79)(80)(81)(82)(83)(84)(85)(86)(87)(88)(89)(90)(91)(92)(93)(94)(95)(96)(97)(98)(99)(100)(101)(102)(103)(104)(105)(106)(107)(108)(109)(110)(111)(112)(113)(114)(115)(116)(117)(118)(119)(120)(121)(122)(123)(124)(125)(126)(127)(128)(129)(130)(131)(132)(133)(134)(135)(136)(137)(138)(139)(140)(141)(142)(143)(144)(145)(146)(147)(148)(149)(150)(151)(152)(153)(154)(155)(156)(157)(158)(159)(160)(161)(162)(163)(164)(165)(166)(167)(168)(169)(170)(171)(172)(173)(174)(175)(176)(177)(178)(179)(180)(181)(182)(183)(184)(185)(186)(187)(188)(189)(190)(191)(192)(193)(194)(195)(196)(197)(198)(199)(200)(201)(202)(203)(204)(205)(206)(207)(208)(209)(210)(211)(212)(213)(214)(215)(216)(217)(218)(219)(220)(221)(222)(223)(224)(225)(226)(227)(228)(229)(230)(231)(232)(233)(234)(235)(236)(237)(238)(239)(240)(241)(242)(243)(244)(245)(246)(247)(248)(249)(250)(251)(252)(253)(254)(255)  test005: subj = 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255  test005 = '$(m $(pat),$(subj),E)' = '$(subj)' -a '$(1)' = 1 -a '$(255)' = 255 +# test parsing pattern options +test006 := '$(m ^TEST$,test,i)' = test +test007 := '$(m ^test+,testtttt,U)' = test +  ### END OF TEST EXPRESSIONS ### @@ -44,6 +48,7 @@ test%:  		echo '$@ PASSED'; \  	else \  		echo '$@ FAILED'; \ +		echo "$($@)"; \  		return 1; \  	fi @@ -27,6 +27,52 @@ int plugin_is_GPL_compatible;  const int MAX_CAP = 256;   /* maximum number of substrings to capture */  const int MAX_CAP_LEN = 3; /* number of decimal digits in MAX_CAP */ +/* set_comp_opt - set regexp option */ +int set_comp_opt(int opts, const char flag, const char *func) +{ +	int b; /* PCRE configuration option value */ + +	switch (flag) { +	case 'i': /* ignore case */ +		opts |= PCRE_CASELESS;  break; +	case 'm': /* multi-line */ +		opts |= PCRE_MULTILINE; break; +	case 's': /* single-line */ +		opts |= PCRE_DOTALL;    break; +	case 'u': /* use Unicode properties */ +		pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &b); +		if (b) { +			opts |= PCRE_UCP; +		} else { +			fprintf(stderr, "%s: PCRE library does not support " +					"Unicode properties, `%c' option is " +					"unavailable\n", +					func, flag); +		} +		break; +	case 'U': /* ungreedy quantifiers */ +		opts |= PCRE_UNGREEDY;  break; +	case 'x': /* extended regexp */ +		opts |= PCRE_EXTENDED;  break; +	case 'X': /* PCRE extras */ +		opts |= PCRE_EXTRA;     break; +	case '8': /* UTF-8 */ +		pcre_config(PCRE_CONFIG_UTF8, &b); +		if (b) { +			opts |= PCRE_UTF8; +		} else { +			fprintf(stderr, "%s: PCRE library does not support " +					"UTF-8, `%c' option is unavailable\n", +					func, flag); +		} +		break; +	default: /* unknown option */ +		fprintf(stderr, "%s: unknown option `%c'\n", func, flag); +		break; +	} +	return opts; +} +  /* set_vars() - set make variables to captured substrings */  int set_vars(const char *subj, int *ovec, const int ncap)  { @@ -89,7 +135,6 @@ char *match(const char *name, int argc, char **argv)  	char *pat = NULL;    /* expanded pattern */  	char *p;             /* iteration pointer */  	int co = 0;          /* pattern compilation options */ -	int b;               /* PCRE configuration option value */  	pcre *re;            /* compiled regexp */  	const char *err;     /* compilation error */  	int erroffset;       /* offset of pattern character where error occured */ @@ -104,63 +149,17 @@ char *match(const char *name, int argc, char **argv)  			case 'E': /* expand pattern */  				pat = gmk_expand(argv[0]);  				break; -			case 'i': /* ignore case */ -				co |= PCRE_CASELESS; -				break; -			case 'm': /* multi-line */ -				co |= PCRE_MULTILINE; -				break; -			case 's': /* single-line */ -				co |= PCRE_DOTALL; -				break; -			case 'u': /* use Unicode properties */ -				pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, -						&b); -				if (b) { -					co |= PCRE_UCP; -				} else { -					fprintf(stderr, "%s: PCRE library " -							"does not support " -							"Unicode properties, " -							"`%c' option is " -							"unavailable\n", -							name, *p); -				} -				break; -			case 'U': /* ungreedy quantifiers */ -				co |= PCRE_UNGREEDY; -				break; -			case 'x': /* extended regexp */ -				co |= PCRE_EXTENDED; -				break; -			case 'X': /* PCRE extras */ -				co |= PCRE_EXTRA; -				break; -			case '8': /* UTF-8 */ -				pcre_config(PCRE_CONFIG_UTF8, &b); -				if (b) { -					co |= PCRE_UTF8; -				} else { -					fprintf(stderr, "%s: PCRE library " -							"does not support " -							"UTF-8, " -							"`%c' option is " -							"unavailable\n", -							name, *p); -				} -				break; -			default: /* unknown option */ -				fprintf(stderr, "%s: unknown option `%c'\n", -						name, *p); +			default: /* not match specific option */ +				co = set_comp_opt(co, *p, name);  				break;  			}  		}  	}  	if (pat == NULL) { /* compile unexpanded pattern */ -		re = pcre_compile(argv[0], 0, &err, &erroffset, NULL); +		re = pcre_compile(argv[0], co, &err, &erroffset, NULL);  	} else {           /* compile expanded pattern */ -		re = pcre_compile(pat, 0, &err, &erroffset, NULL); +		re = pcre_compile(pat, co, &err, &erroffset, NULL);  		gmk_free(pat);  	}  	if (re == NULL) { /* compilation error */ | 
