[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index][Thread Index][Top&Search][Original]
[PATCH 5.005_63] utf8 REx botches
Gurusamy Sarathy writes:
> I'm hotly pursuing the utf8 trail right now, and expect to have v5.5.640
> out some time this weekend.
This fixes all UTEST failures but complex.t one (I think this one is not
related to RExen).
Enjoy,
Ilya
--- ./regexec.c~ Fri Dec 17 03:08:26 1999
+++ ./regexec.c Thu Jan 6 17:21:00 2000
@@ -688,6 +690,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog,
char *startpos = sv ? strend - SvCUR(sv) : s;
t = s;
+ if (prog->reganch & ROPT_UTF8)
+ PL_regdata = prog->data; /* Used by REGINCLASS UTF logic */
s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
if (!s) {
#ifdef DEBUGGING
@@ -869,9 +873,9 @@ S_find_byclass(pTHX_ regexp * prog, regn
/* FALL THROUGH */
case BOUNDUTF8:
tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
- tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
+ tmp = ((OP(c) == BOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
- if (tmp == !(OP(c) == BOUND ?
+ if (tmp == !(OP(c) == BOUNDUTF8 ?
swash_fetch(PL_utf8_alnum, (U8*)s) :
isALNUM_LC_utf8((U8*)s)))
{
@@ -904,12 +908,10 @@ S_find_byclass(pTHX_ regexp * prog, regn
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUNDUTF8:
- if (prog->minlen)
- strend = reghop_c(strend, -1);
tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
- tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
+ tmp = ((OP(c) == NBOUNDUTF8 ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
- if (tmp == !(OP(c) == NBOUND ?
+ if (tmp == !(OP(c) == NBOUNDUTF8 ?
swash_fetch(PL_utf8_alnum, (U8*)s) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- Follow-Ups from:
-
Gurusamy Sarathy <gsar@activestate.com>
- References to:
-
Gurusamy Sarathy <gsar@activestate.com>
[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index][Thread Index][Top&Search][Original]