| /************************************************* | |
| * Perl-Compatible Regular Expressions * | |
| *************************************************/ | |
| /* PCRE is a library of functions to support regular expressions whose syntax | |
| and semantics are as close as possible to those of the Perl 5 language. | |
| Written by Philip Hazel | |
| Original API code Copyright (c) 1997-2012 University of Cambridge | |
| New API code Copyright (c) 2016-2024 University of Cambridge | |
| ----------------------------------------------------------------------------- | |
| Redistribution and use in source and binary forms, with or without | |
| modification, are permitted provided that the following conditions are met: | |
| * Redistributions of source code must retain the above copyright notice, | |
| this list of conditions and the following disclaimer. | |
| * Redistributions in binary form must reproduce the above copyright | |
| notice, this list of conditions and the following disclaimer in the | |
| documentation and/or other materials provided with the distribution. | |
| * Neither the name of the University of Cambridge nor the names of its | |
| contributors may be used to endorse or promote products derived from | |
| this software without specific prior written permission. | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| POSSIBILITY OF SUCH DAMAGE. | |
| ----------------------------------------------------------------------------- | |
| */ | |
| /* Advance the offset by one code unit, and return the new value. | |
| It is only called when the offset is not at the end of the subject. */ | |
| static PCRE2_SIZE do_bumpalong(pcre2_match_data *match_data, | |
| PCRE2_SIZE offset) | |
| { | |
| PCRE2_SPTR subject = match_data->subject; | |
| PCRE2_SIZE subject_length = match_data->subject_length; | |
| BOOL utf = (match_data->code->overall_options & PCRE2_UTF) != 0; | |
| /* Skip over CRLF as an atomic sequence, if CRLF is configured as a newline | |
| sequence. */ | |
| if (subject[offset] == CHAR_CR && offset + 1 < subject_length && | |
| subject[offset + 1] == CHAR_LF) | |
| { | |
| switch(match_data->code->newline_convention) | |
| { | |
| case PCRE2_NEWLINE_CRLF: | |
| case PCRE2_NEWLINE_ANY: | |
| case PCRE2_NEWLINE_ANYCRLF: | |
| return offset + 2; | |
| } | |
| } | |
| /* Advance by one full character if in UTF mode. */ | |
| if (utf) | |
| { | |
| PCRE2_SPTR next = subject + offset + 1; | |
| PCRE2_SPTR subject_end = subject + subject_length; | |
| (void)subject_end; /* Suppress warning; 32-bit FORWARDCHARTEST ignores this */ | |
| FORWARDCHARTEST(next, subject_end); | |
| return next - subject; | |
| } | |
| return offset + 1; | |
| } | |
| /************************************************* | |
| * Advance the match * | |
| *************************************************/ | |
| PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION | |
| pcre2_next_match(pcre2_match_data *match_data, PCRE2_SIZE *pstart_offset, | |
| uint32_t *poptions) | |
| { | |
| int rc = match_data->rc; | |
| PCRE2_SIZE start_offset = match_data->start_offset; | |
| PCRE2_SIZE *ovector = match_data->ovector; | |
| /* Match error, or no match: no further iteration possible. In previous versions | |
| of PCRE2, we recommended that clients use a strategy which involved retrying in | |
| certain cases after PCRE2_ERROR_NOMATCH, but this is no longer required. */ | |
| if (rc < 0) | |
| return FALSE; | |
| /* Match succeeded: get the start offset for the next match */ | |
| /* Although \K can affect the position of ovector[0], there are no ways to do | |
| anything surprising with ovector[1], which must always be >= start_offset. */ | |
| PCRE2_ASSERT(ovector[1] >= start_offset); | |
| /* Special handling for patterns which contain \K in a lookaround, which enables | |
| the match start to be pushed back to before the starting search offset | |
| (ovector[0] < start_offset) or after the match ends (ovector[0] > ovector[1]). | |
| This is not a problem if ovector[1] > start_offset, because in this case, we can | |
| just attempt the next match at ovector[1]: we are making progress, which is all | |
| that we require. | |
| However, if we have ovector[1] == start_offset, then we have a very rare case | |
| which must be handled specially, because it's a non-empty match which | |
| nonetheless fails to make progress through the subject. */ | |
| if (ovector[0] != start_offset && ovector[1] == start_offset) | |
| { | |
| /* If the match end is at the end of the subject, we are done. */ | |
| if (start_offset >= match_data->subject_length) | |
| return FALSE; | |
| /* Otherwise, bump along by one code unit, and do a normal search. */ | |
| *pstart_offset = do_bumpalong(match_data, ovector[1]); | |
| *poptions = 0; | |
| return TRUE; | |
| } | |
| /* If the previous match was for an empty string, we are finished if we are at | |
| the end of the subject. Otherwise, arrange to run another match at the same | |
| point to see if a non-empty match can be found. */ | |
| if (ovector[0] == ovector[1]) | |
| { | |
| /* If the match is at the end of the subject, we are done. */ | |
| if (ovector[0] >= match_data->subject_length) | |
| return FALSE; | |
| /* Otherwise, continue at this exact same point, but we must set the flag | |
| which ensures that we don't return the exact same empty match again. */ | |
| *pstart_offset = ovector[1]; | |
| *poptions = PCRE2_NOTEMPTY_ATSTART; | |
| return TRUE; | |
| } | |
| /* Finally, we must be in the happy state of a non-empty match, where the end of | |
| the match is further on in the subject than start_offset, so we are easily able | |
| to continue and make progress. */ | |
| *pstart_offset = ovector[1]; | |
| *poptions = 0; | |
| return TRUE; | |
| } | |
| /* End of pcre2_match_next.c */ | |