1
0
Fork 0

ClassMapGenerator: stabilize the heredoc/nowdoc stripping

I've looked into 10067 and have come to the conclusion that using a single regex to strip the heredoc/nowdocs is always going to run into trouble as:
* Either the matching will be too greedy (issue 10067);
* Or the matching will run into backtrace limits for large heredoc/nowdocs.

We cannot solve both within a single regex.

So, I'm proposing a slightly different solution which should support both and should also improve performance for files containing large heredoc/nowdocs.

The `stripHereNowDocs()` function will find a start marker and remember the offset of the start marker.
It will then find the end marker and strip the contents between the two (replace with `null`).
The function will then recurse onto itself until all heredocs/nowdocs in a file have been removed.
pull/10072/head
jrfnl 2021-08-21 15:20:16 +02:00 committed by Jordi Boggiano
parent b66b23a03f
commit 40bd4b03ad
No known key found for this signature in database
GPG Key ID: 7BBD42C429EC80BC
1 changed files with 32 additions and 1 deletions

View File

@ -246,7 +246,7 @@ class ClassMapGenerator
} }
// strip heredocs/nowdocs // strip heredocs/nowdocs
$contents = preg_replace('{<<<[ \t]*([\'"]?)(\w+)\\1(?:\r\n|\n|\r)(?:.*(?=[\r\n]+[ \t]*\\2))[\r\n]+[ \t]*\\2(?=\s*[;,.)])}s', 'null', $contents); $contents = self::stripHereNowDocs($contents);
// strip strings // strip strings
$contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents); $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents);
// strip leading non-php code if needed // strip leading non-php code if needed
@ -303,4 +303,35 @@ class ClassMapGenerator
return $classes; return $classes;
} }
/**
* Strip heredoc and nowdoc blocks from the contents of a file.
*
* @param string $contents File contents.
*
* @return string The cleaned up file contents.
*/
private static function stripHereNowDocs($contents)
{
// Find a heredoc/nowdoc start marker an its offset in the file.
$result = preg_match('{<<<[ \t]*([\'"]?)(?P<marker>\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE);
if ($result < 1) {
return $contents;
}
$offset = ($startMatches['marker'][1] + strlen($startMatches['marker'][0]));
$pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`';
// Find the corresponding heredoc/nowdoc end marker an its offset in the file.
$result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset);
if ($result < 1) {
return $contents;
}
// Strip the complete heredoc/nowdoc and replace it with "null".
$contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1]));
// Recurse to strip the next heredoc/nowdoc until there are none left.
return self::stripHereNowDocs($contents);
}
} }