ClassMapGenerator: stabilize the heredoc/nowdoc stripping
I've looked into 10067 and have come to the conclusion that using a single regex to strip the heredoc/nowdocs is always going to run into trouble as: * Either the matching will be too greedy (issue 10067); * Or the matching will run into backtrace limits for large heredoc/nowdocs. We cannot solve both within a single regex. So, I'm proposing a slightly different solution which should support both and should also improve performance for files containing large heredoc/nowdocs. The `stripHereNowDocs()` function will find a start marker and remember the offset of the start marker. It will then find the end marker and strip the contents between the two (replace with `null`). The function will then recurse onto itself until all heredocs/nowdocs in a file have been removed.pull/10072/head
parent
b66b23a03f
commit
40bd4b03ad
|
@ -246,7 +246,7 @@ class ClassMapGenerator
|
||||||
}
|
}
|
||||||
|
|
||||||
// strip heredocs/nowdocs
|
// strip heredocs/nowdocs
|
||||||
$contents = preg_replace('{<<<[ \t]*([\'"]?)(\w+)\\1(?:\r\n|\n|\r)(?:.*(?=[\r\n]+[ \t]*\\2))[\r\n]+[ \t]*\\2(?=\s*[;,.)])}s', 'null', $contents);
|
$contents = self::stripHereNowDocs($contents);
|
||||||
// strip strings
|
// strip strings
|
||||||
$contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents);
|
$contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents);
|
||||||
// strip leading non-php code if needed
|
// strip leading non-php code if needed
|
||||||
|
@ -303,4 +303,35 @@ class ClassMapGenerator
|
||||||
|
|
||||||
return $classes;
|
return $classes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strip heredoc and nowdoc blocks from the contents of a file.
|
||||||
|
*
|
||||||
|
* @param string $contents File contents.
|
||||||
|
*
|
||||||
|
* @return string The cleaned up file contents.
|
||||||
|
*/
|
||||||
|
private static function stripHereNowDocs($contents)
|
||||||
|
{
|
||||||
|
// Find a heredoc/nowdoc start marker an its offset in the file.
|
||||||
|
$result = preg_match('{<<<[ \t]*([\'"]?)(?P<marker>\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE);
|
||||||
|
if ($result < 1) {
|
||||||
|
return $contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
$offset = ($startMatches['marker'][1] + strlen($startMatches['marker'][0]));
|
||||||
|
$pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`';
|
||||||
|
|
||||||
|
// Find the corresponding heredoc/nowdoc end marker an its offset in the file.
|
||||||
|
$result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset);
|
||||||
|
if ($result < 1) {
|
||||||
|
return $contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip the complete heredoc/nowdoc and replace it with "null".
|
||||||
|
$contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1]));
|
||||||
|
|
||||||
|
// Recurse to strip the next heredoc/nowdoc until there are none left.
|
||||||
|
return self::stripHereNowDocs($contents);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue