Rewrite php file cleaning step to be less regex intensive and support extreme cases better, fixes #10106 (#10107)
parent
d99b200cf3
commit
d64d1adf61
|
@ -214,10 +214,7 @@ class ClassMapGenerator
|
|||
*/
|
||||
private static function findClasses($path)
|
||||
{
|
||||
$extraTypes = PHP_VERSION_ID < 50400 ? '' : '|trait';
|
||||
if (PHP_VERSION_ID >= 80100 || (defined('HHVM_VERSION') && version_compare(HHVM_VERSION, '3.3', '>='))) {
|
||||
$extraTypes .= '|enum';
|
||||
}
|
||||
$extraTypes = self::getExtraTypes();
|
||||
|
||||
// Use @ here instead of Silencer to actively suppress 'unhelpful' output
|
||||
// @link https://github.com/composer/composer/pull/4886
|
||||
|
@ -241,57 +238,14 @@ class ClassMapGenerator
|
|||
}
|
||||
|
||||
// return early if there is no chance of matching anything in this file
|
||||
if (!preg_match('{\b(?:class|interface'.$extraTypes.')\s}i', $contents)) {
|
||||
preg_match_all('{\b(?:class|interface'.$extraTypes.')\s}i', $contents, $matches);
|
||||
if (!$matches) {
|
||||
return array();
|
||||
}
|
||||
|
||||
// strip heredocs/nowdocs
|
||||
$heredocRegex = '{
|
||||
# opening heredoc/nowdoc delimiter (word-chars)
|
||||
<<<[ \t]*+([\'"]?)(\w++)\\1
|
||||
# needs to be followed by a newline
|
||||
(?:\r\n|\n|\r)
|
||||
# the meat of it, matching line by line until end delimiter
|
||||
(?:
|
||||
# a valid line is optional white-space (possessive match) not followed by the end delimiter, then anything goes for the rest of the line
|
||||
[\t ]*+(?!\\2 \b)[^\r\n]*+
|
||||
# end of line(s)
|
||||
[\r\n]++
|
||||
)*
|
||||
# end delimiter
|
||||
[\t ]*+ \\2 (?=\b)
|
||||
}x';
|
||||
|
||||
// run first assuming the file is valid unicode
|
||||
$contentWithoutHeredoc = preg_replace($heredocRegex.'u', 'null', $contents);
|
||||
if (null === $contentWithoutHeredoc) {
|
||||
// run again without unicode support if the file failed to be parsed
|
||||
$contents = preg_replace($heredocRegex, 'null', $contents);
|
||||
} else {
|
||||
$contents = $contentWithoutHeredoc;
|
||||
}
|
||||
unset($contentWithoutHeredoc);
|
||||
|
||||
// strip strings
|
||||
$contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents);
|
||||
// strip leading non-php code if needed
|
||||
if (strpos($contents, '<?') !== 0) {
|
||||
$contents = preg_replace('{^.+?<\?}s', '<?', $contents, 1, $replacements);
|
||||
if ($replacements === 0) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
// strip non-php blocks in the file
|
||||
$contents = preg_replace('{\?>(?:[^<]++|<(?!\?))*+<\?}s', '?><?', $contents);
|
||||
// strip trailing non-php code if needed
|
||||
$pos = strrpos($contents, '?>');
|
||||
if (false !== $pos && false === strpos(substr($contents, $pos), '<?')) {
|
||||
$contents = substr($contents, 0, $pos);
|
||||
}
|
||||
// strip comments if short open tags are in the file
|
||||
if (preg_match('{(<\?)(?!(php|hh))}i', $contents)) {
|
||||
$contents = preg_replace('{//.* | /\*(?:[^*]++|\*(?!/))*\*/}x', '', $contents);
|
||||
}
|
||||
$p = new PhpFileCleaner($contents, count($matches[0]));
|
||||
$contents = $p->clean();
|
||||
unset($p);
|
||||
|
||||
preg_match_all('{
|
||||
(?:
|
||||
|
@ -328,4 +282,18 @@ class ClassMapGenerator
|
|||
|
||||
return $classes;
|
||||
}
|
||||
|
||||
private static function getExtraTypes()
|
||||
{
|
||||
static $extraTypes = null;
|
||||
if (null === $extraTypes) {
|
||||
$extraTypes = PHP_VERSION_ID < 50400 ? '' : '|trait';
|
||||
if (PHP_VERSION_ID >= 80100 || (defined('HHVM_VERSION') && version_compare(HHVM_VERSION, '3.3', '>='))) {
|
||||
$extraTypes .= '|enum';
|
||||
}
|
||||
PhpFileCleaner::setTypeConfig(array_merge(array('class', 'interface'), array_filter(explode('|', $extraTypes))));
|
||||
}
|
||||
|
||||
return $extraTypes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,228 @@
|
|||
<?php
|
||||
|
||||
namespace Composer\Autoload;
|
||||
|
||||
/**
|
||||
* @author Jordi Boggiano <j.boggiano@seld.be>
|
||||
* @internal
|
||||
*/
|
||||
class PhpFileCleaner
|
||||
{
|
||||
/** @var array<array{name: string, length: int, pattern: string}> */
|
||||
private static $typeConfig;
|
||||
/** @var string */
|
||||
private static $restPattern;
|
||||
|
||||
/**
|
||||
* @readonly
|
||||
* @var string
|
||||
*/
|
||||
private $contents;
|
||||
|
||||
/**
|
||||
* @readonly
|
||||
* @var int
|
||||
*/
|
||||
private $len;
|
||||
|
||||
/**
|
||||
* @readonly
|
||||
* @var int
|
||||
*/
|
||||
private $maxMatches;
|
||||
|
||||
/** @var int */
|
||||
private $index = 0;
|
||||
|
||||
public static function setTypeConfig($types)
|
||||
{
|
||||
foreach ($types as $type) {
|
||||
self::$typeConfig[$type[0]] = array(
|
||||
'name' => $type,
|
||||
'length' => \strlen($type),
|
||||
'pattern' => '{.\b(?<![\$:>])'.$type.'\s++[a-zA-Z_\x7f-\xff:][a-zA-Z0-9_\x7f-\xff:\-]*+}Ais',
|
||||
);
|
||||
}
|
||||
|
||||
self::$restPattern = '{[^?"\'</'.implode('', array_keys(self::$typeConfig)).']+}A';
|
||||
}
|
||||
|
||||
public function __construct($contents, $maxMatches)
|
||||
{
|
||||
$this->contents = $contents;
|
||||
$this->len = \strlen($this->contents);
|
||||
$this->maxMatches = $maxMatches;
|
||||
}
|
||||
|
||||
public function clean()
|
||||
{
|
||||
$clean = '';
|
||||
|
||||
while ($this->index < $this->len) {
|
||||
$this->skipToPhp();
|
||||
$clean .= '<?';
|
||||
|
||||
while ($this->index < $this->len) {
|
||||
$char = $this->contents[$this->index];
|
||||
if ($char === '?' && $this->peek('>')) {
|
||||
$clean .= '?>';
|
||||
$this->index += 2;
|
||||
continue 2;
|
||||
}
|
||||
|
||||
if ($char === '"') {
|
||||
$this->skipString('"');
|
||||
$clean .= 'null';
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($char === "'") {
|
||||
$this->skipString("'");
|
||||
$clean .= 'null';
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($char === "<" && $this->peek('<') && $this->match('{<<<[ \t]*+([\'"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*+)\\1(?:\r\n|\n|\r)}A', $match)) {
|
||||
$this->index += \strlen($match[0]);
|
||||
$this->skipHeredoc($match[2]);
|
||||
$clean .= 'null';
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($char === '/') {
|
||||
if ($this->peek('/')) {
|
||||
$this->skipToNewline();
|
||||
continue;
|
||||
}
|
||||
if ($this->peek('*')) {
|
||||
$this->skipComment();
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->maxMatches === 1 && isset(self::$typeConfig[$char])) {
|
||||
$type = self::$typeConfig[$char];
|
||||
if (
|
||||
\substr($this->contents, $this->index, $type['length']) === $type['name']
|
||||
&& \preg_match($type['pattern'], $this->contents, $match, 0, $this->index - 1)
|
||||
) {
|
||||
$clean .= $match[0];
|
||||
return $clean;
|
||||
}
|
||||
}
|
||||
|
||||
$this->index += 1;
|
||||
if ($this->match(self::$restPattern, $match)) {
|
||||
$clean .= $char . $match[0];
|
||||
$this->index += \strlen($match[0]);
|
||||
} else {
|
||||
$clean .= $char;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $clean;
|
||||
}
|
||||
|
||||
private function skipToPhp()
|
||||
{
|
||||
while ($this->index < $this->len) {
|
||||
if ($this->contents[$this->index] === '<' && $this->peek('?')) {
|
||||
$this->index += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
$this->index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
private function skipString($delimiter)
|
||||
{
|
||||
$this->index += 1;
|
||||
while ($this->index < $this->len) {
|
||||
if ($this->contents[$this->index] === '\\' && ($this->peek('\\') || $this->peek($delimiter))) {
|
||||
$this->index += 2;
|
||||
continue;
|
||||
}
|
||||
if ($this->contents[$this->index] === $delimiter) {
|
||||
$this->index += 1;
|
||||
break;
|
||||
}
|
||||
$this->index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
private function skipComment()
|
||||
{
|
||||
$this->index += 2;
|
||||
while ($this->index < $this->len) {
|
||||
if ($this->contents[$this->index] === '*' && $this->peek('/')) {
|
||||
$this->index += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
$this->index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
private function skipToNewline()
|
||||
{
|
||||
while ($this->index < $this->len) {
|
||||
if ($this->contents[$this->index] === "\r" || $this->contents[$this->index] === "\n") {
|
||||
return;
|
||||
}
|
||||
$this->index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
private function skipHeredoc($delimiter)
|
||||
{
|
||||
$firstDelimiterChar = $delimiter[0];
|
||||
$delimiterLength = \strlen($delimiter);
|
||||
$delimiterPattern = '{'.preg_quote($delimiter).'(?![a-zA-Z0-9_\x80-\xff])}A';
|
||||
|
||||
while ($this->index < $this->len) {
|
||||
// check if we find the delimiter after some spaces/tabs
|
||||
switch ($this->contents[$this->index]) {
|
||||
case "\t":
|
||||
case " ":
|
||||
$this->index += 1;
|
||||
continue 2;
|
||||
case $firstDelimiterChar:
|
||||
if (
|
||||
\substr($this->contents, $this->index, $delimiterLength) === $delimiter
|
||||
&& $this->match($delimiterPattern)
|
||||
) {
|
||||
$this->index += $delimiterLength;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// skip the rest of the line
|
||||
while ($this->index < $this->len) {
|
||||
$this->skipToNewline();
|
||||
|
||||
// skip newlines
|
||||
while ($this->index < $this->len && ($this->contents[$this->index] === "\r" || $this->contents[$this->index] === "\n")) {
|
||||
$this->index += 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function peek($char)
|
||||
{
|
||||
return $this->index + 1 < $this->len && $this->contents[$this->index + 1] === $char;
|
||||
}
|
||||
|
||||
private function match($regex, array &$match = null)
|
||||
{
|
||||
if (\preg_match($regex, $this->contents, $match, 0, $this->index)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
<?php
|
||||
|
||||
echo <<<'NOT¶ING_TO_SEE_H¤RE'
|
||||
class FailHeredocNonUnicodeNonAscii
|
||||
{
|
||||
}
|
||||
NOT¶ING_TO_SEE_H¤RE;
|
|
@ -19,6 +19,8 @@ class FailHeredocWhitespace
|
|||
}
|
||||
WHITESPACE . <<< MARKERINTEXT
|
||||
In PHP < 7.3, the docblock marker could occur in the text as long as it did not occur at the very start of the line.
|
||||
MARKERINTEXTwithtrail
|
||||
MARKERINTEXT_
|
||||
class FailHeredocMarkerInText
|
||||
{
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue