ClassMapGenerator: stabilize the heredoc/nowdoc stripping

I've looked into 10067 and have come to the conclusion that using a single regex to strip the heredoc/nowdocs is always going to run into trouble as: * Either the matching will be too greedy (issue 10067); * Or the matching will run into backtrace limits for large heredoc/nowdocs. We cannot solve both within a single regex. So, I'm proposing a slightly different solution which should support both and should also improve performance for files containing large heredoc/nowdocs. The `stripHereNowDocs()` function will find a start marker and remember the offset of the start marker. It will then find the end marker and strip the contents between the two (replace with `null`). The function will then recurse onto itself until all heredocs/nowdocs in a file have been removed.
2021-08-21 15:20:16 +02:00 · 2021-08-21 15:20:16 +02:00 · 40bd4b03ad
parent b66b23a03f
commit 40bd4b03ad
1 changed files with 32 additions and 1 deletions
--- a/src/Composer/Autoload/ClassMapGenerator.php
+++ b/src/Composer/Autoload/ClassMapGenerator.php
@ -246,7 +246,7 @@ class ClassMapGenerator
        }
        // strip heredocs/nowdocs
-        $contents = preg_replace('{<<<[ \t]*([\'"]?)(\w+)\\1(?:\r\n|\n|\r)(?:.*(?=[\r\n]+[ \t]*\\2))[\r\n]+[ \t]*\\2(?=\s*[;,.)])}s', 'null', $contents);
+        $contents = self::stripHereNowDocs($contents);
        // strip strings
        $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents);
        // strip leading non-php code if needed
@ -303,4 +303,35 @@ class ClassMapGenerator
        return $classes;
    }
    /**
     * Strip heredoc and nowdoc blocks from the contents of a file.
     *
     * @param string $contents File contents.
     *
     * @return string The cleaned up file contents.
     */
    private static function stripHereNowDocs($contents)
    {
        // Find a heredoc/nowdoc start marker an its offset in the file.
        $result = preg_match('{<<<[ \t]*([\'"]?)(?P<marker>\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE);
        if ($result < 1) {
            return $contents;
        }
        $offset  = ($startMatches['marker'][1] + strlen($startMatches['marker'][0]));
        $pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`';
        // Find the corresponding heredoc/nowdoc end marker an its offset in the file.
        $result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset);
        if ($result < 1) {
            return $contents;
        }
        // Strip the complete heredoc/nowdoc and replace it with "null".
        $contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1]));
        // Recurse to strip the next heredoc/nowdoc until there are none left.
        return self::stripHereNowDocs($contents);
    }
 }