ClassMapGenerator: stabilize the heredoc/nowdoc stripping

I've looked into 10067 and have come to the conclusion that using a single regex to strip the heredoc/nowdocs is always going to run into trouble as: * Either the matching will be too greedy (issue 10067); * Or the matching will run into backtrace limits for large heredoc/nowdocs. We cannot solve both within a single regex. So, I'm proposing a slightly different solution which should support both and should also improve performance for files containing large heredoc/nowdocs. The `stripHereNowDocs()` function will find a start marker and remember the offset of the start marker. It will then find the end marker and strip the contents between the two (replace with `null`). The function will then recurse onto itself until all heredocs/nowdocs in a file have been removed.
2021-08-21 15:20:16 +02:00 · 2021-08-21 15:20:16 +02:00 · 40bd4b03ad
parent b66b23a03f
commit 40bd4b03ad
1 changed files with 32 additions and 1 deletions
--- a/src/Composer/Autoload/ClassMapGenerator.php
+++ b/src/Composer/Autoload/ClassMapGenerator.php
@ -246,7 +246,7 @@ class ClassMapGenerator
        }

        // strip heredocs/nowdocs
-        $contents = preg_replace('{<<<[ \t]*([\'"]?)(\w+)\\1(?:\r\n|\n|\r)(?:.*(?=[\r\n]+[ \t]*\\2))[\r\n]+[ \t]*\\2(?=\s*[;,.)])}s', 'null', $contents);
+        $contents = self::stripHereNowDocs($contents);
        // strip strings
        $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents);
        // strip leading non-php code if needed
@ -303,4 +303,35 @@ class ClassMapGenerator

        return $classes;
    }
+
+    /**
+     * Strip heredoc and nowdoc blocks from the contents of a file.
+     *
+     * @param string $contents File contents.
+     *
+     * @return string The cleaned up file contents.
+     */
+    private static function stripHereNowDocs($contents)
+    {
+        // Find a heredoc/nowdoc start marker an its offset in the file.
+        $result = preg_match('{<<<[ \t]*([\'"]?)(?P<marker>\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE);
+        if ($result < 1) {
+            return $contents;
+        }
+
+        $offset  = ($startMatches['marker'][1] + strlen($startMatches['marker'][0]));
+        $pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`';
+
+        // Find the corresponding heredoc/nowdoc end marker an its offset in the file.
+        $result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset);
+        if ($result < 1) {
+            return $contents;
+        }
+
+        // Strip the complete heredoc/nowdoc and replace it with "null".
+        $contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1]));
+
+        // Recurse to strip the next heredoc/nowdoc until there are none left.
+        return self::stripHereNowDocs($contents);
+    }
 }