From 8e9678a5bacf998cb77a296b5fd6cbe08f4a9c97 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Sat, 10 Nov 2012 17:29:45 +0100 Subject: [PATCH] Optimize classmap generation by avoiding the use of token_get_all() First of all this seems way faster than iterating over all tokens, but especially it reduces memory usage drastically for very large files --- src/Composer/Autoload/ClassMapGenerator.php | 71 +++++++-------------- 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index c54919efb..521f84171 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -92,59 +92,36 @@ class ClassMapGenerator */ private static function findClasses($path) { - $contents = file_get_contents($path); + $contents = php_strip_whitespace($path); + try { if (!preg_match('{\b(?:class|interface|trait)\b}i', $contents)) { return array(); } - $tokens = token_get_all($contents); + + // strip heredocs/nowdocs + $contents = preg_replace('{<<<\'?(\w+)\'?(?:\r\n|\n|\r)(?:.*?)(?:\r\n|\n|\r)\\1(?=\r\n|\n|\r|;)}s', 'null', $contents); + // strip strings + $contents = preg_replace('{"[^"\\\\]*(\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(\\\\.[^\'\\\\]*)*\'}', 'null', $contents); + + preg_match_all('{(?:\b(?])(?class|interface|trait)\s+(?\S+)|\b(?])(?namespace)\s+(?[^\s;{}\\\\]+(?:\s*\\\\\s*[^\s;{}\\\\]+)*))}i', $contents, $matches); + $classes = array(); + + $namespace = ''; + + for ($i = 0, $len = count($matches['type']); $i < $len; $i++) { + $name = $matches['name'][$i]; + + if (!empty($matches['ns'][$i])) { + $namespace = str_replace(array(' ', "\t", "\r", "\n"), '', $matches['nsname'][$i]) . '\\'; + } else { + $classes[] = ltrim($namespace . $matches['name'][$i], '\\'); + } + } + + return $classes; } catch (\Exception $e) { throw new \RuntimeException('Could not scan for classes inside '.$path.": \n".$e->getMessage(), 0, $e); } - $T_TRAIT = version_compare(PHP_VERSION, '5.4', '<') ? -1 : T_TRAIT; - - $classes = array(); - - $namespace = ''; - for ($i = 0, $max = count($tokens); $i < $max; $i++) { - $token = $tokens[$i]; - - if (is_string($token)) { - continue; - } - - $class = ''; - - switch ($token[0]) { - case T_NAMESPACE: - $namespace = ''; - // If there is a namespace, extract it - while (($t = $tokens[++$i]) && is_array($t)) { - if (in_array($t[0], array(T_STRING, T_NS_SEPARATOR))) { - $namespace .= $t[1]; - } - } - $namespace .= '\\'; - break; - case T_CLASS: - case T_INTERFACE: - case $T_TRAIT: - // Find the classname - while (($t = $tokens[++$i]) && is_array($t)) { - if (T_STRING === $t[0]) { - $class .= $t[1]; - } elseif ($class !== '' && T_WHITESPACE == $t[0]) { - break; - } - } - - $classes[] = ltrim($namespace . $class, '\\'); - break; - default: - break; - } - } - - return $classes; } }