1
0
Fork 0

Search improvements (#10336)

* Search performance improvements, add SEARCH_VENDOR type, fixes #10326, fixes #10324, fixes #10325

* Add extra optimization path for autocompletion of ^foo/* whereas the vendor is fully known, refs #10320
pull/10347/head
Jordi Boggiano 2021-12-08 11:45:18 +01:00 committed by GitHub
parent 8c8d9efd87
commit cc32ebcabd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 204 additions and 49 deletions

View File

@ -420,7 +420,9 @@ You can also search for more than one term by passing multiple arguments.
### Options ### Options
* **--only-name (-N):** Search only in name. * **--only-name (-N):** Search only in package names.
* **--only-vendor (-O):** Search only for vendor / organization names, returns only "vendor"
as result.
* **--type (-t):** Search for a specific package type. * **--type (-t):** Search for a specific package type.
* **--format (-f):** Lets you pick between text (default) or json output format. * **--format (-f):** Lets you pick between text (default) or json output format.
Note that in the json, only the name and description keys are guaranteed to be Note that in the json, only the name and description keys are guaranteed to be

View File

@ -286,6 +286,23 @@ class Cache
return false; return false;
} }
/**
* @param string $file
* @return int|false
* @phpstan-return int<0, max>|false
*/
public function getAge($file)
{
if ($this->isEnabled()) {
$file = Preg::replace('{[^'.$this->allowlist.']}i', '-', $file);
if (file_exists($this->root . $file) && ($mtime = filemtime($this->root . $file)) !== false) {
return abs(time() - $mtime);
}
}
return false;
}
/** /**
* @param int $ttl * @param int $ttl
* @param int $maxSize * @param int $maxSize

View File

@ -38,7 +38,8 @@ class SearchCommand extends BaseCommand
->setName('search') ->setName('search')
->setDescription('Searches for packages.') ->setDescription('Searches for packages.')
->setDefinition(array( ->setDefinition(array(
new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in name'), new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in package names'),
new InputOption('only-vendor', 'O', InputOption::VALUE_NONE, 'Search only for vendor / organization names, returns only "vendor" as result'),
new InputOption('type', 't', InputOption::VALUE_REQUIRED, 'Search for a specific package type'), new InputOption('type', 't', InputOption::VALUE_REQUIRED, 'Search for a specific package type'),
new InputOption('format', 'f', InputOption::VALUE_REQUIRED, 'Format of the output: text or json', 'text'), new InputOption('format', 'f', InputOption::VALUE_REQUIRED, 'Format of the output: text or json', 'text'),
new InputArgument('tokens', InputArgument::IS_ARRAY | InputArgument::REQUIRED, 'tokens to search for'), new InputArgument('tokens', InputArgument::IS_ARRAY | InputArgument::REQUIRED, 'tokens to search for'),
@ -77,11 +78,24 @@ EOT
$commandEvent = new CommandEvent(PluginEvents::COMMAND, 'search', $input, $output); $commandEvent = new CommandEvent(PluginEvents::COMMAND, 'search', $input, $output);
$composer->getEventDispatcher()->dispatch($commandEvent->getName(), $commandEvent); $composer->getEventDispatcher()->dispatch($commandEvent->getName(), $commandEvent);
$onlyName = $input->getOption('only-name'); $mode = RepositoryInterface::SEARCH_FULLTEXT;
$type = $input->getOption('type') ?: null; if ($input->getOption('only-name') === true) {
if ($input->getOption('only-vendor') === true) {
throw new \InvalidArgumentException('--only-name and --only-vendor cannot be used together');
}
$mode = RepositoryInterface::SEARCH_NAME;
} elseif ($input->getOption('only-vendor') === true) {
$mode = RepositoryInterface::SEARCH_VENDOR;
}
$flags = $onlyName ? RepositoryInterface::SEARCH_NAME : RepositoryInterface::SEARCH_FULLTEXT; $type = $input->getOption('type');
$results = $repos->search(implode(' ', $input->getArgument('tokens')), $flags, $type);
$query = implode(' ', $input->getArgument('tokens'));
if ($mode !== RepositoryInterface::SEARCH_FULLTEXT) {
$query = preg_quote($query);
}
$results = $repos->search($query, $mode, $type);
if ($results && $format === 'text') { if ($results && $format === 'text') {
$width = $this->getTerminalWidth(); $width = $this->getTerminalWidth();

View File

@ -148,28 +148,43 @@ class ArrayRepository implements RepositoryInterface
*/ */
public function search($query, $mode = 0, $type = null) public function search($query, $mode = 0, $type = null)
{ {
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i'; if ($mode === self::SEARCH_FULLTEXT) {
$regex = '{(?:'.implode('|', Preg::split('{\s+}', preg_quote($query))).')}i';
} else {
// vendor/name searches expect the caller to have preg_quoted the query
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
}
$matches = array(); $matches = array();
foreach ($this->getPackages() as $package) { foreach ($this->getPackages() as $package) {
$name = $package->getName(); $name = $package->getName();
if ($mode === self::SEARCH_VENDOR) {
list($name) = explode('/', $name);
}
if (isset($matches[$name])) { if (isset($matches[$name])) {
continue; continue;
} }
if (null !== $type && $package->getType() !== $type) {
continue;
}
if (Preg::isMatch($regex, $name) if (Preg::isMatch($regex, $name)
|| ($mode === self::SEARCH_FULLTEXT && $package instanceof CompletePackageInterface && Preg::isMatch($regex, implode(' ', (array) $package->getKeywords()) . ' ' . $package->getDescription())) || ($mode === self::SEARCH_FULLTEXT && $package instanceof CompletePackageInterface && Preg::isMatch($regex, implode(' ', (array) $package->getKeywords()) . ' ' . $package->getDescription()))
) { ) {
if (null !== $type && $package->getType() !== $type) { if ($mode === self::SEARCH_VENDOR) {
continue; $matches[$name] = array(
} 'name' => $name,
'description' => null,
);
} else {
$matches[$name] = array(
'name' => $package->getPrettyName(),
'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null,
);
$matches[$name] = array( if ($package instanceof CompletePackageInterface && $package->isAbandoned()) {
'name' => $package->getPrettyName(), $matches[$name]['abandoned'] = $package->getReplacementPackage() ?: true;
'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null, }
);
if ($package instanceof CompletePackageInterface && $package->isAbandoned()) {
$matches[$name]['abandoned'] = $package->getReplacementPackage() ?: true;
} }
} }
} }

View File

@ -337,7 +337,7 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
} }
/** /**
* @param string|null $packageFilter * @param string|null $packageFilter Package pattern filter which can include "*" as a wildcard
* *
* @return string[] * @return string[]
*/ */
@ -345,51 +345,120 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
{ {
$hasProviders = $this->hasProviders(); $hasProviders = $this->hasProviders();
$packageFilterCb = function ($name) { $filterResults =
return true; /**
}; * @param list<string> $results
if (null !== $packageFilter) { * @return list<string>
$packageFilterRegex = '{^'.str_replace('\\*', '.*?', preg_quote($packageFilter)).'$}i'; */
$packageFilterCb = function ($name) use ($packageFilterRegex) { function (array $results) {
return Preg::isMatch($packageFilterRegex, $name); return $results;
}; }
;
if (null !== $packageFilter && '' !== $packageFilter) {
$packageFilterRegex = BasePackage::packageNameToRegexp($packageFilter);
$filterResults =
/**
* @param list<string> $results
* @return list<string>
*/
function (array $results) use ($packageFilterRegex) {
/** @var list<string> $results */
return Preg::grep($packageFilterRegex, $results);
}
;
} }
if ($this->lazyProvidersUrl) { if ($this->lazyProvidersUrl) {
if (is_array($this->availablePackages)) { if (is_array($this->availablePackages)) {
return array_filter(array_keys($this->availablePackages), $packageFilterCb); return $filterResults(array_keys($this->availablePackages));
} }
if ($this->listUrl) { if ($this->listUrl) {
$url = $this->listUrl; // no need to call $filterResults here as the $packageFilter is applied in the function itself
if ($packageFilter) { return $this->loadPackageList($packageFilter);
$url .= '?filter='.urlencode($packageFilter);
}
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
return $result['packageNames'];
} }
if ($this->hasPartialPackages()) { if ($this->hasPartialPackages() && $this->partialPackagesByName !== null) {
return array_filter(array_keys($this->partialPackagesByName), $packageFilterCb); return $filterResults(array_keys($this->partialPackagesByName));
} }
return array(); return array();
} }
if ($hasProviders) { if ($hasProviders) {
return array_filter($this->getProviderNames(), $packageFilterCb); return $filterResults($this->getProviderNames());
} }
$names = array(); $names = array();
foreach ($this->getPackages() as $package) { foreach ($this->getPackages() as $package) {
if ($packageFilterCb($package->getName())) { $names[] = $package->getPrettyName();
$names[] = $package->getPrettyName();
}
} }
return $names; return $filterResults($names);
}
/**
* @return list<string>
*/
private function getVendorNames()
{
$cacheKey = 'vendor-list.txt';
$cacheAge = $this->cache->getAge($cacheKey);
if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) {
$cachedData = explode("\n", $cachedData);
return $cachedData;
}
$names = $this->getPackageNames();
$uniques = array();
foreach ($names as $name) {
// @phpstan-ignore-next-line
$uniques[substr($name, 0, strpos($name, '/'))] = true;
}
$vendors = array_keys($uniques);
if (!$this->cache->isReadOnly()) {
$this->cache->write($cacheKey, implode("\n", $vendors));
}
return $vendors;
}
/**
* @param string|null $packageFilter
* @return list<string>
*/
private function loadPackageList($packageFilter = null)
{
if (null === $this->listUrl) {
throw new \LogicException('Make sure to call loadRootServerFile before loadPackageList');
}
$url = $this->listUrl;
if (is_string($packageFilter) && $packageFilter !== '') {
$url .= '?filter='.urlencode($packageFilter);
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
return $result['packageNames'];
}
$cacheKey = 'package-list.txt';
$cacheAge = $this->cache->getAge($cacheKey);
if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) {
$cachedData = explode("\n", $cachedData);
return $cachedData;
}
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
if (!$this->cache->isReadOnly()) {
$this->cache->write($cacheKey, implode("\n", $result['packageNames']));
}
return $result['packageNames'];
} }
public function loadPackages(array $packageNameMap, array $acceptableStabilities, array $stabilityFlags, array $alreadyLoaded = array()) public function loadPackages(array $packageNameMap, array $acceptableStabilities, array $stabilityFlags, array $alreadyLoaded = array())
@ -465,7 +534,7 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
*/ */
public function search($query, $mode = 0, $type = null) public function search($query, $mode = 0, $type = null)
{ {
$this->loadRootServerFile(); $this->loadRootServerFile(600);
if ($this->searchUrl && $mode === self::SEARCH_FULLTEXT) { if ($this->searchUrl && $mode === self::SEARCH_FULLTEXT) {
$url = str_replace(array('%query%', '%type%'), array($query, $type), $this->searchUrl); $url = str_replace(array('%query%', '%type%'), array($query, $type), $this->searchUrl);
@ -489,12 +558,36 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
return $results; return $results;
} }
if ($mode === self::SEARCH_VENDOR) {
$results = array();
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
$vendorNames = $this->getVendorNames();
foreach (Preg::grep($regex, $vendorNames) as $name) {
$results[] = array('name' => $name, 'description' => '');
}
return $results;
}
if ($this->hasProviders() || $this->lazyProvidersUrl) { if ($this->hasProviders() || $this->lazyProvidersUrl) {
// optimize search for "^foo/bar" where at least "^foo/" is present by loading this directly from the listUrl if present
if (Preg::isMatch('{^\^(?P<query>(?P<vendor>[a-z0-9_.-]+)/[a-z0-9_.-]*)\*?$}i', $query, $match) && $this->listUrl !== null) {
$url = $this->listUrl . '?vendor='.urlencode($match['vendor']).'&filter='.urlencode($match['query'].'*');
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
$results = array();
foreach ($result['packageNames'] as $name) {
$results[] = array('name' => $name, 'description' => '');
}
return $results;
}
$results = array(); $results = array();
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i'; $regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
$packageNames = $this->getPackageNames(); $packageNames = $this->getPackageNames();
foreach (Preg::grep($regex, $packageNames) as $name) { foreach (Preg::grep($regex, $packageNames) as $name) {
$results[] = array('name' => $name, 'description' => ''); $results[] = array('name' => $name, 'description' => '');
} }
@ -920,9 +1013,10 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
} }
/** /**
* @param int|null $rootMaxAge
* @return array<string, mixed> * @return array<string, mixed>
*/ */
protected function loadRootServerFile() protected function loadRootServerFile($rootMaxAge = null)
{ {
if (null !== $this->rootData) { if (null !== $this->rootData) {
return $this->rootData; return $this->rootData;
@ -934,7 +1028,9 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
if ($cachedData = $this->cache->read('packages.json')) { if ($cachedData = $this->cache->read('packages.json')) {
$cachedData = json_decode($cachedData, true); $cachedData = json_decode($cachedData, true);
if (isset($cachedData['last-modified'])) { if ($rootMaxAge !== null && ($age = $this->cache->getAge('packages.json')) !== false && $age <= $rootMaxAge) {
$data = $cachedData;
} elseif (isset($cachedData['last-modified'])) {
$response = $this->fetchFileIfLastModified($this->getPackagesJsonUrl(), 'packages.json', $cachedData['last-modified']); $response = $this->fetchFileIfLastModified($this->getPackagesJsonUrl(), 'packages.json', $cachedData['last-modified']);
$data = true === $response ? $cachedData : $response; $data = true === $response ? $cachedData : $response;
} }

View File

@ -727,4 +727,14 @@ class PlatformRepository extends ArrayRepository
{ {
return self::$lastSeenPlatformPhp; return self::$lastSeenPlatformPhp;
} }
public function search($query, $mode = 0, $type = null)
{
// suppress vendor search as there are no vendors to match in platform packages
if ($mode === self::SEARCH_VENDOR) {
return array();
}
return parent::search($query, $mode, $type);
}
} }

View File

@ -27,6 +27,7 @@ interface RepositoryInterface extends \Countable
{ {
const SEARCH_FULLTEXT = 0; const SEARCH_FULLTEXT = 0;
const SEARCH_NAME = 1; const SEARCH_NAME = 1;
const SEARCH_VENDOR = 2;
/** /**
* Checks if specified package registered (installed). * Checks if specified package registered (installed).
@ -85,11 +86,11 @@ interface RepositoryInterface extends \Countable
/** /**
* Searches the repository for packages containing the query * Searches the repository for packages containing the query
* *
* @param string $query search query * @param string $query search query, for SEARCH_NAME and SEARCH_VENDOR regular expressions metacharacters are supported by implementations, and user input should be escaped through preg_quote by callers
* @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only * @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only, default is SEARCH_FULLTEXT
* @param string $type The type of package to search for. Defaults to all types of packages * @param string $type The type of package to search for. Defaults to all types of packages
* *
* @return array[] an array of array('name' => '...', 'description' => '...'|null) * @return array[] an array of array('name' => '...', 'description' => '...'|null, 'abandoned' => 'string'|true|unset) For SEARCH_VENDOR the name will be in "vendor" form
* @phpstan-return list<array{name: string, description: ?string, abandoned?: string|true}> * @phpstan-return list<array{name: string, description: ?string, abandoned?: string|true}>
*/ */
public function search($query, $mode = 0, $type = null); public function search($query, $mode = 0, $type = null);