1
0
Fork 0

Search improvements (#10336)

* Search performance improvements, add SEARCH_VENDOR type, fixes #10326, fixes #10324, fixes #10325

* Add extra optimization path for autocompletion of ^foo/* whereas the vendor is fully known, refs #10320
pull/10347/head
Jordi Boggiano 2021-12-08 11:45:18 +01:00 committed by GitHub
parent 8c8d9efd87
commit cc32ebcabd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 204 additions and 49 deletions

View File

@ -420,7 +420,9 @@ You can also search for more than one term by passing multiple arguments.
### Options
* **--only-name (-N):** Search only in name.
* **--only-name (-N):** Search only in package names.
* **--only-vendor (-O):** Search only for vendor / organization names, returns only "vendor"
as result.
* **--type (-t):** Search for a specific package type.
* **--format (-f):** Lets you pick between text (default) or json output format.
Note that in the json, only the name and description keys are guaranteed to be

View File

@ -286,6 +286,23 @@ class Cache
return false;
}
/**
* @param string $file
* @return int|false
* @phpstan-return int<0, max>|false
*/
public function getAge($file)
{
if ($this->isEnabled()) {
$file = Preg::replace('{[^'.$this->allowlist.']}i', '-', $file);
if (file_exists($this->root . $file) && ($mtime = filemtime($this->root . $file)) !== false) {
return abs(time() - $mtime);
}
}
return false;
}
/**
* @param int $ttl
* @param int $maxSize

View File

@ -38,7 +38,8 @@ class SearchCommand extends BaseCommand
->setName('search')
->setDescription('Searches for packages.')
->setDefinition(array(
new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in name'),
new InputOption('only-name', 'N', InputOption::VALUE_NONE, 'Search only in package names'),
new InputOption('only-vendor', 'O', InputOption::VALUE_NONE, 'Search only for vendor / organization names, returns only "vendor" as result'),
new InputOption('type', 't', InputOption::VALUE_REQUIRED, 'Search for a specific package type'),
new InputOption('format', 'f', InputOption::VALUE_REQUIRED, 'Format of the output: text or json', 'text'),
new InputArgument('tokens', InputArgument::IS_ARRAY | InputArgument::REQUIRED, 'tokens to search for'),
@ -77,11 +78,24 @@ EOT
$commandEvent = new CommandEvent(PluginEvents::COMMAND, 'search', $input, $output);
$composer->getEventDispatcher()->dispatch($commandEvent->getName(), $commandEvent);
$onlyName = $input->getOption('only-name');
$type = $input->getOption('type') ?: null;
$mode = RepositoryInterface::SEARCH_FULLTEXT;
if ($input->getOption('only-name') === true) {
if ($input->getOption('only-vendor') === true) {
throw new \InvalidArgumentException('--only-name and --only-vendor cannot be used together');
}
$mode = RepositoryInterface::SEARCH_NAME;
} elseif ($input->getOption('only-vendor') === true) {
$mode = RepositoryInterface::SEARCH_VENDOR;
}
$flags = $onlyName ? RepositoryInterface::SEARCH_NAME : RepositoryInterface::SEARCH_FULLTEXT;
$results = $repos->search(implode(' ', $input->getArgument('tokens')), $flags, $type);
$type = $input->getOption('type');
$query = implode(' ', $input->getArgument('tokens'));
if ($mode !== RepositoryInterface::SEARCH_FULLTEXT) {
$query = preg_quote($query);
}
$results = $repos->search($query, $mode, $type);
if ($results && $format === 'text') {
$width = $this->getTerminalWidth();

View File

@ -148,21 +148,35 @@ class ArrayRepository implements RepositoryInterface
*/
public function search($query, $mode = 0, $type = null)
{
if ($mode === self::SEARCH_FULLTEXT) {
$regex = '{(?:'.implode('|', Preg::split('{\s+}', preg_quote($query))).')}i';
} else {
// vendor/name searches expect the caller to have preg_quoted the query
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
}
$matches = array();
foreach ($this->getPackages() as $package) {
$name = $package->getName();
if ($mode === self::SEARCH_VENDOR) {
list($name) = explode('/', $name);
}
if (isset($matches[$name])) {
continue;
}
if (Preg::isMatch($regex, $name)
|| ($mode === self::SEARCH_FULLTEXT && $package instanceof CompletePackageInterface && Preg::isMatch($regex, implode(' ', (array) $package->getKeywords()) . ' ' . $package->getDescription()))
) {
if (null !== $type && $package->getType() !== $type) {
continue;
}
if (Preg::isMatch($regex, $name)
|| ($mode === self::SEARCH_FULLTEXT && $package instanceof CompletePackageInterface && Preg::isMatch($regex, implode(' ', (array) $package->getKeywords()) . ' ' . $package->getDescription()))
) {
if ($mode === self::SEARCH_VENDOR) {
$matches[$name] = array(
'name' => $name,
'description' => null,
);
} else {
$matches[$name] = array(
'name' => $package->getPrettyName(),
'description' => $package instanceof CompletePackageInterface ? $package->getDescription() : null,
@ -173,6 +187,7 @@ class ArrayRepository implements RepositoryInterface
}
}
}
}
return array_values($matches);
}

View File

@ -337,7 +337,7 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
}
/**
* @param string|null $packageFilter
* @param string|null $packageFilter Package pattern filter which can include "*" as a wildcard
*
* @return string[]
*/
@ -345,51 +345,120 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
{
$hasProviders = $this->hasProviders();
$packageFilterCb = function ($name) {
return true;
};
if (null !== $packageFilter) {
$packageFilterRegex = '{^'.str_replace('\\*', '.*?', preg_quote($packageFilter)).'$}i';
$packageFilterCb = function ($name) use ($packageFilterRegex) {
return Preg::isMatch($packageFilterRegex, $name);
};
$filterResults =
/**
* @param list<string> $results
* @return list<string>
*/
function (array $results) {
return $results;
}
;
if (null !== $packageFilter && '' !== $packageFilter) {
$packageFilterRegex = BasePackage::packageNameToRegexp($packageFilter);
$filterResults =
/**
* @param list<string> $results
* @return list<string>
*/
function (array $results) use ($packageFilterRegex) {
/** @var list<string> $results */
return Preg::grep($packageFilterRegex, $results);
}
;
}
if ($this->lazyProvidersUrl) {
if (is_array($this->availablePackages)) {
return array_filter(array_keys($this->availablePackages), $packageFilterCb);
return $filterResults(array_keys($this->availablePackages));
}
if ($this->listUrl) {
$url = $this->listUrl;
if ($packageFilter) {
$url .= '?filter='.urlencode($packageFilter);
// no need to call $filterResults here as the $packageFilter is applied in the function itself
return $this->loadPackageList($packageFilter);
}
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
return $result['packageNames'];
}
if ($this->hasPartialPackages()) {
return array_filter(array_keys($this->partialPackagesByName), $packageFilterCb);
if ($this->hasPartialPackages() && $this->partialPackagesByName !== null) {
return $filterResults(array_keys($this->partialPackagesByName));
}
return array();
}
if ($hasProviders) {
return array_filter($this->getProviderNames(), $packageFilterCb);
return $filterResults($this->getProviderNames());
}
$names = array();
foreach ($this->getPackages() as $package) {
if ($packageFilterCb($package->getName())) {
$names[] = $package->getPrettyName();
}
return $filterResults($names);
}
return $names;
/**
* @return list<string>
*/
private function getVendorNames()
{
$cacheKey = 'vendor-list.txt';
$cacheAge = $this->cache->getAge($cacheKey);
if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) {
$cachedData = explode("\n", $cachedData);
return $cachedData;
}
$names = $this->getPackageNames();
$uniques = array();
foreach ($names as $name) {
// @phpstan-ignore-next-line
$uniques[substr($name, 0, strpos($name, '/'))] = true;
}
$vendors = array_keys($uniques);
if (!$this->cache->isReadOnly()) {
$this->cache->write($cacheKey, implode("\n", $vendors));
}
return $vendors;
}
/**
* @param string|null $packageFilter
* @return list<string>
*/
private function loadPackageList($packageFilter = null)
{
if (null === $this->listUrl) {
throw new \LogicException('Make sure to call loadRootServerFile before loadPackageList');
}
$url = $this->listUrl;
if (is_string($packageFilter) && $packageFilter !== '') {
$url .= '?filter='.urlencode($packageFilter);
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
return $result['packageNames'];
}
$cacheKey = 'package-list.txt';
$cacheAge = $this->cache->getAge($cacheKey);
if (false !== $cacheAge && $cacheAge < 600 && ($cachedData = $this->cache->read($cacheKey)) !== false) {
$cachedData = explode("\n", $cachedData);
return $cachedData;
}
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
if (!$this->cache->isReadOnly()) {
$this->cache->write($cacheKey, implode("\n", $result['packageNames']));
}
return $result['packageNames'];
}
public function loadPackages(array $packageNameMap, array $acceptableStabilities, array $stabilityFlags, array $alreadyLoaded = array())
@ -465,7 +534,7 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
*/
public function search($query, $mode = 0, $type = null)
{
$this->loadRootServerFile();
$this->loadRootServerFile(600);
if ($this->searchUrl && $mode === self::SEARCH_FULLTEXT) {
$url = str_replace(array('%query%', '%type%'), array($query, $type), $this->searchUrl);
@ -489,12 +558,36 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
return $results;
}
if ($mode === self::SEARCH_VENDOR) {
$results = array();
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
$vendorNames = $this->getVendorNames();
foreach (Preg::grep($regex, $vendorNames) as $name) {
$results[] = array('name' => $name, 'description' => '');
}
return $results;
}
if ($this->hasProviders() || $this->lazyProvidersUrl) {
// optimize search for "^foo/bar" where at least "^foo/" is present by loading this directly from the listUrl if present
if (Preg::isMatch('{^\^(?P<query>(?P<vendor>[a-z0-9_.-]+)/[a-z0-9_.-]*)\*?$}i', $query, $match) && $this->listUrl !== null) {
$url = $this->listUrl . '?vendor='.urlencode($match['vendor']).'&filter='.urlencode($match['query'].'*');
$result = $this->httpDownloader->get($url, $this->options)->decodeJson();
$results = array();
foreach ($result['packageNames'] as $name) {
$results[] = array('name' => $name, 'description' => '');
}
return $results;
}
$results = array();
$regex = '{(?:'.implode('|', Preg::split('{\s+}', $query)).')}i';
$packageNames = $this->getPackageNames();
foreach (Preg::grep($regex, $packageNames) as $name) {
$results[] = array('name' => $name, 'description' => '');
}
@ -920,9 +1013,10 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
}
/**
* @param int|null $rootMaxAge
* @return array<string, mixed>
*/
protected function loadRootServerFile()
protected function loadRootServerFile($rootMaxAge = null)
{
if (null !== $this->rootData) {
return $this->rootData;
@ -934,7 +1028,9 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito
if ($cachedData = $this->cache->read('packages.json')) {
$cachedData = json_decode($cachedData, true);
if (isset($cachedData['last-modified'])) {
if ($rootMaxAge !== null && ($age = $this->cache->getAge('packages.json')) !== false && $age <= $rootMaxAge) {
$data = $cachedData;
} elseif (isset($cachedData['last-modified'])) {
$response = $this->fetchFileIfLastModified($this->getPackagesJsonUrl(), 'packages.json', $cachedData['last-modified']);
$data = true === $response ? $cachedData : $response;
}

View File

@ -727,4 +727,14 @@ class PlatformRepository extends ArrayRepository
{
return self::$lastSeenPlatformPhp;
}
public function search($query, $mode = 0, $type = null)
{
// suppress vendor search as there are no vendors to match in platform packages
if ($mode === self::SEARCH_VENDOR) {
return array();
}
return parent::search($query, $mode, $type);
}
}

View File

@ -27,6 +27,7 @@ interface RepositoryInterface extends \Countable
{
const SEARCH_FULLTEXT = 0;
const SEARCH_NAME = 1;
const SEARCH_VENDOR = 2;
/**
* Checks if specified package registered (installed).
@ -85,11 +86,11 @@ interface RepositoryInterface extends \Countable
/**
* Searches the repository for packages containing the query
*
* @param string $query search query
* @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only
* @param string $query search query, for SEARCH_NAME and SEARCH_VENDOR regular expressions metacharacters are supported by implementations, and user input should be escaped through preg_quote by callers
* @param int $mode a set of SEARCH_* constants to search on, implementations should do a best effort only, default is SEARCH_FULLTEXT
* @param string $type The type of package to search for. Defaults to all types of packages
*
* @return array[] an array of array('name' => '...', 'description' => '...'|null)
* @return array[] an array of array('name' => '...', 'description' => '...'|null, 'abandoned' => 'string'|true|unset) For SEARCH_VENDOR the name will be in "vendor" form
* @phpstan-return list<array{name: string, description: ?string, abandoned?: string|true}>
*/
public function search($query, $mode = 0, $type = null);