1
0
Fork 0

Rework JSON matching to use a properly recursive pattern, fixes #5771

pull/5779/head
Jordi Boggiano 2016-10-10 18:20:53 +02:00
parent 092dec9596
commit 5ee22f25ba
2 changed files with 103 additions and 65 deletions

View File

@ -19,10 +19,15 @@ use Composer\Repository\PlatformRepository;
*/ */
class JsonManipulator class JsonManipulator
{ {
private static $RECURSE_BLOCKS; private static $DEFINES = '(?(DEFINE)
private static $RECURSE_ARRAYS; (?<number> -? (?= [1-9]|0(?!\d) ) \d+ (\.\d+)? ([eE] [+-]? \d+)? )
private static $JSON_VALUE; (?<boolean> true | false | null )
private static $JSON_STRING; (?<string> " ([^"\\\\]* | \\\\ ["\\\\bfnrt\/] | \\\\ u [0-9a-f]{4} )* " )
(?<array> \[ (?: (?&json) \s* (?: , (?&json) \s* )* )? \s* \] )
(?<pair> \s* (?&string) \s* : (?&json) \s* )
(?<object> \{ (?: (?&pair) (?: , (?&pair) )* )? \s* \} )
(?<json> \s* (?: (?&number) | (?&boolean) | (?&string) | (?&array) | (?&object) ) )
)';
private $contents; private $contents;
private $newline; private $newline;
@ -30,13 +35,6 @@ class JsonManipulator
public function __construct($contents) public function __construct($contents)
{ {
if (!self::$RECURSE_BLOCKS) {
self::$RECURSE_BLOCKS = '(?:[^{}]*+|\{(?:[^{}]*+|\{(?:[^{}]*+|\{(?:[^{}]*+|\{[^{}]*+\})*\})*\})*\})*';
self::$RECURSE_ARRAYS = '(?:[^\]]*+|\[(?:[^\]]*+|\[(?:[^\]]*+|\[(?:[^\]]*+|\[[^\]]*+\])*\])*\])*\]|'.self::$RECURSE_BLOCKS.')*';
self::$JSON_STRING = '"(?:[^\0-\x09\x0a-\x1f\\\\"]+|\\\\["bfnrt/\\\\]|\\\\u[a-fA-F0-9]{4})*+"';
self::$JSON_VALUE = '(?:[0-9.]+|null|true|false|'.self::$JSON_STRING.'|\['.self::$RECURSE_ARRAYS.'\]|\{'.self::$RECURSE_BLOCKS.'\})';
}
$contents = trim($contents); $contents = trim($contents);
if ($contents === '') { if ($contents === '') {
$contents = '{}'; $contents = '{}';
@ -63,19 +61,19 @@ class JsonManipulator
return $this->addMainKey($type, array($package => $constraint)); return $this->addMainKey($type, array($package => $constraint));
} }
$regex = '{^(\s*\{\s*(?:'.self::$JSON_STRING.'\s*:\s*'.self::$JSON_VALUE.'\s*,\s*)*?)'. $regex = '{'.self::$DEFINES.'^(?P<start>\s*\{\s*(?:(?&string)\s*:\s*(?&json)\s*,\s*)*?)'.
'('.preg_quote(JsonFile::encode($type)).'\s*:\s*)('.self::$JSON_VALUE.')(.*)}s'; '(?P<property>'.preg_quote(JsonFile::encode($type)).'\s*:\s*)(?P<value>(?&json))(?P<end>.*)}sx';
if (!$this->pregMatch($regex, $this->contents, $matches)) { if (!$this->pregMatch($regex, $this->contents, $matches)) {
return false; return false;
} }
$links = $matches[3]; $links = $matches['value'];
if (isset($decoded[$type][$package])) { if (isset($decoded[$type][$package])) {
// update existing link // update existing link
$packageRegex = str_replace('/', '\\\\?/', preg_quote($package)); $packageRegex = str_replace('/', '\\\\?/', preg_quote($package));
$links = preg_replace_callback('{"'.$packageRegex.'"(\s*:\s*)'.self::$JSON_STRING.'}i', function ($m) use ($package, $constraint) { $links = preg_replace_callback('{'.self::$DEFINES.'"'.$packageRegex.'"(?P<separator>\s*:\s*)(?&string)}ix', function ($m) use ($package, $constraint) {
return JsonFile::encode($package) . $m[1] . '"' . $constraint . '"'; return JsonFile::encode($package) . $m['separator'] . '"' . $constraint . '"';
}, $links); }, $links);
} else { } else {
if ($this->pregMatch('#^\s*\{\s*\S+.*?(\s*\}\s*)$#s', $links, $match)) { if ($this->pregMatch('#^\s*\{\s*\S+.*?(\s*\}\s*)$#s', $links, $match)) {
@ -100,7 +98,7 @@ class JsonManipulator
$links = $this->format($requirements); $links = $this->format($requirements);
} }
$this->contents = $matches[1] . $matches[2] . $links . $matches[4]; $this->contents = $matches['start'] . $matches['property'] . $links . $matches['end'];
return true; return true;
} }
@ -202,8 +200,9 @@ class JsonManipulator
} }
// main node content not match-able // main node content not match-able
$nodeRegex = '{^(\s*\{\s*(?:'.self::$JSON_STRING.'\s*:\s*'.self::$JSON_VALUE.'\s*,\s*)*?)'. $nodeRegex = '{'.self::$DEFINES.'^(?P<start> \s* \{ \s* (?: (?&string) \s* : (?&json) \s* , \s* )*?'.
'('.preg_quote(JsonFile::encode($mainNode)).'\s*:\s*\{)('.self::$RECURSE_BLOCKS.')(\})(.*)}s'; preg_quote(JsonFile::encode($mainNode)).'\s*:\s*)(?P<content>(?&object))(?P<end>.*)}sx';
try { try {
if (!$this->pregMatch($nodeRegex, $this->contents, $match)) { if (!$this->pregMatch($nodeRegex, $this->contents, $match)) {
return false; return false;
@ -215,20 +214,20 @@ class JsonManipulator
throw $e; throw $e;
} }
$children = $match[3]; $children = $match['content'];
// invalid match due to un-regexable content, abort // invalid match due to un-regexable content, abort
if (!@json_decode('{'.$children.'}')) { if (!@json_decode($children)) {
return false; return false;
} }
$that = $this; $that = $this;
// child exists // child exists
if ($this->pregMatch('{("'.preg_quote($name).'"\s*:\s*)('.self::$JSON_VALUE.')(,?)}', $children, $matches)) { $childRegex = '{'.self::$DEFINES.'(?P<start>"'.preg_quote($name).'"\s*:\s*)(?P<content>(?&json))(?P<end>,?)}x';
$children = preg_replace_callback('{("'.preg_quote($name).'"\s*:\s*)('.self::$JSON_VALUE.')(,?)}', function ($matches) use ($name, $subName, $value, $that) { if ($this->pregMatch($childRegex, $children, $matches)) {
$children = preg_replace_callback($childRegex, function ($matches) use ($name, $subName, $value, $that) {
if ($subName !== null) { if ($subName !== null) {
$curVal = json_decode($matches[2], true); $curVal = json_decode($matches['content'], true);
if (!is_array($curVal)) { if (!is_array($curVal)) {
$curVal = array(); $curVal = array();
} }
@ -236,30 +235,39 @@ class JsonManipulator
$value = $curVal; $value = $curVal;
} }
return $matches[1] . $that->format($value, 1) . $matches[3]; return $matches['start'] . $that->format($value, 1) . $matches['end'];
}, $children); }, $children);
} elseif ($this->pregMatch('#[^\s](\s*)$#', $children, $match)) {
if ($subName !== null) {
$value = array($subName => $value);
}
// child missing but non empty children
$children = preg_replace(
'#'.$match[1].'$#',
addcslashes(',' . $this->newline . $this->indent . $this->indent . JsonFile::encode($name).': '.$this->format($value, 1) . $match[1], '\\$'),
$children
);
} else { } else {
if ($subName !== null) { $this->pregMatch('#^{ \s*? (?P<content>\S+.*?)? (?P<trailingspace>\s*) }$#sx', $children, $match);
$value = array($subName => $value);
$whitespace = '';
if (!empty($match['trailingspace'])) {
$whitespace = $match['trailingspace'];
} }
// children present but empty if (!empty($match['content'])) {
$children = $this->newline . $this->indent . $this->indent . JsonFile::encode($name).': '.$this->format($value, 1) . $children; if ($subName !== null) {
$value = array($subName => $value);
}
// child missing but non empty children
$children = preg_replace(
'#'.$whitespace.'}$#',
addcslashes(',' . $this->newline . $this->indent . $this->indent . JsonFile::encode($name).': '.$this->format($value, 1) . $whitespace . '}', '\\$'),
$children
);
} else {
if ($subName !== null) {
$value = array($subName => $value);
}
// children present but empty
$children = '{' . $this->newline . $this->indent . $this->indent . JsonFile::encode($name).': '.$this->format($value, 1) . $whitespace . '}';
}
} }
$this->contents = preg_replace_callback($nodeRegex, function ($m) use ($children) { $this->contents = preg_replace_callback($nodeRegex, function ($m) use ($children) {
return $m[1] . $m[2] . $children . $m[4] . $m[5]; return $m['start'] . $children . $m['end'];
}, $this->contents); }, $this->contents);
return true; return true;
@ -275,8 +283,8 @@ class JsonManipulator
} }
// no node content match-able // no node content match-able
$nodeRegex = '{^(\s*\{\s*(?:'.self::$JSON_STRING.'\s*:\s*'.self::$JSON_VALUE.'\s*,\s*)*?)'. $nodeRegex = '{'.self::$DEFINES.'^(?P<start> \s* \{ \s* (?: (?&string) \s* : (?&json) \s* , \s* )*?'.
'('.preg_quote(JsonFile::encode($mainNode)).'\s*:\s*\{)('.self::$RECURSE_BLOCKS.')(\})(.*)}s'; preg_quote(JsonFile::encode($mainNode)).'\s*:\s*)(?P<content>(?&object))(?P<end>.*)}sx';
try { try {
if (!$this->pregMatch($nodeRegex, $this->contents, $match)) { if (!$this->pregMatch($nodeRegex, $this->contents, $match)) {
return false; return false;
@ -288,10 +296,10 @@ class JsonManipulator
throw $e; throw $e;
} }
$children = $match[3]; $children = $match['content'];
// invalid match due to un-regexable content, abort // invalid match due to un-regexable content, abort
if (!@json_decode('{'.$children.'}', true)) { if (!@json_decode($children, true)) {
return false; return false;
} }
@ -308,7 +316,7 @@ class JsonManipulator
// try and find a match for the subkey // try and find a match for the subkey
if ($this->pregMatch('{"'.preg_quote($name).'"\s*:}i', $children)) { if ($this->pregMatch('{"'.preg_quote($name).'"\s*:}i', $children)) {
// find best match for the value of "name" // find best match for the value of "name"
if (preg_match_all('{"'.preg_quote($name).'"\s*:\s*(?:'.self::$JSON_VALUE.')}', $children, $matches)) { if (preg_match_all('{'.self::$DEFINES.'"'.preg_quote($name).'"\s*:\s*(?:(?&json))}x', $children, $matches)) {
$bestMatch = ''; $bestMatch = '';
foreach ($matches[0] as $match) { foreach ($matches[0] as $match) {
if (strlen($bestMatch) < strlen($match)) { if (strlen($bestMatch) < strlen($match)) {
@ -328,12 +336,18 @@ class JsonManipulator
} }
// no child data left, $name was the only key in // no child data left, $name was the only key in
if (!trim($childrenClean)) { $this->pregMatch('#^{ \s*? (?P<content>\S+.*?)? (?P<trailingspace>\s*) }$#sx', $childrenClean, $match);
$this->contents = preg_replace($nodeRegex, '$1$2'.$this->newline.$this->indent.'$4$5', $this->contents); if (empty($match['content'])) {
$newline = $this->newline;
$indent = $this->indent;
$this->contents = preg_replace_callback($nodeRegex, function ($matches) use ($indent, $newline) {
return $matches['start'] . '{' . $newline . $indent . '}' . $matches['end'];
}, $this->contents);
// we have a subname, so we restore the rest of $name // we have a subname, so we restore the rest of $name
if ($subName !== null) { if ($subName !== null) {
$curVal = json_decode('{'.$children.'}', true); $curVal = json_decode($children, true);
unset($curVal[$name][$subName]); unset($curVal[$name][$subName]);
$this->addSubNode($mainNode, $name, $curVal[$name]); $this->addSubNode($mainNode, $name, $curVal[$name]);
} }
@ -344,12 +358,12 @@ class JsonManipulator
$that = $this; $that = $this;
$this->contents = preg_replace_callback($nodeRegex, function ($matches) use ($that, $name, $subName, $childrenClean) { $this->contents = preg_replace_callback($nodeRegex, function ($matches) use ($that, $name, $subName, $childrenClean) {
if ($subName !== null) { if ($subName !== null) {
$curVal = json_decode('{'.$matches[3].'}', true); $curVal = json_decode($matches['content'], true);
unset($curVal[$name][$subName]); unset($curVal[$name][$subName]);
$childrenClean = substr($that->format($curVal, 0), 1, -1); $childrenClean = $that->format($curVal, 0);
} }
return $matches[1] . $matches[2] . $childrenClean . $matches[4] . $matches[5]; return $matches['start'] . $childrenClean . $matches['end'];
}, $this->contents); }, $this->contents);
return true; return true;
@ -361,15 +375,15 @@ class JsonManipulator
$content = $this->format($content); $content = $this->format($content);
// key exists already // key exists already
$regex = '{^(\s*\{\s*(?:'.self::$JSON_STRING.'\s*:\s*'.self::$JSON_VALUE.'\s*,\s*)*?)'. $regex = '{'.self::$DEFINES.'^(?P<start>\s*\{\s*(?:(?&string)\s*:\s*(?&json)\s*,\s*)*?)'.
'('.preg_quote(JsonFile::encode($key)).'\s*:\s*'.self::$JSON_VALUE.')(.*)}s'; '(?P<key>'.preg_quote(JsonFile::encode($key)).'\s*:\s*(?&json))(?P<end>.*)}sx';
if (isset($decoded[$key]) && $this->pregMatch($regex, $this->contents, $matches)) { if (isset($decoded[$key]) && $this->pregMatch($regex, $this->contents, $matches)) {
// invalid match due to un-regexable content, abort // invalid match due to un-regexable content, abort
if (!@json_decode('{'.$matches[2].'}')) { if (!@json_decode('{'.$matches['key'].'}')) {
return false; return false;
} }
$this->contents = $matches[1] . JsonFile::encode($key).': '.$content . $matches[3]; $this->contents = $matches['start'] . JsonFile::encode($key).': '.$content . $matches['end'];
return true; return true;
} }
@ -404,15 +418,15 @@ class JsonManipulator
} }
// key exists already // key exists already
$regex = '{^(\s*\{\s*(?:'.self::$JSON_STRING.'\s*:\s*'.self::$JSON_VALUE.'\s*,\s*)*?)'. $regex = '{'.self::$DEFINES.'^(?P<start>\s*\{\s*(?:(?&string)\s*:\s*(?&json)\s*,\s*)*?)'.
'('.preg_quote(JsonFile::encode($key)).'\s*:\s*'.self::$JSON_VALUE.')\s*,?\s*(.*)}s'; '(?P<removal>'.preg_quote(JsonFile::encode($key)).'\s*:\s*(?&json))\s*,?\s*(?P<end>.*)}sx';
if ($this->pregMatch($regex, $this->contents, $matches)) { if ($this->pregMatch($regex, $this->contents, $matches)) {
// invalid match due to un-regexable content, abort // invalid match due to un-regexable content, abort
if (!@json_decode('{'.$matches[2].'}')) { if (!@json_decode('{'.$matches['removal'].'}')) {
return false; return false;
} }
$this->contents = $matches[1] . $matches[3]; $this->contents = $matches['start'] . $matches['end'];
if (preg_match('#^\{\s*\}\s*$#', $this->contents)) { if (preg_match('#^\{\s*\}\s*$#', $this->contents)) {
$this->contents = "{\n}"; $this->contents = "{\n}";
} }

View File

@ -1542,7 +1542,7 @@ class JsonManipulatorTest extends \PHPUnit_Framework_TestCase
} }
', ',
), ),
'fails on deep repos with borked texts' => array( 'works on deep repos with borked texts' => array(
'{ '{
"repositories": { "repositories": {
"foo": { "foo": {
@ -1551,9 +1551,21 @@ class JsonManipulatorTest extends \PHPUnit_Framework_TestCase
} }
}', }',
'bar', 'bar',
false, true,
'{
"repositories": {
"foo": {
"package": { "bar": "ba{z" }
}
}
}
',
'{
}
',
), ),
'fails on deep repos with borked texts2' => array( 'works on deep repos with borked texts2' => array(
'{ '{
"repositories": { "repositories": {
"foo": { "foo": {
@ -1562,7 +1574,19 @@ class JsonManipulatorTest extends \PHPUnit_Framework_TestCase
} }
}', }',
'bar', 'bar',
false, true,
'{
"repositories": {
"foo": {
"package": { "bar": "ba}z" }
}
}
}
',
'{
}
',
), ),
'fails on deep arrays with borked texts' => array( 'fails on deep arrays with borked texts' => array(
'{ '{