From f92f2f45a96df3c19ccdff827546d9a968537894 Mon Sep 17 00:00:00 2001 From: Sandy Pleyte Date: Tue, 18 Feb 2014 09:01:12 +0100 Subject: [PATCH] Fix for #2613, when using php < 5.4 there was an unnecessary backslash before each utf-8 char. The problem was that the regexp matched all utf-8 encoded chars (included the ones that where escaped). The new regexp uses the lookbehind feature to check if the backslash isn't prefixed with an other backslash. --- src/Composer/Json/JsonFile.php | 81 +----------- src/Composer/Json/JsonFormatter.php | 123 ++++++++++++++++++ tests/Composer/Test/Json/JsonFileTest.php | 7 - .../Composer/Test/Json/JsonFormatterTest.php | 45 +++++++ 4 files changed, 169 insertions(+), 87 deletions(-) create mode 100644 src/Composer/Json/JsonFormatter.php create mode 100644 tests/Composer/Test/Json/JsonFormatterTest.php diff --git a/src/Composer/Json/JsonFile.php b/src/Composer/Json/JsonFile.php index 594e0951b..2d35b9671 100644 --- a/src/Composer/Json/JsonFile.php +++ b/src/Composer/Json/JsonFile.php @@ -177,11 +177,6 @@ class JsonFile /** * Encodes an array into (optionally pretty-printed) JSON * - * This code is based on the function found at: - * http://recursive-design.com/blog/2008/03/11/format-json-with-php/ - * - * Originally licensed under MIT by Dave Perrett - * * @param mixed $data Data to encode into a formatted JSON string * @param int $options json_encode options (defaults to JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE) * @return string Encoded json @@ -202,81 +197,7 @@ class JsonFile return $json; } - $result = ''; - $pos = 0; - $strLen = strlen($json); - $indentStr = ' '; - $newLine = "\n"; - $outOfQuotes = true; - $buffer = ''; - $noescape = true; - - for ($i = 0; $i < $strLen; $i++) { - // Grab the next character in the string - $char = substr($json, $i, 1); - - // Are we inside a quoted string? - if ('"' === $char && $noescape) { - $outOfQuotes = !$outOfQuotes; - } - - if (!$outOfQuotes) { - $buffer .= $char; - $noescape = '\\' === $char ? !$noescape : true; - continue; - } elseif ('' !== $buffer) { - if ($unescapeSlashes) { - $buffer = str_replace('\\/', '/', $buffer); - } - - if ($unescapeUnicode && function_exists('mb_convert_encoding')) { - // http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha - $buffer = preg_replace_callback('/(? + * Jordi Boggiano + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Composer\Json; + +/** + * Formats json strings used for php < 5.4 because the json_encode doesn't + * supports the flags JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE + * in these versions + * + * @author Konstantin Kudryashiv + * @author Jordi Boggiano + */ +class JsonFormatter +{ + /** + * + * This code is based on the function found at: + * http://recursive-design.com/blog/2008/03/11/format-json-with-php/ + * + * Originally licensed under MIT by Dave Perrett + * + * + * @param string $json + * @param bool $unescapeUnicode Un escape unicode + * @param bool $unescapeSlashes Un escape slashes + * @return string + */ + public static function format($json, $unescapeUnicode, $unescapeSlashes) + { + $result = ''; + $pos = 0; + $strLen = strlen($json); + $indentStr = ' '; + $newLine = "\n"; + $outOfQuotes = true; + $buffer = ''; + $noescape = true; + + for ($i = 0; $i < $strLen; $i++) { + // Grab the next character in the string + $char = substr($json, $i, 1); + + // Are we inside a quoted string? + if ('"' === $char && $noescape) { + $outOfQuotes = !$outOfQuotes; + } + + if (!$outOfQuotes) { + $buffer .= $char; + $noescape = '\\' === $char ? !$noescape : true; + continue; + } elseif ('' !== $buffer) { + if ($unescapeSlashes) { + $buffer = str_replace('\\/', '/', $buffer); + } + + if ($unescapeUnicode && function_exists('mb_convert_encoding')) { + // http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha + $buffer = preg_replace_callback('/(\\\\+)u([0-9a-f]{4})/i', function($match) { + $l = strlen($match[1]); + + if ($l%2) + return str_repeat ('\\', $l-1).mb_convert_encoding(pack('H*', $match[2]), 'UTF-8', 'UCS-2BE'); + + return $match[0]; + }, $buffer); + } + + $result .= $buffer.$char; + $buffer = ''; + continue; + } + + if (':' === $char) { + // Add a space after the : character + $char .= ' '; + } elseif (('}' === $char || ']' === $char)) { + $pos--; + $prevChar = substr($json, $i - 1, 1); + + if ('{' !== $prevChar && '[' !== $prevChar) { + // If this character is the end of an element, + // output a new line and indent the next line + $result .= $newLine; + for ($j = 0; $j < $pos; $j++) { + $result .= $indentStr; + } + } else { + // Collapse empty {} and [] + $result = rtrim($result)."\n\n".$indentStr; + } + } + + $result .= $char; + + // If the last character was the beginning of an element, + // output a new line and indent the next line + if (',' === $char || '{' === $char || '[' === $char) { + $result .= $newLine; + + if ('{' === $char || '[' === $char) { + $pos++; + } + + for ($j = 0; $j < $pos; $j++) { + $result .= $indentStr; + } + } + } + + return $result; + } +} diff --git a/tests/Composer/Test/Json/JsonFileTest.php b/tests/Composer/Test/Json/JsonFileTest.php index ac8e83a16..79a1e40f4 100644 --- a/tests/Composer/Test/Json/JsonFileTest.php +++ b/tests/Composer/Test/Json/JsonFileTest.php @@ -198,13 +198,6 @@ class JsonFileTest extends \PHPUnit_Framework_TestCase $this->assertJsonFormat('"\\u018c"', $data, 0); } - public function testDoubleEscapedUnicode() - { - $data = "Zdj\\u0119ciahl\\\\u0119kkjk"; - - $this->assertJsonFormat('"Zdj\\\\u0119ciahl\\\\\\\\u0119kkjk"', $data); - } - private function expectParseException($text, $json) { try { diff --git a/tests/Composer/Test/Json/JsonFormatterTest.php b/tests/Composer/Test/Json/JsonFormatterTest.php new file mode 100644 index 000000000..60d1f0040 --- /dev/null +++ b/tests/Composer/Test/Json/JsonFormatterTest.php @@ -0,0 +1,45 @@ + + * Jordi Boggiano + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Composer\Test\Json; + +use Composer\Json\JsonFormatter; + +class JsonFormatterTest extends \PHPUnit_Framework_TestCase +{ + /** + * Test if \u0119 (196+153) will get correctly formatted + * See ticket #2613 + */ + public function testUnicodeWithPrependedSlash() + { + $data = '"' . chr(92) . chr(92) . chr(92) . 'u0119"'; + $encodedData = JsonFormatter::format($data, true, true); + $expected = '34+92+92+196+153+34'; + $this->assertEquals($expected, $this->getCharacterCodes($encodedData)); + } + + /** + * Convert string to character codes split by a plus sign + * @param string $string + * @return string + */ + protected function getCharacterCodes($string) + { + $codes = array(); + for ($i = 0; $i < strlen($string); $i++) { + $codes[] = ord($string[$i]); + } + return implode('+', $codes); + } + +}