Fix for #2613, when using php < 5.4 there was an unnecessary backslash before each utf-8 char. The problem was that the regexp matched all utf-8 encoded chars (included the ones that where escaped). The new regexp uses the lookbehind feature to check if the backslash isn't prefixed with an other backslash.
parent
7a902ed96d
commit
f92f2f45a9
|
@ -177,11 +177,6 @@ class JsonFile
|
||||||
/**
|
/**
|
||||||
* Encodes an array into (optionally pretty-printed) JSON
|
* Encodes an array into (optionally pretty-printed) JSON
|
||||||
*
|
*
|
||||||
* This code is based on the function found at:
|
|
||||||
* http://recursive-design.com/blog/2008/03/11/format-json-with-php/
|
|
||||||
*
|
|
||||||
* Originally licensed under MIT by Dave Perrett <mail@recursive-design.com>
|
|
||||||
*
|
|
||||||
* @param mixed $data Data to encode into a formatted JSON string
|
* @param mixed $data Data to encode into a formatted JSON string
|
||||||
* @param int $options json_encode options (defaults to JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)
|
* @param int $options json_encode options (defaults to JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)
|
||||||
* @return string Encoded json
|
* @return string Encoded json
|
||||||
|
@ -202,81 +197,7 @@ class JsonFile
|
||||||
return $json;
|
return $json;
|
||||||
}
|
}
|
||||||
|
|
||||||
$result = '';
|
$result = JsonFormatter::format($json, $unescapeUnicode, $unescapeSlashes);
|
||||||
$pos = 0;
|
|
||||||
$strLen = strlen($json);
|
|
||||||
$indentStr = ' ';
|
|
||||||
$newLine = "\n";
|
|
||||||
$outOfQuotes = true;
|
|
||||||
$buffer = '';
|
|
||||||
$noescape = true;
|
|
||||||
|
|
||||||
for ($i = 0; $i < $strLen; $i++) {
|
|
||||||
// Grab the next character in the string
|
|
||||||
$char = substr($json, $i, 1);
|
|
||||||
|
|
||||||
// Are we inside a quoted string?
|
|
||||||
if ('"' === $char && $noescape) {
|
|
||||||
$outOfQuotes = !$outOfQuotes;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!$outOfQuotes) {
|
|
||||||
$buffer .= $char;
|
|
||||||
$noescape = '\\' === $char ? !$noescape : true;
|
|
||||||
continue;
|
|
||||||
} elseif ('' !== $buffer) {
|
|
||||||
if ($unescapeSlashes) {
|
|
||||||
$buffer = str_replace('\\/', '/', $buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($unescapeUnicode && function_exists('mb_convert_encoding')) {
|
|
||||||
// http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha
|
|
||||||
$buffer = preg_replace_callback('/(?<!\\\)\\\\u([0-9a-f]{4})/i', function($match) {
|
|
||||||
return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
|
|
||||||
}, $buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
$result .= $buffer.$char;
|
|
||||||
$buffer = '';
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (':' === $char) {
|
|
||||||
// Add a space after the : character
|
|
||||||
$char .= ' ';
|
|
||||||
} elseif (('}' === $char || ']' === $char)) {
|
|
||||||
$pos--;
|
|
||||||
$prevChar = substr($json, $i - 1, 1);
|
|
||||||
|
|
||||||
if ('{' !== $prevChar && '[' !== $prevChar) {
|
|
||||||
// If this character is the end of an element,
|
|
||||||
// output a new line and indent the next line
|
|
||||||
$result .= $newLine;
|
|
||||||
for ($j = 0; $j < $pos; $j++) {
|
|
||||||
$result .= $indentStr;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Collapse empty {} and []
|
|
||||||
$result = rtrim($result)."\n\n".$indentStr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$result .= $char;
|
|
||||||
|
|
||||||
// If the last character was the beginning of an element,
|
|
||||||
// output a new line and indent the next line
|
|
||||||
if (',' === $char || '{' === $char || '[' === $char) {
|
|
||||||
$result .= $newLine;
|
|
||||||
|
|
||||||
if ('{' === $char || '[' === $char) {
|
|
||||||
$pos++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ($j = 0; $j < $pos; $j++) {
|
|
||||||
$result .= $indentStr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of Composer.
|
||||||
|
*
|
||||||
|
* (c) Nils Adermann <naderman@naderman.de>
|
||||||
|
* Jordi Boggiano <j.boggiano@seld.be>
|
||||||
|
*
|
||||||
|
* For the full copyright and license information, please view the LICENSE
|
||||||
|
* file that was distributed with this source code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Composer\Json;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats json strings used for php < 5.4 because the json_encode doesn't
|
||||||
|
* supports the flags JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE
|
||||||
|
* in these versions
|
||||||
|
*
|
||||||
|
* @author Konstantin Kudryashiv <ever.zet@gmail.com>
|
||||||
|
* @author Jordi Boggiano <j.boggiano@seld.be>
|
||||||
|
*/
|
||||||
|
class JsonFormatter
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* This code is based on the function found at:
|
||||||
|
* http://recursive-design.com/blog/2008/03/11/format-json-with-php/
|
||||||
|
*
|
||||||
|
* Originally licensed under MIT by Dave Perrett <mail@recursive-design.com>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param string $json
|
||||||
|
* @param bool $unescapeUnicode Un escape unicode
|
||||||
|
* @param bool $unescapeSlashes Un escape slashes
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function format($json, $unescapeUnicode, $unescapeSlashes)
|
||||||
|
{
|
||||||
|
$result = '';
|
||||||
|
$pos = 0;
|
||||||
|
$strLen = strlen($json);
|
||||||
|
$indentStr = ' ';
|
||||||
|
$newLine = "\n";
|
||||||
|
$outOfQuotes = true;
|
||||||
|
$buffer = '';
|
||||||
|
$noescape = true;
|
||||||
|
|
||||||
|
for ($i = 0; $i < $strLen; $i++) {
|
||||||
|
// Grab the next character in the string
|
||||||
|
$char = substr($json, $i, 1);
|
||||||
|
|
||||||
|
// Are we inside a quoted string?
|
||||||
|
if ('"' === $char && $noescape) {
|
||||||
|
$outOfQuotes = !$outOfQuotes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$outOfQuotes) {
|
||||||
|
$buffer .= $char;
|
||||||
|
$noescape = '\\' === $char ? !$noescape : true;
|
||||||
|
continue;
|
||||||
|
} elseif ('' !== $buffer) {
|
||||||
|
if ($unescapeSlashes) {
|
||||||
|
$buffer = str_replace('\\/', '/', $buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($unescapeUnicode && function_exists('mb_convert_encoding')) {
|
||||||
|
// http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha
|
||||||
|
$buffer = preg_replace_callback('/(\\\\+)u([0-9a-f]{4})/i', function($match) {
|
||||||
|
$l = strlen($match[1]);
|
||||||
|
|
||||||
|
if ($l%2)
|
||||||
|
return str_repeat ('\\', $l-1).mb_convert_encoding(pack('H*', $match[2]), 'UTF-8', 'UCS-2BE');
|
||||||
|
|
||||||
|
return $match[0];
|
||||||
|
}, $buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
$result .= $buffer.$char;
|
||||||
|
$buffer = '';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (':' === $char) {
|
||||||
|
// Add a space after the : character
|
||||||
|
$char .= ' ';
|
||||||
|
} elseif (('}' === $char || ']' === $char)) {
|
||||||
|
$pos--;
|
||||||
|
$prevChar = substr($json, $i - 1, 1);
|
||||||
|
|
||||||
|
if ('{' !== $prevChar && '[' !== $prevChar) {
|
||||||
|
// If this character is the end of an element,
|
||||||
|
// output a new line and indent the next line
|
||||||
|
$result .= $newLine;
|
||||||
|
for ($j = 0; $j < $pos; $j++) {
|
||||||
|
$result .= $indentStr;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Collapse empty {} and []
|
||||||
|
$result = rtrim($result)."\n\n".$indentStr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$result .= $char;
|
||||||
|
|
||||||
|
// If the last character was the beginning of an element,
|
||||||
|
// output a new line and indent the next line
|
||||||
|
if (',' === $char || '{' === $char || '[' === $char) {
|
||||||
|
$result .= $newLine;
|
||||||
|
|
||||||
|
if ('{' === $char || '[' === $char) {
|
||||||
|
$pos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ($j = 0; $j < $pos; $j++) {
|
||||||
|
$result .= $indentStr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
|
@ -198,13 +198,6 @@ class JsonFileTest extends \PHPUnit_Framework_TestCase
|
||||||
$this->assertJsonFormat('"\\u018c"', $data, 0);
|
$this->assertJsonFormat('"\\u018c"', $data, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testDoubleEscapedUnicode()
|
|
||||||
{
|
|
||||||
$data = "Zdj\\u0119ciahl\\\\u0119kkjk";
|
|
||||||
|
|
||||||
$this->assertJsonFormat('"Zdj\\\\u0119ciahl\\\\\\\\u0119kkjk"', $data);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function expectParseException($text, $json)
|
private function expectParseException($text, $json)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of Composer.
|
||||||
|
*
|
||||||
|
* (c) Nils Adermann <naderman@naderman.de>
|
||||||
|
* Jordi Boggiano <j.boggiano@seld.be>
|
||||||
|
*
|
||||||
|
* For the full copyright and license information, please view the LICENSE
|
||||||
|
* file that was distributed with this source code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace Composer\Test\Json;
|
||||||
|
|
||||||
|
use Composer\Json\JsonFormatter;
|
||||||
|
|
||||||
|
class JsonFormatterTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Test if \u0119 (196+153) will get correctly formatted
|
||||||
|
* See ticket #2613
|
||||||
|
*/
|
||||||
|
public function testUnicodeWithPrependedSlash()
|
||||||
|
{
|
||||||
|
$data = '"' . chr(92) . chr(92) . chr(92) . 'u0119"';
|
||||||
|
$encodedData = JsonFormatter::format($data, true, true);
|
||||||
|
$expected = '34+92+92+196+153+34';
|
||||||
|
$this->assertEquals($expected, $this->getCharacterCodes($encodedData));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert string to character codes split by a plus sign
|
||||||
|
* @param string $string
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected function getCharacterCodes($string)
|
||||||
|
{
|
||||||
|
$codes = array();
|
||||||
|
for ($i = 0; $i < strlen($string); $i++) {
|
||||||
|
$codes[] = ord($string[$i]);
|
||||||
|
}
|
||||||
|
return implode('+', $codes);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue