1
0
Fork 0

Fix for #2613, when using php < 5.4 there was an unnecessary backslash before each utf-8 char. The problem was that the regexp matched all utf-8 encoded chars (included the ones that where escaped). The new regexp uses the lookbehind feature to check if the backslash isn't prefixed with an other backslash.

pull/2718/head
Sandy Pleyte 2014-02-18 09:01:12 +01:00
parent 7a902ed96d
commit f92f2f45a9
4 changed files with 169 additions and 87 deletions

View File

@ -177,11 +177,6 @@ class JsonFile
/**
* Encodes an array into (optionally pretty-printed) JSON
*
* This code is based on the function found at:
* http://recursive-design.com/blog/2008/03/11/format-json-with-php/
*
* Originally licensed under MIT by Dave Perrett <mail@recursive-design.com>
*
* @param mixed $data Data to encode into a formatted JSON string
* @param int $options json_encode options (defaults to JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)
* @return string Encoded json
@ -202,81 +197,7 @@ class JsonFile
return $json;
}
$result = '';
$pos = 0;
$strLen = strlen($json);
$indentStr = ' ';
$newLine = "\n";
$outOfQuotes = true;
$buffer = '';
$noescape = true;
for ($i = 0; $i < $strLen; $i++) {
// Grab the next character in the string
$char = substr($json, $i, 1);
// Are we inside a quoted string?
if ('"' === $char && $noescape) {
$outOfQuotes = !$outOfQuotes;
}
if (!$outOfQuotes) {
$buffer .= $char;
$noescape = '\\' === $char ? !$noescape : true;
continue;
} elseif ('' !== $buffer) {
if ($unescapeSlashes) {
$buffer = str_replace('\\/', '/', $buffer);
}
if ($unescapeUnicode && function_exists('mb_convert_encoding')) {
// http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha
$buffer = preg_replace_callback('/(?<!\\\)\\\\u([0-9a-f]{4})/i', function($match) {
return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
}, $buffer);
}
$result .= $buffer.$char;
$buffer = '';
continue;
}
if (':' === $char) {
// Add a space after the : character
$char .= ' ';
} elseif (('}' === $char || ']' === $char)) {
$pos--;
$prevChar = substr($json, $i - 1, 1);
if ('{' !== $prevChar && '[' !== $prevChar) {
// If this character is the end of an element,
// output a new line and indent the next line
$result .= $newLine;
for ($j = 0; $j < $pos; $j++) {
$result .= $indentStr;
}
} else {
// Collapse empty {} and []
$result = rtrim($result)."\n\n".$indentStr;
}
}
$result .= $char;
// If the last character was the beginning of an element,
// output a new line and indent the next line
if (',' === $char || '{' === $char || '[' === $char) {
$result .= $newLine;
if ('{' === $char || '[' === $char) {
$pos++;
}
for ($j = 0; $j < $pos; $j++) {
$result .= $indentStr;
}
}
}
$result = JsonFormatter::format($json, $unescapeUnicode, $unescapeSlashes);
return $result;
}

View File

@ -0,0 +1,123 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer\Json;
/**
* Formats json strings used for php < 5.4 because the json_encode doesn't
* supports the flags JSON_UNESCAPED_SLASHES | JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE
* in these versions
*
* @author Konstantin Kudryashiv <ever.zet@gmail.com>
* @author Jordi Boggiano <j.boggiano@seld.be>
*/
class JsonFormatter
{
/**
*
* This code is based on the function found at:
* http://recursive-design.com/blog/2008/03/11/format-json-with-php/
*
* Originally licensed under MIT by Dave Perrett <mail@recursive-design.com>
*
*
* @param string $json
* @param bool $unescapeUnicode Un escape unicode
* @param bool $unescapeSlashes Un escape slashes
* @return string
*/
public static function format($json, $unescapeUnicode, $unescapeSlashes)
{
$result = '';
$pos = 0;
$strLen = strlen($json);
$indentStr = ' ';
$newLine = "\n";
$outOfQuotes = true;
$buffer = '';
$noescape = true;
for ($i = 0; $i < $strLen; $i++) {
// Grab the next character in the string
$char = substr($json, $i, 1);
// Are we inside a quoted string?
if ('"' === $char && $noescape) {
$outOfQuotes = !$outOfQuotes;
}
if (!$outOfQuotes) {
$buffer .= $char;
$noescape = '\\' === $char ? !$noescape : true;
continue;
} elseif ('' !== $buffer) {
if ($unescapeSlashes) {
$buffer = str_replace('\\/', '/', $buffer);
}
if ($unescapeUnicode && function_exists('mb_convert_encoding')) {
// http://stackoverflow.com/questions/2934563/how-to-decode-unicode-escape-sequences-like-u00ed-to-proper-utf-8-encoded-cha
$buffer = preg_replace_callback('/(\\\\+)u([0-9a-f]{4})/i', function($match) {
$l = strlen($match[1]);
if ($l%2)
return str_repeat ('\\', $l-1).mb_convert_encoding(pack('H*', $match[2]), 'UTF-8', 'UCS-2BE');
return $match[0];
}, $buffer);
}
$result .= $buffer.$char;
$buffer = '';
continue;
}
if (':' === $char) {
// Add a space after the : character
$char .= ' ';
} elseif (('}' === $char || ']' === $char)) {
$pos--;
$prevChar = substr($json, $i - 1, 1);
if ('{' !== $prevChar && '[' !== $prevChar) {
// If this character is the end of an element,
// output a new line and indent the next line
$result .= $newLine;
for ($j = 0; $j < $pos; $j++) {
$result .= $indentStr;
}
} else {
// Collapse empty {} and []
$result = rtrim($result)."\n\n".$indentStr;
}
}
$result .= $char;
// If the last character was the beginning of an element,
// output a new line and indent the next line
if (',' === $char || '{' === $char || '[' === $char) {
$result .= $newLine;
if ('{' === $char || '[' === $char) {
$pos++;
}
for ($j = 0; $j < $pos; $j++) {
$result .= $indentStr;
}
}
}
return $result;
}
}

View File

@ -198,13 +198,6 @@ class JsonFileTest extends \PHPUnit_Framework_TestCase
$this->assertJsonFormat('"\\u018c"', $data, 0);
}
public function testDoubleEscapedUnicode()
{
$data = "Zdj\\u0119ciahl\\\\u0119kkjk";
$this->assertJsonFormat('"Zdj\\\\u0119ciahl\\\\\\\\u0119kkjk"', $data);
}
private function expectParseException($text, $json)
{
try {

View File

@ -0,0 +1,45 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer\Test\Json;
use Composer\Json\JsonFormatter;
class JsonFormatterTest extends \PHPUnit_Framework_TestCase
{
/**
* Test if \u0119 (196+153) will get correctly formatted
* See ticket #2613
*/
public function testUnicodeWithPrependedSlash()
{
$data = '"' . chr(92) . chr(92) . chr(92) . 'u0119"';
$encodedData = JsonFormatter::format($data, true, true);
$expected = '34+92+92+196+153+34';
$this->assertEquals($expected, $this->getCharacterCodes($encodedData));
}
/**
* Convert string to character codes split by a plus sign
* @param string $string
* @return string
*/
protected function getCharacterCodes($string)
{
$codes = array();
for ($i = 0; $i < strlen($string); $i++) {
$codes[] = ord($string[$i]);
}
return implode('+', $codes);
}
}