Source for file String.php
Documentation is available at String.php
* Copyright (c) 2006 - 2009 PHPExcel
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* @package PHPExcel_Shared
* @copyright Copyright (c) 2006 - 2009 PHPExcel (http://www.codeplex.com/PHPExcel)
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
* @version 1.7.0, 2009-08-10
* @package PHPExcel_Shared
* @copyright Copyright (c) 2006 - 2009 PHPExcel (http://www.codeplex.com/PHPExcel)
* Control characters array
private static $_controlCharacters = array();
* Is mbstring extension avalable?
private static $_isMbstringEnabled;
* Is iconv extension avalable?
private static $_isIconvEnabled;
* Build control characters array
for ($i = 0; $i <= 19; ++ $i) {
if ($i != 9 && $i != 10 && $i != 13) {
self::$_controlCharacters[$find] = $replace;
* Get whether mbstring extension is available
if (isset (self::$_isMbstringEnabled)) {
return self::$_isMbstringEnabled;
self::$_isMbstringEnabled = function_exists('mb_convert_encoding') ?
return self::$_isMbstringEnabled;
* Get whether iconv extension is available
if (isset (self::$_isIconvEnabled)) {
return self::$_isIconvEnabled;
self::$_isIconvEnabled = function_exists('iconv') ?
return self::$_isIconvEnabled;
* Convert from OpenXML escaped control character to PHP control character
* That's correct, control characters are stored directly in the shared-strings table.
* We do encode characters that cannot be represented in XML using the following escape sequence:
* _xHHHH_ where H represents a hexadecimal character in the character's value...
* So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
* element or in the shared string <t> element.
* @param string $value Value to unescape
if(empty(self::$_controlCharacters)) {
self::_buildControlCharacters();
return str_replace( array_keys(self::$_controlCharacters), array_values(self::$_controlCharacters), $value );
* Convert from PHP control character to OpenXML escaped control character
* That's correct, control characters are stored directly in the shared-strings table.
* We do encode characters that cannot be represented in XML using the following escape sequence:
* _xHHHH_ where H represents a hexadecimal character in the character's value...
* So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
* element or in the shared string <t> element.
* @param string $value Value to escape
if(empty(self::$_controlCharacters)) {
self::_buildControlCharacters();
* Try to sanitize UTF8, stripping invalid byte sequences. Not perfect. Does not surrogate characters.
if (self::getIsIconvEnabled()) {
$value = @iconv('UTF-8', 'UTF-8', $value);
if (self::getIsMbstringEnabled()) {
* Check if a string contains UTF8 data
public static function IsUTF8($value = '') {
* Formats a numeric value as a string for output in various output writers forcing
* point as decimal separator in case locale is other than English.
* Converts a UTF-8 string into BIFF8 Unicode string data (8-bit string length)
* Writes the string using uncompressed notation, no rich text, no Asian phonetics
* If mbstring extension is not available, ASCII is assumed, and compressed notation is used
* although this will give wrong results for non-ASCII strings
* see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
* @param string $value UTF-8 encoded string
$ln = self::CountCharacters($value, 'UTF-8');
$opt = (self::getIsIconvEnabled() || self::getIsMbstringEnabled()) ?
$chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
$data = pack('CC', $ln, $opt) . $chars;
* Converts a UTF-8 string into BIFF8 Unicode string data (16-bit string length)
* Writes the string using uncompressed notation, no rich text, no Asian phonetics
* If mbstring extension is not available, ASCII is assumed, and compressed notation is used
* although this will give wrong results for non-ASCII strings
* see OpenOffice.org's Documentation of the Microsoft Excel File Format, sect. 2.5.3
* @param string $value UTF-8 encoded string
$ln = self::CountCharacters($value, 'UTF-8');
$opt = (self::getIsIconvEnabled() || self::getIsMbstringEnabled()) ?
$chars = self::ConvertEncoding($value, 'UTF-16LE', 'UTF-8');
$data = pack('vC', $ln, $opt) . $chars;
* Convert string from one encoding to another. First try mbstring, then iconv, or no convertion
* @param string $to Encoding to convert to, e.g. 'UTF-8'
* @param string $from Encoding to convert from, e.g. 'UTF-16LE'
if (self::getIsIconvEnabled()) {
$value = iconv($from, $to, $value);
if (self::getIsMbstringEnabled()) {
* Get character count. First try mbstring, then iconv, finally strlen
* @param string $enc Encoding
* @return int Character count
if (self::getIsIconvEnabled()) {
if (self::getIsMbstringEnabled()) {
* Get a substring of a UTF-8 encoded string
* @param string $pValue UTF-8 encoded string
* @param int $start Start offset
* @param int $length Maximum number of characters in substring
public static function Substring($pValue = '', $pStart = 0, $pLength = 0)
if (self::getIsIconvEnabled()) {
$string = iconv_substr($pValue, $pStart, $pLength, 'UTF-8');
if (self::getIsMbstringEnabled()) {
$string = mb_substr($pValue, $pStart, $pLength, 'UTF-8');
$string = substr($pValue, $pStart, $pLength);
|