www.aleph1.co.uk Git - yaffs-website/blob - vendor/zendframework/zend-stdlib/src/StringWrapper/Iconv.php

   1 <?php
   2 /**
   3  * Zend Framework (http://framework.zend.com/)
   4  *
   5  * @link      http://github.com/zendframework/zf2 for the canonical source repository
   6  * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
   7  * @license   http://framework.zend.com/license/new-bsd New BSD License
   8  */
   9
  10 namespace Zend\Stdlib\StringWrapper;
  11
  12 use Zend\Stdlib\Exception;
  13
  14 class Iconv extends AbstractStringWrapper
  15 {
  16     /**
  17      * List of supported character sets (upper case)
  18      *
  19      * @var string[]
  20      * @link http://www.gnu.org/software/libiconv/
  21      */
  22     protected static $encodings = [
  23         // European languages
  24         'ASCII',
  25         'ISO-8859-1',
  26         'ISO-8859-2',
  27         'ISO-8859-3',
  28         'ISO-8859-4',
  29         'ISO-8859-5',
  30         'ISO-8859-7',
  31         'ISO-8859-9',
  32         'ISO-8859-10',
  33         'ISO-8859-13',
  34         'ISO-8859-14',
  35         'ISO-8859-15',
  36         'ISO-8859-16',
  37         'KOI8-R',
  38         'KOI8-U',
  39         'KOI8-RU',
  40         'CP1250',
  41         'CP1251',
  42         'CP1252',
  43         'CP1253',
  44         'CP1254',
  45         'CP1257',
  46         'CP850',
  47         'CP866',
  48         'CP1131',
  49         'MACROMAN',
  50         'MACCENTRALEUROPE',
  51         'MACICELAND',
  52         'MACCROATIAN',
  53         'MACROMANIA',
  54         'MACCYRILLIC',
  55         'MACUKRAINE',
  56         'MACGREEK',
  57         'MACTURKISH',
  58         'MACINTOSH',
  59
  60         // Semitic languages
  61         'ISO-8859-6',
  62         'ISO-8859-8',
  63         'CP1255',
  64         'CP1256',
  65         'CP862',
  66         'MACHEBREW',
  67         'MACARABIC',
  68
  69         // Japanese
  70         'EUC-JP',
  71         'SHIFT_JIS',
  72         'CP932',
  73         'ISO-2022-JP',
  74         'ISO-2022-JP-2',
  75         'ISO-2022-JP-1',
  76
  77         // Chinese
  78         'EUC-CN',
  79         'HZ',
  80         'GBK',
  81         'CP936',
  82         'GB18030',
  83         'EUC-TW',
  84         'BIG5',
  85         'CP950',
  86         'BIG5-HKSCS',
  87         'BIG5-HKSCS:2004',
  88         'BIG5-HKSCS:2001',
  89         'BIG5-HKSCS:1999',
  90         'ISO-2022-CN',
  91         'ISO-2022-CN-EXT',
  92
  93         // Korean
  94         'EUC-KR',
  95         'CP949',
  96         'ISO-2022-KR',
  97         'JOHAB',
  98
  99         // Armenian
 100         'ARMSCII-8',
 101
 102         // Georgian
 103         'GEORGIAN-ACADEMY',
 104         'GEORGIAN-PS',
 105
 106         // Tajik
 107         'KOI8-T',
 108
 109         // Kazakh
 110         'PT154',
 111         'RK1048',
 112
 113         // Thai
 114         'ISO-8859-11',
 115         'TIS-620',
 116         'CP874',
 117         'MACTHAI',
 118
 119         // Laotian
 120         'MULELAO-1',
 121         'CP1133',
 122
 123         // Vietnamese
 124         'VISCII',
 125         'TCVN',
 126         'CP1258',
 127
 128         // Platform specifics
 129         'HP-ROMAN8',
 130         'NEXTSTEP',
 131
 132         // Full Unicode
 133         'UTF-8',
 134         'UCS-2',
 135         'UCS-2BE',
 136         'UCS-2LE',
 137         'UCS-4',
 138         'UCS-4BE',
 139         'UCS-4LE',
 140         'UTF-16',
 141         'UTF-16BE',
 142         'UTF-16LE',
 143         'UTF-32',
 144         'UTF-32BE',
 145         'UTF-32LE',
 146         'UTF-7',
 147         'C99',
 148         'JAVA',
 149
 150         /* Commented out because that's internal encodings not existing in real world
 151         // Full Unicode, in terms of uint16_t or uint32_t (with machine dependent endianness and alignment)
 152         'UCS-2-INTERNAL',
 153         'UCS-4-INTERNAL',
 154
 155         // Locale dependent, in terms of `char' or `wchar_t' (with machine dependent endianness and alignment,
 156         // and with OS and locale dependent semantics)
 157         'char',
 158         'wchar_t',
 159         '', // The empty encoding name is equivalent to "char": it denotes the locale dependent character encoding.
 160         */
 161
 162         // When configured with the option --enable-extra-encodings,
 163         // it also provides support for a few extra encodings:
 164
 165         // European languages
 166         'CP437',
 167         'CP737',
 168         'CP775',
 169         'CP852',
 170         'CP853',
 171         'CP855',
 172         'CP857',
 173         'CP858',
 174         'CP860',
 175         'CP861',
 176         'CP863',
 177         'CP865',
 178         'CP869',
 179         'CP1125',
 180
 181         // Semitic languages
 182         'CP864',
 183
 184         // Japanese
 185         'EUC-JISX0213',
 186         'Shift_JISX0213',
 187         'ISO-2022-JP-3',
 188
 189         // Chinese
 190         'BIG5-2003', // (experimental)
 191
 192         // Turkmen
 193         'TDS565',
 194
 195         // Platform specifics
 196         'ATARIST',
 197         'RISCOS-LATIN1',
 198     ];
 199
 200     /**
 201      * Get a list of supported character encodings
 202      *
 203      * @return string[]
 204      */
 205     public static function getSupportedEncodings()
 206     {
 207         return static::$encodings;
 208     }
 209
 210     /**
 211      * Constructor
 212      *
 213      * @throws Exception\ExtensionNotLoadedException
 214      */
 215     public function __construct()
 216     {
 217         if (! extension_loaded('iconv')) {
 218             throw new Exception\ExtensionNotLoadedException(
 219                 'PHP extension "iconv" is required for this wrapper'
 220             );
 221         }
 222     }
 223
 224     /**
 225      * Returns the length of the given string
 226      *
 227      * @param string $str
 228      * @return int|false
 229      */
 230     public function strlen($str)
 231     {
 232         return iconv_strlen($str, $this->getEncoding());
 233     }
 234
 235     /**
 236      * Returns the portion of string specified by the start and length parameters
 237      *
 238      * @param string   $str
 239      * @param int      $offset
 240      * @param int|null $length
 241      * @return string|false
 242      */
 243     public function substr($str, $offset = 0, $length = null)
 244     {
 245         return iconv_substr($str, $offset, $length, $this->getEncoding());
 246     }
 247
 248     /**
 249      * Find the position of the first occurrence of a substring in a string
 250      *
 251      * @param string $haystack
 252      * @param string $needle
 253      * @param int    $offset
 254      * @return int|false
 255      */
 256     public function strpos($haystack, $needle, $offset = 0)
 257     {
 258         return iconv_strpos($haystack, $needle, $offset, $this->getEncoding());
 259     }
 260
 261     /**
 262      * Convert a string from defined encoding to the defined convert encoding
 263      *
 264      * @param string  $str
 265      * @param bool $reverse
 266      * @return string|false
 267      */
 268     public function convert($str, $reverse = false)
 269     {
 270         $encoding        = $this->getEncoding();
 271         $convertEncoding = $this->getConvertEncoding();
 272         if ($convertEncoding === null) {
 273             throw new Exception\LogicException(
 274                 'No convert encoding defined'
 275             );
 276         }
 277
 278         if ($encoding === $convertEncoding) {
 279             return $str;
 280         }
 281
 282         $fromEncoding = $reverse ? $convertEncoding : $encoding;
 283         $toEncoding   = $reverse ? $encoding : $convertEncoding;
 284
 285         // automatically add "//IGNORE" to not stop converting on invalid characters
 286         // invalid characters triggers a notice anyway
 287         return iconv($fromEncoding, $toEncoding . '//IGNORE', $str);
 288     }
 289 }