www.aleph1.co.uk Git - yaffs-website/blob - node_modules/iconv-lite/encodings/utf16.js

   1 "use strict"
   2
   3 // == UTF16-BE codec. ==========================================================
   4
   5 exports.utf16be = Utf16BECodec;
   6 function Utf16BECodec() {
   7 }
   8
   9 Utf16BECodec.prototype.encoder = Utf16BEEncoder;
  10 Utf16BECodec.prototype.decoder = Utf16BEDecoder;
  11 Utf16BECodec.prototype.bomAware = true;
  12
  13
  14 // -- Encoding
  15
  16 function Utf16BEEncoder() {
  17 }
  18
  19 Utf16BEEncoder.prototype.write = function(str) {
  20     var buf = new Buffer(str, 'ucs2');
  21     for (var i = 0; i < buf.length; i += 2) {
  22         var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
  23     }
  24     return buf;
  25 }
  26
  27 Utf16BEEncoder.prototype.end = function() {
  28 }
  29
  30
  31 // -- Decoding
  32
  33 function Utf16BEDecoder() {
  34     this.overflowByte = -1;
  35 }
  36
  37 Utf16BEDecoder.prototype.write = function(buf) {
  38     if (buf.length == 0)
  39         return '';
  40
  41     var buf2 = new Buffer(buf.length + 1),
  42         i = 0, j = 0;
  43
  44     if (this.overflowByte !== -1) {
  45         buf2[0] = buf[0];
  46         buf2[1] = this.overflowByte;
  47         i = 1; j = 2;
  48     }
  49
  50     for (; i < buf.length-1; i += 2, j+= 2) {
  51         buf2[j] = buf[i+1];
  52         buf2[j+1] = buf[i];
  53     }
  54
  55     this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
  56
  57     return buf2.slice(0, j).toString('ucs2');
  58 }
  59
  60 Utf16BEDecoder.prototype.end = function() {
  61 }
  62
  63
  64 // == UTF-16 codec =============================================================
  65 // Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
  66 // Defaults to UTF-16LE, as it's prevalent and default in Node.
  67 // http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
  68 // Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
  69
  70 // Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
  71
  72 exports.utf16 = Utf16Codec;
  73 function Utf16Codec(codecOptions, iconv) {
  74     this.iconv = iconv;
  75 }
  76
  77 Utf16Codec.prototype.encoder = Utf16Encoder;
  78 Utf16Codec.prototype.decoder = Utf16Decoder;
  79
  80
  81 // -- Encoding (pass-through)
  82
  83 function Utf16Encoder(options, codec) {
  84     options = options || {};
  85     if (options.addBOM === undefined)
  86         options.addBOM = true;
  87     this.encoder = codec.iconv.getEncoder('utf-16le', options);
  88 }
  89
  90 Utf16Encoder.prototype.write = function(str) {
  91     return this.encoder.write(str);
  92 }
  93
  94 Utf16Encoder.prototype.end = function() {
  95     return this.encoder.end();
  96 }
  97
  98
  99 // -- Decoding
 100
 101 function Utf16Decoder(options, codec) {
 102     this.decoder = null;
 103     this.initialBytes = [];
 104     this.initialBytesLen = 0;
 105
 106     this.options = options || {};
 107     this.iconv = codec.iconv;
 108 }
 109
 110 Utf16Decoder.prototype.write = function(buf) {
 111     if (!this.decoder) {
 112         // Codec is not chosen yet. Accumulate initial bytes.
 113         this.initialBytes.push(buf);
 114         this.initialBytesLen += buf.length;
 115
 116         if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
 117             return '';
 118
 119         // We have enough bytes -> detect endianness.
 120         var buf = Buffer.concat(this.initialBytes),
 121             encoding = detectEncoding(buf, this.options.defaultEncoding);
 122         this.decoder = this.iconv.getDecoder(encoding, this.options);
 123         this.initialBytes.length = this.initialBytesLen = 0;
 124     }
 125
 126     return this.decoder.write(buf);
 127 }
 128
 129 Utf16Decoder.prototype.end = function() {
 130     if (!this.decoder) {
 131         var buf = Buffer.concat(this.initialBytes),
 132             encoding = detectEncoding(buf, this.options.defaultEncoding);
 133         this.decoder = this.iconv.getDecoder(encoding, this.options);
 134
 135         var res = this.decoder.write(buf),
 136             trail = this.decoder.end();
 137
 138         return trail ? (res + trail) : res;
 139     }
 140     return this.decoder.end();
 141 }
 142
 143 function detectEncoding(buf, defaultEncoding) {
 144     var enc = defaultEncoding || 'utf-16le';
 145
 146     if (buf.length >= 2) {
 147         // Check BOM.
 148         if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
 149             enc = 'utf-16be';
 150         else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
 151             enc = 'utf-16le';
 152         else {
 153             // No BOM found. Try to deduce encoding from initial content.
 154             // Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
 155             // So, we count ASCII as if it was LE or BE, and decide from that.
 156             var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
 157                 _len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
 158
 159             for (var i = 0; i < _len; i += 2) {
 160                 if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
 161                 if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
 162             }
 163
 164             if (asciiCharsBE > asciiCharsLE)
 165                 enc = 'utf-16be';
 166             else if (asciiCharsBE < asciiCharsLE)
 167                 enc = 'utf-16le';
 168         }
 169     }
 170
 171     return enc;
 172 }
 173
 174