internal.js 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. "use strict";
  2. var Buffer = require("buffer").Buffer;
  3. // Export Node.js internal encodings.
  4. module.exports = {
  5. // Encodings
  6. utf8: { type: "_internal", bomAware: true},
  7. cesu8: { type: "_internal", bomAware: true},
  8. unicode11utf8: "utf8",
  9. ucs2: { type: "_internal", bomAware: true},
  10. utf16le: "ucs2",
  11. binary: { type: "_internal" },
  12. base64: { type: "_internal" },
  13. hex: { type: "_internal" },
  14. // Codec.
  15. _internal: InternalCodec,
  16. };
  17. //------------------------------------------------------------------------------
  18. function InternalCodec(codecOptions, iconv) {
  19. this.enc = codecOptions.encodingName;
  20. this.bomAware = codecOptions.bomAware;
  21. if (this.enc === "base64")
  22. this.encoder = InternalEncoderBase64;
  23. else if (this.enc === "cesu8") {
  24. this.enc = "utf8"; // Use utf8 for decoding.
  25. this.encoder = InternalEncoderCesu8;
  26. // Add decoder for versions of Node not supporting CESU-8
  27. if (new Buffer('eda0bdedb2a9', 'hex').toString() !== '💩') {
  28. this.decoder = InternalDecoderCesu8;
  29. this.defaultCharUnicode = iconv.defaultCharUnicode;
  30. }
  31. }
  32. }
  33. InternalCodec.prototype.encoder = InternalEncoder;
  34. InternalCodec.prototype.decoder = InternalDecoder;
  35. //------------------------------------------------------------------------------
  36. // We use node.js internal decoder. Its signature is the same as ours.
  37. var StringDecoder = require('string_decoder').StringDecoder;
  38. if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
  39. StringDecoder.prototype.end = function() {};
  40. function InternalDecoder(options, codec) {
  41. StringDecoder.call(this, codec.enc);
  42. }
  43. InternalDecoder.prototype = StringDecoder.prototype;
  44. //------------------------------------------------------------------------------
  45. // Encoder is mostly trivial
  46. function InternalEncoder(options, codec) {
  47. this.enc = codec.enc;
  48. }
  49. InternalEncoder.prototype.write = function(str) {
  50. return new Buffer(str, this.enc);
  51. }
  52. InternalEncoder.prototype.end = function() {
  53. }
  54. //------------------------------------------------------------------------------
  55. // Except base64 encoder, which must keep its state.
  56. function InternalEncoderBase64(options, codec) {
  57. this.prevStr = '';
  58. }
  59. InternalEncoderBase64.prototype.write = function(str) {
  60. str = this.prevStr + str;
  61. var completeQuads = str.length - (str.length % 4);
  62. this.prevStr = str.slice(completeQuads);
  63. str = str.slice(0, completeQuads);
  64. return new Buffer(str, "base64");
  65. }
  66. InternalEncoderBase64.prototype.end = function() {
  67. return new Buffer(this.prevStr, "base64");
  68. }
  69. //------------------------------------------------------------------------------
  70. // CESU-8 encoder is also special.
  71. function InternalEncoderCesu8(options, codec) {
  72. }
  73. InternalEncoderCesu8.prototype.write = function(str) {
  74. var buf = new Buffer(str.length * 3), bufIdx = 0;
  75. for (var i = 0; i < str.length; i++) {
  76. var charCode = str.charCodeAt(i);
  77. // Naive implementation, but it works because CESU-8 is especially easy
  78. // to convert from UTF-16 (which all JS strings are encoded in).
  79. if (charCode < 0x80)
  80. buf[bufIdx++] = charCode;
  81. else if (charCode < 0x800) {
  82. buf[bufIdx++] = 0xC0 + (charCode >>> 6);
  83. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  84. }
  85. else { // charCode will always be < 0x10000 in javascript.
  86. buf[bufIdx++] = 0xE0 + (charCode >>> 12);
  87. buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
  88. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  89. }
  90. }
  91. return buf.slice(0, bufIdx);
  92. }
  93. InternalEncoderCesu8.prototype.end = function() {
  94. }
  95. //------------------------------------------------------------------------------
  96. // CESU-8 decoder is not implemented in Node v4.0+
  97. function InternalDecoderCesu8(options, codec) {
  98. this.acc = 0;
  99. this.contBytes = 0;
  100. this.accBytes = 0;
  101. this.defaultCharUnicode = codec.defaultCharUnicode;
  102. }
  103. InternalDecoderCesu8.prototype.write = function(buf) {
  104. var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
  105. res = '';
  106. for (var i = 0; i < buf.length; i++) {
  107. var curByte = buf[i];
  108. if ((curByte & 0xC0) !== 0x80) { // Leading byte
  109. if (contBytes > 0) { // Previous code is invalid
  110. res += this.defaultCharUnicode;
  111. contBytes = 0;
  112. }
  113. if (curByte < 0x80) { // Single-byte code
  114. res += String.fromCharCode(curByte);
  115. } else if (curByte < 0xE0) { // Two-byte code
  116. acc = curByte & 0x1F;
  117. contBytes = 1; accBytes = 1;
  118. } else if (curByte < 0xF0) { // Three-byte code
  119. acc = curByte & 0x0F;
  120. contBytes = 2; accBytes = 1;
  121. } else { // Four or more are not supported for CESU-8.
  122. res += this.defaultCharUnicode;
  123. }
  124. } else { // Continuation byte
  125. if (contBytes > 0) { // We're waiting for it.
  126. acc = (acc << 6) | (curByte & 0x3f);
  127. contBytes--; accBytes++;
  128. if (contBytes === 0) {
  129. // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
  130. if (accBytes === 2 && acc < 0x80 && acc > 0)
  131. res += this.defaultCharUnicode;
  132. else if (accBytes === 3 && acc < 0x800)
  133. res += this.defaultCharUnicode;
  134. else
  135. // Actually add character.
  136. res += String.fromCharCode(acc);
  137. }
  138. } else { // Unexpected continuation byte
  139. res += this.defaultCharUnicode;
  140. }
  141. }
  142. }
  143. this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
  144. return res;
  145. }
  146. InternalDecoderCesu8.prototype.end = function() {
  147. var res = 0;
  148. if (this.contBytes > 0)
  149. res += this.defaultCharUnicode;
  150. return res;
  151. }