You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					152 lines
				
				3.3 KiB
			
		
		
			
		
	
	
					152 lines
				
				3.3 KiB
			| 
								 
											4 years ago
										 
									 | 
							
								
							 | 
						||
| 
								 | 
							
								var fs = require('fs');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var utf8  = require('./encoding/utf8'),
							 | 
						||
| 
								 | 
							
								  unicode = require('./encoding/unicode'),
							 | 
						||
| 
								 | 
							
								  mbcs    = require('./encoding/mbcs'),
							 | 
						||
| 
								 | 
							
								  sbcs    = require('./encoding/sbcs'),
							 | 
						||
| 
								 | 
							
								  iso2022 = require('./encoding/iso2022');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var self = this;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var recognisers = [
							 | 
						||
| 
								 | 
							
								  new utf8,
							 | 
						||
| 
								 | 
							
								  new unicode.UTF_16BE,
							 | 
						||
| 
								 | 
							
								  new unicode.UTF_16LE,
							 | 
						||
| 
								 | 
							
								  new unicode.UTF_32BE,
							 | 
						||
| 
								 | 
							
								  new unicode.UTF_32LE,
							 | 
						||
| 
								 | 
							
								  new mbcs.sjis,
							 | 
						||
| 
								 | 
							
								  new mbcs.big5,
							 | 
						||
| 
								 | 
							
								  new mbcs.euc_jp,
							 | 
						||
| 
								 | 
							
								  new mbcs.euc_kr,
							 | 
						||
| 
								 | 
							
								  new mbcs.gb_18030,
							 | 
						||
| 
								 | 
							
								  new iso2022.ISO_2022_JP,
							 | 
						||
| 
								 | 
							
								  new iso2022.ISO_2022_KR,
							 | 
						||
| 
								 | 
							
								  new iso2022.ISO_2022_CN,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_1,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_2,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_5,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_6,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_7,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_8,
							 | 
						||
| 
								 | 
							
								  new sbcs.ISO_8859_9,
							 | 
						||
| 
								 | 
							
								  new sbcs.windows_1251,
							 | 
						||
| 
								 | 
							
								  new sbcs.windows_1256,
							 | 
						||
| 
								 | 
							
								  new sbcs.KOI8_R
							 | 
						||
| 
								 | 
							
								];
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports.detect = function(buffer, opts) {
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  // Tally up the byte occurence statistics.
							 | 
						||
| 
								 | 
							
								  var fByteStats = [];
							 | 
						||
| 
								 | 
							
								  for (var i = 0; i < 256; i++)
							 | 
						||
| 
								 | 
							
								    fByteStats[i] = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  for (var i = buffer.length - 1; i >= 0; i--)
							 | 
						||
| 
								 | 
							
								    fByteStats[buffer[i] & 0x00ff]++;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var fC1Bytes = false;
							 | 
						||
| 
								 | 
							
								  for (var i = 0x80; i <= 0x9F; i += 1) {
							 | 
						||
| 
								 | 
							
								    if (fByteStats[i] != 0) {
							 | 
						||
| 
								 | 
							
								      fC1Bytes = true;
							 | 
						||
| 
								 | 
							
								      break;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var context = {
							 | 
						||
| 
								 | 
							
								    fByteStats:  fByteStats,
							 | 
						||
| 
								 | 
							
								    fC1Bytes:    fC1Bytes,
							 | 
						||
| 
								 | 
							
								    fRawInput:   buffer,
							 | 
						||
| 
								 | 
							
								    fRawLength:  buffer.length,
							 | 
						||
| 
								 | 
							
								    fInputBytes: buffer,
							 | 
						||
| 
								 | 
							
								    fInputLen:   buffer.length
							 | 
						||
| 
								 | 
							
								  };
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var matches = recognisers.map(function(rec) {
							 | 
						||
| 
								 | 
							
								    return rec.match(context);
							 | 
						||
| 
								 | 
							
								  }).filter(function(match) {
							 | 
						||
| 
								 | 
							
								    return !!match;
							 | 
						||
| 
								 | 
							
								  }).sort(function(a, b) {
							 | 
						||
| 
								 | 
							
								    return b.confidence - a.confidence;
							 | 
						||
| 
								 | 
							
								  });
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (opts && opts.returnAllMatches === true) {
							 | 
						||
| 
								 | 
							
								    return matches;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  else {
							 | 
						||
| 
								 | 
							
								    return matches.length > 0 ? matches[0].name : null;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports.detectFile = function(filepath, opts, cb) {
							 | 
						||
| 
								 | 
							
								  if (typeof opts === 'function') {
							 | 
						||
| 
								 | 
							
								    cb = opts;
							 | 
						||
| 
								 | 
							
								    opts = undefined;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var fd;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  var handler = function(err, buffer) {
							 | 
						||
| 
								 | 
							
								    if (fd) {
							 | 
						||
| 
								 | 
							
								      fs.closeSync(fd);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    if (err) return cb(err, null);
							 | 
						||
| 
								 | 
							
								    cb(null, self.detect(buffer, opts));
							 | 
						||
| 
								 | 
							
								  };
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  if (opts && opts.sampleSize) {
							 | 
						||
| 
								 | 
							
								    fd = fs.openSync(filepath, 'r'),
							 | 
						||
| 
								 | 
							
								      sample = Buffer.allocUnsafe(opts.sampleSize);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
							 | 
						||
| 
								 | 
							
								      handler(err, sample);
							 | 
						||
| 
								 | 
							
								    });
							 | 
						||
| 
								 | 
							
								    return;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  fs.readFile(filepath, handler);
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports.detectFileSync = function(filepath, opts) {
							 | 
						||
| 
								 | 
							
								  if (opts && opts.sampleSize) {
							 | 
						||
| 
								 | 
							
								    var fd = fs.openSync(filepath, 'r'),
							 | 
						||
| 
								 | 
							
								      sample = Buffer.allocUnsafe(opts.sampleSize);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    fs.readSync(fd, sample, 0, opts.sampleSize);
							 | 
						||
| 
								 | 
							
								    fs.closeSync(fd);
							 | 
						||
| 
								 | 
							
								    return self.detect(sample, opts);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  return self.detect(fs.readFileSync(filepath), opts);
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Wrappers for the previous functions to return all encodings
							 | 
						||
| 
								 | 
							
								module.exports.detectAll = function(buffer, opts) {
							 | 
						||
| 
								 | 
							
								  if (typeof opts !== 'object') {
							 | 
						||
| 
								 | 
							
								    opts = {};
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  opts.returnAllMatches = true;
							 | 
						||
| 
								 | 
							
								  return self.detect(buffer, opts);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports.detectFileAll = function(filepath, opts, cb) {
							 | 
						||
| 
								 | 
							
								  if (typeof opts === 'function') {
							 | 
						||
| 
								 | 
							
								    cb = opts;
							 | 
						||
| 
								 | 
							
								    opts = undefined;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  if (typeof opts !== 'object') {
							 | 
						||
| 
								 | 
							
								    opts = {};
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  opts.returnAllMatches = true;
							 | 
						||
| 
								 | 
							
								  self.detectFile(filepath, opts, cb);
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								module.exports.detectFileAllSync = function(filepath, opts) {
							 | 
						||
| 
								 | 
							
								  if (typeof opts !== 'object') {
							 | 
						||
| 
								 | 
							
								    opts = {};
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  opts.returnAllMatches = true;
							 | 
						||
| 
								 | 
							
								  return self.detectFileSync(filepath, opts);
							 | 
						||
| 
								 | 
							
								}
							 |