在node.js中读取文件一次一行?

我试图读取一个大文件一行在一次。我在Quora上找到了一个关于这个主题的问题，但我错过了一些联系，把整个事情联系在一起。

 var Lazy=require("lazy");
 new Lazy(process.stdin)
     .lines
     .forEach(
          function(line) { 
              console.log(line.toString()); 
          }
 );
 process.stdin.resume();

我想要弄清楚的是如何一次从文件中读取一行，而不是像本例中那样读取STDIN。

我试着:

 fs.open('./VeryBigFile.csv', 'r', '0666', Process);

 function Process(err, fd) {
    if (err) throw err;
    // DO lazy read 
 }

但这并不奏效。我知道在必要时我可以使用PHP之类的东西，但我想弄清楚这个问题。

我不认为其他答案会起作用，因为文件比我运行它的服务器的内存大得多。

当前回答

在大多数情况下，这应该足够了:

const fs = require("fs")

fs.readFile('./file', 'utf-8', (err, file) => {
  const lines = file.split('\n')

  for (let line of lines)
    console.log(line)
});

2017-02-08 12:47:21

其他回答

我想解决同样的问题，基本上在Perl中是这样的:

while (<>) {
    process_line($_);
}

我的用例只是一个独立的脚本，而不是服务器，所以同步就可以了。以下是我的标准:

可以在许多项目中重用的最小同步代码。不限制文件大小或行数。不限制线的长度。能够处理UTF-8的完整Unicode，包括BMP以外的字符。能够处理*nix和Windows行结束符(老式Mac对我来说不需要)。行中要包含的行结束字符。能够处理带有或不带有行尾字符的最后一行。不要使用node.js发行版中不包含的任何外部库。

这是一个让我在node.js中感受低级脚本类型代码的项目，并决定它作为其他脚本语言(如Perl)的替代品的可行性。

经过惊人的努力和一些错误的开始，这是我提出的代码。它非常快，但没有我预期的那么琐碎:(在GitHub上分叉)

var fs            = require('fs'),
    StringDecoder = require('string_decoder').StringDecoder,
    util          = require('util');

function lineByLine(fd) {
  var blob = '';
  var blobStart = 0;
  var blobEnd = 0;

  var decoder = new StringDecoder('utf8');

  var CHUNK_SIZE = 16384;
  var chunk = new Buffer(CHUNK_SIZE);

  var eolPos = -1;
  var lastChunk = false;

  var moreLines = true;
  var readMore = true;

  // each line
  while (moreLines) {

    readMore = true;
    // append more chunks from the file onto the end of our blob of text until we have an EOL or EOF
    while (readMore) {

      // do we have a whole line? (with LF)
      eolPos = blob.indexOf('\n', blobStart);

      if (eolPos !== -1) {
        blobEnd = eolPos;
        readMore = false;

      // do we have the last line? (no LF)
      } else if (lastChunk) {
        blobEnd = blob.length;
        readMore = false;

      // otherwise read more
      } else {
        var bytesRead = fs.readSync(fd, chunk, 0, CHUNK_SIZE, null);

        lastChunk = bytesRead !== CHUNK_SIZE;

        blob += decoder.write(chunk.slice(0, bytesRead));
      }
    }

    if (blobStart < blob.length) {
      processLine(blob.substring(blobStart, blobEnd + 1));

      blobStart = blobEnd + 1;

      if (blobStart >= CHUNK_SIZE) {
        // blobStart is in characters, CHUNK_SIZE is in octets
        var freeable = blobStart / CHUNK_SIZE;

        // keep blob from growing indefinitely, not as deterministic as I'd like
        blob = blob.substring(CHUNK_SIZE);
        blobStart -= CHUNK_SIZE;
        blobEnd -= CHUNK_SIZE;
      }
    } else {
      moreLines = false;
    }
  }
}

它可能会被进一步清理，这是试验和错误的结果。

2013-01-05 11:34:52

我有一个小模块，它做得很好，被相当多的其他项目使用npm readline注意，在节点v10中有一个本地readline模块，所以我重新发布了我的模块linebyline https://www.npmjs.com/package/linebyline

如果你不想使用这个模块，函数很简单:

var fs = require('fs'),
EventEmitter = require('events').EventEmitter,
util = require('util'),
newlines = [
  13, // \r
  10  // \n
];
var readLine = module.exports = function(file, opts) {
if (!(this instanceof readLine)) return new readLine(file);

EventEmitter.call(this);
opts = opts || {};
var self = this,
  line = [],
  lineCount = 0,
  emit = function(line, count) {
    self.emit('line', new Buffer(line).toString(), count);
  };
  this.input = fs.createReadStream(file);
  this.input.on('open', function(fd) {
    self.emit('open', fd);
  })
  .on('data', function(data) {
   for (var i = 0; i < data.length; i++) {
    if (0 <= newlines.indexOf(data[i])) { // Newline char was found.
      lineCount++;
      if (line.length) emit(line, lineCount);
      line = []; // Empty buffer.
     } else {
      line.push(data[i]); // Buffer new line data.
     }
   }
 }).on('error', function(err) {
   self.emit('error', err);
 }).on('end', function() {
  // Emit last line if anything left over since EOF won't trigger it.
  if (line.length){
     lineCount++;
     emit(line, lineCount);
  }
  self.emit('end');
 }).on('close', function() {
   self.emit('close');
 });
};
util.inherits(readLine, EventEmitter);

2015-01-17 09:53:03

老话题了，但这个很管用:

var rl = readline.createInterface({
      input : fs.createReadStream('/path/file.txt'),
      output: process.stdout,
      terminal: false
})
rl.on('line',function(line){
     console.log(line) //or parse line
})

简单。不需要外部模块。

2014-04-09 23:48:03

在大多数情况下，这应该足够了:

const fs = require("fs")

fs.readFile('./file', 'utf-8', (err, file) => {
  const lines = file.split('\n')

  for (let line of lines)
    console.log(line)
});

2017-02-08 12:47:21

编辑:

使用转换流。

使用BufferedReader，您可以读取行。

new BufferedReader ("lorem ipsum", { encoding: "utf8" })
    .on ("error", function (error){
        console.log ("error: " + error);
    })
    .on ("line", function (line){
        console.log ("line: " + line);
    })
    .on ("end", function (){
        console.log ("EOF");
    })
    .read ();

2012-04-09 19:44:19

在node.js中读取文件一次一行?

推荐文章

最新文章

标签