关于使用fs.readdir进行异步目录搜索有什么想法吗?我意识到我们可以引入递归,并调用read目录函数来读取下一个目录,但我有点担心它不是异步的…

什么好主意吗?我已经看了node-walk,它很棒,但它不能像readdir那样只给我数组中的文件。虽然

寻找这样的输出…

['file1.txt', 'file2.txt', 'dir/file3.txt']

当前回答

递归-readdir模块具有此功能。

其他回答

对于Node 10.3+,这里是一个For -await解决方案:

#!/usr/bin/env node

const FS = require('fs');
const Util = require('util');
const readDir = Util.promisify(FS.readdir);
const Path = require('path');

async function* readDirR(path) {
    const entries = await readDir(path,{withFileTypes:true});
    for(let entry of entries) {
        const fullPath = Path.join(path,entry.name);
        if(entry.isDirectory()) {
            yield* readDirR(fullPath);
        } else {
            yield fullPath;
        }
    }
}

async function main() {
    const start = process.hrtime.bigint();
    for await(const file of readDirR('/mnt/home/media/Unsorted')) {
        console.log(file);
    }
    console.log((process.hrtime.bigint()-start)/1000000n);
}

main().catch(err => {
    console.error(err);
});

这种解决方案的好处是,您可以立即开始处理结果;例如,读取媒体目录中的所有文件需要12秒,但如果我这样做,我可以在几毫秒内得到第一个结果。

这是另一个实现。上述解决方案都没有任何限制,因此如果您的目录结构很大,它们都会崩溃并最终耗尽资源。

var async = require('async');
var fs = require('fs');
var resolve = require('path').resolve;

var scan = function(path, concurrency, callback) {
    var list = [];

    var walker = async.queue(function(path, callback) {
        fs.stat(path, function(err, stats) {
            if (err) {
                return callback(err);
            } else {
                if (stats.isDirectory()) {
                    fs.readdir(path, function(err, files) {
                        if (err) {
                            callback(err);
                        } else {
                            for (var i = 0; i < files.length; i++) {
                                walker.push(resolve(path, files[i]));
                            }
                            callback();
                        }
                    });
                } else {
                    list.push(path);
                    callback();
                }
            }
        });
    }, concurrency);

    walker.push(path);

    walker.drain = function() {
        callback(list);
    }
};

使用50的并发工作得非常好,并且几乎和小型目录结构的简单实现一样快。

因为每个人都应该写自己的,所以我写了一个。

步行(dir, cb, endCb) cb(文件) 零endCb (err |)

module.exports = walk;

function walk(dir, cb, endCb) {
  var fs = require('fs');
  var path = require('path');

  fs.readdir(dir, function(err, files) {
    if (err) {
      return endCb(err);
    }

    var pending = files.length;
    if (pending === 0) {
      endCb(null);
    }
    files.forEach(function(file) {
      fs.stat(path.join(dir, file), function(err, stats) {
        if (err) {
          return endCb(err)
        }

        if (stats.isDirectory()) {
          walk(path.join(dir, file), cb, function() {
            pending--;
            if (pending === 0) {
              endCb(null);
            }
          });
        } else {
          cb(path.join(dir, file));
          pending--;
          if (pending === 0) {
            endCb(null);
          }
        }
      })
    });

  });
}

短小、现代、高效:

import {readdir} from 'node:fs/promises'
import {join} from 'node:path'

const deepReadDir = async (dirPath) => await Promise.all(
  (await readdir(dirPath, {withFileTypes: true})).map(async (dirent) => {
    const path = join(dirPath, dirent.name)
    return dirent.isDirectory() ? await deepReadDir(path) : path
  }),
)

特别感谢函数提示使用{withFileTypes: true}。


这将自动将每个嵌套路径折叠成一个新的嵌套数组。例如,如果:

await deepReadDir('src')

返回如下内容:

[
  [
    'src/client/api.js',
    'src/client/http-constants.js',
    'src/client/index.html',
    'src/client/index.js',
    [ 'src/client/res/favicon.ico' ],
    'src/client/storage.js'
  ],
  [ 'src/crypto/keygen.js' ],
  'src/discover.js',
  [
    'src/mutations/createNewMutation.js',
    'src/mutations/newAccount.js',
    'src/mutations/transferCredit.js',
    'src/mutations/updateApp.js'
  ],
  [
    'src/server/authentication.js',
    'src/server/handlers.js',
    'src/server/quick-response.js',
    'src/server/server.js',
    'src/server/static-resources.js'
  ],
  [ 'src/util/prompt.js', 'src/util/safeWriteFile.js' ],
  'src/util.js'
]

但如果你想,你可以很容易地把它压平:

(await deepReadDir('src')).flat(Number.POSITIVE_INFINITY)
[
  'src/client/api.js',
  'src/client/http-constants.js',
  'src/client/index.html',
  'src/client/index.js',
  'src/client/res/favicon.ico',
  'src/client/storage.js',
  'src/crypto/keygen.js',
  'src/discover.js',
  'src/mutations/createNewMutation.js',
  'src/mutations/newAccount.js',
  'src/mutations/transferCredit.js',
  'src/mutations/updateApp.js',
  'src/server/authentication.js',
  'src/server/handlers.js',
  'src/server/quick-response.js',
  'src/server/server.js',
  'src/server/static-resources.js',
  'src/util/prompt.js',
  'src/util/safeWriteFile.js',
  'src/util.js'
]

为了好玩,这里有一个基于流的版本,它与highland.js streams库一起工作。作者之一是维克多·伍。

###
  directory >---m------> dirFilesStream >---------o----> out
                |                                 |
                |                                 |
                +--------< returnPipe <-----------+

  legend: (m)erge  (o)bserve

 + directory         has the initial file
 + dirListStream     does a directory listing
 + out               prints out the full path of the file
 + returnPipe        runs stat and filters on directories

###

_ = require('highland')
fs = require('fs')
fsPath = require('path')

directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
  _.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
    fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
  _.wrapCallback(fs.stat)(path).map (v) ->
    v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure.  This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))