如何在不使用第三方库的情况下使用Node.js下载文件?

我不需要什么特别的东西。我只想从给定的URL下载文件,然后将其保存到给定的目录。


当前回答

根据上面的其他答案和一些微妙的问题,下面是我的尝试。

Check the file does not exist before hitting the network by using fs.access. Only create the fs.createWriteStream if you get a 200 OK status code. This reduces the amount of fs.unlink commands required to tidy up temporary file handles. Even on a 200 OK we can still possibly reject due to an EEXIST file already exists (imagine another process created the file whilst we were doing network calls). Recursively call download if you get a 301 Moved Permanently or 302 Found (Moved Temporarily) redirect following the link location provided in the header. The issue with some of the other answers recursively calling download was that they called resolve(download) instead of download(...).then(() => resolve()) so the Promise would return before the download actually finished. This way the nested chain of promises resolve in the correct order. It might seem cool to clean up the temp file asynchronously, but I chose to reject only after that completed too so I know that everything start to finish is done when this promise resolves or rejects.

const https = require('https');
const fs = require('fs');

/**
 * Download a resource from `url` to `dest`.
 * @param {string} url - Valid URL to attempt download of resource
 * @param {string} dest - Valid path to save the file.
 * @returns {Promise<void>} - Returns asynchronously when successfully completed download
 */
function download(url, dest) {
  return new Promise((resolve, reject) => {
    // Check file does not exist yet before hitting network
    fs.access(dest, fs.constants.F_OK, (err) => {

        if (err === null) reject('File already exists');

        const request = https.get(url, response => {
            if (response.statusCode === 200) {
       
              const file = fs.createWriteStream(dest, { flags: 'wx' });
              file.on('finish', () => resolve());
              file.on('error', err => {
                file.close();
                if (err.code === 'EEXIST') reject('File already exists');
                else fs.unlink(dest, () => reject(err.message)); // Delete temp file
              });
              response.pipe(file);
            } else if (response.statusCode === 302 || response.statusCode === 301) {
              //Recursively follow redirects, only a 200 will resolve.
              download(response.headers.location, dest).then(() => resolve());
            } else {
              reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
            }
          });
      
          request.on('error', err => {
            reject(err.message);
          });
    });
  });
}

其他回答

Gfxmonk的答案在回调和file.close()完成之间有一个非常紧张的数据竞赛。File.close()实际上接受一个回调函数,该函数在close完成时被调用。否则,立即使用文件可能会失败(非常罕见!)。

一个完整的解决方案是:

var http = require('http');
var fs = require('fs');

var download = function(url, dest, cb) {
  var file = fs.createWriteStream(dest);
  var request = http.get(url, function(response) {
    response.pipe(file);
    file.on('finish', function() {
      file.close(cb);  // close() is async, call cb after close completes.
    });
  });
}

如果不等待finish事件,幼稚的脚本可能最终得到一个不完整的文件。如果不通过close调度cb回调,您可能会在访问文件和文件实际准备就绪之间出现竞争。

我发现这种方法是最有帮助的,特别是当涉及到pdf和其他随机文件时。

import fs from "fs";

  fs.appendFile("output_file_name.ext", fileDataInBytes, (err) => {
    if (err) throw err;
    console.log("File saved!");
  });

使用promise下载,它解析可读流。添加额外的逻辑来处理重定向。

var http = require('http');
var promise = require('bluebird');
var url = require('url');
var fs = require('fs');
var assert = require('assert');

function download(option) {
    assert(option);
    if (typeof option == 'string') {
        option = url.parse(option);
    }

    return new promise(function(resolve, reject) {
        var req = http.request(option, function(res) {
            if (res.statusCode == 200) {
                resolve(res);
            } else {
                if (res.statusCode === 301 && res.headers.location) {
                    resolve(download(res.headers.location));
                } else {
                    reject(res.statusCode);
                }
            }
        })
        .on('error', function(e) {
            reject(e);
        })
        .end();
    });
}

download('http://localhost:8080/redirect')
.then(function(stream) {
    try {

        var writeStream = fs.createWriteStream('holyhigh.jpg');
        stream.pipe(writeStream);

    } catch(e) {
        console.error(e);
    }
});

2022年底编辑:

Node v18及以上版本自带自带的Fetch API支持。使用它。

最初的回答:

对于支持承诺的节点,与其他答案相比,一个简单的(部分)Fetch API的Node shim只需要少量额外的代码:

const fs = require(`fs`);
const http = require(`http`);
const https = require(`https`);

module.exports = function fetch(url) {
  return new Promise((resolve, reject) => {
    const data = [];
    const client = url.startsWith("https") ? https : http;
    client
      .request(url, (res) => {
        res.on(`data`, (chunk) => data.push(chunk));
        res.on(`end`, () => {
          const asBytes = Buffer.concat(data);
          const asString = asBytes.toString(`utf8`);
          resolve({
            arrayBuffer: async () => asBytes,
            json: async () => JSON.parse(asString),
            text: async () => asString,
          });
        });
        res.on(`error`, (e) => reject(e));
      })
      .end();
  });
};

你可以用它来做任何你需要的事情,使用普通的fetch语法:

const fetch = require(`./tiny-fetch.js`);

fetch(`https://placekitten.com/200/300`)
  .then(res => res.arrayBuffer())
  .then(bytes => fs.writeFileSync(`kitten.jpg`, bytes))
  .catch(e => console.error(e));

fetch(`https://jsonplaceholder.typicode.com/todos/1`)
  .then(res => res.json())
  .then(obj => console.log(obj))
  .catch(e => console.error(e));

// etc.

使用http2模块

我看到了使用http、https和request模块的答案。我想添加一个使用另一个本地NodeJS模块,支持http或https协议:

解决方案

我已经参考了官方的NodeJS API,以及关于这个问题的一些其他答案。下面是我编写的测试,它可以按照预期工作:

import * as fs from 'fs';
import * as _path from 'path';
import * as http2 from 'http2';

/* ... */

async function download( host, query, destination )
{
    return new Promise
    (
        ( resolve, reject ) =>
        {
            // Connect to client:
            const client = http2.connect( host );
            client.on( 'error', error => reject( error ) );

            // Prepare a write stream:
            const fullPath = _path.join( fs.realPathSync( '.' ), destination );
            const file = fs.createWriteStream( fullPath, { flags: "wx" } );
            file.on( 'error', error => reject( error ) );

            // Create a request:
            const request = client.request( { [':path']: query } );

            // On initial response handle non-success (!== 200) status error:
            request.on
            (
                'response',
                ( headers/*, flags*/ ) =>
                {
                    if( headers[':status'] !== 200 )
                    {
                        file.close();
                        fs.unlink( fullPath, () => {} );
                        reject( new Error( `Server responded with ${headers[':status']}` ) );
                    }
                }
            );

            // Set encoding for the payload:
            request.setEncoding( 'utf8' );

            // Write the payload to file:
            request.on( 'data', chunk => file.write( chunk ) );

            // Handle ending the request
            request.on
            (
                'end',
                () =>
                {
                    file.close();
                    client.close();
                    resolve( { result: true } );
                }
            );

            /* 
                You can use request.setTimeout( 12000, () => {} ) for aborting
                after period of inactivity
            */

            // Fire off [flush] the request:
            request.end();
        }
    );
}

然后,例如:

/* ... */

let downloaded = await download( 'https://gitlab.com', '/api/v4/...', 'tmp/tmpFile' );

if( downloaded.result )
{
    // Success!
}

// ...

外部引用

https://nodejs.org/api/http2.html#http2_client_side_example https://nodejs.org/api/http2.html#http2_clienthttp2session_request_headers_options

编辑信息

解决方案是为typescript编写的,函数是一个类方法——但是没有注意到这一点,如果没有正确使用函数声明,这个解决方案将无法为假定的javascript用户工作,这是我们的贡献者迅速添加的。谢谢!