Multipart Upload

สำหรับการทำงานร่วมกับไฟล์ข้อมูลขนาดใหญ่นั้น อาจต้องทำการแบ่งส่วนการ Upload ของไฟล์ป็นส่วนๆ ซึ่งสามารถทำได้โดยการใช้ MultipartUpload

MultipartUpload

หลังการทำงานเบื้องต้น

  • CreateMultipartUploadCommand -> ใช้สำหรับการ initiates การทำ multipart upload โดยข้อมูลที่ได้กลับมาจะเป็น upload ID ซึ่งจะถูกนำไปใช้สำหรับการ upload แต่ละ part ของ file

  • UploadPartCommand -> ใช้สำหรับการ upload file แต่ละ part

  • CompleteMultipartUploadCommand -> หลังจากทำการ upload ทุก part เรียบร้อยแล้ว จะเรียกใช้งาน Command นี้เพื่อทำการรวม file ที่ได้ upload ไป เป็นการเสร็จสิ้นการทำงาน

  • AbortMultipartUploadCommand -> หากการ upload ล้มเหลว command นี้จะถูกเรียกใช้งานเพื่อทำการลบข้อมูลที่ล้มเหลวออกไป

import fs from 'fs';
import { Buffer } from 'node:buffer';
import {
    S3Client,
    CreateMultipartUploadCommand,
    UploadPartCommand,
    CompleteMultipartUploadCommand
} from '@aws-sdk/client-s3';

// 100 MB chunk/part size
const CHUNK_SIZE = 1024 * 1024 * 100;

// Max retries when uploading parts
const MAX_RETRIES = 3;

const multipartS3Uploader = async (filePath, options) => {
    const { ncs_region, contentType, key, bucket, ncs_credentials, ncs_endpoint } = options;

    // Get file size
    const fileSize = fs.statSync(filePath).size;

    // Calculate total parts
    const totalParts = Math.ceil(fileSize / CHUNK_SIZE);

    // Initialize the S3 client instance
    const S3 = new S3Client({ region: ncs_region, endpoint: ncs_endpoint, credentials: ncs_credentials });
    const uploadParams = { Bucket: bucket, Key: key, ContentType: contentType };

    let PartNumber = 1;
    const uploadPartResults = [];

    // Send multipart upload request to S3, this returns a UploadId for use when uploading individual parts
    // https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-s3/classes/s3.html#createmultipartupload
    let { UploadId } = await S3.send(new CreateMultipartUploadCommand(uploadParams));

    console.log(`Initiate multipart upload, uploadId: ${UploadId}, totalParts: ${totalParts}, fileSize: ${fileSize}`);

    // Read file parts and upload parts to s3, this promise resolves when all parts are uploaded successfully
    await new Promise(resolve => {
        fs.open(filePath, 'r', async (err, fileDescriptor) => {
            if (err) throw err;

            // Read and upload file parts until end of file
            while (true) {

                // Read next file chunk
                const { buffer, bytesRead } = await readNextPart(fileDescriptor);

                // When end-of-file is reached bytesRead is zero
                if (bytesRead === 0) {
                    // Done reading file, close the file, resolve the promise and return
                    fs.close(fileDescriptor, (err) => { if (err) throw err; });
                    return resolve();
                }

                // Get data chunk/part
                const data = bytesRead < CHUNK_SIZE ? buffer.slice(0, bytesRead) : buffer;

                // Upload data chunk to S3
                const response = await uploadPart(S3,
                    { data, bucket, key, PartNumber, UploadId }
                );

                console.log(`Uploaded part ${PartNumber} of ${totalParts}`);
                uploadPartResults.push({ PartNumber, ETag: response.ETag });
                PartNumber++;
            }
        });
    });

    console.log(`Finish uploading all parts for multipart uploadId: ${UploadId}`);

    // Completes a multipart upload by assembling previously uploaded parts.
    // https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-s3/classes/s3.html#completemultipartupload
    let completeUploadResponse = await S3.send(new CompleteMultipartUploadCommand({
        Bucket: bucket,
        Key: key,
        MultipartUpload: { Parts: uploadPartResults },
        UploadId: UploadId
    }));

    console.log('Successfully completed multipart upload');

    return completeUploadResponse;
};

const readNextPart = async (fileDescriptor) => await new Promise((resolve, reject) => {
    // Allocate an empty buffer to save data chunk that is read
    const buffer = Buffer.alloc(CHUNK_SIZE);

    fs.read(
        fileDescriptor,
        buffer,                             // Buffer where data will be written
        0,                                  // Start Offset on buffer while writing data
        CHUNK_SIZE,                         // Length of bytes to read
        null,                               // Position in file(PartNumber * CHUNK_SIZE); if position is null data is read from the current file position, and the position is updated
        (err, bytesRead) => {               // Callback function
            if (err) return reject(err);
            resolve({ bytesRead, buffer });
        });
});

// Upload a given part with retries
const uploadPart = async (S3, options, retry = 1) => {
    const { data, bucket, key, PartNumber, UploadId } = options;
    let response;
    try {
        // Upload part to S3
        // https://docs.aws.amazon.com/AWSJavaScriptSDK/v3/latest/clients/client-s3/classes/s3.html#uploadpart
        response = await S3.send(
            new UploadPartCommand({
                Body: data,
                Bucket: bucket,
                Key: key,
                PartNumber,
                UploadId
            })
        );
    } catch {
        console.log(`ATTEMPT-#${retry} Failed to upload part ${PartNumber} due to ${JSON.stringify(response)}`);

        if (retry >= MAX_RETRIES)
            throw (response);
        else
            return uploadPart(S3, options, retry + 1);
    }

    return response;
};

export default multipartS3Uploader;

// Example:

await multipartS3Uploader('file2upload.dum',
    {
        ncs_region: 'NCP-TH',
        bucket: 's3-client-buckets',
        key: 'uploaded.dum',
        ncs_endpoint: 'https://s3-bkk.nipa.cloud',
        ncs_credentials: {
            accessKeyId: 'X2EGTHMCW0xxxxxxS1B8',
            secretAccessKey: 'c1XgG0DCjPxH9RCHJByDDMxxxxxxxxxxxxU17F7m'
        }
    }
);

ตัวอย่างการ upload ไฟล์ขนาด 5.04GB

Last updated