From f74ff155bd3211486732923c2801c20523450d06 Mon Sep 17 00:00:00 2001 From: Chad Kimes <1936066+chkimes@users.noreply.github.com> Date: Mon, 7 Aug 2023 13:25:56 -0400 Subject: [PATCH] Add option for concurrent cache downloads with timeout (#1484) * Add option for concurrent cache downloads with timeout * Add release notes * Fix lint --- .eslintrc.json | 1 + packages/cache/RELEASES.md | 4 + .../cache/__tests__/cacheHttpClient.test.ts | 27 ++- packages/cache/__tests__/options.test.ts | 7 +- packages/cache/package-lock.json | 18 +- packages/cache/package.json | 4 +- .../cache/src/internal/cacheHttpClient.ts | 32 +++- packages/cache/src/internal/downloadUtils.ts | 168 +++++++++++++++++- packages/cache/src/options.ts | 13 +- 9 files changed, 242 insertions(+), 32 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index 4bff39f8..f86ba651 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -22,6 +22,7 @@ } ], "eslint-comments/no-use": "off", + "no-constant-condition": ["error", { "checkLoops": false }], "github/no-then": "off", "import/no-namespace": "off", "no-shadow": "off", diff --git a/packages/cache/RELEASES.md b/packages/cache/RELEASES.md index 7f6ae719..eb86a8b0 100644 --- a/packages/cache/RELEASES.md +++ b/packages/cache/RELEASES.md @@ -160,3 +160,7 @@ ### 3.2.1 - Updated @azure/storage-blob to `v12.13.0` + +### 3.2.2 + +- Add new default cache download method to improve performance and reduce hangs [#1484](https://github.com/actions/toolkit/pull/1484) diff --git a/packages/cache/__tests__/cacheHttpClient.test.ts b/packages/cache/__tests__/cacheHttpClient.test.ts index a0164d93..02dd91c1 100644 --- a/packages/cache/__tests__/cacheHttpClient.test.ts +++ b/packages/cache/__tests__/cacheHttpClient.test.ts @@ -84,18 +84,24 @@ test('downloadCache uses storage SDK for Azure storage URLs', async () => { 'downloadCacheStorageSDK' ) + const downloadCacheHttpClientConcurrentMock = jest.spyOn( + downloadUtils, + 'downloadCacheHttpClientConcurrent' + ) + const archiveLocation = 'http://foo.blob.core.windows.net/bar/baz' const archivePath = '/foo/bar' await downloadCache(archiveLocation, archivePath) - expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(1) - expect(downloadCacheStorageSDKMock).toHaveBeenCalledWith( + expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledTimes(1) + expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledWith( archiveLocation, archivePath, getDownloadOptions() ) + expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(0) expect(downloadCacheHttpClientMock).toHaveBeenCalledTimes(0) }) @@ -109,20 +115,26 @@ test('downloadCache passes options to download methods', async () => { 'downloadCacheStorageSDK' ) + const downloadCacheHttpClientConcurrentMock = jest.spyOn( + downloadUtils, + 'downloadCacheHttpClientConcurrent' + ) + const archiveLocation = 'http://foo.blob.core.windows.net/bar/baz' const archivePath = '/foo/bar' const options: DownloadOptions = {downloadConcurrency: 4} await downloadCache(archiveLocation, archivePath, options) - expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(1) - expect(downloadCacheStorageSDKMock).toHaveBeenCalled() - expect(downloadCacheStorageSDKMock).toHaveBeenCalledWith( + expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledTimes(1) + expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalled() + expect(downloadCacheHttpClientConcurrentMock).toHaveBeenCalledWith( archiveLocation, archivePath, getDownloadOptions(options) ) + expect(downloadCacheStorageSDKMock).toHaveBeenCalledTimes(0) expect(downloadCacheHttpClientMock).toHaveBeenCalledTimes(0) }) @@ -138,7 +150,10 @@ test('downloadCache uses http-client when overridden', async () => { const archiveLocation = 'http://foo.blob.core.windows.net/bar/baz' const archivePath = '/foo/bar' - const options: DownloadOptions = {useAzureSdk: false} + const options: DownloadOptions = { + useAzureSdk: false, + concurrentBlobDownloads: false + } await downloadCache(archiveLocation, archivePath, options) diff --git a/packages/cache/__tests__/options.test.ts b/packages/cache/__tests__/options.test.ts index bcb478d1..7585b60f 100644 --- a/packages/cache/__tests__/options.test.ts +++ b/packages/cache/__tests__/options.test.ts @@ -5,7 +5,8 @@ import { getUploadOptions } from '../src/options' -const useAzureSdk = true +const useAzureSdk = false +const concurrentBlobDownloads = true const downloadConcurrency = 8 const timeoutInMs = 30000 const segmentTimeoutInMs = 600000 @@ -18,6 +19,7 @@ test('getDownloadOptions sets defaults', async () => { expect(actualOptions).toEqual({ useAzureSdk, + concurrentBlobDownloads, downloadConcurrency, timeoutInMs, segmentTimeoutInMs, @@ -27,7 +29,8 @@ test('getDownloadOptions sets defaults', async () => { test('getDownloadOptions overrides all settings', async () => { const expectedOptions: DownloadOptions = { - useAzureSdk: false, + useAzureSdk: true, + concurrentBlobDownloads: false, downloadConcurrency: 14, timeoutInMs: 20000, segmentTimeoutInMs: 3600000, diff --git a/packages/cache/package-lock.json b/packages/cache/package-lock.json index 3d905701..faff2233 100644 --- a/packages/cache/package-lock.json +++ b/packages/cache/package-lock.json @@ -1,18 +1,18 @@ { "name": "@actions/cache", - "version": "3.2.1", + "version": "3.2.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@actions/cache", - "version": "3.2.1", + "version": "3.2.2", "license": "MIT", "dependencies": { "@actions/core": "^1.10.0", "@actions/exec": "^1.0.1", "@actions/glob": "^0.1.0", - "@actions/http-client": "^2.0.1", + "@actions/http-client": "^2.1.1", "@actions/io": "^1.0.1", "@azure/abort-controller": "^1.1.0", "@azure/ms-rest-js": "^2.6.0", @@ -61,9 +61,9 @@ } }, "node_modules/@actions/http-client": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.0.tgz", - "integrity": "sha512-BonhODnXr3amchh4qkmjPMUO8mFi/zLaaCeCAJZqch8iQqyDnVIkySjB38VHAC8IJ+bnlgfOqlhpyCUZHlQsqw==", + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.1.tgz", + "integrity": "sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==", "dependencies": { "tunnel": "^0.0.6" } @@ -565,9 +565,9 @@ } }, "@actions/http-client": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.0.tgz", - "integrity": "sha512-BonhODnXr3amchh4qkmjPMUO8mFi/zLaaCeCAJZqch8iQqyDnVIkySjB38VHAC8IJ+bnlgfOqlhpyCUZHlQsqw==", + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/@actions/http-client/-/http-client-2.1.1.tgz", + "integrity": "sha512-qhrkRMB40bbbLo7gF+0vu+X+UawOvQQqNAA/5Unx774RS8poaOhThDOG6BGmxvAnxhQnDp2BG/ZUm65xZILTpw==", "requires": { "tunnel": "^0.0.6" } diff --git a/packages/cache/package.json b/packages/cache/package.json index 319e8067..8877dd99 100644 --- a/packages/cache/package.json +++ b/packages/cache/package.json @@ -1,6 +1,6 @@ { "name": "@actions/cache", - "version": "3.2.1", + "version": "3.2.2", "preview": true, "description": "Actions cache lib", "keywords": [ @@ -40,7 +40,7 @@ "@actions/core": "^1.10.0", "@actions/exec": "^1.0.1", "@actions/glob": "^0.1.0", - "@actions/http-client": "^2.0.1", + "@actions/http-client": "^2.1.1", "@actions/io": "^1.0.1", "@azure/abort-controller": "^1.1.0", "@azure/ms-rest-js": "^2.6.0", diff --git a/packages/cache/src/internal/cacheHttpClient.ts b/packages/cache/src/internal/cacheHttpClient.ts index 52422ff6..880ebff2 100644 --- a/packages/cache/src/internal/cacheHttpClient.ts +++ b/packages/cache/src/internal/cacheHttpClient.ts @@ -20,7 +20,11 @@ import { ITypedResponseWithError, ArtifactCacheList } from './contracts' -import {downloadCacheHttpClient, downloadCacheStorageSDK} from './downloadUtils' +import { + downloadCacheHttpClient, + downloadCacheHttpClientConcurrent, + downloadCacheStorageSDK +} from './downloadUtils' import { DownloadOptions, UploadOptions, @@ -171,14 +175,26 @@ export async function downloadCache( const archiveUrl = new URL(archiveLocation) const downloadOptions = getDownloadOptions(options) - if ( - downloadOptions.useAzureSdk && - archiveUrl.hostname.endsWith('.blob.core.windows.net') - ) { - // Use Azure storage SDK to download caches hosted on Azure to improve speed and reliability. - await downloadCacheStorageSDK(archiveLocation, archivePath, downloadOptions) + if (archiveUrl.hostname.endsWith('.blob.core.windows.net')) { + if (downloadOptions.useAzureSdk) { + // Use Azure storage SDK to download caches hosted on Azure to improve speed and reliability. + await downloadCacheStorageSDK( + archiveLocation, + archivePath, + downloadOptions + ) + } else if (downloadOptions.concurrentBlobDownloads) { + // Use concurrent implementation with HttpClient to work around blob SDK issue + await downloadCacheHttpClientConcurrent( + archiveLocation, + archivePath, + downloadOptions + ) + } else { + // Otherwise, download using the Actions http-client. + await downloadCacheHttpClient(archiveLocation, archivePath) + } } else { - // Otherwise, download using the Actions http-client. await downloadCacheHttpClient(archiveLocation, archivePath) } } diff --git a/packages/cache/src/internal/downloadUtils.ts b/packages/cache/src/internal/downloadUtils.ts index 7e1b1e8a..de57ed78 100644 --- a/packages/cache/src/internal/downloadUtils.ts +++ b/packages/cache/src/internal/downloadUtils.ts @@ -203,6 +203,166 @@ export async function downloadCacheHttpClient( } } +/** + * Download the cache using the Actions toolkit http-client concurrently + * + * @param archiveLocation the URL for the cache + * @param archivePath the local path where the cache is saved + */ +export async function downloadCacheHttpClientConcurrent( + archiveLocation: string, + archivePath: fs.PathLike, + options: DownloadOptions +): Promise { + const archiveDescriptor = await fs.promises.open(archivePath, 'w') + const httpClient = new HttpClient('actions/cache', undefined, { + socketTimeout: options.timeoutInMs, + keepAlive: true + }) + try { + const res = await retryHttpClientResponse( + 'downloadCacheMetadata', + async () => await httpClient.request('HEAD', archiveLocation, null, {}) + ) + + const lengthHeader = res.message.headers['content-length'] + if (lengthHeader === undefined || lengthHeader === null) { + throw new Error('Content-Length not found on blob response') + } + + const length = parseInt(lengthHeader) + if (Number.isNaN(length)) { + throw new Error(`Could not interpret Content-Length: ${length}`) + } + + const downloads: { + offset: number + promiseGetter: () => Promise + }[] = [] + const blockSize = 4 * 1024 * 1024 + + for (let offset = 0; offset < length; offset += blockSize) { + const count = Math.min(blockSize, length - offset) + downloads.push({ + offset, + promiseGetter: async () => { + return await downloadSegmentRetry( + httpClient, + archiveLocation, + offset, + count + ) + } + }) + } + + // reverse to use .pop instead of .shift + downloads.reverse() + let actives = 0 + let bytesDownloaded = 0 + const progress = new DownloadProgress(length) + progress.startDisplayTimer() + const progressFn = progress.onProgress() + + const activeDownloads: {[offset: number]: Promise} = [] + let nextDownload: + | {offset: number; promiseGetter: () => Promise} + | undefined + + const waitAndWrite: () => Promise = async () => { + const segment = await Promise.race(Object.values(activeDownloads)) + await archiveDescriptor.write( + segment.buffer, + 0, + segment.count, + segment.offset + ) + actives-- + delete activeDownloads[segment.offset] + bytesDownloaded += segment.count + progressFn({loadedBytes: bytesDownloaded}) + } + + while ((nextDownload = downloads.pop())) { + activeDownloads[nextDownload.offset] = nextDownload.promiseGetter() + actives++ + + if (actives >= (options.downloadConcurrency ?? 10)) { + await waitAndWrite() + } + } + + while (actives > 0) { + await waitAndWrite() + } + } finally { + httpClient.dispose() + await archiveDescriptor.close() + } +} + +async function downloadSegmentRetry( + httpClient: HttpClient, + archiveLocation: string, + offset: number, + count: number +): Promise { + const retries = 5 + let failures = 0 + + while (true) { + try { + const timeout = 30000 + const result = await promiseWithTimeout( + timeout, + downloadSegment(httpClient, archiveLocation, offset, count) + ) + if (typeof result === 'string') { + throw new Error('downloadSegmentRetry failed due to timeout') + } + + return result + } catch (err) { + if (failures >= retries) { + throw err + } + + failures++ + } + } +} + +async function downloadSegment( + httpClient: HttpClient, + archiveLocation: string, + offset: number, + count: number +): Promise { + const partRes = await retryHttpClientResponse( + 'downloadCachePart', + async () => + await httpClient.get(archiveLocation, { + Range: `bytes=${offset}-${offset + count - 1}` + }) + ) + + if (!partRes.readBodyBuffer) { + throw new Error('Expected HttpClientResponse to implement readBodyBuffer') + } + + return { + offset, + count, + buffer: await partRes.readBodyBuffer() + } +} + +declare class DownloadSegment { + offset: number + count: number + buffer: Buffer +} + /** * Download the cache using the Azure Storage SDK. Only call this method if the * URL points to an Azure Storage endpoint. @@ -287,12 +447,12 @@ export async function downloadCacheStorageSDK( } } -const promiseWithTimeout = async ( +const promiseWithTimeout = async ( timeoutMs: number, - promise: Promise -): Promise => { + promise: Promise +): Promise => { let timeoutHandle: NodeJS.Timeout - const timeoutPromise = new Promise(resolve => { + const timeoutPromise = new Promise(resolve => { timeoutHandle = setTimeout(() => resolve('timeout'), timeoutMs) }) diff --git a/packages/cache/src/options.ts b/packages/cache/src/options.ts index a518d207..d768ff54 100644 --- a/packages/cache/src/options.ts +++ b/packages/cache/src/options.ts @@ -39,6 +39,12 @@ export interface DownloadOptions { */ downloadConcurrency?: number + /** + * Indicates whether to use Actions HttpClient with concurrency + * for Azure Blob Storage + */ + concurrentBlobDownloads?: boolean + /** * Maximum time for each download request, in milliseconds (this * option only applies when using the Azure SDK) @@ -98,7 +104,8 @@ export function getUploadOptions(copy?: UploadOptions): UploadOptions { */ export function getDownloadOptions(copy?: DownloadOptions): DownloadOptions { const result: DownloadOptions = { - useAzureSdk: true, + useAzureSdk: false, + concurrentBlobDownloads: true, downloadConcurrency: 8, timeoutInMs: 30000, segmentTimeoutInMs: 600000, @@ -110,6 +117,10 @@ export function getDownloadOptions(copy?: DownloadOptions): DownloadOptions { result.useAzureSdk = copy.useAzureSdk } + if (typeof copy.concurrentBlobDownloads === 'boolean') { + result.concurrentBlobDownloads = copy.concurrentBlobDownloads + } + if (typeof copy.downloadConcurrency === 'number') { result.downloadConcurrency = copy.downloadConcurrency }