Skip to content

Commit 6954817

Browse files
committed
Harden parser more
1 parent 370ed91 commit 6954817

File tree

3 files changed

+4057
-238
lines changed

3 files changed

+4057
-238
lines changed

core.js

Lines changed: 199 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,19 @@ export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-typ
1717
// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
1818
const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2;
1919
const maximumZipEntrySizeInBytes = 1024 * 1024;
20+
const maximumZipEntryCount = 1024;
21+
const maximumZipBufferedReadSizeInBytes = (2 ** 31) - 1;
2022
const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024;
23+
const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes;
2124
const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes;
25+
const maximumNestedGzipProbeDepth = 1;
2226
const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes;
2327
const maximumEbmlDocumentTypeSizeInBytes = 64;
2428
const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes;
2529
const maximumEbmlElementCount = 256;
30+
const maximumPngChunkCount = 512;
31+
const maximumAsfHeaderObjectCount = 512;
32+
const maximumTiffTagCount = 512;
2633
const maximumDetectionReentryCount = 256;
2734
const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes;
2835
const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes;
@@ -32,7 +39,9 @@ const recoverableZipErrorMessages = new Set([
3239
'Expected Central-File-Header signature',
3340
]);
3441
const recoverableZipErrorMessagePrefixes = [
42+
'ZIP entry count exceeds ',
3543
'Unsupported ZIP compression method:',
44+
'ZIP entry compressed data exceeds ',
3645
'ZIP entry decompressed data exceeds ',
3746
];
3847
const recoverableZipErrorCodes = new Set([
@@ -110,6 +119,114 @@ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEn
110119
return uncompressedData;
111120
}
112121

122+
const zipDataDescriptorSignature = 0x08_07_4B_50;
123+
const zipDataDescriptorLengthInBytes = 16;
124+
const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1;
125+
126+
function findZipDataDescriptorOffset(buffer, bytesConsumed) {
127+
if (buffer.length < zipDataDescriptorLengthInBytes) {
128+
return -1;
129+
}
130+
131+
const lastPossibleDescriptorOffset = buffer.length - zipDataDescriptorLengthInBytes;
132+
for (let index = 0; index <= lastPossibleDescriptorOffset; index++) {
133+
if (
134+
Token.UINT32_LE.get(buffer, index) === zipDataDescriptorSignature
135+
&& Token.UINT32_LE.get(buffer, index + 8) === bytesConsumed + index
136+
) {
137+
return index;
138+
}
139+
}
140+
141+
return -1;
142+
}
143+
144+
function mergeByteChunks(chunks, totalLength) {
145+
const merged = new Uint8Array(totalLength);
146+
let offset = 0;
147+
148+
for (const chunk of chunks) {
149+
merged.set(chunk, offset);
150+
offset += chunk.length;
151+
}
152+
153+
return merged;
154+
}
155+
156+
async function readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer, maximumLength = maximumZipEntrySizeInBytes} = {}) {
157+
const {syncBuffer} = zipHandler;
158+
const {length: syncBufferLength} = syncBuffer;
159+
const chunks = [];
160+
let bytesConsumed = 0;
161+
162+
for (;;) {
163+
const length = await zipHandler.tokenizer.peekBuffer(syncBuffer, {mayBeLess: true});
164+
const dataDescriptorOffset = findZipDataDescriptorOffset(syncBuffer.subarray(0, length), bytesConsumed);
165+
const retainedLength = dataDescriptorOffset >= 0
166+
? 0
167+
: (
168+
length === syncBufferLength
169+
? Math.min(zipDataDescriptorOverlapLengthInBytes, length - 1)
170+
: 0
171+
);
172+
const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength;
173+
174+
if (chunkLength === 0) {
175+
break;
176+
}
177+
178+
bytesConsumed += chunkLength;
179+
if (bytesConsumed > maximumLength) {
180+
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
181+
}
182+
183+
if (shouldBuffer) {
184+
const data = new Uint8Array(chunkLength);
185+
await zipHandler.tokenizer.readBuffer(data);
186+
chunks.push(data);
187+
} else {
188+
await zipHandler.tokenizer.ignore(chunkLength);
189+
}
190+
191+
if (dataDescriptorOffset >= 0) {
192+
break;
193+
}
194+
}
195+
196+
if (!shouldBuffer) {
197+
return;
198+
}
199+
200+
return mergeByteChunks(chunks, bytesConsumed);
201+
}
202+
203+
async function readZipEntryData(zipHandler, zipHeader, {shouldBuffer} = {}) {
204+
if (
205+
zipHeader.dataDescriptor
206+
&& zipHeader.compressedSize === 0
207+
) {
208+
return readZipDataDescriptorEntryWithLimit(zipHandler, {shouldBuffer});
209+
}
210+
211+
if (!shouldBuffer) {
212+
await zipHandler.tokenizer.ignore(zipHeader.compressedSize);
213+
return;
214+
}
215+
216+
const maximumLength = getMaximumZipBufferedReadLength(zipHandler.tokenizer);
217+
if (
218+
!Number.isFinite(zipHeader.compressedSize)
219+
|| zipHeader.compressedSize < 0
220+
|| zipHeader.compressedSize > maximumLength
221+
) {
222+
throw new Error(`ZIP entry compressed data exceeds ${maximumLength} bytes`);
223+
}
224+
225+
const fileData = new Uint8Array(zipHeader.compressedSize);
226+
await zipHandler.tokenizer.readBuffer(fileData);
227+
return fileData;
228+
}
229+
113230
// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
114231
ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
115232
if (zipHeader.compressedMethod === 0) {
@@ -125,6 +242,41 @@ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
125242
return callback(uncompressedData);
126243
};
127244

245+
ZipHandler.prototype.unzip = async function (fileCallback) {
246+
let stop = false;
247+
let zipEntryCount = 0;
248+
do {
249+
const zipHeader = await this.readLocalFileHeader();
250+
if (!zipHeader) {
251+
break;
252+
}
253+
254+
zipEntryCount++;
255+
if (zipEntryCount > maximumZipEntryCount) {
256+
throw new Error(`ZIP entry count exceeds ${maximumZipEntryCount}`);
257+
}
258+
259+
const next = fileCallback(zipHeader);
260+
stop = Boolean(next.stop);
261+
await this.tokenizer.ignore(zipHeader.extraFieldLength);
262+
const fileData = await readZipEntryData(this, zipHeader, {
263+
shouldBuffer: Boolean(next.handler),
264+
});
265+
266+
if (next.handler) {
267+
await this.inflate(zipHeader, fileData, next.handler);
268+
}
269+
270+
if (zipHeader.dataDescriptor) {
271+
const dataDescriptor = new Uint8Array(zipDataDescriptorLengthInBytes);
272+
await this.tokenizer.readBuffer(dataDescriptor);
273+
if (Token.UINT32_LE.get(dataDescriptor, 0) !== zipDataDescriptorSignature) {
274+
throw new Error(`Expected data-descriptor-signature at position ${this.tokenizer.position - dataDescriptor.length}`);
275+
}
276+
}
277+
} while (!stop);
278+
};
279+
128280
function createByteLimitedReadableStream(stream, maximumBytes) {
129281
const reader = stream.getReader();
130282
let emittedBytes = 0;
@@ -385,6 +537,15 @@ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes)
385537
);
386538
}
387539

540+
function getMaximumZipBufferedReadLength(tokenizer) {
541+
const fileSize = tokenizer.fileInfo.size;
542+
const remainingBytes = Number.isFinite(fileSize)
543+
? Math.max(0, fileSize - tokenizer.position)
544+
: Number.MAX_SAFE_INTEGER;
545+
546+
return Math.min(remainingBytes, maximumZipBufferedReadSizeInBytes);
547+
}
548+
388549
function isRecoverableZipError(error) {
389550
if (error instanceof strtok3.EndOfStreamError) {
390551
return true;
@@ -546,6 +707,13 @@ export class FileTypeParser {
546707
this.tokenizerOptions = {
547708
abortSignal: this.options.signal,
548709
};
710+
this.gzipProbeDepth = 0;
711+
}
712+
713+
getTokenizerOptions() {
714+
return {
715+
...this.tokenizerOptions,
716+
};
549717
}
550718

551719
async fromTokenizer(tokenizer, detectionReentryCount = 0) {
@@ -589,11 +757,11 @@ export class FileTypeParser {
589757
return;
590758
}
591759

592-
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.tokenizerOptions));
760+
return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions()));
593761
}
594762

595763
async fromBlob(blob) {
596-
const tokenizer = strtok3.fromBlob(blob, this.tokenizerOptions);
764+
const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions());
597765
try {
598766
return await this.fromTokenizer(tokenizer);
599767
} finally {
@@ -602,7 +770,7 @@ export class FileTypeParser {
602770
}
603771

604772
async fromStream(stream) {
605-
const tokenizer = strtok3.fromWebStream(stream, this.tokenizerOptions);
773+
const tokenizer = strtok3.fromWebStream(stream, this.getTokenizerOptions());
606774
try {
607775
return await this.fromTokenizer(tokenizer);
608776
} finally {
@@ -777,17 +945,27 @@ export class FileTypeParser {
777945
}
778946

779947
if (this.check([0x1F, 0x8B, 0x8])) {
948+
if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) {
949+
return {
950+
ext: 'gz',
951+
mime: 'application/gzip',
952+
};
953+
}
954+
780955
const gzipHandler = new GzipHandler(tokenizer);
781956
const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes);
782957
let compressedFileType;
783958
try {
959+
this.gzipProbeDepth++;
784960
compressedFileType = await this.fromStream(limitedInflatedStream);
785961
} catch (error) {
786962
if (error?.name === 'AbortError') {
787963
throw error;
788964
}
789965

790966
// Decompression or inner-detection failures are expected for non-tar gzip files.
967+
} finally {
968+
this.gzipProbeDepth--;
791969
}
792970

793971
// We only need enough inflated bytes to confidently decide whether this is tar.gz.
@@ -980,7 +1158,7 @@ export class FileTypeParser {
9801158
stop: true,
9811159
};
9821160
case 'mimetype':
983-
if (!canReadZipEntryForDetection(zipHeader)) {
1161+
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
9841162
return {};
9851163
}
9861164

@@ -996,8 +1174,7 @@ export class FileTypeParser {
9961174
case '[Content_Types].xml': {
9971175
openXmlState.hasContentTypesEntry = true;
9981176

999-
const maximumContentTypesEntrySize = hasUnknownFileSize(tokenizer) ? maximumZipEntrySizeInBytes : Number.MAX_SAFE_INTEGER;
1000-
if (!canReadZipEntryForDetection(zipHeader, maximumContentTypesEntrySize)) {
1177+
if (!canReadZipEntryForDetection(zipHeader, maximumZipTextEntrySizeInBytes)) {
10011178
openXmlState.hasUnparseableContentTypes = true;
10021179
return {};
10031180
}
@@ -1698,7 +1875,13 @@ export class FileTypeParser {
16981875

16991876
const isUnknownPngStream = hasUnknownFileSize(tokenizer);
17001877
const pngScanStart = tokenizer.position;
1878+
let pngChunkCount = 0;
17011879
do {
1880+
pngChunkCount++;
1881+
if (pngChunkCount > maximumPngChunkCount) {
1882+
break;
1883+
}
1884+
17021885
if (hasExceededUnknownSizeScanBudget(tokenizer, pngScanStart, maximumPngChunkSizeInBytes)) {
17031886
break;
17041887
}
@@ -1927,7 +2110,13 @@ export class FileTypeParser {
19272110
});
19282111
const isUnknownFileSize = hasUnknownFileSize(tokenizer);
19292112
const asfHeaderScanStart = tokenizer.position;
2113+
let asfHeaderObjectCount = 0;
19302114
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
2115+
asfHeaderObjectCount++;
2116+
if (asfHeaderObjectCount > maximumAsfHeaderObjectCount) {
2117+
break;
2118+
}
2119+
19312120
if (hasExceededUnknownSizeScanBudget(tokenizer, asfHeaderScanStart, maximumUntrustedSkipSizeInBytes)) {
19322121
break;
19332122
}
@@ -2384,6 +2573,10 @@ export class FileTypeParser {
23842573

23852574
async readTiffIFD(bigEndian) {
23862575
const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
2576+
if (numberOfTags > maximumTiffTagCount) {
2577+
return;
2578+
}
2579+
23872580
if (
23882581
hasUnknownFileSize(this.tokenizer)
23892582
&& (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes

index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ function isTokenizerStreamBoundsError(error) {
2727

2828
export class FileTypeParser extends DefaultFileTypeParser {
2929
async fromStream(stream) {
30-
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.tokenizerOptions) : strtok3.fromStream(stream, this.tokenizerOptions));
30+
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream, this.getTokenizerOptions()) : strtok3.fromStream(stream, this.getTokenizerOptions()));
3131
try {
3232
return await super.fromTokenizer(tokenizer);
3333
} catch (error) {

0 commit comments

Comments
 (0)