@@ -17,12 +17,19 @@ export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-typ
1717// Keep defensive limits small enough to avoid accidental memory spikes from untrusted inputs.
1818const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2 ;
1919const maximumZipEntrySizeInBytes = 1024 * 1024 ;
20+ const maximumZipEntryCount = 1024 ;
21+ const maximumZipBufferedReadSizeInBytes = ( 2 ** 31 ) - 1 ;
2022const maximumUntrustedSkipSizeInBytes = 16 * 1024 * 1024 ;
23+ const maximumZipTextEntrySizeInBytes = maximumZipEntrySizeInBytes ;
2124const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes ;
25+ const maximumNestedGzipProbeDepth = 1 ;
2226const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes ;
2327const maximumEbmlDocumentTypeSizeInBytes = 64 ;
2428const maximumEbmlElementPayloadSizeInBytes = maximumUntrustedSkipSizeInBytes ;
2529const maximumEbmlElementCount = 256 ;
30+ const maximumPngChunkCount = 512 ;
31+ const maximumAsfHeaderObjectCount = 512 ;
32+ const maximumTiffTagCount = 512 ;
2633const maximumDetectionReentryCount = 256 ;
2734const maximumPngChunkSizeInBytes = maximumUntrustedSkipSizeInBytes ;
2835const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes ;
@@ -32,7 +39,9 @@ const recoverableZipErrorMessages = new Set([
3239 'Expected Central-File-Header signature' ,
3340] ) ;
3441const recoverableZipErrorMessagePrefixes = [
42+ 'ZIP entry count exceeds ' ,
3543 'Unsupported ZIP compression method:' ,
44+ 'ZIP entry compressed data exceeds ' ,
3645 'ZIP entry decompressed data exceeds ' ,
3746] ;
3847const recoverableZipErrorCodes = new Set ( [
@@ -110,6 +119,114 @@ async function decompressDeflateRawWithLimit(data, {maximumLength = maximumZipEn
110119 return uncompressedData ;
111120}
112121
122+ const zipDataDescriptorSignature = 0x08_07_4B_50 ;
123+ const zipDataDescriptorLengthInBytes = 16 ;
124+ const zipDataDescriptorOverlapLengthInBytes = zipDataDescriptorLengthInBytes - 1 ;
125+
126+ function findZipDataDescriptorOffset ( buffer , bytesConsumed ) {
127+ if ( buffer . length < zipDataDescriptorLengthInBytes ) {
128+ return - 1 ;
129+ }
130+
131+ const lastPossibleDescriptorOffset = buffer . length - zipDataDescriptorLengthInBytes ;
132+ for ( let index = 0 ; index <= lastPossibleDescriptorOffset ; index ++ ) {
133+ if (
134+ Token . UINT32_LE . get ( buffer , index ) === zipDataDescriptorSignature
135+ && Token . UINT32_LE . get ( buffer , index + 8 ) === bytesConsumed + index
136+ ) {
137+ return index ;
138+ }
139+ }
140+
141+ return - 1 ;
142+ }
143+
144+ function mergeByteChunks ( chunks , totalLength ) {
145+ const merged = new Uint8Array ( totalLength ) ;
146+ let offset = 0 ;
147+
148+ for ( const chunk of chunks ) {
149+ merged . set ( chunk , offset ) ;
150+ offset += chunk . length ;
151+ }
152+
153+ return merged ;
154+ }
155+
156+ async function readZipDataDescriptorEntryWithLimit ( zipHandler , { shouldBuffer, maximumLength = maximumZipEntrySizeInBytes } = { } ) {
157+ const { syncBuffer} = zipHandler ;
158+ const { length : syncBufferLength } = syncBuffer ;
159+ const chunks = [ ] ;
160+ let bytesConsumed = 0 ;
161+
162+ for ( ; ; ) {
163+ const length = await zipHandler . tokenizer . peekBuffer ( syncBuffer , { mayBeLess : true } ) ;
164+ const dataDescriptorOffset = findZipDataDescriptorOffset ( syncBuffer . subarray ( 0 , length ) , bytesConsumed ) ;
165+ const retainedLength = dataDescriptorOffset >= 0
166+ ? 0
167+ : (
168+ length === syncBufferLength
169+ ? Math . min ( zipDataDescriptorOverlapLengthInBytes , length - 1 )
170+ : 0
171+ ) ;
172+ const chunkLength = dataDescriptorOffset >= 0 ? dataDescriptorOffset : length - retainedLength ;
173+
174+ if ( chunkLength === 0 ) {
175+ break ;
176+ }
177+
178+ bytesConsumed += chunkLength ;
179+ if ( bytesConsumed > maximumLength ) {
180+ throw new Error ( `ZIP entry compressed data exceeds ${ maximumLength } bytes` ) ;
181+ }
182+
183+ if ( shouldBuffer ) {
184+ const data = new Uint8Array ( chunkLength ) ;
185+ await zipHandler . tokenizer . readBuffer ( data ) ;
186+ chunks . push ( data ) ;
187+ } else {
188+ await zipHandler . tokenizer . ignore ( chunkLength ) ;
189+ }
190+
191+ if ( dataDescriptorOffset >= 0 ) {
192+ break ;
193+ }
194+ }
195+
196+ if ( ! shouldBuffer ) {
197+ return ;
198+ }
199+
200+ return mergeByteChunks ( chunks , bytesConsumed ) ;
201+ }
202+
203+ async function readZipEntryData ( zipHandler , zipHeader , { shouldBuffer} = { } ) {
204+ if (
205+ zipHeader . dataDescriptor
206+ && zipHeader . compressedSize === 0
207+ ) {
208+ return readZipDataDescriptorEntryWithLimit ( zipHandler , { shouldBuffer} ) ;
209+ }
210+
211+ if ( ! shouldBuffer ) {
212+ await zipHandler . tokenizer . ignore ( zipHeader . compressedSize ) ;
213+ return ;
214+ }
215+
216+ const maximumLength = getMaximumZipBufferedReadLength ( zipHandler . tokenizer ) ;
217+ if (
218+ ! Number . isFinite ( zipHeader . compressedSize )
219+ || zipHeader . compressedSize < 0
220+ || zipHeader . compressedSize > maximumLength
221+ ) {
222+ throw new Error ( `ZIP entry compressed data exceeds ${ maximumLength } bytes` ) ;
223+ }
224+
225+ const fileData = new Uint8Array ( zipHeader . compressedSize ) ;
226+ await zipHandler . tokenizer . readBuffer ( fileData ) ;
227+ return fileData ;
228+ }
229+
113230// Override the default inflate to enforce decompression size limits, since @tokenizer/inflate does not expose a configuration hook for this.
114231ZipHandler . prototype . inflate = async function ( zipHeader , fileData , callback ) {
115232 if ( zipHeader . compressedMethod === 0 ) {
@@ -125,6 +242,41 @@ ZipHandler.prototype.inflate = async function (zipHeader, fileData, callback) {
125242 return callback ( uncompressedData ) ;
126243} ;
127244
245+ ZipHandler . prototype . unzip = async function ( fileCallback ) {
246+ let stop = false ;
247+ let zipEntryCount = 0 ;
248+ do {
249+ const zipHeader = await this . readLocalFileHeader ( ) ;
250+ if ( ! zipHeader ) {
251+ break ;
252+ }
253+
254+ zipEntryCount ++ ;
255+ if ( zipEntryCount > maximumZipEntryCount ) {
256+ throw new Error ( `ZIP entry count exceeds ${ maximumZipEntryCount } ` ) ;
257+ }
258+
259+ const next = fileCallback ( zipHeader ) ;
260+ stop = Boolean ( next . stop ) ;
261+ await this . tokenizer . ignore ( zipHeader . extraFieldLength ) ;
262+ const fileData = await readZipEntryData ( this , zipHeader , {
263+ shouldBuffer : Boolean ( next . handler ) ,
264+ } ) ;
265+
266+ if ( next . handler ) {
267+ await this . inflate ( zipHeader , fileData , next . handler ) ;
268+ }
269+
270+ if ( zipHeader . dataDescriptor ) {
271+ const dataDescriptor = new Uint8Array ( zipDataDescriptorLengthInBytes ) ;
272+ await this . tokenizer . readBuffer ( dataDescriptor ) ;
273+ if ( Token . UINT32_LE . get ( dataDescriptor , 0 ) !== zipDataDescriptorSignature ) {
274+ throw new Error ( `Expected data-descriptor-signature at position ${ this . tokenizer . position - dataDescriptor . length } ` ) ;
275+ }
276+ }
277+ } while ( ! stop ) ;
278+ } ;
279+
128280function createByteLimitedReadableStream ( stream , maximumBytes ) {
129281 const reader = stream . getReader ( ) ;
130282 let emittedBytes = 0 ;
@@ -385,6 +537,15 @@ function hasExceededUnknownSizeScanBudget(tokenizer, startOffset, maximumBytes)
385537 ) ;
386538}
387539
540+ function getMaximumZipBufferedReadLength ( tokenizer ) {
541+ const fileSize = tokenizer . fileInfo . size ;
542+ const remainingBytes = Number . isFinite ( fileSize )
543+ ? Math . max ( 0 , fileSize - tokenizer . position )
544+ : Number . MAX_SAFE_INTEGER ;
545+
546+ return Math . min ( remainingBytes , maximumZipBufferedReadSizeInBytes ) ;
547+ }
548+
388549function isRecoverableZipError ( error ) {
389550 if ( error instanceof strtok3 . EndOfStreamError ) {
390551 return true ;
@@ -546,6 +707,13 @@ export class FileTypeParser {
546707 this . tokenizerOptions = {
547708 abortSignal : this . options . signal ,
548709 } ;
710+ this . gzipProbeDepth = 0 ;
711+ }
712+
713+ getTokenizerOptions ( ) {
714+ return {
715+ ...this . tokenizerOptions ,
716+ } ;
549717 }
550718
551719 async fromTokenizer ( tokenizer , detectionReentryCount = 0 ) {
@@ -589,11 +757,11 @@ export class FileTypeParser {
589757 return ;
590758 }
591759
592- return this . fromTokenizer ( strtok3 . fromBuffer ( buffer , this . tokenizerOptions ) ) ;
760+ return this . fromTokenizer ( strtok3 . fromBuffer ( buffer , this . getTokenizerOptions ( ) ) ) ;
593761 }
594762
595763 async fromBlob ( blob ) {
596- const tokenizer = strtok3 . fromBlob ( blob , this . tokenizerOptions ) ;
764+ const tokenizer = strtok3 . fromBlob ( blob , this . getTokenizerOptions ( ) ) ;
597765 try {
598766 return await this . fromTokenizer ( tokenizer ) ;
599767 } finally {
@@ -602,7 +770,7 @@ export class FileTypeParser {
602770 }
603771
604772 async fromStream ( stream ) {
605- const tokenizer = strtok3 . fromWebStream ( stream , this . tokenizerOptions ) ;
773+ const tokenizer = strtok3 . fromWebStream ( stream , this . getTokenizerOptions ( ) ) ;
606774 try {
607775 return await this . fromTokenizer ( tokenizer ) ;
608776 } finally {
@@ -777,17 +945,27 @@ export class FileTypeParser {
777945 }
778946
779947 if ( this . check ( [ 0x1F , 0x8B , 0x8 ] ) ) {
948+ if ( this . gzipProbeDepth >= maximumNestedGzipProbeDepth ) {
949+ return {
950+ ext : 'gz' ,
951+ mime : 'application/gzip' ,
952+ } ;
953+ }
954+
780955 const gzipHandler = new GzipHandler ( tokenizer ) ;
781956 const limitedInflatedStream = createByteLimitedReadableStream ( gzipHandler . inflate ( ) , maximumNestedGzipDetectionSizeInBytes ) ;
782957 let compressedFileType ;
783958 try {
959+ this . gzipProbeDepth ++ ;
784960 compressedFileType = await this . fromStream ( limitedInflatedStream ) ;
785961 } catch ( error ) {
786962 if ( error ?. name === 'AbortError' ) {
787963 throw error ;
788964 }
789965
790966 // Decompression or inner-detection failures are expected for non-tar gzip files.
967+ } finally {
968+ this . gzipProbeDepth -- ;
791969 }
792970
793971 // We only need enough inflated bytes to confidently decide whether this is tar.gz.
@@ -980,7 +1158,7 @@ export class FileTypeParser {
9801158 stop : true ,
9811159 } ;
9821160 case 'mimetype' :
983- if ( ! canReadZipEntryForDetection ( zipHeader ) ) {
1161+ if ( ! canReadZipEntryForDetection ( zipHeader , maximumZipTextEntrySizeInBytes ) ) {
9841162 return { } ;
9851163 }
9861164
@@ -996,8 +1174,7 @@ export class FileTypeParser {
9961174 case '[Content_Types].xml' : {
9971175 openXmlState . hasContentTypesEntry = true ;
9981176
999- const maximumContentTypesEntrySize = hasUnknownFileSize ( tokenizer ) ? maximumZipEntrySizeInBytes : Number . MAX_SAFE_INTEGER ;
1000- if ( ! canReadZipEntryForDetection ( zipHeader , maximumContentTypesEntrySize ) ) {
1177+ if ( ! canReadZipEntryForDetection ( zipHeader , maximumZipTextEntrySizeInBytes ) ) {
10011178 openXmlState . hasUnparseableContentTypes = true ;
10021179 return { } ;
10031180 }
@@ -1698,7 +1875,13 @@ export class FileTypeParser {
16981875
16991876 const isUnknownPngStream = hasUnknownFileSize ( tokenizer ) ;
17001877 const pngScanStart = tokenizer . position ;
1878+ let pngChunkCount = 0 ;
17011879 do {
1880+ pngChunkCount ++ ;
1881+ if ( pngChunkCount > maximumPngChunkCount ) {
1882+ break ;
1883+ }
1884+
17021885 if ( hasExceededUnknownSizeScanBudget ( tokenizer , pngScanStart , maximumPngChunkSizeInBytes ) ) {
17031886 break ;
17041887 }
@@ -1927,7 +2110,13 @@ export class FileTypeParser {
19272110 } ) ;
19282111 const isUnknownFileSize = hasUnknownFileSize ( tokenizer ) ;
19292112 const asfHeaderScanStart = tokenizer . position ;
2113+ let asfHeaderObjectCount = 0 ;
19302114 while ( tokenizer . position + 24 < tokenizer . fileInfo . size ) {
2115+ asfHeaderObjectCount ++ ;
2116+ if ( asfHeaderObjectCount > maximumAsfHeaderObjectCount ) {
2117+ break ;
2118+ }
2119+
19312120 if ( hasExceededUnknownSizeScanBudget ( tokenizer , asfHeaderScanStart , maximumUntrustedSkipSizeInBytes ) ) {
19322121 break ;
19332122 }
@@ -2384,6 +2573,10 @@ export class FileTypeParser {
23842573
23852574 async readTiffIFD ( bigEndian ) {
23862575 const numberOfTags = await this . tokenizer . readToken ( bigEndian ? Token . UINT16_BE : Token . UINT16_LE ) ;
2576+ if ( numberOfTags > maximumTiffTagCount ) {
2577+ return ;
2578+ }
2579+
23872580 if (
23882581 hasUnknownFileSize ( this . tokenizer )
23892582 && ( 2 + ( numberOfTags * 12 ) ) > maximumTiffIfdOffsetInBytes
0 commit comments