From 88becd6097cb3e96cb5149fb9c165a8e91312a87 Mon Sep 17 00:00:00 2001 From: atxr Date: Fri, 23 Feb 2024 18:02:03 +0100 Subject: [PATCH] Scan zip for multiple threats --- libmineziper/include/libmineziper.h | 20 ++- libmineziper/include/libmineziper_zip.h | 3 +- libmineziper/src/libmineziper.c | 180 +++++++++++++++++++----- tests/test_decode_fixed_tree.c | 2 +- 4 files changed, 168 insertions(+), 37 deletions(-) diff --git a/libmineziper/include/libmineziper.h b/libmineziper/include/libmineziper.h index d1d823e..9bf4014 100644 --- a/libmineziper/include/libmineziper.h +++ b/libmineziper/include/libmineziper.h @@ -6,7 +6,25 @@ #include "libmineziper_huffman_tree.h" #include "libmineziper_zip.h" -int get_uncompressed_size(zip* in); +#define MAX_UNCOMPRESSED_SIZE 0x10000000 +#define MAX_INT 0xffffffff + +typedef struct data +{ + unsigned int size; + void (*clean)(void*); + char* buffer; +} data; + +int get_uncompressed_size(zip zip); +bool detect_overlaps(zip zip); +bool scan_decoded_files(zip zip); bool scan_zip(char* zip_data, int zip_size); +static const char* sigs[] = { + "\x7f" + "ELF", + "MZ"}; +static const int sigs_size = sizeof(sigs) / sizeof(char*); + #endif \ No newline at end of file diff --git a/libmineziper/include/libmineziper_zip.h b/libmineziper/include/libmineziper_zip.h index 6d225b5..9bfa5e1 100644 --- a/libmineziper/include/libmineziper_zip.h +++ b/libmineziper/include/libmineziper_zip.h @@ -9,7 +9,8 @@ #define EOCD_SIG "PK\05\06" #define LFH_SIG "PK\03\04" #define CDH_SIG "PK\01\02" -#define DEFLATE 8 +#define COMP_NONE 0 +#define COMP_DEFLATE 8 #define END_OF_BLOCK 256 diff --git a/libmineziper/src/libmineziper.c b/libmineziper/src/libmineziper.c index bb3bd78..0515ac9 100644 --- a/libmineziper/src/libmineziper.c +++ b/libmineziper/src/libmineziper.c @@ -5,37 +5,89 @@ #include "libmineziper.h" -int get_uncompressed_size(zip* in) +int get_uncompressed_size(zip zip) { - int size = 0; + unsigned int size = 0; - for (int i = 0; i < in->entries; i++) + for (int i = 0; i < zip.entries; i++) { - LFH* lfh = &in->start[in->lfh_off[i]]; - size += lfh->uncompressed_size; + unsigned int file_size = + ((LFH*) zip.start + zip.lfh_off[i])->uncompressed_size; + + if (size > MAX_INT / 2 && file_size > MAX_INT / 2) + { + return MAX_INT; + } + size += file_size; } return size; } -bool scan_zip(char* zip_data, int zip_size) +bool detect_overlaps(zip zip) { - zip zip = init_zip(zip_data, zip_size); + // TODO sort only deals with char* + int* lfh_order = sort_int(zip.lfh_off, zip.entries); + bool overlap = false; + for (int i = 0; i < zip.entries - 1; i++) + { + LFH* lfh = (LFH*) zip.start + zip.lfh_off[i]; + unsigned int lf_size = sizeof(LFH) + lfh->filename_length + + lfh->extraf_length + lfh->compressed_size; + + if (lf_size > zip.lfh_off[i + 1] - zip.lfh_off[i]) + { + overlap = true; + break; + } + } + + free(lfh_order); + return overlap; +} + +bool scan_decoded_files(zip zip) +{ for (int i = 0; i < zip.entries; i++) { - LFH* lfh = zip.start + zip.lfh_off[i]; + LFH* lfh = (LFH*) (zip.start + zip.lfh_off[i]); // Verify CDH/LFH parsed sizes to avoid undefined behavior if (lfh->filename_length != zip.cdh_filename_length[i]) - { - printf("[ERROR] Mismatch in CDH/LFH filename lengths.\n"); - return true; - } - - char* decoded_data = ""; - if (lfh->compression_method == DEFLATE) { + printf("[ERROR] Mismatch in CDH/LFH filename lengths.\n"); + return true; + } + + data* decoded = malloc(sizeof(data)); + decoded->buffer = decoded->clean = decoded->size = 0; + + if (lfh->compression_method == COMP_NONE) + { + printf("[FILE %d] Scanning stored data...\n", i); + int lfh_length = sizeof(LFH) + lfh->filename_length + lfh->extraf_length; + char* block = &((char*) lfh)[lfh_length]; + decoded->size = lfh->uncompressed_size; + + decoded->buffer = (char*) malloc(decoded->size); + if (!decoded->buffer) + { + fprintf( + stderr, + "[FILE %d] Failed to allocate buffer. Skipping this block.\n", + i); + continue; + } + decoded->clean = &free; + + memcpy(decoded->buffer, block, decoded->size); + } + + else if (lfh->compression_method == COMP_DEFLATE) + { + printf("[FILE %d] Scanning first block of DEFLATED data...\n", i); + int lfh_length = sizeof(LFH) + lfh->filename_length + lfh->extraf_length; char* encoded_block = &((char*) lfh)[lfh_length]; @@ -46,54 +98,114 @@ bool scan_zip(char* zip_data, int zip_size) if (deflate_header.block_type == 0) { align_to_next_byte(&bs); - short block_size = get_bits(&bs, 16); + decoded->size = get_bits(&bs, 16); short inv_block_size = get_bits(&bs, 16); - assert(block_size == ~inv_block_size); + if ((short) decoded->size != ~inv_block_size) + { + fprintf(stderr, "-> Sizes mismatch in block type 0.\n"); + return true; + } - decoded_data = malloc(block_size); + decoded->buffer = (char*) malloc(decoded->size); + if (!decoded->buffer) + { + fprintf( + stderr, "-> Failed to allocate buffer. Skipping this block.\n"); + continue; + } + decoded->clean = &free; - memcpy(decoded_data, &bs.data[bs.current_data_offset], block_size); + memcpy( + decoded->buffer, &bs.data[bs.current_data_offset], decoded->size); } // Fixed Huffman Codes else if (deflate_header.block_type == 1) { - printf("[FILE %d] Scanning 1 block...\n", i); - decoded_data = malloc(lfh->uncompressed_size); + decoded->size = lfh->uncompressed_size; - decode_type1_block_vuln(&bs, decoded_data); + decoded->buffer = (char*) malloc(decoded->size); + if (!decoded->buffer) + { + fprintf( + stderr, "-> Failed to allocate buffer. Skipping this block.\n"); + continue; + } + decoded->clean = &free; + + decode_type1_block_vuln(&bs, decoded->buffer); } // Dynamic Huffman Codes else if (deflate_header.block_type == 2) { - fprintf( - stderr, - "[FILE %d] Dynamic Huffman codes block type not supported\n", - i); + fprintf(stderr, "-> Dynamic Huffman codes block type not supported\n"); } // Invalid type else { - fprintf(stderr, "[FILE %d] Error in compressed data\n", i); + fprintf(stderr, "-> Error in compressed data\n"); } } else { - fprintf(stderr, "Unknown decompression algorithm. Skipping...\n"); + fprintf( + stderr, + "[FILE %d] Unknown decompression algorithm. Skipping...\n", + i); } - // Test the decoded data - if (strcmp("VIRUS", decoded_data) == 0) + if (decoded->size) { - printf("-> VIRUS FOUND\n"); - return true; + // Test magic bytes of the decoded data + unsigned int cmp_size; + for (int s = 0; s < sigs_size; s++) + { + cmp_size = (decoded->size < strlen(sigs[s])) ? decoded->size + : strlen(sigs[s]); + + if (memcmp(sigs[s], decoded->buffer, cmp_size) == 0) + { + return true; + } + } } - else + + // If allocated with malloc, free the buffer + if (decoded->clean) { - printf("-> OK\n\n"); + decoded->clean(decoded->buffer); } + + free(decoded); + } + + return false; +} + +bool scan_zip(char* zip_data, int zip_size) +{ + zip zip = init_zip(zip_data, zip_size); + + if (get_uncompressed_size(zip) > MAX_UNCOMPRESSED_SIZE) + { + fprintf(stderr, "Uncompressed size too big. Could be a zip bomb.\n"); + return true; + } + + if (detect_overlaps(zip)) + { + fprintf( + stderr, "Potential overlaps found in zip file. Could be a zip bomb.\n"); + return true; + } + + if (scan_decoded_files(zip)) + { + fprintf( + stderr, "Decompressed files looks malicious. Could contain a virus.\n"); + return true; } return false; diff --git a/tests/test_decode_fixed_tree.c b/tests/test_decode_fixed_tree.c index efd31a2..c84f83a 100644 --- a/tests/test_decode_fixed_tree.c +++ b/tests/test_decode_fixed_tree.c @@ -44,7 +44,7 @@ void main(int argc, char** argv) } else { - if (lfh->compression_method == DEFLATE) + if (lfh->compression_method == COMP_DEFLATE) { char* data = ((char*) lfh) + sizeof(LFH) + lfh->filename_length + lfh->extraf_length;