import io import os from pathlib import Path from fastapi import UploadFile from fastapi.datastructures import Headers from PIL import Image import pytest from constants import MAX_FILE_NAME_LENGTH, MAX_FILE_SIZE from exceptions import ( FileExtractionError, FileExtractionException, FileValidationError, FileValidationException, ) from helpers.file_helper import ( ValidatedFile, clean_text, extract_text_from_file, sanitize_image, validate_file, ) from tests.file_factory import ( create_empty_txt, create_fake_large_jpeg, create_fake_large_png, create_fake_small_png, create_jpeg_cpu_scan_bomb, create_jpeg_pixel_bomb, create_jpeg_with_excessive_markers, create_malformed_jpeg, create_malformed_pdf, create_malformed_png, create_png_decompression_bomb, create_simple_txt, create_simple_pdf, create_simple_docx, create_simple_png, create_simple_jpeg, create_deeply_nested_docx, create_deeply_nested_docx_bomb, create_zip_bomb_docx, create_xxe_docx, ) DATA_DIR = os.path.join(os.path.dirname(__file__), "data") INVALID_FILE_NAMES = [ # Path traversal attacks "../etc/passwd", "../../secret.txt", # Null bytes "file\x00.txt", # Special characters "file;rm -rf.txt", "file|cmd.txt", "file&cmd.txt", "file>redirect.txt", "