import re from fastapi import UploadFile def validate_mime_type(file: UploadFile): if file.content_type != "application/pdf": print("[!] validate_mime_type") return False return True def validate_extension(file: UploadFile): if not file.filename.endswith((".pdf")): print("[!] validate_extension") return False return True async def validate_magic_bytes(file: UploadFile): magic_bytes = await file.read(5) await file.seek(0) # Reset file pointer after reading if magic_bytes != b"%PDF-" or len(magic_bytes) < 5: print("[!] validate_magic_bytes") return False return True '''validate the uploaded file''' async def validate_document(file: UploadFile): if file.file is None: print("[!] file is None") return False if not validate_mime_type(file): return False if not validate_extension(file): return False if not await validate_magic_bytes(file): return False return True def sanitize_for_display(filename: str) -> str: # Remove any characters that are not letters, numbers, spaces, or common punctuation clean_name = re.sub(r'[^\w \-_.\(\)]', '', filename) #if it's longer than 50 chars keep the first 20 chars and the last 10 if len(clean_name) > 50: clean_name = clean_name[:40] + "..." + clean_name[-10:] return clean_name.strip() or "Untitled Document.pdf"