Spaces:
Sleeping
Sleeping
Ahmed Sadik
fix: correct argument order in get_document_text call for chunk context retrieval
e4662f9 | import re | |
| from fastapi import UploadFile | |
| def validate_mime_type(file: UploadFile): | |
| if file.content_type != "application/pdf": | |
| print("[!] validate_mime_type") | |
| return False | |
| return True | |
| def validate_extension(file: UploadFile): | |
| if not file.filename.endswith((".pdf")): | |
| print("[!] validate_extension") | |
| return False | |
| return True | |
| async def validate_magic_bytes(file: UploadFile): | |
| magic_bytes = await file.read(5) | |
| await file.seek(0) # Reset file pointer after reading | |
| if magic_bytes != b"%PDF-" or len(magic_bytes) < 5: | |
| print("[!] validate_magic_bytes") | |
| return False | |
| return True | |
| '''validate the uploaded file''' | |
| async def validate_document(file: UploadFile): | |
| if file.file is None: | |
| print("[!] file is None") | |
| return False | |
| if not validate_mime_type(file): | |
| return False | |
| if not validate_extension(file): | |
| return False | |
| if not await validate_magic_bytes(file): | |
| return False | |
| return True | |
| def sanitize_for_display(filename: str) -> str: | |
| # Remove any characters that are not letters, numbers, spaces, or common punctuation | |
| clean_name = re.sub(r'[^\w \-_.\(\)]', '', filename) | |
| #if it's longer than 50 chars keep the first 20 chars and the last 10 | |
| if len(clean_name) > 50: | |
| clean_name = clean_name[:40] + "..." + clean_name[-10:] | |
| return clean_name.strip() or "Untitled Document.pdf" |