| """ |
| File handling utilities for document upload and validation. |
| """ |
| import os |
| import shutil |
| from pathlib import Path |
| from typing import Optional, Tuple |
| import config |
|
|
|
|
| def validate_file(file_path: str) -> Tuple[bool, str]: |
| """ |
| Validate uploaded file. |
| |
| Args: |
| file_path: Path to the file to validate |
| |
| Returns: |
| Tuple of (is_valid, error_message) |
| """ |
| file_path_obj = Path(file_path) |
|
|
| |
| if not file_path_obj.exists(): |
| return False, "File does not exist" |
|
|
| |
| if file_path_obj.suffix.lower() not in config.ALLOWED_EXTENSIONS: |
| return False, f"Invalid file type. Allowed: {', '.join(config.ALLOWED_EXTENSIONS)}" |
|
|
| |
| file_size_mb = file_path_obj.stat().st_size / (1024 * 1024) |
| if file_size_mb > config.MAX_FILE_SIZE_MB: |
| return False, f"File too large ({file_size_mb:.1f}MB). Max: {config.MAX_FILE_SIZE_MB}MB" |
|
|
| return True, "" |
|
|
|
|
| def save_uploaded_file(uploaded_file, destination_dir: Path = None) -> str: |
| """ |
| Save an uploaded Streamlit file to disk. |
| |
| Args: |
| uploaded_file: Streamlit UploadedFile object |
| destination_dir: Directory to save the file (default: config.UPLOAD_DIR) |
| |
| Returns: |
| Path to saved file as string |
| """ |
| if destination_dir is None: |
| destination_dir = config.UPLOAD_DIR |
|
|
| |
| destination_dir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| file_path = destination_dir / uploaded_file.name |
|
|
| |
| with open(file_path, "wb") as f: |
| f.write(uploaded_file.getbuffer()) |
|
|
| return str(file_path) |
|
|
|
|
| def cleanup_file(file_path: str) -> bool: |
| """ |
| Delete a file from disk. |
| |
| Args: |
| file_path: Path to file to delete |
| |
| Returns: |
| True if deleted successfully, False otherwise |
| """ |
| try: |
| file_path_obj = Path(file_path) |
| if file_path_obj.exists(): |
| file_path_obj.unlink() |
| return True |
| return False |
| except Exception as e: |
| print(f"Error deleting file {file_path}: {e}") |
| return False |
|
|
|
|
| def cleanup_directory(dir_path: Path, keep_dir: bool = True) -> bool: |
| """ |
| Clean up all files in a directory. |
| |
| Args: |
| dir_path: Directory to clean |
| keep_dir: If True, keep the directory but remove contents |
| |
| Returns: |
| True if successful, False otherwise |
| """ |
| try: |
| if dir_path.exists(): |
| if keep_dir: |
| |
| for item in dir_path.iterdir(): |
| if item.is_file(): |
| item.unlink() |
| elif item.is_dir(): |
| shutil.rmtree(item) |
| else: |
| |
| shutil.rmtree(dir_path) |
| return True |
| except Exception as e: |
| print(f"Error cleaning directory {dir_path}: {e}") |
| return False |
|
|
|
|
| def get_file_type(file_path: str) -> str: |
| """ |
| Get the file type from file extension. |
| |
| Args: |
| file_path: Path to file |
| |
| Returns: |
| File type as string ('pdf' or 'docx') |
| """ |
| extension = Path(file_path).suffix.lower() |
| if extension == ".pdf": |
| return "pdf" |
| elif extension in [".docx", ".doc"]: |
| return "docx" |
| else: |
| return "unknown" |
|
|