agentic-doc-sim-streamlit / src /utils /file_handler.py
syedmohaiminulhoque's picture
Upload full Streamlit app from GitHub
f02f2d2
"""
File handling utilities for document upload and validation.
"""
import os
import shutil
from pathlib import Path
from typing import Optional, Tuple
import config
def validate_file(file_path: str) -> Tuple[bool, str]:
"""
Validate uploaded file.
Args:
file_path: Path to the file to validate
Returns:
Tuple of (is_valid, error_message)
"""
file_path_obj = Path(file_path)
# Check if file exists
if not file_path_obj.exists():
return False, "File does not exist"
# Check file extension
if file_path_obj.suffix.lower() not in config.ALLOWED_EXTENSIONS:
return False, f"Invalid file type. Allowed: {', '.join(config.ALLOWED_EXTENSIONS)}"
# Check file size
file_size_mb = file_path_obj.stat().st_size / (1024 * 1024)
if file_size_mb > config.MAX_FILE_SIZE_MB:
return False, f"File too large ({file_size_mb:.1f}MB). Max: {config.MAX_FILE_SIZE_MB}MB"
return True, ""
def save_uploaded_file(uploaded_file, destination_dir: Path = None) -> str:
"""
Save an uploaded Streamlit file to disk.
Args:
uploaded_file: Streamlit UploadedFile object
destination_dir: Directory to save the file (default: config.UPLOAD_DIR)
Returns:
Path to saved file as string
"""
if destination_dir is None:
destination_dir = config.UPLOAD_DIR
# Ensure destination directory exists
destination_dir.mkdir(parents=True, exist_ok=True)
# Create file path
file_path = destination_dir / uploaded_file.name
# Write file
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return str(file_path)
def cleanup_file(file_path: str) -> bool:
"""
Delete a file from disk.
Args:
file_path: Path to file to delete
Returns:
True if deleted successfully, False otherwise
"""
try:
file_path_obj = Path(file_path)
if file_path_obj.exists():
file_path_obj.unlink()
return True
return False
except Exception as e:
print(f"Error deleting file {file_path}: {e}")
return False
def cleanup_directory(dir_path: Path, keep_dir: bool = True) -> bool:
"""
Clean up all files in a directory.
Args:
dir_path: Directory to clean
keep_dir: If True, keep the directory but remove contents
Returns:
True if successful, False otherwise
"""
try:
if dir_path.exists():
if keep_dir:
# Remove all files but keep directory
for item in dir_path.iterdir():
if item.is_file():
item.unlink()
elif item.is_dir():
shutil.rmtree(item)
else:
# Remove directory entirely
shutil.rmtree(dir_path)
return True
except Exception as e:
print(f"Error cleaning directory {dir_path}: {e}")
return False
def get_file_type(file_path: str) -> str:
"""
Get the file type from file extension.
Args:
file_path: Path to file
Returns:
File type as string ('pdf' or 'docx')
"""
extension = Path(file_path).suffix.lower()
if extension == ".pdf":
return "pdf"
elif extension in [".docx", ".doc"]:
return "docx"
else:
return "unknown"