| from fastapi import UploadFile, HTTPException |
| import os |
| from typing import Dict |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class FileService: |
| """Service to handle file uploads and text extraction""" |
| |
| ALLOWED_EXTENSIONS = {'.txt'} |
| MAX_FILE_SIZE = 10 * 1024 * 1024 |
| |
| @staticmethod |
| def validate_file(file: UploadFile) -> None: |
| """ |
| Validate uploaded file |
| |
| Args: |
| file: Uploaded file object |
| |
| Raises: |
| HTTPException: If file is invalid |
| """ |
| |
| if not file: |
| raise HTTPException(status_code=400, detail="No file provided") |
| |
| |
| file_ext = os.path.splitext(file.filename)[1].lower() |
| if file_ext not in FileService.ALLOWED_EXTENSIONS: |
| raise HTTPException( |
| status_code=400, |
| detail=f"Invalid file type. Only {', '.join(FileService.ALLOWED_EXTENSIONS)} files are allowed" |
| ) |
| |
| @staticmethod |
| async def extract_text_from_file(file: UploadFile) -> Dict[str, any]: |
| """ |
| Extract text content from uploaded file |
| |
| Args: |
| file: Uploaded file object |
| |
| Returns: |
| Dictionary containing extracted text and metadata |
| """ |
| try: |
| |
| FileService.validate_file(file) |
| |
| |
| content = await file.read() |
| |
| |
| file_size = len(content) |
| if file_size > FileService.MAX_FILE_SIZE: |
| raise HTTPException( |
| status_code=400, |
| detail=f"File too large. Maximum size is {FileService.MAX_FILE_SIZE / (1024*1024)} MB" |
| ) |
| |
| |
| try: |
| text = content.decode('utf-8') |
| except UnicodeDecodeError: |
| try: |
| text = content.decode('latin-1') |
| except Exception as e: |
| raise HTTPException( |
| status_code=400, |
| detail="Unable to decode file. Please ensure it's a valid text file" |
| ) |
| |
| |
| if not text.strip(): |
| raise HTTPException( |
| status_code=400, |
| detail="File is empty or contains no readable text" |
| ) |
| |
| if len(text.strip()) < 10: |
| raise HTTPException( |
| status_code=400, |
| detail="Extracted text is too short. Please provide more detailed provider notes" |
| ) |
| |
| logger.info(f"Successfully extracted {len(text)} characters from {file.filename}") |
| |
| return { |
| "text": text, |
| "filename": file.filename, |
| "file_size": file_size, |
| "text_length": len(text) |
| } |
| |
| except HTTPException: |
| raise |
| except Exception as e: |
| logger.error(f"Error extracting text from file: {str(e)}") |
| raise HTTPException( |
| status_code=500, |
| detail=f"Error processing file: {str(e)}" |
| ) |
|
|
|
|
| |
| file_service = FileService() |