dd-poc / app /core /utils.py
Juan Salas
Refactored code
12f0afd
#!/usr/bin/env python3
"""
Utility Functions Module
Collection of utility functions used throughout the application.
This module contains helper functions for file operations, formatting,
and document processing utilities.
"""
from typing import List, Optional
from pathlib import Path
def get_mime_type(file_path: Path) -> str:
"""Get MIME type based on file extension"""
file_extension = file_path.suffix.lower()
if file_extension == '.pdf':
return 'application/pdf'
elif file_extension in ['.doc', '.docx']:
return 'application/msword'
elif file_extension == '.txt':
return 'text/plain'
elif file_extension == '.md':
return 'text/markdown'
else:
return 'application/octet-stream'
def format_document_title(doc_name: str) -> str:
"""Format document name into a readable title"""
if '.' in doc_name:
doc_title = doc_name.rsplit('.', 1)[0].replace('_', ' ').replace('-', ' ').title()
else:
doc_title = doc_name.replace('_', ' ').replace('-', ' ').title()
return doc_title
def count_documents_in_directory(directory: Path, supported_extensions: Optional[List[str]] = None) -> int:
"""Count supported documents in a directory recursively"""
if supported_extensions is None:
supported_extensions = ['.pdf', '.docx', '.doc', '.txt', '.md']
return sum(1 for f in directory.rglob('*')
if f.is_file() and f.suffix.lower() in supported_extensions)
def create_document_processor(store_name: Optional[str] = None) -> 'DocumentProcessor':
"""
Create and initialize a DocumentProcessor.
This utility function encapsulates the common pattern of creating a DocumentProcessor
instance.
Args:
store_name: Optional name for the FAISS store (uses config default if None)
Returns:
Initialized DocumentProcessor instance
"""
from app.core.document_processor import DocumentProcessor
# Create document processor instance
processor = DocumentProcessor(store_name=store_name)
return processor