Spaces:
Running
Running
| """ | |
| Configuration module for PDF Insight Beta application. | |
| This module centralizes all configuration settings, constants, and environment variables. | |
| """ | |
| import os | |
| from typing import List, Dict, Any | |
| import dotenv | |
| # Load environment variables | |
| dotenv.load_dotenv() | |
| class Config: | |
| """Application configuration class.""" | |
| # API Configuration | |
| GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "") | |
| TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "") | |
| # Application Settings | |
| UPLOAD_DIR: str = "uploads" | |
| MAX_FILE_SIZE: int = 50 * 1024 * 1024 # 50MB | |
| # Model Configuration | |
| DEFAULT_MODEL: str = "llama-3.1-8b-instant" | |
| EMBEDDING_MODEL: str = "BAAI/bge-large-en-v1.5" | |
| # Text Processing Settings | |
| DEFAULT_CHUNK_SIZE: int = 1000 | |
| MIN_CHUNK_LENGTH: int = 20 | |
| MIN_PARAGRAPH_LENGTH: int = 10 | |
| # RAG Configuration | |
| DEFAULT_K_CHUNKS: int = 10 | |
| INITIAL_CONTEXT_CHUNKS: int = 5 | |
| MAX_CONTEXT_TOKENS: int = 7000 | |
| SIMILARITY_THRESHOLD: float = 1.5 | |
| # LLM Settings | |
| LLM_TEMPERATURE: float = 0.1 | |
| MAX_TOKENS: int = 4500 | |
| # FAISS Index Configuration | |
| FAISS_NEIGHBORS: int = 32 | |
| FAISS_EF_CONSTRUCTION: int = 200 | |
| FAISS_EF_SEARCH: int = 50 | |
| # Agent Configuration | |
| AGENT_MAX_ITERATIONS: int = 2 | |
| AGENT_VERBOSE: bool = False | |
| # Tavily Search Configuration | |
| TAVILY_MAX_RESULTS: int = 5 | |
| TAVILY_SEARCH_DEPTH: str = "advanced" | |
| TAVILY_INCLUDE_ANSWER: bool = True | |
| TAVILY_INCLUDE_RAW_CONTENT: bool = False | |
| # CORS Configuration | |
| CORS_ORIGINS: List[str] = ["*"] | |
| CORS_CREDENTIALS: bool = True | |
| CORS_METHODS: List[str] = ["*"] | |
| CORS_HEADERS: List[str] = ["*"] | |
| class ModelConfig: | |
| """Model configuration and metadata.""" | |
| AVAILABLE_MODELS: List[Dict[str, str]] = [ | |
| {"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"}, | |
| {"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"}, | |
| {"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70b Versatile"}, | |
| {"id": "openai/gpt-oss-120b", "name": "GPT-OSS 120B"} | |
| ] | |
| def get_model_ids(cls) -> List[str]: | |
| """Get list of available model IDs.""" | |
| return [model["id"] for model in cls.AVAILABLE_MODELS] | |
| def is_valid_model(cls, model_id: str) -> bool: | |
| """Check if a model ID is valid.""" | |
| return model_id in cls.get_model_ids() | |
| class ErrorMessages: | |
| """Centralized error messages.""" | |
| # Validation Errors | |
| EMPTY_QUERY = "Query cannot be empty" | |
| QUERY_TOO_SHORT = "Query must be at least 3 characters long" | |
| # Session Errors | |
| SESSION_NOT_FOUND = "Session not found" | |
| SESSION_EXPIRED = "Session not found or expired. Please upload a document first." | |
| SESSION_INCOMPLETE = "Session data is incomplete. Please upload the document again." | |
| SESSION_REMOVAL_FAILED = "Session not found or could not be removed" | |
| # File Errors | |
| FILE_NOT_FOUND = "The file {file_path} does not exist." | |
| PDF_PROCESSING_ERROR = "Error processing PDF: {error}" | |
| # API Key Errors | |
| GROQ_API_KEY_MISSING = "GROQ_API_KEY is not set for Groq Llama models." | |
| TAVILY_API_KEY_MISSING = "TAVILY_API_KEY is not set. Web search will not function." | |
| # Processing Errors | |
| PROCESSING_ERROR = "Error processing query: {error}" | |
| RESPONSE_GENERATION_ERROR = "Sorry, I could not generate a response." | |
| class SuccessMessages: | |
| """Centralized success messages.""" | |
| PDF_PROCESSED = "Processed {filename}" | |
| PDF_REMOVED = "PDF file and session removed successfully" | |
| CHAT_HISTORY_CLEARED = "Chat history cleared" | |
| # Initialize directories | |
| def initialize_directories(): | |
| """Create necessary directories if they don't exist.""" | |
| if not os.path.exists(Config.UPLOAD_DIR): | |
| os.makedirs(Config.UPLOAD_DIR) | |
| # Initialize on import | |
| initialize_directories() | |