Spaces:

jatinmehra
/

PDF-Insight-PRO

Running

App Files Files Community

PDF-Insight-PRO / configs /config.py

jatinmehra

feat: add OpenAI GPT-OSS 120B model option to configuration and UI

1862c3b 6 months ago

raw

history blame contribute delete

3.95 kB

	"""
	Configuration module for PDF Insight Beta application.

	This module centralizes all configuration settings, constants, and environment variables.
	"""

	import os
	from typing import List, Dict, Any
	import dotenv

	# Load environment variables
	dotenv.load_dotenv()


	class Config:
	"""Application configuration class."""

	# API Configuration
	GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
	TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "")

	# Application Settings
	UPLOAD_DIR: str = "uploads"
	MAX_FILE_SIZE: int = 50 * 1024 * 1024 # 50MB

	# Model Configuration
	DEFAULT_MODEL: str = "llama-3.1-8b-instant"
	EMBEDDING_MODEL: str = "BAAI/bge-large-en-v1.5"

	# Text Processing Settings
	DEFAULT_CHUNK_SIZE: int = 1000
	MIN_CHUNK_LENGTH: int = 20
	MIN_PARAGRAPH_LENGTH: int = 10

	# RAG Configuration
	DEFAULT_K_CHUNKS: int = 10
	INITIAL_CONTEXT_CHUNKS: int = 5
	MAX_CONTEXT_TOKENS: int = 7000
	SIMILARITY_THRESHOLD: float = 1.5

	# LLM Settings
	LLM_TEMPERATURE: float = 0.1
	MAX_TOKENS: int = 4500

	# FAISS Index Configuration
	FAISS_NEIGHBORS: int = 32
	FAISS_EF_CONSTRUCTION: int = 200
	FAISS_EF_SEARCH: int = 50

	# Agent Configuration
	AGENT_MAX_ITERATIONS: int = 2
	AGENT_VERBOSE: bool = False

	# Tavily Search Configuration
	TAVILY_MAX_RESULTS: int = 5
	TAVILY_SEARCH_DEPTH: str = "advanced"
	TAVILY_INCLUDE_ANSWER: bool = True
	TAVILY_INCLUDE_RAW_CONTENT: bool = False

	# CORS Configuration
	CORS_ORIGINS: List[str] = ["*"]
	CORS_CREDENTIALS: bool = True
	CORS_METHODS: List[str] = ["*"]
	CORS_HEADERS: List[str] = ["*"]


	class ModelConfig:
	"""Model configuration and metadata."""

	AVAILABLE_MODELS: List[Dict[str, str]] = [
	{"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"},
	{"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"},
	{"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70b Versatile"},
	{"id": "openai/gpt-oss-120b", "name": "GPT-OSS 120B"}
	]

	@classmethod
	def get_model_ids(cls) -> List[str]:
	"""Get list of available model IDs."""
	return [model["id"] for model in cls.AVAILABLE_MODELS]

	@classmethod
	def is_valid_model(cls, model_id: str) -> bool:
	"""Check if a model ID is valid."""
	return model_id in cls.get_model_ids()


	class ErrorMessages:
	"""Centralized error messages."""

	# Validation Errors
	EMPTY_QUERY = "Query cannot be empty"
	QUERY_TOO_SHORT = "Query must be at least 3 characters long"

	# Session Errors
	SESSION_NOT_FOUND = "Session not found"
	SESSION_EXPIRED = "Session not found or expired. Please upload a document first."
	SESSION_INCOMPLETE = "Session data is incomplete. Please upload the document again."
	SESSION_REMOVAL_FAILED = "Session not found or could not be removed"

	# File Errors
	FILE_NOT_FOUND = "The file {file_path} does not exist."
	PDF_PROCESSING_ERROR = "Error processing PDF: {error}"

	# API Key Errors
	GROQ_API_KEY_MISSING = "GROQ_API_KEY is not set for Groq Llama models."
	TAVILY_API_KEY_MISSING = "TAVILY_API_KEY is not set. Web search will not function."

	# Processing Errors
	PROCESSING_ERROR = "Error processing query: {error}"
	RESPONSE_GENERATION_ERROR = "Sorry, I could not generate a response."


	class SuccessMessages:
	"""Centralized success messages."""

	PDF_PROCESSED = "Processed {filename}"
	PDF_REMOVED = "PDF file and session removed successfully"
	CHAT_HISTORY_CLEARED = "Chat history cleared"


	# Initialize directories
	def initialize_directories():
	"""Create necessary directories if they don't exist."""
	if not os.path.exists(Config.UPLOAD_DIR):
	os.makedirs(Config.UPLOAD_DIR)


	# Initialize on import
	initialize_directories()