Spaces:
Sleeping
Sleeping
| """ | |
| LLM Manager module with Sarvam-1 model support for the Hindi RAG system | |
| Optimized for CPU-only environments like HF Spaces free tier | |
| """ | |
| from typing import Optional, Dict, Any | |
| from langchain_huggingface import HuggingFacePipeline | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import torch | |
| import os | |
| import warnings | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| warnings.filterwarnings("ignore", category=UserWarning) | |
| class LLMManager: | |
| _instance = None | |
| _llm_instance = None | |
| _initialization_error = None | |
| def __new__(cls): | |
| if cls._instance is None: | |
| cls._instance = super(LLMManager, cls).__new__(cls) | |
| return cls._instance | |
| def get_llm(self, provider: str = "huggingface", model_kwargs: Optional[Dict[str, Any]] = None): | |
| """ | |
| Get LLM instance based on provider | |
| """ | |
| if self._initialization_error is not None: | |
| logger.error(f"LLM initialization failed: {self._initialization_error}") | |
| return None | |
| if self._llm_instance is not None: | |
| return self._llm_instance | |
| # Initialize Sarvam-1 model | |
| self._llm_instance = self._get_sarvam_llm(model_kwargs) | |
| if self._llm_instance is None: | |
| logger.error("Failed to initialize Sarvam-1 LLM") | |
| self._initialization_error = "Sarvam-1 initialization failed" | |
| return self._llm_instance | |
| def _get_sarvam_llm(self, model_kwargs: Optional[Dict[str, Any]] = None): | |
| """ | |
| Initialize Sarvam-1 model for Hindi text generation | |
| Uses simple pipeline approach for CPU compatibility | |
| """ | |
| model_id = "sarvamai/sarvam-1" | |
| try: | |
| logger.info(f"Initializing Sarvam-1 model: {model_id}") | |
| logger.info("Sarvam-1: 2B parameters, optimized for 10 Indic languages") | |
| # Use simple pipeline approach - handles device placement automatically | |
| # This is the recommended approach from Sarvam AI | |
| logger.info("Loading model with CPU-first approach...") | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| model_kwargs={ | |
| "torch_dtype": torch.float32, # Float32 for CPU compatibility | |
| "low_cpu_mem_usage": False, # Avoid meta tensor issues | |
| }, | |
| device_map="cpu" # Force CPU for HF Spaces | |
| ) | |
| logger.info(f"✓ Sarvam-1 pipeline initialized successfully on CPU") | |
| # Wrap pipeline for LangChain compatibility | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| return llm | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Sarvam-1 model: {e}") | |
| logger.error(f"Error type: {type(e).__name__}") | |
| # Provide helpful error message | |
| if "meta tensor" in str(e).lower(): | |
| logger.error("Meta tensor error: Insufficient RAM for model loading") | |
| logger.error("HF Spaces CPU tier has ~13GB RAM, Sarvam-1 needs ~8GB") | |
| logger.error("Try: Upgrading to GPU Space or using smaller model") | |
| self._initialization_error = str(e) | |
| return None | |
| def is_available(self) -> bool: | |
| """ | |
| Check if LLM is available and initialized | |
| """ | |
| return self._llm_instance is not None and self._initialization_error is None | |
| def get_initialization_error(self) -> Optional[str]: | |
| """ | |
| Get the initialization error if any | |
| """ | |
| return self._initialization_error | |
| def get_llm(provider: str = "huggingface", model_kwargs: Optional[Dict[str, Any]] = None): | |
| """ | |
| Convenience function to get LLM instance | |
| """ | |
| manager = LLMManager() | |
| return manager.get_llm(provider, model_kwargs) | |
| def get_llm_with_provider(provider: str = "huggingface", model_kwargs: Optional[Dict[str, Any]] = None): | |
| """ | |
| Get LLM with specific provider and model kwargs | |
| """ | |
| manager = LLMManager() | |
| return manager.get_llm(provider, model_kwargs) | |