Spaces:

hardkpentium101
/

indicRAG

Sleeping

App Files Files Community

hardkpentium101 Qwen-Coder commited on Mar 10

Commit

057cc64

1 Parent(s): a9b1188

Switch to AI4Bharat IndicLLM - better support for 11 Indic languages

Browse files

Files changed (3) hide show

Dockerfile +1 -1
backend/src/llm_manager.py +17 -14
backend/src/rag_system.py +1 -1

Dockerfile CHANGED Viewed

@@ -24,7 +24,7 @@ COPY backend/ ./backend/
 COPY app.py ./app.py
 # Pre-download models during build
-RUN python -c "from transformers import AutoTokenizer; import os; token = os.getenv('HF_TOKEN'); AutoTokenizer.from_pretrained('sarvamai/sarvam-1', token=token); print('✓ Sarvam-1 tokenizer downloaded')" || echo "Warning: Sarvam-1 tokenizer download failed"
 RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'); print('✓ Embedding model downloaded')" || echo "Warning: Embedding model download failed"
 # Create non-root user for runtime

 COPY app.py ./app.py
 # Pre-download models during build
+RUN python -c "from transformers import AutoTokenizer; import os; token = os.getenv('HF_TOKEN'); AutoTokenizer.from_pretrained('ai4bharat/IndicLLM-1.8B', token=token); print('✓ IndicLLM tokenizer downloaded')" || echo "Warning: IndicLLM tokenizer download failed"
 RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'); print('✓ Embedding model downloaded')" || echo "Warning: Embedding model download failed"
 # Create non-root user for runtime

backend/src/llm_manager.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-LLM Manager module with Sarvam-1 model support for Indic languages
-Uses bitsandbytes 4-bit quantization for memory efficiency
 """
 from typing import Optional, Dict, Any
 from langchain_huggingface import HuggingFacePipeline
@@ -39,17 +39,17 @@ class LLMManager:
         if self._llm_instance is not None:
             return self._llm_instance
-        self._llm_instance = self._get_sarvam_llm(model_kwargs)
         if self._llm_instance is None:
-            logger.error("Failed to initialize Sarvam-1 LLM")
-            self._initialization_error = "Sarvam-1 initialization failed"
         return self._llm_instance
-    def _get_sarvam_llm(self, model_kwargs: Optional[Dict[str, Any]] = None):
-        """Initialize Sarvam-1 model optimized for CPU inference"""
-        model_id = "sarvamai/sarvam-1"
         try:
             # Authenticate with HuggingFace if token is provided
@@ -60,8 +60,9 @@ class LLMManager:
             else:
                 logger.warning("No HF_TOKEN provided. Downloads may be slower.")
-            logger.info(f"Initializing Sarvam-1 for CPU inference: {model_id}")
-            logger.info("Sarvam-1: 2B parameters, optimized for 10 Indic languages")
             logger.info("Loading model...")
             # Load tokenizer
@@ -98,10 +99,12 @@ class LLMManager:
                 top_p=float(os.getenv("TOP_P", 0.92)),
                 top_k=int(os.getenv("TOP_K", 50)),
                 repetition_penalty=float(os.getenv("REPETITION_PENALTY", 1.15)),
-                max_new_tokens=int(os.getenv("MAX_NEW_TOKENS", 1024)),
                 do_sample=True,
             )
             # Set on model - pipeline will use this
             model.generation_config = gen_config
@@ -114,12 +117,12 @@ class LLMManager:
                 clean_up_tokenization_spaces=True,
             )
-            logger.info("Sarvam-1 pipeline initialized successfully")
             llm = HuggingFacePipeline(pipeline=pipe)
             return llm
         except Exception as e:
-            logger.error(f"Failed to load Sarvam-1: {e}")
             self._initialization_error = str(e)
             return None

 """
+LLM Manager module with AI4Bharat IndicLLM support
+Optimized for 11 Indic languages on CPU
 """
 from typing import Optional, Dict, Any
 from langchain_huggingface import HuggingFacePipeline
         if self._llm_instance is not None:
             return self._llm_instance
+        self._llm_instance = self._get_indic_llm(model_kwargs)
         if self._llm_instance is None:
+            logger.error("Failed to initialize IndicLLM")
+            self._initialization_error = "IndicLLM initialization failed"
         return self._llm_instance
+    def _get_indic_llm(self, model_kwargs: Optional[Dict[str, Any]] = None):
+        """Initialize AI4Bharat IndicLLM for 11 Indic languages"""
+        model_id = "ai4bharat/IndicLLM-1.8B"
         try:
             # Authenticate with HuggingFace if token is provided
             else:
                 logger.warning("No HF_TOKEN provided. Downloads may be slower.")
+            logger.info(f"Initializing AI4Bharat IndicLLM: {model_id}")
+            logger.info("IndicLLM: 1.8B parameters, optimized for 11 Indic languages")
+            logger.info("Languages: Hindi, Bengali, Gujarati, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu, Urdu")
             logger.info("Loading model...")
             # Load tokenizer
                 top_p=float(os.getenv("TOP_P", 0.92)),
                 top_k=int(os.getenv("TOP_K", 50)),
                 repetition_penalty=float(os.getenv("REPETITION_PENALTY", 1.15)),
+                max_new_tokens=int(os.getenv("MAX_NEW_TOKENS", 400)),
                 do_sample=True,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
             )
             # Set on model - pipeline will use this
             model.generation_config = gen_config
                 clean_up_tokenization_spaces=True,
             )
+            logger.info("IndicLLM pipeline initialized successfully")
             llm = HuggingFacePipeline(pipeline=pipe)
             return llm
         except Exception as e:
+            logger.error(f"Failed to load IndicLLM: {e}")
             self._initialization_error = str(e)
             return None

backend/src/rag_system.py CHANGED Viewed

@@ -23,7 +23,7 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Prompt for Sarvam-1 - Simple creative writing format
 CREATIVE_PROMPT = """You are a creative writer inspired by Hindi literature and nature poetry.
 Context for inspiration:

 logger = logging.getLogger(__name__)
+# Prompt for AI4Bharat IndicLLM - Creative writing for 11 Indic languages
 CREATIVE_PROMPT = """You are a creative writer inspired by Hindi literature and nature poetry.
 Context for inspiration: