hardkpentium101 Qwen-Coder commited on
Commit
057cc64
·
1 Parent(s): a9b1188

Switch to AI4Bharat IndicLLM - better support for 11 Indic languages

Browse files
Dockerfile CHANGED
@@ -24,7 +24,7 @@ COPY backend/ ./backend/
24
  COPY app.py ./app.py
25
 
26
  # Pre-download models during build
27
- RUN python -c "from transformers import AutoTokenizer; import os; token = os.getenv('HF_TOKEN'); AutoTokenizer.from_pretrained('sarvamai/sarvam-1', token=token); print('✓ Sarvam-1 tokenizer downloaded')" || echo "Warning: Sarvam-1 tokenizer download failed"
28
  RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'); print('✓ Embedding model downloaded')" || echo "Warning: Embedding model download failed"
29
 
30
  # Create non-root user for runtime
 
24
  COPY app.py ./app.py
25
 
26
  # Pre-download models during build
27
+ RUN python -c "from transformers import AutoTokenizer; import os; token = os.getenv('HF_TOKEN'); AutoTokenizer.from_pretrained('ai4bharat/IndicLLM-1.8B', token=token); print('✓ IndicLLM tokenizer downloaded')" || echo "Warning: IndicLLM tokenizer download failed"
28
  RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2'); print('✓ Embedding model downloaded')" || echo "Warning: Embedding model download failed"
29
 
30
  # Create non-root user for runtime
backend/src/llm_manager.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- LLM Manager module with Sarvam-1 model support for Indic languages
3
- Uses bitsandbytes 4-bit quantization for memory efficiency
4
  """
5
  from typing import Optional, Dict, Any
6
  from langchain_huggingface import HuggingFacePipeline
@@ -39,17 +39,17 @@ class LLMManager:
39
  if self._llm_instance is not None:
40
  return self._llm_instance
41
 
42
- self._llm_instance = self._get_sarvam_llm(model_kwargs)
43
 
44
  if self._llm_instance is None:
45
- logger.error("Failed to initialize Sarvam-1 LLM")
46
- self._initialization_error = "Sarvam-1 initialization failed"
47
 
48
  return self._llm_instance
49
 
50
- def _get_sarvam_llm(self, model_kwargs: Optional[Dict[str, Any]] = None):
51
- """Initialize Sarvam-1 model optimized for CPU inference"""
52
- model_id = "sarvamai/sarvam-1"
53
 
54
  try:
55
  # Authenticate with HuggingFace if token is provided
@@ -60,8 +60,9 @@ class LLMManager:
60
  else:
61
  logger.warning("No HF_TOKEN provided. Downloads may be slower.")
62
 
63
- logger.info(f"Initializing Sarvam-1 for CPU inference: {model_id}")
64
- logger.info("Sarvam-1: 2B parameters, optimized for 10 Indic languages")
 
65
  logger.info("Loading model...")
66
 
67
  # Load tokenizer
@@ -98,10 +99,12 @@ class LLMManager:
98
  top_p=float(os.getenv("TOP_P", 0.92)),
99
  top_k=int(os.getenv("TOP_K", 50)),
100
  repetition_penalty=float(os.getenv("REPETITION_PENALTY", 1.15)),
101
- max_new_tokens=int(os.getenv("MAX_NEW_TOKENS", 1024)),
102
  do_sample=True,
 
 
103
  )
104
-
105
  # Set on model - pipeline will use this
106
  model.generation_config = gen_config
107
 
@@ -114,12 +117,12 @@ class LLMManager:
114
  clean_up_tokenization_spaces=True,
115
  )
116
 
117
- logger.info("Sarvam-1 pipeline initialized successfully")
118
  llm = HuggingFacePipeline(pipeline=pipe)
119
  return llm
120
 
121
  except Exception as e:
122
- logger.error(f"Failed to load Sarvam-1: {e}")
123
  self._initialization_error = str(e)
124
  return None
125
 
 
1
  """
2
+ LLM Manager module with AI4Bharat IndicLLM support
3
+ Optimized for 11 Indic languages on CPU
4
  """
5
  from typing import Optional, Dict, Any
6
  from langchain_huggingface import HuggingFacePipeline
 
39
  if self._llm_instance is not None:
40
  return self._llm_instance
41
 
42
+ self._llm_instance = self._get_indic_llm(model_kwargs)
43
 
44
  if self._llm_instance is None:
45
+ logger.error("Failed to initialize IndicLLM")
46
+ self._initialization_error = "IndicLLM initialization failed"
47
 
48
  return self._llm_instance
49
 
50
+ def _get_indic_llm(self, model_kwargs: Optional[Dict[str, Any]] = None):
51
+ """Initialize AI4Bharat IndicLLM for 11 Indic languages"""
52
+ model_id = "ai4bharat/IndicLLM-1.8B"
53
 
54
  try:
55
  # Authenticate with HuggingFace if token is provided
 
60
  else:
61
  logger.warning("No HF_TOKEN provided. Downloads may be slower.")
62
 
63
+ logger.info(f"Initializing AI4Bharat IndicLLM: {model_id}")
64
+ logger.info("IndicLLM: 1.8B parameters, optimized for 11 Indic languages")
65
+ logger.info("Languages: Hindi, Bengali, Gujarati, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu, Urdu")
66
  logger.info("Loading model...")
67
 
68
  # Load tokenizer
 
99
  top_p=float(os.getenv("TOP_P", 0.92)),
100
  top_k=int(os.getenv("TOP_K", 50)),
101
  repetition_penalty=float(os.getenv("REPETITION_PENALTY", 1.15)),
102
+ max_new_tokens=int(os.getenv("MAX_NEW_TOKENS", 400)),
103
  do_sample=True,
104
+ pad_token_id=tokenizer.pad_token_id,
105
+ eos_token_id=tokenizer.eos_token_id,
106
  )
107
+
108
  # Set on model - pipeline will use this
109
  model.generation_config = gen_config
110
 
 
117
  clean_up_tokenization_spaces=True,
118
  )
119
 
120
+ logger.info("IndicLLM pipeline initialized successfully")
121
  llm = HuggingFacePipeline(pipeline=pipe)
122
  return llm
123
 
124
  except Exception as e:
125
+ logger.error(f"Failed to load IndicLLM: {e}")
126
  self._initialization_error = str(e)
127
  return None
128
 
backend/src/rag_system.py CHANGED
@@ -23,7 +23,7 @@ logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
- # Prompt for Sarvam-1 - Simple creative writing format
27
  CREATIVE_PROMPT = """You are a creative writer inspired by Hindi literature and nature poetry.
28
 
29
  Context for inspiration:
 
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
+ # Prompt for AI4Bharat IndicLLM - Creative writing for 11 Indic languages
27
  CREATIVE_PROMPT = """You are a creative writer inspired by Hindi literature and nature poetry.
28
 
29
  Context for inspiration: