parthnuwal7 commited on
Commit
a8443d7
·
1 Parent(s): 70de5e9
Files changed (1) hide show
  1. src/utils/data_processor.py +19 -13
src/utils/data_processor.py CHANGED
@@ -86,10 +86,10 @@ class TranslationService:
86
 
87
  def __init__(self):
88
  self.api_token = self._get_hf_token()
89
- # Using Helsinki-NLP model - better Inference API support for Hindi→English
90
- self.translation_model = "Helsinki-NLP/opus-mt-hi-en"
91
  self.base_url = "https://router.huggingface.co"
92
- logger.info("Initialized HF Inference API for translation")
93
 
94
  def _get_hf_token(self) -> Optional[str]:
95
  """Get HF token from environment or Streamlit secrets."""
@@ -109,31 +109,37 @@ class TranslationService:
109
  def _call_hf_translation_api(self, text: str, source_lang: str = "hi", target_lang: str = "en") -> str:
110
  """Call HF Translation API with fallback."""
111
  if not self.api_token:
112
- logger.warning("No API token, skipping translation")
113
  return text
114
 
115
  try:
116
  headers = {"Authorization": f"Bearer {self.api_token}"}
117
- # New router API format: /models/{model_id}
118
- url = f"{self.base_url}/models/{self.translation_model}"
119
 
120
- # Simplified payload for Helsinki-NLP models
121
  payload = {"inputs": text}
122
 
123
- response = requests.post(url, headers=headers, json=payload, timeout=30)
124
 
125
  if response.status_code == 200:
126
  result = response.json()
127
- # Helsinki models return: [{"translation_text": "..."}]
128
  if isinstance(result, list) and len(result) > 0:
129
- return result[0].get("translation_text", text)
 
 
 
 
 
 
130
 
131
- # Enhanced error logging
132
- logger.warning(f"Translation API failed: {response.status_code} - {response.text[:200]}")
133
  return text
134
 
135
  except Exception as e:
136
- logger.error(f"Translation error: {str(e)}")
137
  return text
138
 
139
  def detect_language(self, text: str) -> str:
 
86
 
87
  def __init__(self):
88
  self.api_token = self._get_hf_token()
89
+ # Using AI4Bharat IndicTrans2 - specialized for Indian languages
90
+ self.translation_model = "ai4bharat/indictrans2-en-indic-1.3B"
91
  self.base_url = "https://router.huggingface.co"
92
+ logger.info("Initialized HF Inference API for translation (IndicTrans2)")
93
 
94
  def _get_hf_token(self) -> Optional[str]:
95
  """Get HF token from environment or Streamlit secrets."""
 
109
  def _call_hf_translation_api(self, text: str, source_lang: str = "hi", target_lang: str = "en") -> str:
110
  """Call HF Translation API with fallback."""
111
  if not self.api_token:
112
+ logger.debug("No API token, skipping translation")
113
  return text
114
 
115
  try:
116
  headers = {"Authorization": f"Bearer {self.api_token}"}
117
+ # Try using serverless inference endpoint
118
+ url = f"https://api-inference.huggingface.co/models/{self.translation_model}"
119
 
120
+ # IndicTrans2 requires simple input format
121
  payload = {"inputs": text}
122
 
123
+ response = requests.post(url, headers=headers, json=payload, timeout=10)
124
 
125
  if response.status_code == 200:
126
  result = response.json()
127
+ # IndicTrans2 returns: [{"translation_text": "..."}] or {"generated_text": "..."}
128
  if isinstance(result, list) and len(result) > 0:
129
+ translated = result[0].get("translation_text", "") or result[0].get("generated_text", "")
130
+ if translated:
131
+ return translated
132
+ elif isinstance(result, dict):
133
+ translated = result.get("generated_text", "") or result.get("translation_text", "")
134
+ if translated:
135
+ return translated
136
 
137
+ # Silently fallback to original text (translation is optional)
138
+ logger.debug(f"Translation unavailable, using original text (status: {response.status_code})")
139
  return text
140
 
141
  except Exception as e:
142
+ logger.debug(f"Translation skipped: {str(e)}")
143
  return text
144
 
145
  def detect_language(self, text: str) -> str: