Spaces:

swapmyface
/

language-learn

Sleeping

App Files Files Community

swapmyface commited on Feb 27

Commit

96ea3f8

verified ·

1 Parent(s): 07bc9aa

Fix: use correct HF router endpoint and InferenceClient for TTS/STT

Browse files

Files changed (1) hide show

backend/hf_client.py +29 -47

backend/hf_client.py CHANGED Viewed

@@ -1,34 +1,37 @@
 """HuggingFace Inference API wrapper for LLM, TTS, and STT."""
 import os
 import logging
 import requests
-import time
 logger = logging.getLogger(__name__)
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
-API_BASE = "https://router.huggingface.co/hf-inference/models"
 PRIMARY_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 FALLBACK_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
 STT_MODEL = "openai/whisper-base"
-HEADERS = {
-    "Authorization": f"Bearer {HF_TOKEN}",
-    "Content-Type": "application/json"
-}
-def _get_headers(content_type="application/json"):
-    return {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": content_type
-    }
 def chat_completion(messages, max_tokens=1024, temperature=0.7):
-    """Send chat completion request to HF Inference API."""
     payload = {
         "model": PRIMARY_MODEL,
         "messages": messages,
@@ -40,12 +43,7 @@ def chat_completion(messages, max_tokens=1024, temperature=0.7):
     for model in [PRIMARY_MODEL, FALLBACK_MODEL]:
         try:
             payload["model"] = model
-            resp = requests.post(
-                "https://router.huggingface.co/hf-inference/v1/chat/completions",
-                headers=_get_headers(),
-                json=payload,
-                timeout=60
-            )
             resp.raise_for_status()
             data = resp.json()
             content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
@@ -59,24 +57,17 @@ def chat_completion(messages, max_tokens=1024, temperature=0.7):
 def text_to_speech(text, tts_model="facebook/mms-tts-hin"):
-    """Convert text to speech audio bytes."""
     if not text or not text.strip():
         return None
-    # Truncate very long text for TTS
     tts_text = text[:500]
     try:
-        resp = requests.post(
-            f"{API_BASE}/{tts_model}",
-            headers=_get_headers(),
-            json={"inputs": tts_text},
-            timeout=30
-        )
-        resp.raise_for_status()
-        content_type = resp.headers.get("content-type", "")
-        if "audio" in content_type or len(resp.content) > 1000:
-            return resp.content
         return None
     except Exception as e:
         logger.warning(f"TTS failed for model {tts_model}: {e}")
@@ -84,27 +75,18 @@ def text_to_speech(text, tts_model="facebook/mms-tts-hin"):
 def speech_to_text(audio_bytes):
-    """Transcribe audio to text using Whisper."""
     if not audio_bytes:
         return ""
     try:
-        resp = requests.post(
-            f"{API_BASE}/{STT_MODEL}",
-            headers={
-                "Authorization": f"Bearer {HF_TOKEN}",
-                "Content-Type": "audio/wav"
-            },
-            data=audio_bytes,
-            timeout=30
-        )
-        resp.raise_for_status()
-        data = resp.json()
-        if isinstance(data, dict):
-            return data.get("text", "")
-        if isinstance(data, list) and data:
-            return data[0].get("text", "")
-        return ""
     except Exception as e:
         logger.warning(f"STT failed: {e}")
         return ""

 """HuggingFace Inference API wrapper for LLM, TTS, and STT."""
 import os
+import io
 import logging
 import requests
 logger = logging.getLogger(__name__)
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
+CHAT_API_URL = "https://router.huggingface.co/v1/chat/completions"
 PRIMARY_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 FALLBACK_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
 STT_MODEL = "openai/whisper-base"
+_inference_client = None
+def _get_client():
+    """Lazy-init the HF InferenceClient."""
+    global _inference_client
+    if _inference_client is None:
+        from huggingface_hub import InferenceClient
+        _inference_client = InferenceClient(token=HF_TOKEN)
+    return _inference_client
 def chat_completion(messages, max_tokens=1024, temperature=0.7):
+    """Send chat completion request via the OpenAI-compatible endpoint."""
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}",
+        "Content-Type": "application/json"
+    }
     payload = {
         "model": PRIMARY_MODEL,
         "messages": messages,
     for model in [PRIMARY_MODEL, FALLBACK_MODEL]:
         try:
             payload["model"] = model
+            resp = requests.post(CHAT_API_URL, headers=headers, json=payload, timeout=60)
             resp.raise_for_status()
             data = resp.json()
             content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
 def text_to_speech(text, tts_model="facebook/mms-tts-hin"):
+    """Convert text to speech audio bytes using HF InferenceClient."""
     if not text or not text.strip():
         return None
     tts_text = text[:500]
     try:
+        client = _get_client()
+        audio_bytes = client.text_to_speech(tts_text, model=tts_model)
+        if isinstance(audio_bytes, bytes) and len(audio_bytes) > 100:
+            return audio_bytes
         return None
     except Exception as e:
         logger.warning(f"TTS failed for model {tts_model}: {e}")
 def speech_to_text(audio_bytes):
+    """Transcribe audio to text using HF InferenceClient."""
     if not audio_bytes:
         return ""
     try:
+        client = _get_client()
+        result = client.automatic_speech_recognition(audio_bytes, model=STT_MODEL)
+        if isinstance(result, dict):
+            return result.get("text", "")
+        if hasattr(result, "text"):
+            return result.text
+        return str(result) if result else ""
     except Exception as e:
         logger.warning(f"STT failed: {e}")
         return ""