Spaces:

MohitG012
/

Voice_Bot_Portfolio_Backend

Sleeping

App Files Files Community

MohitGupta41 commited on Aug 31, 2025

Commit

62960aa

1 Parent(s): 8b9d771

Increased Context Window and improved prompt

Browse files

Files changed (1) hide show

app.py +72 -46

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field, ConfigDict
 import httpx
 from datetime import date
 from Constants import CONTEXT
@@ -120,46 +121,62 @@ async def call_gemini(
         # If we got here, all attempts failed
         raise last_err or HTTPException(502, "Gemini request failed")
-async def call_huggingface_inference(
-    hf_api_key: str,
-    model: str,
-    prompt: str,
-    parameters: Optional[Dict[str, Any]] = None
-) -> str:
     """
-    Calls Hugging Face Inference API for text generation models (e.g., google/gemma-3-27b-it).
     """
-    parameters = parameters or {
-        "max_new_tokens": CONTEXT,
-        "temperature": 0.2,
-        "return_full_text": False,
-        "repetition_penalty": 1.1,
-    }
-    url = f"https://api-inference.huggingface.co/models/{model}"
-    headers = {"Authorization": f"Bearer {hf_api_key}"}
-    payload = {"inputs": prompt, "parameters": parameters}
-    async with httpx.AsyncClient(timeout=120) as client:
-        r = await client.post(url, headers=headers, json=payload)
-    if r.status_code == 200:
-        data = r.json()
-        # HF returns either a list[{"generated_text": "..."}] or a dict with error/stream info
-        if isinstance(data, list) and data and "generated_text" in data[0]:
-            return data[0]["generated_text"].strip()
-        # Some pipelines return dict with "generated_text"
-        if isinstance(data, dict) and "generated_text" in data:
-            return data["generated_text"].strip()
-        # Some models return plain string
-        if isinstance(data, str):
-            return data.strip()
-        raise HTTPException(502, f"Unexpected HF response format: {data}")
-    elif r.status_code == 503:
-        # Model is loading or warming up
-        raise HTTPException(503, "Hugging Face model is loading. Please retry.")
-    else:
-        raise HTTPException(r.status_code, f"Hugging Face error: {r.text}")
 # ---------- FastAPI ----------
 app = FastAPI(title="Voice Agent API", version="0.2.0")
@@ -238,19 +255,28 @@ async def chat(
         text = await call_gemini(gemini_key, model, prompt)
         return ChatOut(answer=text or "Sorry, I didn't catch that.")
     elif provider == "huggingface":
         model = payload.model or os.getenv("DEFAULT_HF_MODEL", "google/gemma-3-27b-it")
-        # choose key from body > header (X-Hf-Api-Key) > Authorization Bearer > env
-        hf_key = payload.hf_api_key or x_hf_api_key
-        if not hf_key and authorization and authorization.lower().startswith("bearer "):
-            hf_key = authorization.split(" ", 1)[1].strip()
-        if not hf_key:
-            hf_key = os.getenv("HF_API_KEY")
         if not hf_key:
             raise HTTPException(400, "Hugging Face API key is required (send hf_api_key, X-Hf-Api-Key, or Authorization: Bearer).")
-        text = await call_huggingface_inference(hf_key, model, prompt)
-        return ChatOut(answer=text or "Sorry, I didn't catch that.")
     else:
         raise HTTPException(400, f"Unknown provider: {provider}")

 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field, ConfigDict
 import httpx
+from huggingface_hub import InferenceClient
 from datetime import date
 from Constants import CONTEXT
         # If we got here, all attempts failed
         raise last_err or HTTPException(502, "Gemini request failed")
+# async def call_huggingface_inference(
+#     hf_api_key: str,
+#     model: str,
+#     prompt: str,
+#     parameters: Optional[Dict[str, Any]] = None
+# ) -> str:
+#     """
+#     Calls Hugging Face Inference API for text generation models (e.g., google/gemma-3-27b-it).
+#     """
+#     parameters = parameters or {
+#         "max_new_tokens": CONTEXT,
+#         "temperature": 0.2,
+#         "return_full_text": False,
+#         "repetition_penalty": 1.1,
+#     }
+#     url = f"https://api-inference.huggingface.co/models/{model}"
+#     headers = {"Authorization": f"Bearer {hf_api_key}"}
+#     payload = {"inputs": prompt, "parameters": parameters}
+#     async with httpx.AsyncClient(timeout=120) as client:
+#         r = await client.post(url, headers=headers, json=payload)
+#     if r.status_code == 200:
+#         data = r.json()
+#         # HF returns either a list[{"generated_text": "..."}] or a dict with error/stream info
+#         if isinstance(data, list) and data and "generated_text" in data[0]:
+#             return data[0]["generated_text"].strip()
+#         # Some pipelines return dict with "generated_text"
+#         if isinstance(data, dict) and "generated_text" in data:
+#             return data["generated_text"].strip()
+#         # Some models return plain string
+#         if isinstance(data, str):
+#             return data.strip()
+#         raise HTTPException(502, f"Unexpected HF response format: {data}")
+#     elif r.status_code == 503:
+#         # Model is loading or warming up
+#         raise HTTPException(503, "Hugging Face model is loading. Please retry.")
+#     else:
+#         raise HTTPException(r.status_code, f"Hugging Face error: {r.text}")
+async def call_hf_chat(hf_api_key: str, model: str, messages, *, provider: str | None = "auto",
+                       max_tokens: int = 1024, temperature: float = 0.2) -> str:
     """
+    Uses Hugging Face Inference Providers (OpenAI-compatible chat completions).
     """
+    client = InferenceClient(api_key=hf_api_key, provider=provider, timeout=120)
+    resp = client.chat.completions.create(
+        model=model,
+        messages=messages,           # [{"role":"user","content":"..."}] OR multimodal structure
+        max_tokens=max_tokens,
+        temperature=temperature,
+        stream=False,
+    )
+    # hf client returns OpenAI-style response
+    return resp.choices[0].message["content"].strip()
 # ---------- FastAPI ----------
 app = FastAPI(title="Voice Agent API", version="0.2.0")
         text = await call_gemini(gemini_key, model, prompt)
         return ChatOut(answer=text or "Sorry, I didn't catch that.")
+    # elif provider == "huggingface":
+    #     model = payload.model or os.getenv("DEFAULT_HF_MODEL", "google/gemma-3-27b-it")
+    #     # choose key from body > header (X-Hf-Api-Key) > Authorization Bearer > env
+    #     hf_key = payload.hf_api_key or x_hf_api_key
+    #     if not hf_key and authorization and authorization.lower().startswith("bearer "):
+    #         hf_key = authorization.split(" ", 1)[1].strip()
+    #     if not hf_key:
+    #         hf_key = os.getenv("HF_API_KEY")
+    #     if not hf_key:
+    #         raise HTTPException(400, "Hugging Face API key is required (send hf_api_key, X-Hf-Api-Key, or Authorization: Bearer).")
+    #     text = await call_huggingface_inference(hf_key, model, prompt)
+    #     return ChatOut(answer=text or "Sorry, I didn't catch that.")
     elif provider == "huggingface":
         model = payload.model or os.getenv("DEFAULT_HF_MODEL", "google/gemma-3-27b-it")
+        hf_key = payload.hf_api_key or x_hf_api_key or (authorization.split(" ",1)[1].strip() if authorization and authorization.lower().startswith("bearer ") else None) or os.getenv("HF_API_KEY")
         if not hf_key:
             raise HTTPException(400, "Hugging Face API key is required (send hf_api_key, X-Hf-Api-Key, or Authorization: Bearer).")
+        messages = [{"role":"user","content": build_prompt(payload.question)}]
+        text = await call_hf_chat(hf_key, model, messages, provider="auto")
+        return ChatOut(answer=text or "Sorry, I didn't catch that.")
     else:
         raise HTTPException(400, f"Unknown provider: {provider}")