Spaces:

kappai
/

question

Sleeping

App Files Files Community

kappai commited on Dec 1, 2025

Commit

844db63

verified ·

1 Parent(s): f03f0ce

Upload app.py

Browse files

Files changed (1) hide show

app.py +43 -11

app.py CHANGED Viewed

@@ -339,12 +339,11 @@ def try_parse_json(text: str) -> Optional[Dict[str, Any]]:
 # 🔧 SIMPLIFIED, ROBUST MODEL CALL (no secrets required)
 def model_call(prompt: str) -> str:
     """
-    Call Hugging Face Inference API using a single text-generation call.
     """
     if not MODEL_ID:
         raise RuntimeError("MODEL_ID env var is empty. Set it or use the default.")
-    # HF_TOKEN must be defined as a Secret in the Space (read-only token is enough)
     if not HF_TOKEN:
         raise RuntimeError(
             "HF_TOKEN is not set. Add a Hugging Face token as a Space secret named HF_TOKEN."
@@ -353,21 +352,54 @@ def model_call(prompt: str) -> str:
     client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
     try:
-        out = client.text_generation(
-            prompt=prompt,
-            max_new_tokens=260,
             temperature=0.9,
             top_p=0.92,
-            return_full_text=False,
         )
     except Exception as e:
-        # Bubble up a clear error so ai_generate can surface it
         raise RuntimeError(f"Inference API error: {e}") from e
-    if not out:
-        raise RuntimeError("Inference API returned an empty response.")
-    return out.strip()
 def normalize_output(

 # 🔧 SIMPLIFIED, ROBUST MODEL CALL (no secrets required)
 def model_call(prompt: str) -> str:
     """
+    Call Hugging Face Inference API using the conversational (chat) task.
+    This matches models like google/gemma-2-2b-it which only support 'conversational'.
     """
     if not MODEL_ID:
         raise RuntimeError("MODEL_ID env var is empty. Set it or use the default.")
     if not HF_TOKEN:
         raise RuntimeError(
             "HF_TOKEN is not set. Add a Hugging Face token as a Space secret named HF_TOKEN."
     client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
     try:
+        resp = client.chat.completions.create(
+            model=MODEL_ID,
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You generate JSON only. "
+                        "Do not add any explanation outside of the JSON object."
+                    ),
+                },
+                {
+                    "role": "user",
+                    "content": prompt,
+                },
+            ],
+            max_tokens=260,
             temperature=0.9,
             top_p=0.92,
         )
     except Exception as e:
         raise RuntimeError(f"Inference API error: {e}") from e
+    # Extract text from the first choice
+    try:
+        message = resp.choices[0].message
+        content = message.content
+    except Exception as e:
+        raise RuntimeError(f"Unexpected chat response format: {e}") from e
+    # content can be a string or a list of parts
+    if isinstance(content, list):
+        # Newer HF SDK sometimes uses list-of-parts format
+        parts = []
+        for part in content:
+            # part may be a dict like {"type": "text", "text": "..."}
+            if isinstance(part, dict) and "text" in part:
+                parts.append(part["text"])
+            else:
+                parts.append(str(part))
+        text = "".join(parts)
+    else:
+        text = str(content)
+    text = text.strip()
+    if not text:
+        raise RuntimeError("Empty response from model.")
+    return text
 def normalize_output(