Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -339,12 +339,11 @@ def try_parse_json(text: str) -> Optional[Dict[str, Any]]:
|
|
| 339 |
# 🔧 SIMPLIFIED, ROBUST MODEL CALL (no secrets required)
|
| 340 |
def model_call(prompt: str) -> str:
|
| 341 |
"""
|
| 342 |
-
Call Hugging Face Inference API using
|
|
|
|
| 343 |
"""
|
| 344 |
if not MODEL_ID:
|
| 345 |
raise RuntimeError("MODEL_ID env var is empty. Set it or use the default.")
|
| 346 |
-
|
| 347 |
-
# HF_TOKEN must be defined as a Secret in the Space (read-only token is enough)
|
| 348 |
if not HF_TOKEN:
|
| 349 |
raise RuntimeError(
|
| 350 |
"HF_TOKEN is not set. Add a Hugging Face token as a Space secret named HF_TOKEN."
|
|
@@ -353,21 +352,54 @@ def model_call(prompt: str) -> str:
|
|
| 353 |
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
|
| 354 |
|
| 355 |
try:
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
temperature=0.9,
|
| 360 |
top_p=0.92,
|
| 361 |
-
return_full_text=False,
|
| 362 |
)
|
| 363 |
except Exception as e:
|
| 364 |
-
# Bubble up a clear error so ai_generate can surface it
|
| 365 |
raise RuntimeError(f"Inference API error: {e}") from e
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
-
return
|
| 371 |
|
| 372 |
|
| 373 |
def normalize_output(
|
|
|
|
| 339 |
# 🔧 SIMPLIFIED, ROBUST MODEL CALL (no secrets required)
|
| 340 |
def model_call(prompt: str) -> str:
|
| 341 |
"""
|
| 342 |
+
Call Hugging Face Inference API using the conversational (chat) task.
|
| 343 |
+
This matches models like google/gemma-2-2b-it which only support 'conversational'.
|
| 344 |
"""
|
| 345 |
if not MODEL_ID:
|
| 346 |
raise RuntimeError("MODEL_ID env var is empty. Set it or use the default.")
|
|
|
|
|
|
|
| 347 |
if not HF_TOKEN:
|
| 348 |
raise RuntimeError(
|
| 349 |
"HF_TOKEN is not set. Add a Hugging Face token as a Space secret named HF_TOKEN."
|
|
|
|
| 352 |
client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
|
| 353 |
|
| 354 |
try:
|
| 355 |
+
resp = client.chat.completions.create(
|
| 356 |
+
model=MODEL_ID,
|
| 357 |
+
messages=[
|
| 358 |
+
{
|
| 359 |
+
"role": "system",
|
| 360 |
+
"content": (
|
| 361 |
+
"You generate JSON only. "
|
| 362 |
+
"Do not add any explanation outside of the JSON object."
|
| 363 |
+
),
|
| 364 |
+
},
|
| 365 |
+
{
|
| 366 |
+
"role": "user",
|
| 367 |
+
"content": prompt,
|
| 368 |
+
},
|
| 369 |
+
],
|
| 370 |
+
max_tokens=260,
|
| 371 |
temperature=0.9,
|
| 372 |
top_p=0.92,
|
|
|
|
| 373 |
)
|
| 374 |
except Exception as e:
|
|
|
|
| 375 |
raise RuntimeError(f"Inference API error: {e}") from e
|
| 376 |
|
| 377 |
+
# Extract text from the first choice
|
| 378 |
+
try:
|
| 379 |
+
message = resp.choices[0].message
|
| 380 |
+
content = message.content
|
| 381 |
+
except Exception as e:
|
| 382 |
+
raise RuntimeError(f"Unexpected chat response format: {e}") from e
|
| 383 |
+
|
| 384 |
+
# content can be a string or a list of parts
|
| 385 |
+
if isinstance(content, list):
|
| 386 |
+
# Newer HF SDK sometimes uses list-of-parts format
|
| 387 |
+
parts = []
|
| 388 |
+
for part in content:
|
| 389 |
+
# part may be a dict like {"type": "text", "text": "..."}
|
| 390 |
+
if isinstance(part, dict) and "text" in part:
|
| 391 |
+
parts.append(part["text"])
|
| 392 |
+
else:
|
| 393 |
+
parts.append(str(part))
|
| 394 |
+
text = "".join(parts)
|
| 395 |
+
else:
|
| 396 |
+
text = str(content)
|
| 397 |
+
|
| 398 |
+
text = text.strip()
|
| 399 |
+
if not text:
|
| 400 |
+
raise RuntimeError("Empty response from model.")
|
| 401 |
|
| 402 |
+
return text
|
| 403 |
|
| 404 |
|
| 405 |
def normalize_output(
|