Spaces:
Running
Running
Update PolyAgent/gradio_interface.py
Browse files- PolyAgent/gradio_interface.py +25 -47
PolyAgent/gradio_interface.py
CHANGED
|
@@ -1201,9 +1201,7 @@ def gpt_only_answer(state: Dict[str, Any], prompt: str) -> str:
|
|
| 1201 |
# ----------------------------- Other LLMs (Hugging Face Inference) ----------------------------- #
|
| 1202 |
def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
|
| 1203 |
"""
|
| 1204 |
-
LLM-only responses
|
| 1205 |
-
- Forces provider to avoid unwanted auto-routing (e.g., fireworks-ai).
|
| 1206 |
-
- Tries chat_completion first; if model/provider doesn't support chat, falls back to text_generation.
|
| 1207 |
"""
|
| 1208 |
ensure_orch(state)
|
| 1209 |
|
|
@@ -1212,7 +1210,12 @@ def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
|
|
| 1212 |
|
| 1213 |
HF_TOKEN = (os.getenv("HF_TOKEN") or "").strip()
|
| 1214 |
if not HF_TOKEN:
|
| 1215 |
-
return pretty_json(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
|
| 1217 |
HF_MODEL_MAP = {
|
| 1218 |
"mixtral-8x22b-instruct": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
|
@@ -1221,6 +1224,7 @@ def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
|
|
| 1221 |
|
| 1222 |
m = (model_name or "").strip()
|
| 1223 |
p = (prompt or "").strip()
|
|
|
|
| 1224 |
if not p:
|
| 1225 |
return "Please provide a prompt."
|
| 1226 |
if not m:
|
|
@@ -1228,60 +1232,34 @@ def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
|
|
| 1228 |
|
| 1229 |
model_id = HF_MODEL_MAP.get(m)
|
| 1230 |
if not model_id:
|
| 1231 |
-
return pretty_json(
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
model=model_id,
|
| 1239 |
-
api_key=HF_TOKEN, # api_key works for both HF token + provider keys
|
| 1240 |
-
)
|
| 1241 |
|
| 1242 |
-
|
| 1243 |
-
"You are a polymer R&D assistant. Answer directly and clearly. "
|
| 1244 |
-
"Do not call tools or run web searches. If you are uncertain, state uncertainty."
|
| 1245 |
-
)
|
| 1246 |
|
| 1247 |
-
# 1) Try chat (conversational)
|
| 1248 |
try:
|
| 1249 |
resp = client.chat_completion(
|
| 1250 |
messages=[
|
| 1251 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1252 |
{"role": "user", "content": p},
|
| 1253 |
],
|
| 1254 |
max_tokens=900,
|
| 1255 |
temperature=0.7,
|
| 1256 |
)
|
| 1257 |
return resp.choices[0].message.content or ""
|
| 1258 |
-
except Exception as
|
| 1259 |
-
|
| 1260 |
-
try:
|
| 1261 |
-
if provider != "hf-inference":
|
| 1262 |
-
# text_generation is not universally supported across providers
|
| 1263 |
-
raise RuntimeError(
|
| 1264 |
-
f"Chat failed and provider='{provider}' may not support text_generation. "
|
| 1265 |
-
f"Set HF_PROVIDER=hf-inference (recommended) or choose a compatible model/provider."
|
| 1266 |
-
)
|
| 1267 |
-
|
| 1268 |
-
# A simple prompt wrapper for non-chat models / non-chat endpoints
|
| 1269 |
-
wrapped = f"{system}\n\nUser: {p}\nAssistant:"
|
| 1270 |
-
out = client.text_generation(
|
| 1271 |
-
wrapped,
|
| 1272 |
-
max_new_tokens=900,
|
| 1273 |
-
temperature=0.7,
|
| 1274 |
-
do_sample=True,
|
| 1275 |
-
return_full_text=False,
|
| 1276 |
-
)
|
| 1277 |
-
return out if isinstance(out, str) else str(out)
|
| 1278 |
-
except Exception as e_gen:
|
| 1279 |
-
return pretty_json({
|
| 1280 |
-
"ok": False,
|
| 1281 |
-
"error": f"chat_completion failed: {e_chat}; text_generation failed: {e_gen}",
|
| 1282 |
-
"model_id": model_id,
|
| 1283 |
-
"provider": provider,
|
| 1284 |
-
})
|
| 1285 |
|
| 1286 |
|
| 1287 |
def build_ui() -> gr.Blocks:
|
|
|
|
| 1201 |
# ----------------------------- Other LLMs (Hugging Face Inference) ----------------------------- #
|
| 1202 |
def llm_only_answer(state: Dict[str, Any], model_name: str, prompt: str) -> str:
|
| 1203 |
"""
|
| 1204 |
+
LLM-only responses using Hugging Face Inference API for non-GPT models.
|
|
|
|
|
|
|
| 1205 |
"""
|
| 1206 |
ensure_orch(state)
|
| 1207 |
|
|
|
|
| 1210 |
|
| 1211 |
HF_TOKEN = (os.getenv("HF_TOKEN") or "").strip()
|
| 1212 |
if not HF_TOKEN:
|
| 1213 |
+
return pretty_json(
|
| 1214 |
+
{
|
| 1215 |
+
"ok": False,
|
| 1216 |
+
"error": "HF_TOKEN is not set. Add HF_TOKEN=hf_... to your .env or env vars.",
|
| 1217 |
+
}
|
| 1218 |
+
)
|
| 1219 |
|
| 1220 |
HF_MODEL_MAP = {
|
| 1221 |
"mixtral-8x22b-instruct": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
|
|
|
| 1224 |
|
| 1225 |
m = (model_name or "").strip()
|
| 1226 |
p = (prompt or "").strip()
|
| 1227 |
+
|
| 1228 |
if not p:
|
| 1229 |
return "Please provide a prompt."
|
| 1230 |
if not m:
|
|
|
|
| 1232 |
|
| 1233 |
model_id = HF_MODEL_MAP.get(m)
|
| 1234 |
if not model_id:
|
| 1235 |
+
return pretty_json(
|
| 1236 |
+
{
|
| 1237 |
+
"ok": False,
|
| 1238 |
+
"error": f"Unsupported model selection: {m}",
|
| 1239 |
+
"supported": list(HF_MODEL_MAP.keys()),
|
| 1240 |
+
}
|
| 1241 |
+
)
|
|
|
|
|
|
|
|
|
|
| 1242 |
|
| 1243 |
+
client = InferenceClient(model=model_id, token=HF_TOKEN)
|
|
|
|
|
|
|
|
|
|
| 1244 |
|
|
|
|
| 1245 |
try:
|
| 1246 |
resp = client.chat_completion(
|
| 1247 |
messages=[
|
| 1248 |
+
{
|
| 1249 |
+
"role": "system",
|
| 1250 |
+
"content": (
|
| 1251 |
+
"You are a polymer R&D assistant. Answer directly and clearly. "
|
| 1252 |
+
"Do not call tools or run web searches. If you are uncertain, state uncertainty."
|
| 1253 |
+
),
|
| 1254 |
+
},
|
| 1255 |
{"role": "user", "content": p},
|
| 1256 |
],
|
| 1257 |
max_tokens=900,
|
| 1258 |
temperature=0.7,
|
| 1259 |
)
|
| 1260 |
return resp.choices[0].message.content or ""
|
| 1261 |
+
except Exception as e:
|
| 1262 |
+
return pretty_json({"ok": False, "error": str(e), "model_id": model_id})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1263 |
|
| 1264 |
|
| 1265 |
def build_ui() -> gr.Blocks:
|