Spaces:
Sleeping
Sleeping
File size: 7,346 Bytes
945cec1 9a263fc 945cec1 4c66cda 945cec1 4c66cda 945cec1 f3ee24f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | import json, queue, threading, time, uuid
from datetime import datetime
from flask import Flask, render_template, request, Response, jsonify
from dotenv import load_dotenv
import os
import events
load_dotenv()
app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY","lgsa-2025-dev-secret")
AVAILABLE_MODELS = [
{"id":"Qwen/Qwen2.5-7B-Instruct","name":"Qwen 2.5 7B","badge":"⚡ Fast & Reliable","priority":1},
{"id":"google/gemma-2-9b-it","name":"Gemma 2 9B","badge":"💎 Quality","priority":2},
{"id":"mistralai/Mistral-7B-Instruct-v0.2","name":"Mistral 7B","badge":"🌀 Balanced","priority":3},
{"id":"TinyLlama/TinyLlama-1.1B-Chat-v1.0","name":"TinyLlama 1.1B","badge":"⚙️ Lightweight","priority":4},
]
# Fallback model if all fail
FALLBACK_MODEL = "Qwen/Qwen2.5-7B-Instruct"
_sessions: dict = {}
_lock = threading.Lock()
class Session:
def __init__(self, sid, model):
self.session_id = sid
self.model_name = model
self.messages: list = []
self.tool_calls: list = []
self.node_traces: list = []
self.turn_count = 0
self.total_tokens = 0
self.latency_history: list = []
def _get(sid, model=""):
with _lock:
if sid not in _sessions:
_sessions[sid] = Session(sid, model or AVAILABLE_MODELS[0]["id"])
elif model:
_sessions[sid].model_name = model
return _sessions[sid]
def _analytics(s):
usage = {}
for tc in s.tool_calls:
usage[tc["tool_name"]] = usage.get(tc["tool_name"],0) + 1
avg = round(sum(s.latency_history)/max(len(s.latency_history),1),1)
return {"turn_count":s.turn_count,"total_tokens":s.total_tokens,"avg_latency_ms":avg,
"latency_history":s.latency_history[-20:],"tool_call_count":len(s.tool_calls),
"tool_usage":usage,"node_traces":s.node_traces[-30:]}
def _tok(text): return max(1, int(len(text.split())*1.35))
@app.route("/")
def index():
return render_template("index.html", models=AVAILABLE_MODELS)
@app.route("/api/models")
def api_models():
return jsonify(AVAILABLE_MODELS)
@app.route("/api/chat", methods=["POST"])
def api_chat():
body = request.get_json(force=True) or {}
user_message = (body.get("message") or "").strip()
model_name = body.get("model") or AVAILABLE_MODELS[0]["id"]
session_id = body.get("session_id") or str(uuid.uuid4())
if not user_message:
return jsonify({"error":"Message cannot be empty."}), 400
hf_token = os.getenv("HF_TOKEN","").strip()
def generate():
if not hf_token:
yield f"data: {json.dumps({'type':'error','message':'HF_TOKEN not set. Add it as a Space secret under Settings → Variables and Secrets.'})}\n\n"
return
s = _get(session_id, model_name)
s.turn_count += 1
t_start = time.time()
user_entry = {"role":"user","content":user_message,"token_count":_tok(user_message),"timestamp":datetime.utcnow().isoformat()+"Z"}
s.messages.append(user_entry)
events.clear_queue(session_id)
prior = list(s.messages[:-1])
from agent.state import AgentState
from agent.graph import build_graph
from langchain_core.messages import HumanMessage
initial: AgentState = {"messages":[HumanMessage(content=user_message)],"current_node":"router",
"model_name":model_name,"session_id":session_id,"hf_token":hf_token,
"iteration_count":0,"should_end":False,"final_answer":None,"error":None,
"conversation_history":prior,"pending_tool":None}
result_box: dict = {}
def run():
try:
result_box["result"] = build_graph().invoke(initial)
except Exception as exc:
result_box["error"] = str(exc)
finally:
events.emit(session_id, {"type":"_done"})
threading.Thread(target=run, daemon=True).start()
q = events.get_queue(session_id)
buf: list = []
while True:
try:
ev = q.get(timeout=90)
except queue.Empty:
yield f"data: {json.dumps({'type':'error','message':'Response timed out.'})}\n\n"
return
if ev["type"] == "_done":
break
if ev["type"] == "token":
buf.append(ev["content"])
elif ev["type"] == "node_enter":
s.node_traces.append({"node_name":ev["node"],"entered_at":ev["timestamp"],"exited_at":None,"duration_ms":None,"status":"running"})
elif ev["type"] == "node_exit":
for tr in reversed(s.node_traces):
if tr["node_name"] == ev["node"] and tr["status"] == "running":
tr.update({"exited_at":ev["timestamp"],"duration_ms":ev.get("duration_ms"),"status":"completed"})
break
elif ev["type"] == "tool_call":
s.tool_calls.append({"tool_name":ev["name"],"tool_input":ev.get("input",{}),"tool_output":"","timestamp":ev["timestamp"],"latency_ms":0})
elif ev["type"] == "tool_result":
for tc in reversed(s.tool_calls):
if tc["tool_name"] == ev["name"] and tc["tool_output"] == "":
tc.update({"tool_output":ev.get("output",""),"latency_ms":ev.get("latency_ms",0)})
break
yield f"data: {json.dumps(ev)}\n\n"
if "error" in result_box:
yield f"data: {json.dumps({'type':'error','message':result_box['error']})}\n\n"
return
final = ((result_box.get("result") or {}).get("final_answer") or "".join(buf)).strip()
if not final:
final = "I'm sorry, I wasn't able to generate a response. Please try again."
elapsed = round((time.time()-t_start)*1000,1)
tok = _tok(final)
s.total_tokens += tok + user_entry["token_count"]
s.latency_history.append(elapsed)
asst = {"role":"assistant","content":final,"token_count":tok,"timestamp":datetime.utcnow().isoformat()+"Z"}
s.messages.append(asst)
yield f"data: {json.dumps({'type':'done','session_id':session_id,'message':asst,'latency_ms':elapsed,'analytics':_analytics(s)})}\n\n"
resp = Response(generate(), mimetype="text/event-stream")
resp.headers.update({"Cache-Control":"no-cache","X-Accel-Buffering":"no","X-Session-ID":session_id})
return resp
@app.route("/api/session/<session_id>")
def api_session(session_id):
with _lock:
s = _sessions.get(session_id)
if not s:
return jsonify({"error":"Session not found"}), 404
return jsonify({"session_id":session_id,"model_name":s.model_name,"messages":s.messages,"tool_calls":s.tool_calls,"node_traces":s.node_traces,"analytics":_analytics(s)})
@app.route("/api/reset", methods=["POST"])
def api_reset():
body = request.get_json(force=True) or {}
sid = body.get("session_id") or str(uuid.uuid4())
model = body.get("model") or AVAILABLE_MODELS[0]["id"]
with _lock:
_sessions[sid] = Session(sid, model)
events.clear_queue(sid)
return jsonify({"status":"ok","session_id":sid})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=False, threaded=True) |