Spaces:

dodd869
/

dudud

Sleeping

dodd869 commited on Sep 14, 2025

Commit

15e853b

verified ·

1 Parent(s): 39d2fb8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,23 @@
 import os
-import gradio as gr
 from huggingface_hub import InferenceClient
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise RuntimeError("HF_TOKEN environment variable not set")
-def respond(message, history):
-    client = InferenceClient(token=HF_TOKEN, model="Qwen/Qwen3-Next-80B-A3B-Instruct")
-    messages = [{"role": "system", "content": "You are a helpful assistant."}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for msg in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
-        if msg.choices and msg.choices[0].delta.content:
-            response += msg.choices[0].delta.content
-            yield response
-demo = gr.ChatInterface(respond, type="messages")
 if __name__ == "__main__":
-    demo.launch()

 import os
+from flask import Flask, request, Response, stream_with_context
 from huggingface_hub import InferenceClient
+app = Flask(__name__)
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise RuntimeError("HF_TOKEN environment variable not set")
+client = InferenceClient(token=HF_TOKEN, model="Qwen/Qwen3-32B-Instruct")
+@app.route("/chat", methods=["POST"])
+def chat():
+    data = request.json
+    messages = data.get("messages", [])
+    def gen():
+        for msg in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
+            if msg.choices and msg.choices[0].delta.content:
+                yield msg.choices[0].delta.content
+    return Response(stream_with_context(gen()), mimetype="text/plain")
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8000)