dodd869 commited on
Commit
15e853b
·
verified ·
1 Parent(s): 39d2fb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -1,23 +1,23 @@
1
  import os
2
- import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
 
5
  HF_TOKEN = os.getenv("HF_TOKEN")
6
  if not HF_TOKEN:
7
  raise RuntimeError("HF_TOKEN environment variable not set")
8
 
9
- def respond(message, history):
10
- client = InferenceClient(token=HF_TOKEN, model="Qwen/Qwen3-Next-80B-A3B-Instruct")
11
- messages = [{"role": "system", "content": "You are a helpful assistant."}]
12
- messages.extend(history)
13
- messages.append({"role": "user", "content": message})
14
- response = ""
15
- for msg in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
16
- if msg.choices and msg.choices[0].delta.content:
17
- response += msg.choices[0].delta.content
18
- yield response
19
 
20
- demo = gr.ChatInterface(respond, type="messages")
 
 
 
 
 
 
 
 
21
 
22
  if __name__ == "__main__":
23
- demo.launch()
 
1
  import os
2
+ from flask import Flask, request, Response, stream_with_context
3
  from huggingface_hub import InferenceClient
4
 
5
+ app = Flask(__name__)
6
  HF_TOKEN = os.getenv("HF_TOKEN")
7
  if not HF_TOKEN:
8
  raise RuntimeError("HF_TOKEN environment variable not set")
9
 
10
+ client = InferenceClient(token=HF_TOKEN, model="Qwen/Qwen3-32B-Instruct")
 
 
 
 
 
 
 
 
 
11
 
12
+ @app.route("/chat", methods=["POST"])
13
+ def chat():
14
+ data = request.json
15
+ messages = data.get("messages", [])
16
+ def gen():
17
+ for msg in client.chat_completion(messages, max_tokens=512, stream=True, temperature=0.7, top_p=0.95):
18
+ if msg.choices and msg.choices[0].delta.content:
19
+ yield msg.choices[0].delta.content
20
+ return Response(stream_with_context(gen()), mimetype="text/plain")
21
 
22
  if __name__ == "__main__":
23
+ app.run(host="0.0.0.0", port=8000)