Spaces:

kaizen9
/

server

Sleeping

App Files Files Community

kaizen9 commited on Aug 29, 2025

Commit

61fc039

1 Parent(s): 33f0dc1

a

Browse files

Files changed (2) hide show

app.py +19 -18
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -2,23 +2,24 @@ import os
 import gradio as gr
 from openai import OpenAI
-# Pick up secrets from HF Space
 BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/")
 API_KEY = os.getenv("HF_TOKEN")
-MODEL_ID = "kaizen9/qsft_30_6000_v2"
-client = OpenAI(
-    base_url=f"{BASE}/v1",
-    api_key=API_KEY,
-)
 def build_messages(history, user_msg, system_msg):
     msgs = []
-    if system_msg.strip():
         msgs.append({"role": "system", "content": system_msg.strip()})
     for u, a in history:
-        if u: msgs.append({"role": "user", "content": u})
-        if a: msgs.append({"role": "assistant", "content": a})
     msgs.append({"role": "user", "content": user_msg})
     return msgs
@@ -32,7 +33,6 @@ def chat_fn(message, history, system_message, temperature, top_p, max_tokens):
         max_tokens=int(max_tokens),
         stream=True,
     )
     partial = ""
     for chunk in stream:
         delta = chunk.choices[0].delta
@@ -40,23 +40,24 @@ def chat_fn(message, history, system_message, temperature, top_p, max_tokens):
             partial += delta.content
             yield partial
-with gr.Blocks() as demo:
-    gr.Markdown("# QSFT Chat UI")
     system_box = gr.Textbox(
         label="System prompt",
         value="You are a helpful assistant.",
         lines=2,
     )
-    temp = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
-    topp = gr.Slider(0.0, 1.0, 0.95, step=0.01, label="Top-p")
-    maxt = gr.Slider(16, 4096, 512, step=16, label="Max tokens")
     gr.ChatInterface(
         fn=chat_fn,
         additional_inputs=[system_box, temp, topp, maxt],
-        retry_btn=True,
-        undo_btn=True,
     )
 if __name__ == "__main__":

 import gradio as gr
 from openai import OpenAI
 BASE = os.getenv("HF_ENDPOINT_URL", "").rstrip("/")
 API_KEY = os.getenv("HF_TOKEN")
+MODEL_ID = os.getenv("MODEL_ID", "kaizen9/qsft_30_6000_v2")
+if not BASE or not API_KEY:
+    raise RuntimeError("Set HF_ENDPOINT_URL and HF_TOKEN in Settings → Repository secrets.")
+client = OpenAI(base_url=f"{BASE}/v1", api_key=API_KEY)
 def build_messages(history, user_msg, system_msg):
     msgs = []
+    if system_msg and system_msg.strip():
         msgs.append({"role": "system", "content": system_msg.strip()})
     for u, a in history:
+        if u:
+            msgs.append({"role": "user", "content": u})
+        if a:
+            msgs.append({"role": "assistant", "content": a})
     msgs.append({"role": "user", "content": user_msg})
     return msgs
         max_tokens=int(max_tokens),
         stream=True,
     )
     partial = ""
     for chunk in stream:
         delta = chunk.choices[0].delta
             partial += delta.content
             yield partial
+with gr.Blocks(title="QSFT Chat") as demo:
+    gr.Markdown("# QSFT Chat\nTalk to your HF Inference Endpoint via OpenAI /v1.")
     system_box = gr.Textbox(
         label="System prompt",
         value="You are a helpful assistant.",
         lines=2,
     )
+    with gr.Row():
+        temp = gr.Slider(0.0, 2.0, value=0.7, step=0.1, label="Temperature")
+        topp = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="Top-p")
+        maxt = gr.Slider(16, 4096, value=512, step=16, label="Max tokens")
     gr.ChatInterface(
         fn=chat_fn,
         additional_inputs=[system_box, temp, topp, maxt],
+        submit_btn="Send",
+        stop_btn="Stop",
+        multimodal=False,
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 gradio>=4.44.0
 openai>=1.40.0

 gradio>=4.44.0
 openai>=1.40.0
+httpx>=0.27.0