Spaces:

Selinaliu1030
/

iris2

Sleeping

App Files Files Community

Selinaliu1030 commited on Nov 27, 2025

Commit

55ea8b6

1 Parent(s): 66ce9ad

update app.py

Browse files

Files changed (1) hide show

app.py +36 -17

app.py CHANGED Viewed

@@ -2,6 +2,27 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 def respond(
     message,
     history: list[dict[str, str]],
@@ -11,38 +32,39 @@ def respond(
     top_p,
     hf_token: gr.OAuthToken,
 ):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="Selinaliu1030/lora_model")
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.text_generation(
-        messages,
         max_new_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        choices = message.choices
         token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
@@ -65,8 +87,5 @@ with gr.Blocks() as demo:
         gr.LoginButton()
     chatbot.render()
 if __name__ == "__main__":
     demo.launch()

 from huggingface_hub import InferenceClient
+def format_chat_messages(messages):
+    """
+    Converts Gradio message history into a single text prompt
+    in ChatML / Llama-3-like format.
+    """
+    prompt = ""
+    for msg in messages:
+        role = msg["role"]
+        content = msg["content"]
+        if role == "system":
+            prompt += f"<|system|>\n{content}\n"
+        elif role == "user":
+            prompt += f"<|user|>\n{content}\n"
+        else:
+            prompt += f"<|assistant|>\n{content}\n"
+    prompt += "<|assistant|>\n"  # assistant is about to reply
+    return prompt
 def respond(
     message,
     history: list[dict[str, str]],
     top_p,
     hf_token: gr.OAuthToken,
 ):
+    # explicitly specify provider (IMPORTANT for avoiding StopIteration)
+    client = InferenceClient(
+        model="Selinaliu1030/lora_model",
+        token=hf_token.token,
+        provider="hf-inference"   # <-- ⭐ REQUIRED FIX
+    )
+    # Build the message list
     messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
+    # Convert messages into single prompt
+    prompt = format_chat_messages(messages)
     response = ""
+    # Use the correct API for text generation (not chat endpoints)
+    for msg in client.text_generation(
+        prompt,
         max_new_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
         token = ""
+        if msg.token:   # new API returns .token
+            token = msg.token
         response += token
         yield response
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
         gr.LoginButton()
     chatbot.render()
 if __name__ == "__main__":
     demo.launch()