Spaces:

abanm
/

DubsChat

Build error

App Files Files Community

abanm commited on Jan 19, 2025

Commit

f3417ef

verified ·

1 Parent(s): 86c89d3

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -47

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import requests
 import json
 import os
 import datetime
 # Constants
 SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
@@ -11,7 +12,7 @@ EOS_TOKEN = "<|end|>"
 CHAT_HISTORY_DIR = "chat_histories"
 IMAGE_PATH = "DubsChat.png"
 IMAGE_PATH_2 = "Reboot AI.png"
-Dubs_PATH = "Dubs.png"
 # Ensure the directory exists
 try:
@@ -109,56 +110,54 @@ for message in st.session_state["messages"]:
     if message["role"] == "user":
         st.chat_message("user").write(message["content"])
     elif message["role"] == "assistant":
-        st.chat_message("assistant", avatar=Dubs_PATH).write(message["content"])
 # -------------------------
-#    Streaming Logic
 # -------------------------
 def stream_response(prompt_text, api_key):
     """
-    Stream text from the HF Inference Endpoint (or any streaming API).
-    Yields each chunk of text as it arrives.
     """
     try:
-    # Match the structure of your working payload:
-        payload = {
-            "inputs": prompt_text,
-            "parameters": {
-                "max_new_tokens": 250,
-                "return_full_text": False,
-                "stream": True
-            }
-        }
-        headers = {
-                	"Accept" : "application/json",
-                    "Authorization": f"Bearer {api_key}",
-                	"Content-Type": "application/json"
-                  }
-        # POST request with stream=True to get partial chunks
-        response = requests.post(
-            SPACE_URL,
-            json=payload,
-            headers=headers,
-            stream=True
-        )
-        response.raise_for_status()
-        # The endpoint presumably returns lines of JSON. Adjust parsing if needed:
-        for line in response.iter_lines():
-            if line:
-                data = json.loads(line.decode("utf-8"))
-                # Example: data might be [{"generated_text": "..."}]
-                # Adjust if your endpoint returns different JSON keys
-                chunk = data[0].get("generated_text", "")
-                yield chunk
-    except requests.exceptions.Timeout:
-        yield "The request timed out. Please try again later."
-    except requests.exceptions.RequestException as e:
         yield f"Error: {e}"
-    except json.JSONDecodeError:
-        yield "Error decoding server response."
 # -------------------------
 #       User Input
@@ -172,18 +171,19 @@ if prompt := st.chat_input():
         st.chat_message("user").write(prompt)
         # 2) Build combined chat history for the model prompt
         chat_history = "".join(
             [f"<|{msg['role']}|>{msg['content']}<|end|>" for msg in st.session_state["messages"]]
         )
         # 3) Create a placeholder for the assistant’s streamed response
         with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
-            assistant_message_placeholder = st.chat_message("assistant", avatar=Dubs_PATH).empty()
             full_response = ""
-            # 4) Stream chunks from the API
-            for chunk in stream_response(chat_history, HF_API_KEY):
-                full_response += chunk
                 # Continuously update the placeholder with the partial response
                 assistant_message_placeholder.write(full_response)

 import json
 import os
 import datetime
+from huggingface_hub import InferenceClient  # Make sure to install huggingface_hub first
 # Constants
 SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
 CHAT_HISTORY_DIR = "chat_histories"
 IMAGE_PATH = "DubsChat.png"
 IMAGE_PATH_2 = "Reboot AI.png"
+DUBS_PATH = "Dubs.png"
 # Ensure the directory exists
 try:
     if message["role"] == "user":
         st.chat_message("user").write(message["content"])
     elif message["role"] == "assistant":
+        st.chat_message("assistant", avatar=DUBS_PATH).write(message["content"])
 # -------------------------
+#    Streaming Logic using InferenceClient
 # -------------------------
 def stream_response(prompt_text, api_key):
     """
+    Stream text from the HF Inference Endpoint using the InferenceClient.
+    Yields each partial chunk of text as it arrives.
     """
+    # Initialize the client with your endpoint_url and API key
+    client = InferenceClient(
+        endpoint_url=SPACE_URL,
+        token=api_key
+    )
+    # Define generation parameters
+    gen_kwargs = dict(
+        max_new_tokens=512,
+        top_k=30,
+        top_p=0.9,
+        temperature=0.2,
+        repetition_penalty=1.02,
+        stop_sequences=["<|endoftext|>"]
+    )
+    # Start streaming from the model
+    stream = client.text_generation(prompt_text, stream=True, details=True, **gen_kwargs)
+    # We'll build the response incrementally
+    partial_text = ""
     try:
+        for response in stream:
+            # Skip special tokens
+            if response.token.special:
+                continue
+            # Break if we encounter a stop sequence
+            if response.token.text in gen_kwargs["stop_sequences"]:
+                break
+            # Update the partial text
+            partial_text += response.token.text
+            # Yield the text so far so we can stream on the frontend
+            yield partial_text
+    except Exception as e:
         yield f"Error: {e}"
 # -------------------------
 #       User Input
         st.chat_message("user").write(prompt)
         # 2) Build combined chat history for the model prompt
+        #    This format is just an example; adjust as needed for your model
         chat_history = "".join(
             [f"<|{msg['role']}|>{msg['content']}<|end|>" for msg in st.session_state["messages"]]
         )
         # 3) Create a placeholder for the assistant’s streamed response
         with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
+            assistant_message_placeholder = st.chat_message("assistant", avatar=DUBS_PATH).empty()
             full_response = ""
+            # 4) Stream chunks from the Hugging Face InferenceClient
+            for chunk in stream_response(chat_history, dubs_key):
+                full_response = chunk  # each chunk is the incremental text so far
                 # Continuously update the placeholder with the partial response
                 assistant_message_placeholder.write(full_response)