Spaces:

abanm
/

DubsChat

Build error

App Files Files Community

abanm commited on Jan 19, 2025

Commit

79e4e8d

verified ·

1 Parent(s): 7cf1012

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -35

app.py CHANGED Viewed

@@ -113,49 +113,33 @@ for message in st.session_state["messages"]:
         st.chat_message("assistant", avatar=DUBS_PATH).write(message["content"])
 # -------------------------
-#    Streaming Logic using InferenceClient
 # -------------------------
 def stream_response(prompt_text, api_key):
     """
     Stream text from the HF Inference Endpoint using the InferenceClient.
     Yields each partial chunk of text as it arrives.
     """
-    # Initialize the client with your endpoint_url and API key
-    client = InferenceClient(
-        SPACE_URL,
-        token=api_key
-    )
-    # Define generation parameters
-    gen_kwargs = dict(
-        max_new_tokens=512,
-        top_k=30,
-        top_p=0.9,
-        temperature=0.2,
-        repetition_penalty=1.02,
-        stop_sequences=["<|endoftext|>"]
-    )
-    # Start streaming from the model
     stream = client.text_generation(prompt_text, stream=True, details=True, **gen_kwargs)
-    # We'll build the response incrementally
     partial_text = ""
     try:
         for response in stream:
-            # Skip special tokens
             if response.token.special:
                 continue
-            # Break if we encounter a stop sequence
-            if response.token.text in gen_kwargs["stop_sequences"]:
-                break
-            # Update the partial text
-            partial_text = response.token.text
-            # Yield the text so far so we can stream on the frontend
-            yield partial_text
     except Exception as e:
         yield f"Error: {e}"
@@ -171,7 +155,6 @@ if prompt := st.chat_input():
         st.chat_message("user").write(prompt)
         # 2) Build combined chat history for the model prompt
-        #    This format is just an example; adjust as needed for your model
         chat_history = "".join(
             [f"<|{msg['role']}|>{msg['content']}<|end|>" for msg in st.session_state["messages"]]
         )
@@ -181,11 +164,10 @@ if prompt := st.chat_input():
             assistant_message_placeholder = st.chat_message("assistant", avatar=DUBS_PATH).empty()
             full_response = ""
-            # 4) Stream chunks from the Hugging Face InferenceClient
             for chunk in stream_response(chat_history, HF_API_KEY):
-                full_response += chunk + " "  # each chunk is the incremental text so far
-                msg = st.write_stream(full_response)
-                assistant_message_placeholder.markdown(full_response + "▌")
             assistant_message_placeholder.markdown(full_response)
         # 5) Save the final assistant message in session state

         st.chat_message("assistant", avatar=DUBS_PATH).write(message["content"])
 # -------------------------
+#    Streaming Logic using Generator
 # -------------------------
 def stream_response(prompt_text, api_key):
     """
     Stream text from the HF Inference Endpoint using the InferenceClient.
     Yields each partial chunk of text as it arrives.
     """
+    client = InferenceClient(SPACE_URL, token=api_key)
+    gen_kwargs = {
+        "max_new_tokens": 512,
+        "top_k": 30,
+        "top_p": 0.9,
+        "temperature": 0.2,
+        "repetition_penalty": 1.02,
+        "stop_sequences": ["<|endoftext|>"]
+    }
     stream = client.text_generation(prompt_text, stream=True, details=True, **gen_kwargs)
     partial_text = ""
     try:
         for response in stream:
             if response.token.special:
                 continue
+            partial_text += response.token.text
+            yield response.token.text
     except Exception as e:
         yield f"Error: {e}"
         st.chat_message("user").write(prompt)
         # 2) Build combined chat history for the model prompt
         chat_history = "".join(
             [f"<|{msg['role']}|>{msg['content']}<|end|>" for msg in st.session_state["messages"]]
         )
             assistant_message_placeholder = st.chat_message("assistant", avatar=DUBS_PATH).empty()
             full_response = ""
+            # 4) Stream chunks from the generator
             for chunk in stream_response(chat_history, HF_API_KEY):
+                full_response += chunk  # Accumulate the full response
+                assistant_message_placeholder.markdown(full_response + "▌")  # Show streamed response
             assistant_message_placeholder.markdown(full_response)
         # 5) Save the final assistant message in session state