abanm commited on
Commit
f3417ef
·
verified ·
1 Parent(s): 86c89d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -47
app.py CHANGED
@@ -3,6 +3,7 @@ import requests
3
  import json
4
  import os
5
  import datetime
 
6
 
7
  # Constants
8
  SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
@@ -11,7 +12,7 @@ EOS_TOKEN = "<|end|>"
11
  CHAT_HISTORY_DIR = "chat_histories"
12
  IMAGE_PATH = "DubsChat.png"
13
  IMAGE_PATH_2 = "Reboot AI.png"
14
- Dubs_PATH = "Dubs.png"
15
 
16
  # Ensure the directory exists
17
  try:
@@ -109,56 +110,54 @@ for message in st.session_state["messages"]:
109
  if message["role"] == "user":
110
  st.chat_message("user").write(message["content"])
111
  elif message["role"] == "assistant":
112
- st.chat_message("assistant", avatar=Dubs_PATH).write(message["content"])
113
 
114
  # -------------------------
115
- # Streaming Logic
116
  # -------------------------
117
  def stream_response(prompt_text, api_key):
118
  """
119
- Stream text from the HF Inference Endpoint (or any streaming API).
120
- Yields each chunk of text as it arrives.
121
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  try:
123
- # Match the structure of your working payload:
124
- payload = {
125
- "inputs": prompt_text,
126
- "parameters": {
127
- "max_new_tokens": 250,
128
- "return_full_text": False,
129
- "stream": True
130
- }
131
- }
132
- headers = {
133
- "Accept" : "application/json",
134
- "Authorization": f"Bearer {api_key}",
135
- "Content-Type": "application/json"
136
- }
137
-
138
- # POST request with stream=True to get partial chunks
139
- response = requests.post(
140
- SPACE_URL,
141
- json=payload,
142
- headers=headers,
143
- stream=True
144
- )
145
- response.raise_for_status()
146
-
147
- # The endpoint presumably returns lines of JSON. Adjust parsing if needed:
148
- for line in response.iter_lines():
149
- if line:
150
- data = json.loads(line.decode("utf-8"))
151
- # Example: data might be [{"generated_text": "..."}]
152
- # Adjust if your endpoint returns different JSON keys
153
- chunk = data[0].get("generated_text", "")
154
- yield chunk
155
-
156
- except requests.exceptions.Timeout:
157
- yield "The request timed out. Please try again later."
158
- except requests.exceptions.RequestException as e:
159
  yield f"Error: {e}"
160
- except json.JSONDecodeError:
161
- yield "Error decoding server response."
162
 
163
  # -------------------------
164
  # User Input
@@ -172,18 +171,19 @@ if prompt := st.chat_input():
172
  st.chat_message("user").write(prompt)
173
 
174
  # 2) Build combined chat history for the model prompt
 
175
  chat_history = "".join(
176
  [f"<|{msg['role']}|>{msg['content']}<|end|>" for msg in st.session_state["messages"]]
177
  )
178
 
179
  # 3) Create a placeholder for the assistant’s streamed response
180
  with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
181
- assistant_message_placeholder = st.chat_message("assistant", avatar=Dubs_PATH).empty()
182
 
183
  full_response = ""
184
- # 4) Stream chunks from the API
185
- for chunk in stream_response(chat_history, HF_API_KEY):
186
- full_response += chunk
187
  # Continuously update the placeholder with the partial response
188
  assistant_message_placeholder.write(full_response)
189
 
 
3
  import json
4
  import os
5
  import datetime
6
+ from huggingface_hub import InferenceClient # Make sure to install huggingface_hub first
7
 
8
  # Constants
9
  SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
 
12
  CHAT_HISTORY_DIR = "chat_histories"
13
  IMAGE_PATH = "DubsChat.png"
14
  IMAGE_PATH_2 = "Reboot AI.png"
15
+ DUBS_PATH = "Dubs.png"
16
 
17
  # Ensure the directory exists
18
  try:
 
110
  if message["role"] == "user":
111
  st.chat_message("user").write(message["content"])
112
  elif message["role"] == "assistant":
113
+ st.chat_message("assistant", avatar=DUBS_PATH).write(message["content"])
114
 
115
  # -------------------------
116
+ # Streaming Logic using InferenceClient
117
  # -------------------------
118
  def stream_response(prompt_text, api_key):
119
  """
120
+ Stream text from the HF Inference Endpoint using the InferenceClient.
121
+ Yields each partial chunk of text as it arrives.
122
  """
123
+ # Initialize the client with your endpoint_url and API key
124
+ client = InferenceClient(
125
+ endpoint_url=SPACE_URL,
126
+ token=api_key
127
+ )
128
+
129
+ # Define generation parameters
130
+ gen_kwargs = dict(
131
+ max_new_tokens=512,
132
+ top_k=30,
133
+ top_p=0.9,
134
+ temperature=0.2,
135
+ repetition_penalty=1.02,
136
+ stop_sequences=["<|endoftext|>"]
137
+ )
138
+
139
+ # Start streaming from the model
140
+ stream = client.text_generation(prompt_text, stream=True, details=True, **gen_kwargs)
141
+
142
+ # We'll build the response incrementally
143
+ partial_text = ""
144
+
145
  try:
146
+ for response in stream:
147
+ # Skip special tokens
148
+ if response.token.special:
149
+ continue
150
+ # Break if we encounter a stop sequence
151
+ if response.token.text in gen_kwargs["stop_sequences"]:
152
+ break
153
+
154
+ # Update the partial text
155
+ partial_text += response.token.text
156
+
157
+ # Yield the text so far so we can stream on the frontend
158
+ yield partial_text
159
+ except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  yield f"Error: {e}"
 
 
161
 
162
  # -------------------------
163
  # User Input
 
171
  st.chat_message("user").write(prompt)
172
 
173
  # 2) Build combined chat history for the model prompt
174
+ # This format is just an example; adjust as needed for your model
175
  chat_history = "".join(
176
  [f"<|{msg['role']}|>{msg['content']}<|end|>" for msg in st.session_state["messages"]]
177
  )
178
 
179
  # 3) Create a placeholder for the assistant’s streamed response
180
  with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
181
+ assistant_message_placeholder = st.chat_message("assistant", avatar=DUBS_PATH).empty()
182
 
183
  full_response = ""
184
+ # 4) Stream chunks from the Hugging Face InferenceClient
185
+ for chunk in stream_response(chat_history, dubs_key):
186
+ full_response = chunk # each chunk is the incremental text so far
187
  # Continuously update the placeholder with the partial response
188
  assistant_message_placeholder.write(full_response)
189