elfsong commited on
Commit
45e2ada
·
1 Parent(s): 1f1385b

refactor: Improve message handling in bot_response function by streamlining history updates and enhancing user interaction during streaming.

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -9,27 +9,24 @@ MODELS = [
9
  ]
10
 
11
  def bot_response(user_message, history, model_name, system_message, max_tokens, temperature, top_p, oauth_token: gr.OAuthToken | None, local_endpoint: str):
12
- if not user_message:
13
  yield history, ""
14
  return
15
 
16
  token = oauth_token.token if oauth_token else None
17
 
18
- # --- Logic switch: local vLLM vs Hugging Face Hub ---
19
  if model_name.startswith("Local-"):
20
  client = InferenceClient(base_url=local_endpoint, token="vllm-token")
21
  else:
22
  client = InferenceClient(token=token, model=model_name)
23
 
24
- # 1. Construct messages for API
25
- api_messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": user_message}]
 
 
26
 
27
- # 2. Update UI history (Gradio 5/6 format)
28
- new_history = history + [
29
- {"role": "user", "content": user_message},
30
- {"role": "assistant", "content": ""}
31
- ]
32
- yield new_history, ""
33
 
34
  try:
35
  response_text = ""
@@ -44,12 +41,17 @@ def bot_response(user_message, history, model_name, system_message, max_tokens,
44
  for chunk in stream:
45
  token_content = chunk.choices[0].delta.content or ""
46
  response_text += token_content
47
- new_history[-1]["content"] = response_text
48
- yield new_history, ""
 
 
 
 
 
49
 
50
  except Exception as e:
51
- new_history[-1]["content"] = f"**Error:** {str(e)}"
52
- yield new_history, ""
53
 
54
  with gr.Blocks() as demo:
55
  with gr.Sidebar():
 
9
  ]
10
 
11
  def bot_response(user_message, history, model_name, system_message, max_tokens, temperature, top_p, oauth_token: gr.OAuthToken | None, local_endpoint: str):
12
+ if not user_message or user_message.strip() == "":
13
  yield history, ""
14
  return
15
 
16
  token = oauth_token.token if oauth_token else None
17
 
 
18
  if model_name.startswith("Local-"):
19
  client = InferenceClient(base_url=local_endpoint, token="vllm-token")
20
  else:
21
  client = InferenceClient(token=token, model=model_name)
22
 
23
+ # 1. Append the new user message to history
24
+ history.append({"role": "user", "content": user_message})
25
+ # 2. Append an empty assistant message to be filled by the stream
26
+ history.append({"role": "assistant", "content": ""})
27
 
28
+ # Prep the API payload (history now includes the new user message)
29
+ api_messages = [{"role": "system", "content": system_message}] + history[:-1]
 
 
 
 
30
 
31
  try:
32
  response_text = ""
 
41
  for chunk in stream:
42
  token_content = chunk.choices[0].delta.content or ""
43
  response_text += token_content
44
+ # Update the last message in history (the assistant's content)
45
+ history[-1]["content"] = response_text
46
+ # Yield history and keep the input box text as is during streaming
47
+ yield history, gr.update(interactive=False)
48
+
49
+ # Final yield to clear the input box and make it interactive again
50
+ yield history, ""
51
 
52
  except Exception as e:
53
+ history[-1]["content"] = f"**Error:** {str(e)}"
54
+ yield history, ""
55
 
56
  with gr.Blocks() as demo:
57
  with gr.Sidebar():