elfsong commited on
Commit
1f1385b
·
1 Parent(s): 810081f

feat: Add support for local model endpoints and enhance bot_response function to handle local vs. remote inference clients.

Browse files
Files changed (1) hide show
  1. app.py +21 -6
app.py CHANGED
@@ -4,15 +4,22 @@ from huggingface_hub import InferenceClient
4
  MODELS = [
5
  "Qwen/Qwen2.5-72B-Instruct",
6
  "meta-llama/Llama-3.1-8B-Instruct",
 
 
7
  ]
8
 
9
- def bot_response(user_message, history, model_name, system_message, max_tokens, temperature, top_p, oauth_token: gr.OAuthToken | None):
10
  if not user_message:
11
  yield history, ""
12
  return
13
 
14
  token = oauth_token.token if oauth_token else None
15
- client = InferenceClient(token=token, model=model_name)
 
 
 
 
 
16
 
17
  # 1. Construct messages for API
18
  api_messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": user_message}]
@@ -48,6 +55,14 @@ with gr.Blocks() as demo:
48
  with gr.Sidebar():
49
  gr.Markdown("## Configuration")
50
  gr.LoginButton()
 
 
 
 
 
 
 
 
51
  system_msg = gr.Textbox(value="You are a helpful assistant.", label="System Prompt")
52
  max_t = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
53
  temp = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
@@ -62,7 +77,7 @@ with gr.Blocks() as demo:
62
  # Explicitly set type="messages"
63
  chatbot_a = gr.Chatbot(label="Model A Output", type="messages")
64
  msg_a = gr.Textbox(placeholder="Send message to Model A...", label="Model A Input")
65
- btn_a = gr.Button("Send to A")
66
 
67
  # --- Model B ---
68
  with gr.Column():
@@ -70,15 +85,15 @@ with gr.Blocks() as demo:
70
  # Explicitly set type="messages"
71
  chatbot_b = gr.Chatbot(label="Model B Output", type="messages")
72
  msg_b = gr.Textbox(placeholder="Send message to Model B...", label="Model B Input")
73
- btn_b = gr.Button("Send to B")
74
 
75
  # --- Bind Events ---
76
 
77
- a_inputs = [msg_a, chatbot_a, model_a_name, system_msg, max_t, temp, top_p_val]
78
  msg_a.submit(bot_response, a_inputs, [chatbot_a, msg_a])
79
  btn_a.click(bot_response, a_inputs, [chatbot_a, msg_a])
80
 
81
- b_inputs = [msg_b, chatbot_b, model_b_name, system_msg, max_t, temp, top_p_val]
82
  msg_b.submit(bot_response, b_inputs, [chatbot_b, msg_b])
83
  btn_b.click(bot_response, b_inputs, [chatbot_b, msg_b])
84
 
 
4
  MODELS = [
5
  "Qwen/Qwen2.5-72B-Instruct",
6
  "meta-llama/Llama-3.1-8B-Instruct",
7
+ "Local-Model-1",
8
+ "Local-Model-2",
9
  ]
10
 
11
+ def bot_response(user_message, history, model_name, system_message, max_tokens, temperature, top_p, oauth_token: gr.OAuthToken | None, local_endpoint: str):
12
  if not user_message:
13
  yield history, ""
14
  return
15
 
16
  token = oauth_token.token if oauth_token else None
17
+
18
+ # --- Logic switch: local vLLM vs Hugging Face Hub ---
19
+ if model_name.startswith("Local-"):
20
+ client = InferenceClient(base_url=local_endpoint, token="vllm-token")
21
+ else:
22
+ client = InferenceClient(token=token, model=model_name)
23
 
24
  # 1. Construct messages for API
25
  api_messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": user_message}]
 
55
  with gr.Sidebar():
56
  gr.Markdown("## Configuration")
57
  gr.LoginButton()
58
+
59
+ # Local vLLM endpoint setting
60
+ local_endpoint = gr.Textbox(
61
+ value="http://localhost:8000/v1",
62
+ label="Local vLLM Endpoint",
63
+ placeholder="http://127.0.0.1:8000/v1"
64
+ )
65
+
66
  system_msg = gr.Textbox(value="You are a helpful assistant.", label="System Prompt")
67
  max_t = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
68
  temp = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
 
77
  # Explicitly set type="messages"
78
  chatbot_a = gr.Chatbot(label="Model A Output", type="messages")
79
  msg_a = gr.Textbox(placeholder="Send message to Model A...", label="Model A Input")
80
+ btn_a = gr.Button("Send to Model A")
81
 
82
  # --- Model B ---
83
  with gr.Column():
 
85
  # Explicitly set type="messages"
86
  chatbot_b = gr.Chatbot(label="Model B Output", type="messages")
87
  msg_b = gr.Textbox(placeholder="Send message to Model B...", label="Model B Input")
88
+ btn_b = gr.Button("Send to Model B")
89
 
90
  # --- Bind Events ---
91
 
92
+ a_inputs = [msg_a, chatbot_a, model_a_name, system_msg, max_t, temp, top_p_val, local_endpoint]
93
  msg_a.submit(bot_response, a_inputs, [chatbot_a, msg_a])
94
  btn_a.click(bot_response, a_inputs, [chatbot_a, msg_a])
95
 
96
+ b_inputs = [msg_b, chatbot_b, model_b_name, system_msg, max_t, temp, top_p_val, local_endpoint]
97
  msg_b.submit(bot_response, b_inputs, [chatbot_b, msg_b])
98
  btn_b.click(bot_response, b_inputs, [chatbot_b, msg_b])
99