13ilguun commited on
Commit
20cd3e6
·
1 Parent(s): bfcbb50
Files changed (1) hide show
  1. app.py +36 -50
app.py CHANGED
@@ -2,31 +2,31 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
 
5
- # Load token and create client
6
- token = os.getenv("huggingface_token")
 
7
  client = InferenceClient(
8
- "HuggingFaceH4/zephyr-7b-beta",
9
- token=token,
10
- provider="auto" # <- fixes the issue
11
  )
12
 
13
- # Load system messages from environment
14
- SYSTEM_MESSAGES = {
15
- "LaunchPad": os.getenv("DEFAULT_SYSTEM_MESSAGE_1", "Default LaunchPad message."),
16
- "MentorBot": os.getenv("DEFAULT_SYSTEM_MESSAGE_2", "Default MentorBot message."),
17
- "CoachAI": os.getenv("DEFAULT_SYSTEM_MESSAGE_3", "Default CoachAI message."),
18
- }
19
-
20
- # Chat function used by Gradio UI
21
- def respond(message, history, persona, max_tokens, temperature, top_p):
22
- system_message = SYSTEM_MESSAGES.get(persona, "Default fallback message.")
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
- for user, assistant in history:
26
- if user:
27
- messages.append({"role": "user", "content": user})
28
- if assistant:
29
- messages.append({"role": "assistant", "content": assistant})
 
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
@@ -37,44 +37,30 @@ def respond(message, history, persona, max_tokens, temperature, top_p):
37
  temperature=temperature,
38
  top_p=top_p,
39
  ):
40
- token_piece = message.choices[0].delta.content
41
- response += token_piece
 
42
  yield response
43
 
44
- # API-callable version
45
- def chat_api(message, persona, max_tokens, temperature, top_p):
46
- system_message = SYSTEM_MESSAGES.get(persona, "Default fallback message.")
47
- messages = [{"role": "system", "content": system_message}]
48
- messages.append({"role": "user", "content": message})
49
 
50
- output = ""
51
- for chunk in client.chat_completion(
52
- messages,
53
- max_tokens=max_tokens,
54
- stream=True,
55
- temperature=temperature,
56
- top_p=top_p,
57
- ):
58
- output += chunk.choices[0].delta.content
59
- return output
60
-
61
- # Gradio UI
62
  demo = gr.ChatInterface(
63
  respond,
64
  additional_inputs=[
65
- gr.Dropdown(
66
- choices=list(SYSTEM_MESSAGES.keys()),
67
- value="LaunchPad",
68
- label="Select Persona"
69
- ),
70
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
71
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
72
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
73
  ],
74
  )
75
 
76
- # Serve UI and API
77
- demo.launch(share=True, show_error=True, show_api=True)
78
-
79
- # To add an API endpoint, you must use FastAPI + mount if needed.
80
- # Ask if you'd like that added again.
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
 
5
+ """
6
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
+ """
8
  client = InferenceClient(
9
+ model="HuggingFaceH4/zephyr-7b-beta",
10
+ token=os.getenv("huggingface_token"),
11
+ provider="hf-inference"
12
  )
13
 
14
+ def respond(
15
+ message,
16
+ history: list[tuple[str, str]],
17
+ system_message,
18
+ max_tokens,
19
+ temperature,
20
+ top_p,
21
+ ):
 
 
22
  messages = [{"role": "system", "content": system_message}]
23
 
24
+ for val in history:
25
+ if val[0]:
26
+ messages.append({"role": "user", "content": val[0]})
27
+ if val[1]:
28
+ messages.append({"role": "assistant", "content": val[1]})
29
+
30
  messages.append({"role": "user", "content": message})
31
 
32
  response = ""
 
37
  temperature=temperature,
38
  top_p=top_p,
39
  ):
40
+ token = message.choices[0].delta.content
41
+
42
+ response += token
43
  yield response
44
 
 
 
 
 
 
45
 
46
+ """
47
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
48
+ """
 
 
 
 
 
 
 
 
 
49
  demo = gr.ChatInterface(
50
  respond,
51
  additional_inputs=[
52
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
 
 
 
 
53
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
+ gr.Slider(
56
+ minimum=0.1,
57
+ maximum=1.0,
58
+ value=0.95,
59
+ step=0.05,
60
+ label="Top-p (nucleus sampling)",
61
+ ),
62
  ],
63
  )
64
 
65
+ if __name__ == "__main__":
66
+ demo.launch(share=True)