3v324v23 commited on
Commit
ff382d3
·
1 Parent(s): 51737ba
Files changed (1) hide show
  1. app.py +33 -42
app.py CHANGED
@@ -1,61 +1,52 @@
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
  ENDPOINT_URL = "https://x6leavj4hgm2fdyx.us-east-2.aws.endpoints.huggingface.cloud"
5
 
6
- def respond(
7
- message,
8
- history: list[dict[str, str]],
9
- system_message,
10
- max_tokens,
11
- temperature,
12
- top_p,
13
- hf_token: gr.OAuthToken,
14
- ):
15
- """
16
- Uses your Hugging Face Inference Endpoint for chat completion.
17
- """
18
- # Use the endpoint URL here. `token` must be the raw string.
19
- client = InferenceClient(model=ENDPOINT_URL, token=hf_token.token)
20
-
21
- # Build messages (system + prior turns + current user)
22
- messages = [{"role": "system", "content": system_message}]
23
- messages.extend(history)
24
- messages.append({"role": "user", "content": message})
25
-
26
- # Stream tokens from the endpoint
27
- response = ""
28
- for chunk in client.chat_completion(
29
- messages=messages,
30
- max_tokens=max_tokens,
31
- stream=True,
32
- temperature=temperature,
33
- top_p=top_p,
34
- ):
35
- token = ""
36
- if chunk.choices and chunk.choices[0].delta.content:
37
- token = chunk.choices[0].delta.content
38
- response += token
39
- yield response
40
-
41
-
42
- # ---- Gradio UI ----
43
  chatbot = gr.ChatInterface(
44
  respond,
45
  type="messages",
46
  additional_inputs=[
47
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
48
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
49
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
50
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
51
  ],
52
  )
53
 
54
  with gr.Blocks() as demo:
55
- with gr.Sidebar():
56
- gr.LoginButton() # lets you pass the HF token to the app
57
  chatbot.render()
58
 
59
  if __name__ == "__main__":
 
60
  demo.launch()
61
 
 
1
+ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
  ENDPOINT_URL = "https://x6leavj4hgm2fdyx.us-east-2.aws.endpoints.huggingface.cloud"
6
 
7
+ def respond(message, history, system_message, max_tokens, temperature, top_p, hf_pat):
8
+ token = hf_pat or os.getenv("HUGGINGFACEHUB_API_TOKEN")
9
+ if not token:
10
+ yield "Missing token. Paste a Hugging Face PAT with endpoint write access."
11
+ return
12
+
13
+ client = InferenceClient(model=ENDPOINT_URL, token=token)
14
+
15
+ messages = [{"role": "system", "content": system_message}, *history,
16
+ {"role": "user", "content": message}]
17
+
18
+ try:
19
+ out = ""
20
+ for chunk in client.chat_completion(
21
+ messages=messages,
22
+ max_tokens=max_tokens,
23
+ stream=True,
24
+ temperature=temperature,
25
+ top_p=top_p,
26
+ ):
27
+ delta = chunk.choices[0].delta.content if chunk.choices and chunk.choices[0].delta else ""
28
+ if delta:
29
+ out += delta
30
+ yield out
31
+ except Exception as e:
32
+ yield f"Request failed: {e}\n\nCheck that your token has `inference.endpoints.infer.write` and access to this endpoint."
33
+
 
 
 
 
 
 
 
 
 
 
34
  chatbot = gr.ChatInterface(
35
  respond,
36
  type="messages",
37
  additional_inputs=[
38
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
+ gr.Slider(1, 2048, value=512, step=1, label="Max new tokens"),
40
+ gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
41
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
42
+ gr.Textbox(lines=1, type="password", placeholder="hf_... token", label="HF PAT (with endpoint write)"),
43
  ],
44
  )
45
 
46
  with gr.Blocks() as demo:
 
 
47
  chatbot.render()
48
 
49
  if __name__ == "__main__":
50
+ # Or export HUGGINGFACEHUB_API_TOKEN in your shell and leave the textbox empty.
51
  demo.launch()
52