Frusto commited on
Commit
94f65f8
·
verified ·
1 Parent(s): 4e20c59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -40
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
-
5
  def respond(
6
  message,
7
  history: list[dict[str, str]],
@@ -9,61 +8,67 @@ def respond(
9
  max_tokens,
10
  temperature,
11
  top_p,
12
- hf_token: gr.OAuthToken,
13
  ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="Frusto/llama-3.2-1b-frusto360-final")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
 
21
- messages.extend(history)
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
24
 
25
  response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  chatbot = gr.ChatInterface(
47
  respond,
 
48
  additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
  ],
60
  )
61
 
62
- with gr.Blocks() as demo:
63
  with gr.Sidebar():
 
64
  gr.LoginButton()
 
 
65
  chatbot.render()
66
 
67
-
68
  if __name__ == "__main__":
69
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
4
  def respond(
5
  message,
6
  history: list[dict[str, str]],
 
8
  max_tokens,
9
  temperature,
10
  top_p,
11
+ hf_token: gr.OAuthToken, # Gradio injects this if logged in
12
  ):
13
+ # Ensure token exists (User must click Login)
14
+ if not hf_token or not hf_token.token:
15
+ yield "⚠️ Please login using the button in the sidebar to access the @frusto360 AI."
16
+ return
 
 
17
 
18
+ # Use direct InferenceClient (more stable for custom models)
19
+ client = InferenceClient(model="Frusto/llama-3.2-1b-frusto360-final", token=hf_token.token)
20
 
21
+ # 1. Manually build the Llama 3.2 Chat Template
22
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
23
+
24
+ for msg in history:
25
+ role = msg['role']
26
+ content = msg['content']
27
+ prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
28
+
29
+ # Add current user message
30
+ prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
31
 
32
  response = ""
33
+ try:
34
+ # 2. Use text_generation instead of chat_completion
35
+ for token in client.text_generation(
36
+ prompt,
37
+ max_new_tokens=max_tokens,
38
+ stream=True,
39
+ temperature=temperature,
40
+ top_p=top_p,
41
+ stop=["<|eot_id|>"] # Stop generating at the end-of-turn token
42
+ ):
43
+ response += token
44
+ yield response
45
+
46
+ except Exception as e:
47
+ error_msg = str(e)
48
+ if "503" in error_msg:
49
+ yield "⏳ Model is waking up... please wait 60 seconds and try again."
50
+ else:
51
+ yield f"❌ Error: {error_msg}"
52
 
53
+ # --- UI Setup ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  chatbot = gr.ChatInterface(
55
  respond,
56
+ type="messages", # Ensures history is a list of dictionaries
57
  additional_inputs=[
58
+ gr.Textbox(value="You are the @frusto360 AI assistant. Created by @frusto360.", label="System message"),
59
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
60
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
61
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
 
 
 
 
 
 
62
  ],
63
  )
64
 
65
+ with gr.Blocks(theme="glass") as demo:
66
  with gr.Sidebar():
67
+ gr.Markdown("### 🔐 Authentication")
68
  gr.LoginButton()
69
+ gr.Markdown("Login to use your Hugging Face account permissions.")
70
+
71
  chatbot.render()
72
 
 
73
  if __name__ == "__main__":
74
+ demo.launch()