Frusto commited on
Commit
4fc388e
·
verified ·
1 Parent(s): 94f65f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -8,19 +8,20 @@ def respond(
8
  max_tokens,
9
  temperature,
10
  top_p,
11
- hf_token: gr.OAuthToken, # Gradio injects this if logged in
12
  ):
13
- # Ensure token exists (User must click Login)
14
  if not hf_token or not hf_token.token:
15
- yield "⚠️ Please login using the button in the sidebar to access the @frusto360 AI."
16
  return
17
 
18
- # Use direct InferenceClient (more stable for custom models)
19
  client = InferenceClient(model="Frusto/llama-3.2-1b-frusto360-final", token=hf_token.token)
20
 
21
  # 1. Manually build the Llama 3.2 Chat Template
22
  prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
23
 
 
24
  for msg in history:
25
  role = msg['role']
26
  content = msg['content']
@@ -31,14 +32,14 @@ def respond(
31
 
32
  response = ""
33
  try:
34
- # 2. Use text_generation instead of chat_completion
35
  for token in client.text_generation(
36
  prompt,
37
  max_new_tokens=max_tokens,
38
  stream=True,
39
  temperature=temperature,
40
  top_p=top_p,
41
- stop=["<|eot_id|>"] # Stop generating at the end-of-turn token
42
  ):
43
  response += token
44
  yield response
@@ -46,14 +47,14 @@ def respond(
46
  except Exception as e:
47
  error_msg = str(e)
48
  if "503" in error_msg:
49
- yield "⏳ Model is waking up... please wait 60 seconds and try again."
50
  else:
51
  yield f"❌ Error: {error_msg}"
52
 
53
  # --- UI Setup ---
 
54
  chatbot = gr.ChatInterface(
55
  respond,
56
- type="messages", # Ensures history is a list of dictionaries
57
  additional_inputs=[
58
  gr.Textbox(value="You are the @frusto360 AI assistant. Created by @frusto360.", label="System message"),
59
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -64,9 +65,9 @@ chatbot = gr.ChatInterface(
64
 
65
  with gr.Blocks(theme="glass") as demo:
66
  with gr.Sidebar():
67
- gr.Markdown("### 🔐 Authentication")
68
  gr.LoginButton()
69
- gr.Markdown("Login to use your Hugging Face account permissions.")
70
 
71
  chatbot.render()
72
 
 
8
  max_tokens,
9
  temperature,
10
  top_p,
11
+ hf_token: gr.OAuthToken,
12
  ):
13
+ # Security Check: Ensure user is logged in
14
  if not hf_token or not hf_token.token:
15
+ yield "⚠️ Please **Login** using the button in the sidebar to talk to @frusto360 AI."
16
  return
17
 
18
+ # Direct model access to avoid the 400/404 Router errors
19
  client = InferenceClient(model="Frusto/llama-3.2-1b-frusto360-final", token=hf_token.token)
20
 
21
  # 1. Manually build the Llama 3.2 Chat Template
22
  prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
23
 
24
+ # In Gradio 6, history is ALWAYS a list of dicts
25
  for msg in history:
26
  role = msg['role']
27
  content = msg['content']
 
32
 
33
  response = ""
34
  try:
35
+ # 2. Use text_generation for maximum stability
36
  for token in client.text_generation(
37
  prompt,
38
  max_new_tokens=max_tokens,
39
  stream=True,
40
  temperature=temperature,
41
  top_p=top_p,
42
+ stop=["<|eot_id|>"]
43
  ):
44
  response += token
45
  yield response
 
47
  except Exception as e:
48
  error_msg = str(e)
49
  if "503" in error_msg:
50
+ yield "⏳ Model is warming up on Hugging Face servers... please wait 60 seconds."
51
  else:
52
  yield f"❌ Error: {error_msg}"
53
 
54
  # --- UI Setup ---
55
+ # REMOVED type="messages" to fix the TypeError
56
  chatbot = gr.ChatInterface(
57
  respond,
 
58
  additional_inputs=[
59
  gr.Textbox(value="You are the @frusto360 AI assistant. Created by @frusto360.", label="System message"),
60
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
65
 
66
  with gr.Blocks(theme="glass") as demo:
67
  with gr.Sidebar():
68
+ gr.Markdown("### 🔐 @frusto360 Auth")
69
  gr.LoginButton()
70
+ gr.Markdown("Click login to authenticate with your HF account.")
71
 
72
  chatbot.render()
73