Update app.py
Browse files
app.py
CHANGED
|
@@ -8,19 +8,20 @@ def respond(
|
|
| 8 |
max_tokens,
|
| 9 |
temperature,
|
| 10 |
top_p,
|
| 11 |
-
hf_token: gr.OAuthToken,
|
| 12 |
):
|
| 13 |
-
#
|
| 14 |
if not hf_token or not hf_token.token:
|
| 15 |
-
yield "⚠️ Please
|
| 16 |
return
|
| 17 |
|
| 18 |
-
#
|
| 19 |
client = InferenceClient(model="Frusto/llama-3.2-1b-frusto360-final", token=hf_token.token)
|
| 20 |
|
| 21 |
# 1. Manually build the Llama 3.2 Chat Template
|
| 22 |
prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
|
| 23 |
|
|
|
|
| 24 |
for msg in history:
|
| 25 |
role = msg['role']
|
| 26 |
content = msg['content']
|
|
@@ -31,14 +32,14 @@ def respond(
|
|
| 31 |
|
| 32 |
response = ""
|
| 33 |
try:
|
| 34 |
-
# 2. Use text_generation
|
| 35 |
for token in client.text_generation(
|
| 36 |
prompt,
|
| 37 |
max_new_tokens=max_tokens,
|
| 38 |
stream=True,
|
| 39 |
temperature=temperature,
|
| 40 |
top_p=top_p,
|
| 41 |
-
stop=["<|eot_id|>"]
|
| 42 |
):
|
| 43 |
response += token
|
| 44 |
yield response
|
|
@@ -46,14 +47,14 @@ def respond(
|
|
| 46 |
except Exception as e:
|
| 47 |
error_msg = str(e)
|
| 48 |
if "503" in error_msg:
|
| 49 |
-
yield "⏳ Model is
|
| 50 |
else:
|
| 51 |
yield f"❌ Error: {error_msg}"
|
| 52 |
|
| 53 |
# --- UI Setup ---
|
|
|
|
| 54 |
chatbot = gr.ChatInterface(
|
| 55 |
respond,
|
| 56 |
-
type="messages", # Ensures history is a list of dictionaries
|
| 57 |
additional_inputs=[
|
| 58 |
gr.Textbox(value="You are the @frusto360 AI assistant. Created by @frusto360.", label="System message"),
|
| 59 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
|
@@ -64,9 +65,9 @@ chatbot = gr.ChatInterface(
|
|
| 64 |
|
| 65 |
with gr.Blocks(theme="glass") as demo:
|
| 66 |
with gr.Sidebar():
|
| 67 |
-
gr.Markdown("### 🔐
|
| 68 |
gr.LoginButton()
|
| 69 |
-
gr.Markdown("
|
| 70 |
|
| 71 |
chatbot.render()
|
| 72 |
|
|
|
|
| 8 |
max_tokens,
|
| 9 |
temperature,
|
| 10 |
top_p,
|
| 11 |
+
hf_token: gr.OAuthToken,
|
| 12 |
):
|
| 13 |
+
# Security Check: Ensure user is logged in
|
| 14 |
if not hf_token or not hf_token.token:
|
| 15 |
+
yield "⚠️ Please **Login** using the button in the sidebar to talk to @frusto360 AI."
|
| 16 |
return
|
| 17 |
|
| 18 |
+
# Direct model access to avoid the 400/404 Router errors
|
| 19 |
client = InferenceClient(model="Frusto/llama-3.2-1b-frusto360-final", token=hf_token.token)
|
| 20 |
|
| 21 |
# 1. Manually build the Llama 3.2 Chat Template
|
| 22 |
prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
|
| 23 |
|
| 24 |
+
# In Gradio 6, history is ALWAYS a list of dicts
|
| 25 |
for msg in history:
|
| 26 |
role = msg['role']
|
| 27 |
content = msg['content']
|
|
|
|
| 32 |
|
| 33 |
response = ""
|
| 34 |
try:
|
| 35 |
+
# 2. Use text_generation for maximum stability
|
| 36 |
for token in client.text_generation(
|
| 37 |
prompt,
|
| 38 |
max_new_tokens=max_tokens,
|
| 39 |
stream=True,
|
| 40 |
temperature=temperature,
|
| 41 |
top_p=top_p,
|
| 42 |
+
stop=["<|eot_id|>"]
|
| 43 |
):
|
| 44 |
response += token
|
| 45 |
yield response
|
|
|
|
| 47 |
except Exception as e:
|
| 48 |
error_msg = str(e)
|
| 49 |
if "503" in error_msg:
|
| 50 |
+
yield "⏳ Model is warming up on Hugging Face servers... please wait 60 seconds."
|
| 51 |
else:
|
| 52 |
yield f"❌ Error: {error_msg}"
|
| 53 |
|
| 54 |
# --- UI Setup ---
|
| 55 |
+
# REMOVED type="messages" to fix the TypeError
|
| 56 |
chatbot = gr.ChatInterface(
|
| 57 |
respond,
|
|
|
|
| 58 |
additional_inputs=[
|
| 59 |
gr.Textbox(value="You are the @frusto360 AI assistant. Created by @frusto360.", label="System message"),
|
| 60 |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
|
|
|
| 65 |
|
| 66 |
with gr.Blocks(theme="glass") as demo:
|
| 67 |
with gr.Sidebar():
|
| 68 |
+
gr.Markdown("### 🔐 @frusto360 Auth")
|
| 69 |
gr.LoginButton()
|
| 70 |
+
gr.Markdown("Click login to authenticate with your HF account.")
|
| 71 |
|
| 72 |
chatbot.render()
|
| 73 |
|