File size: 2,786 Bytes
26545a4 576ace8 fbe4031 576ace8 fbe4031 576ace8 26545a4 576ace8 26545a4 4fc388e 26545a4 94f65f8 fbe4031 94f65f8 26545a4 576ace8 fbe4031 bd78a22 fbe4031 26545a4 fbe4031 576ace8 bd78a22 26545a4 576ace8 bd78a22 94f65f8 fbe4031 bd78a22 576ace8 94f65f8 fbe4031 26545a4 fbe4031 bd78a22 26545a4 fbe4031 576ace8 94f65f8 26545a4 bd78a22 26545a4 fbe4031 26545a4 bd78a22 26545a4 bd78a22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import gradio as gr
from huggingface_hub import InferenceClient
import traceback
def get_text(content):
if isinstance(content, str): return content
if isinstance(content, list):
return "".join([block.get("text", "") for block in content if block.get("type") == "text"])
if isinstance(content, dict): return content.get("text", str(content))
return str(content)
def respond(
message,
history: list[dict],
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken,
):
if not hf_token or not hf_token.token:
yield "⚠️ Please **Login** in the sidebar to access @frusto360 AI."
return
try:
# ✅ NEW 2026 ROUTER URL
# We use the 'hf-inference' provider prefix on the new router domain
MODEL_ID = "Frusto/llama-3.2-1b-frusto360-final"
API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}"
client = InferenceClient(base_url=API_URL, token=hf_token.token)
# Build Prompt
prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
for msg in history:
role = msg.get("role", "user")
content = get_text(msg.get("content", ""))
prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{get_text(message)}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
response = ""
for token in client.text_generation(
prompt,
max_new_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
stop=["<|eot_id|>"]
):
token_text = token if isinstance(token, str) else getattr(token, 'token', getattr(token, 'text', str(token)))
response += token_text
yield response
except Exception as e:
yield f"❌ **Router Error:** {str(e)}\n\n*Note: Ensure 'Inference API' is enabled in your model settings.*"
# UI Setup (Gradio 6.5)
chatbot_interface = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are the @frusto360 AI.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
],
)
with gr.Blocks(fill_height=True) as demo:
with gr.Sidebar():
gr.Markdown("## 🔐 @frusto360 Auth")
gr.LoginButton()
chatbot_interface.render()
if __name__ == "__main__":
demo.launch(theme="glass") |