File size: 2,786 Bytes
26545a4
 
576ace8
 
 
fbe4031
576ace8
 
fbe4031
576ace8
26545a4
 
 
576ace8
26545a4
 
 
 
4fc388e
26545a4
94f65f8
fbe4031
94f65f8
26545a4
576ace8
fbe4031
 
bd78a22
fbe4031
 
 
26545a4
fbe4031
576ace8
 
 
 
 
 
bd78a22
26545a4
576ace8
bd78a22
94f65f8
 
 
 
 
fbe4031
bd78a22
576ace8
 
94f65f8
 
 
fbe4031
26545a4
fbe4031
bd78a22
26545a4
 
fbe4031
576ace8
 
94f65f8
26545a4
 
 
bd78a22
26545a4
fbe4031
26545a4
bd78a22
26545a4
 
bd78a22
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
from huggingface_hub import InferenceClient
import traceback

def get_text(content):
    if isinstance(content, str): return content
    if isinstance(content, list):
        return "".join([block.get("text", "") for block in content if block.get("type") == "text"])
    if isinstance(content, dict): return content.get("text", str(content))
    return str(content)

def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token: gr.OAuthToken, 
):
    if not hf_token or not hf_token.token:
        yield "⚠️ Please **Login** in the sidebar to access @frusto360 AI."
        return

    try:
        # ✅ NEW 2026 ROUTER URL
        # We use the 'hf-inference' provider prefix on the new router domain
        MODEL_ID = "Frusto/llama-3.2-1b-frusto360-final"
        API_URL = f"https://router.huggingface.co/hf-inference/models/{MODEL_ID}"
        
        client = InferenceClient(base_url=API_URL, token=hf_token.token)

        # Build Prompt
        prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>"
        for msg in history:
            role = msg.get("role", "user")
            content = get_text(msg.get("content", ""))
            prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
        
        prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{get_text(message)}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"

        response = ""
        for token in client.text_generation(
            prompt,
            max_new_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            stop=["<|eot_id|>"]
        ):
            token_text = token if isinstance(token, str) else getattr(token, 'token', getattr(token, 'text', str(token)))
            response += token_text
            yield response
            
    except Exception as e:
        yield f"❌ **Router Error:** {str(e)}\n\n*Note: Ensure 'Inference API' is enabled in your model settings.*"

# UI Setup (Gradio 6.5)
chatbot_interface = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are the @frusto360 AI.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
    ],
)

with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("## 🔐 @frusto360 Auth")
        gr.LoginButton()
    chatbot_interface.render()

if __name__ == "__main__":
    demo.launch(theme="glass")