Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| from huggingface_hub import InferenceClient | |
| # 1. Access your secret token | |
| hf_token = os.getenv("HF_TOKEN") | |
| # 2. Use the 2026 Router URL for custom models | |
| # This tells Hugging Face: "Wake up MY model specifically" | |
| client = InferenceClient( | |
| model="Frusto/llama-3.2-1b-frusto360-final", | |
| token=hf_token | |
| ) | |
| def chat_fn(message, history): | |
| system_prompt = "You are the @frusto360 AI. Created by @frusto360. Link: https://youtube.com/@frusto360" | |
| # Building the chat for the new router | |
| messages = [{"role": "system", "content": system_prompt}] | |
| for user_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| try: | |
| # 2026 update: use provider="hf-inference" to force serverless run | |
| for message in client.chat_completion( | |
| messages, | |
| max_tokens=512, | |
| stream=True, | |
| provider="hf-inference" # THIS IS THE KEY IN 2026 | |
| ): | |
| token = message.choices[0].delta.content | |
| if token: | |
| response += token | |
| yield response | |
| except Exception as e: | |
| yield f"⚠️ Status: Model is loading on Hugging Face servers. Please wait 30 seconds and try again.\n(Error: {str(e)})" | |
| demo = gr.ChatInterface(chat_fn, title="@frusto360 AI") | |
| demo.launch() |