Spaces:

Frusto
/

myfrustoai

Sleeping

Update app.py

e63982c verified 2 months ago

1.5 kB

	import gradio as gr
	import os
	from huggingface_hub import InferenceClient

	# 1. Access your secret token
	hf_token = os.getenv("HF_TOKEN")

	# 2. Use the 2026 Router URL for custom models
	# This tells Hugging Face: "Wake up MY model specifically"
	client = InferenceClient(
	model="Frusto/llama-3.2-1b-frusto360-final",
	token=hf_token
	)

	def chat_fn(message, history):
	system_prompt = "You are the @frusto360 AI. Created by @frusto360. Link: https://youtube.com/@frusto360"

	# Building the chat for the new router
	messages = [{"role": "system", "content": system_prompt}]
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	response = ""
	try:
	# 2026 update: use provider="hf-inference" to force serverless run
	for message in client.chat_completion(
	messages,
	max_tokens=512,
	stream=True,
	provider="hf-inference" # THIS IS THE KEY IN 2026
	):
	token = message.choices[0].delta.content
	if token:
	response += token
	yield response
	except Exception as e:
	yield f"⚠️ Status: Model is loading on Hugging Face servers. Please wait 30 seconds and try again.\n(Error: {str(e)})"

	demo = gr.ChatInterface(chat_fn, title="@frusto360 AI")
	demo.launch()