import os import json import gradio as gr from fastapi import FastAPI, HTTPException from pydantic import BaseModel from openai import OpenAI # --------------------------- # 1. Setup Hugging Face Router client # --------------------------- HF_TOKEN = os.environ.get("HF_TOKEN") client = OpenAI( base_url="https://router.huggingface.co/v1", api_key=HF_TOKEN, ) # --------------------------- # 2. System Prompt # --------------------------- SYSTEM_PROMPT = ( "Your name is YAH Assistant. " "If the user asks your name, always answer: 'My name is YAH Assistant.' " "Your tone is precise, formal, and concise. " "Only mention your name when specifically asked or when introducing yourself." "Avoid slang. Stay helpful and direct." ) MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct:novita" # --------------------------- # 3. Pydantic models for API # --------------------------- class ChatMessage(BaseModel): role: str content: str class ChatRequest(BaseModel): message: str history: list[tuple[str, str]] = [] class ChatResponse(BaseModel): response: str error: str = None # --------------------------- # 4. Core chat function # --------------------------- def chat_fn(message, history): """ history = list of [user, assistant] messages. We convert this into OpenAI-style messages. """ messages = [{"role": "system", "content": SYSTEM_PROMPT}] # Add chat history for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": bot_msg}) # Add new user message messages.append({"role": "user", "content": message}) try: completion = client.chat.completions.create( model=MODEL_ID, messages=messages, ) reply = completion.choices[0].message.content except Exception as e: reply = f"Error: {str(e)}" return reply # --------------------------- # 5. API Endpoint Function # --------------------------- def chat_api_endpoint(chat_request: ChatRequest) -> ChatResponse: """ API endpoint for chat interactions """ try: reply = chat_fn(chat_request.message, chat_request.history) return ChatResponse(response=reply) except Exception as e: return ChatResponse(response="", error=str(e)) # --------------------------- # 6. FastAPI App Setup # --------------------------- app = FastAPI(title="YAH Assistant API") # Add the API endpoint @app.post("/chat/") async def chat(request: ChatRequest): return chat_api_endpoint(request) # Health check endpoint @app.get("/health") async def health_check(): return {"status": "healthy", "model": MODEL_ID} # --------------------------- # 7. Gradio UI (keeps the original interface) # --------------------------- with gr.Blocks(title="YAH Assistant") as demo: gr.Markdown( """ ## YAH Assistant Large-model chat interface powered by Hugging Face Router. ### API Usage You can also interact with this assistant via API: - **Endpoint:** `POST /chat/` - **Body:** ```json { "message": "Your message here", "history": [["Hello", "Hi there!"], ["How are you?", "I'm good!"]] } ``` """ ) chatbot = gr.Chatbot(height=500) msg = gr.Textbox(label="Message") def respond(message, chat_history): reply = chat_fn(message, chat_history) chat_history.append([message, reply]) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) # --------------------------- # 8. Mount Gradio app to FastAPI # --------------------------- app = gr.mount_gradio_app(app, demo, path="/") # Launch instructions if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)