Spaces:
Running
Running
File size: 3,922 Bytes
7345ebc 28ad213 852bb8b 28ad213 7345ebc d9cd930 7345ebc d9cd930 7345ebc d9cd930 7345ebc 852bb8b 7345ebc 852bb8b 7345ebc 81776f6 7345ebc 852bb8b 7345ebc 28ad213 7345ebc 852bb8b d9cd930 7345ebc 28ad213 7345ebc d9cd930 7345ebc 28ad213 7345ebc 28ad213 7345ebc d9cd930 7345ebc 2f017f7 28ad213 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import os
import json
import gradio as gr
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from openai import OpenAI
# ---------------------------
# 1. Setup Hugging Face Router client
# ---------------------------
HF_TOKEN = os.environ.get("HF_TOKEN")
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=HF_TOKEN,
)
# ---------------------------
# 2. System Prompt
# ---------------------------
SYSTEM_PROMPT = (
"Your name is YAH Assistant. "
"If the user asks your name, always answer: 'My name is YAH Assistant.' "
"Your tone is precise, formal, and concise. "
"Only mention your name when specifically asked or when introducing yourself."
"Avoid slang. Stay helpful and direct."
)
MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct:novita"
# ---------------------------
# 3. Pydantic models for API
# ---------------------------
class ChatMessage(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
message: str
history: list[tuple[str, str]] = []
class ChatResponse(BaseModel):
response: str
error: str = None
# ---------------------------
# 4. Core chat function
# ---------------------------
def chat_fn(message, history):
"""
history = list of [user, assistant] messages.
We convert this into OpenAI-style messages.
"""
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# Add chat history
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
# Add new user message
messages.append({"role": "user", "content": message})
try:
completion = client.chat.completions.create(
model=MODEL_ID,
messages=messages,
)
reply = completion.choices[0].message.content
except Exception as e:
reply = f"Error: {str(e)}"
return reply
# ---------------------------
# 5. API Endpoint Function
# ---------------------------
def chat_api_endpoint(chat_request: ChatRequest) -> ChatResponse:
"""
API endpoint for chat interactions
"""
try:
reply = chat_fn(chat_request.message, chat_request.history)
return ChatResponse(response=reply)
except Exception as e:
return ChatResponse(response="", error=str(e))
# ---------------------------
# 6. FastAPI App Setup
# ---------------------------
app = FastAPI(title="YAH Assistant API")
# Add the API endpoint
@app.post("/chat/")
async def chat(request: ChatRequest):
return chat_api_endpoint(request)
# Health check endpoint
@app.get("/health")
async def health_check():
return {"status": "healthy", "model": MODEL_ID}
# ---------------------------
# 7. Gradio UI (keeps the original interface)
# ---------------------------
with gr.Blocks(title="YAH Assistant") as demo:
gr.Markdown(
"""
## YAH Assistant
Large-model chat interface powered by Hugging Face Router.
### API Usage
You can also interact with this assistant via API:
- **Endpoint:** `POST /chat/`
- **Body:**
```json
{
"message": "Your message here",
"history": [["Hello", "Hi there!"], ["How are you?", "I'm good!"]]
}
```
"""
)
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(label="Message")
def respond(message, chat_history):
reply = chat_fn(message, chat_history)
chat_history.append([message, reply])
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
# ---------------------------
# 8. Mount Gradio app to FastAPI
# ---------------------------
app = gr.mount_gradio_app(app, demo, path="/")
# Launch instructions
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |