File size: 3,922 Bytes
7345ebc
28ad213
852bb8b
28ad213
 
7345ebc
d9cd930
7345ebc
 
 
d9cd930
7345ebc
d9cd930
7345ebc
 
 
 
852bb8b
7345ebc
 
 
852bb8b
7345ebc
 
 
 
81776f6
7345ebc
852bb8b
 
7345ebc
 
 
28ad213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7345ebc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
852bb8b
d9cd930
7345ebc
 
 
 
 
28ad213
7345ebc
d9cd930
7345ebc
 
 
 
 
28ad213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7345ebc
 
 
 
 
 
 
28ad213
 
 
 
 
 
 
 
 
 
 
7345ebc
 
 
 
 
 
 
 
 
 
d9cd930
7345ebc
2f017f7
28ad213
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import json
import gradio as gr
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from openai import OpenAI

# ---------------------------
# 1. Setup Hugging Face Router client
# ---------------------------

HF_TOKEN = os.environ.get("HF_TOKEN")

client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=HF_TOKEN,
)

# ---------------------------
# 2. System Prompt
# ---------------------------

SYSTEM_PROMPT = (
    "Your name is YAH Assistant. "
    "If the user asks your name, always answer: 'My name is YAH Assistant.' "
    "Your tone is precise, formal, and concise. "
    "Only mention your name when specifically asked or when introducing yourself."
    "Avoid slang. Stay helpful and direct."
)

MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct:novita"

# ---------------------------
# 3. Pydantic models for API
# ---------------------------

class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    message: str
    history: list[tuple[str, str]] = []

class ChatResponse(BaseModel):
    response: str
    error: str = None

# ---------------------------
# 4. Core chat function
# ---------------------------

def chat_fn(message, history):
    """
    history = list of [user, assistant] messages.
    We convert this into OpenAI-style messages.
    """

    messages = [{"role": "system", "content": SYSTEM_PROMPT}]

    # Add chat history
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})

    # Add new user message
    messages.append({"role": "user", "content": message})

    try:
        completion = client.chat.completions.create(
            model=MODEL_ID,
            messages=messages,
        )

        reply = completion.choices[0].message.content

    except Exception as e:
        reply = f"Error: {str(e)}"

    return reply

# ---------------------------
# 5. API Endpoint Function
# ---------------------------

def chat_api_endpoint(chat_request: ChatRequest) -> ChatResponse:
    """
    API endpoint for chat interactions
    """
    try:
        reply = chat_fn(chat_request.message, chat_request.history)
        return ChatResponse(response=reply)
    except Exception as e:
        return ChatResponse(response="", error=str(e))

# ---------------------------
# 6. FastAPI App Setup
# ---------------------------

app = FastAPI(title="YAH Assistant API")

# Add the API endpoint
@app.post("/chat/")
async def chat(request: ChatRequest):
    return chat_api_endpoint(request)

# Health check endpoint
@app.get("/health")
async def health_check():
    return {"status": "healthy", "model": MODEL_ID}

# ---------------------------
# 7. Gradio UI (keeps the original interface)
# ---------------------------

with gr.Blocks(title="YAH Assistant") as demo:
    gr.Markdown(
        """
        ## YAH Assistant  
        Large-model chat interface powered by Hugging Face Router.
        
        ### API Usage
        You can also interact with this assistant via API:
        - **Endpoint:** `POST /chat/`
        - **Body:**
        ```json
        {
            "message": "Your message here",
            "history": [["Hello", "Hi there!"], ["How are you?", "I'm good!"]]
        }
        ```
        """
    )

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(label="Message")

    def respond(message, chat_history):
        reply = chat_fn(message, chat_history)
        chat_history.append([message, reply])
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

# ---------------------------
# 8. Mount Gradio app to FastAPI
# ---------------------------

app = gr.mount_gradio_app(app, demo, path="/")

# Launch instructions
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)