Adedoyinjames commited on
Commit
7345ebc
·
verified ·
1 Parent(s): 8f85144

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -116
app.py CHANGED
@@ -1,129 +1,85 @@
1
- import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from fastapi import FastAPI
4
- from pydantic import BaseModel
5
- import uvicorn
6
- from fastapi.middleware.cors import CORSMiddleware
7
  import gradio as gr
 
8
 
9
- # --- Qwen Chat System ---
 
 
10
 
11
- print("🔄 Loading Qwen model from Qwen/Qwen1.5-0.5B-Chat...")
12
 
13
- # Load Qwen model
14
- model_name = "Qwen/Qwen1.5-0.5B-Chat"
15
-
16
- try:
17
- tokenizer = AutoTokenizer.from_pretrained(
18
- model_name,
19
- trust_remote_code=True
20
- )
21
-
22
- model = AutoModelForCausalLM.from_pretrained(
23
- model_name,
24
- torch_dtype=torch.float16,
25
- device_map="auto",
26
- trust_remote_code=True
27
- )
28
-
29
- print("✅ Qwen model loaded successfully!")
30
-
31
- except Exception as e:
32
- print(f"❌ Error loading model: {e}")
33
- raise
34
-
35
- def generate_response(query):
36
- """Generates response using only the Qwen model"""
37
- try:
38
- # Format prompt using Qwen chat template for better performance
39
- messages = [
40
- {"role": "user", "content": query}
41
- ]
42
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
43
-
44
- # Tokenize input
45
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
46
-
47
- # Generate response
48
- with torch.no_grad():
49
- outputs = model.generate(
50
- **inputs,
51
- max_new_tokens=256,
52
- temperature=0.7,
53
- do_sample=True,
54
- pad_token_id=tokenizer.eos_token_id,
55
- repetition_penalty=1.1
56
- )
57
-
58
- # Decode response
59
- full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
-
61
- # Extract only the assistant's response
62
- response = full_text[len(prompt):].strip()
63
-
64
- return response
65
-
66
- except Exception as e:
67
- return f"Error generating response: {str(e)}"
68
 
69
- # --- FastAPI App ---
70
- app = FastAPI(title="Qwen AI", description="Chat with Qwen1.5-0.5B-Chat model")
 
71
 
72
- app.add_middleware(
73
- CORSMiddleware,
74
- allow_origins=["*"],
75
- allow_credentials=True,
76
- allow_methods=["*"],
77
- allow_headers=["*"],
78
  )
79
 
80
- class QueryRequest(BaseModel):
81
- query: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- @app.post("/chat/")
84
- async def chat_with_ai(query_request: QueryRequest):
85
  try:
86
- response = generate_response(query_request.query)
87
- return {
88
- "response": response,
89
- "model_used": "Qwen/Qwen1.5-0.5B-Chat",
90
- "status": "success"
91
- }
92
-
93
  except Exception as e:
94
- return {
95
- "response": f"Error: {str(e)}",
96
- "model_used": "Qwen/Qwen1.5-0.5B-Chat",
97
- "status": "error"
98
- }
99
-
100
- @app.get("/status/")
101
- async def get_status():
102
- return {
103
- "model_loaded": True,
104
- "model_name": "Qwen/Qwen1.5-0.5B-Chat",
105
- "system_ready": True
106
- }
107
-
108
- @app.get("/")
109
- async def root():
110
- return {"message": "Qwen AI running with Qwen model"}
111
-
112
- # Simple Gradio interface
113
- def chat_interface(message, history):
114
- try:
115
- response = generate_response(message)
116
- return response
117
- except:
118
- return "System busy, please try again."
119
-
120
- gradio_app = gr.ChatInterface(
121
- fn=chat_interface,
122
- title="Qwen AI",
123
- description="Chat with Qwen1.5-0.5B-Chat model"
124
- )
125
 
126
- app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
127
 
128
- if __name__ == "__main__":
129
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
+ import os
 
 
 
 
 
2
  import gradio as gr
3
+ from openai import OpenAI
4
 
5
+ # ---------------------------
6
+ # 1. Setup Hugging Face Router client
7
+ # ---------------------------
8
 
9
+ HF_TOKEN = os.environ.get("HF_TOKEN")
10
 
11
+ client = OpenAI(
12
+ base_url="https://router.huggingface.co/v1",
13
+ api_key=HF_TOKEN,
14
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # ---------------------------
17
+ # 2. System Prompt
18
+ # ---------------------------
19
 
20
+ SYSTEM_PROMPT = (
21
+ "Your name is YAH Assistant. "
22
+ "If the user asks your name, always answer: 'My name is YAH Assistant.' "
23
+ "Your tone is precise, formal, and concise. "
24
+ "Avoid slang. Stay helpful and direct."
 
25
  )
26
 
27
+ MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct:novita"
28
+
29
+ # ---------------------------
30
+ # 3. Chat function
31
+ # ---------------------------
32
+
33
+ def chat_fn(message, history):
34
+ """
35
+ history = list of [user, assistant] messages.
36
+ We convert this into OpenAI-style messages.
37
+ """
38
+
39
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
40
+
41
+ # Add chat history
42
+ for user_msg, bot_msg in history:
43
+ messages.append({"role": "user", "content": user_msg})
44
+ messages.append({"role": "assistant", "content": bot_msg})
45
+
46
+ # Add new user message
47
+ messages.append({"role": "user", "content": message})
48
 
 
 
49
  try:
50
+ completion = client.chat.completions.create(
51
+ model=MODEL_ID,
52
+ messages=messages,
53
+ )
54
+
55
+ reply = completion.choices[0].message["content"]
56
+
57
  except Exception as e:
58
+ reply = f"Error: {str(e)}"
59
+
60
+ return reply
61
+
62
+ # ---------------------------
63
+ # 4. Gradio UI
64
+ # ---------------------------
65
+
66
+ with gr.Blocks(title="YAH Assistant") as demo:
67
+ gr.Markdown(
68
+ """
69
+ ## YAH Assistant
70
+ Large-model chat interface powered by Hugging Face Router.
71
+ """
72
+ )
73
+
74
+ chatbot = gr.Chatbot(height=500)
75
+ msg = gr.Textbox(label="Message")
76
+
77
+ def respond(message, chat_history):
78
+ reply = chat_fn(message, chat_history)
79
+ chat_history.append([message, reply])
80
+ return "", chat_history
 
 
 
 
 
 
 
 
81
 
82
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
83
 
84
+ # Launch
85
+ demo.launch()