Spaces:

Adedoyinjames
/

CVbot

Sleeping

App Files Files Community

Adedoyinjames commited on Dec 6, 2025

Commit

7345ebc

verified ·

1 Parent(s): 8f85144

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -116

app.py CHANGED Viewed

@@ -1,129 +1,85 @@
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from fastapi import FastAPI
-from pydantic import BaseModel
-import uvicorn
-from fastapi.middleware.cors import CORSMiddleware
 import gradio as gr
-# --- Qwen Chat System ---
-print("🔄 Loading Qwen model from Qwen/Qwen1.5-0.5B-Chat...")
-# Load Qwen model
-model_name = "Qwen/Qwen1.5-0.5B-Chat"
-try:
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_name,
-        trust_remote_code=True
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        trust_remote_code=True
-    )
-    print("✅ Qwen model loaded successfully!")
-except Exception as e:
-    print(f"❌ Error loading model: {e}")
-    raise
-def generate_response(query):
-    """Generates response using only the Qwen model"""
-    try:
-        # Format prompt using Qwen chat template for better performance
-        messages = [
-            {"role": "user", "content": query}
-        ]
-        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        # Tokenize input
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=256,
-                temperature=0.7,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1
-            )
-        # Decode response
-        full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract only the assistant's response
-        response = full_text[len(prompt):].strip()
-        return response
-    except Exception as e:
-        return f"Error generating response: {str(e)}"
-# --- FastAPI App ---
-app = FastAPI(title="Qwen AI", description="Chat with Qwen1.5-0.5B-Chat model")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-class QueryRequest(BaseModel):
-    query: str
-@app.post("/chat/")
-async def chat_with_ai(query_request: QueryRequest):
     try:
-        response = generate_response(query_request.query)
-        return {
-            "response": response,
-            "model_used": "Qwen/Qwen1.5-0.5B-Chat",
-            "status": "success"
-        }
     except Exception as e:
-        return {
-            "response": f"Error: {str(e)}",
-            "model_used": "Qwen/Qwen1.5-0.5B-Chat",
-            "status": "error"
-        }
-@app.get("/status/")
-async def get_status():
-    return {
-        "model_loaded": True,
-        "model_name": "Qwen/Qwen1.5-0.5B-Chat",
-        "system_ready": True
-    }
-@app.get("/")
-async def root():
-    return {"message": "Qwen AI running with Qwen model"}
-# Simple Gradio interface
-def chat_interface(message, history):
-    try:
-        response = generate_response(message)
-        return response
-    except:
-        return "System busy, please try again."
-gradio_app = gr.ChatInterface(
-    fn=chat_interface,
-    title="Qwen AI",
-    description="Chat with Qwen1.5-0.5B-Chat model"
-)
-app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+import os
 import gradio as gr
+from openai import OpenAI
+# ---------------------------
+# 1. Setup Hugging Face Router client
+# ---------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")
+client = OpenAI(
+    base_url="https://router.huggingface.co/v1",
+    api_key=HF_TOKEN,
+)
+# ---------------------------
+# 2. System Prompt
+# ---------------------------
+SYSTEM_PROMPT = (
+    "Your name is YAH Assistant. "
+    "If the user asks your name, always answer: 'My name is YAH Assistant.' "
+    "Your tone is precise, formal, and concise. "
+    "Avoid slang. Stay helpful and direct."
 )
+MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct:novita"
+# ---------------------------
+# 3. Chat function
+# ---------------------------
+def chat_fn(message, history):
+    """
+    history = list of [user, assistant] messages.
+    We convert this into OpenAI-style messages.
+    """
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # Add chat history
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
+    # Add new user message
+    messages.append({"role": "user", "content": message})
     try:
+        completion = client.chat.completions.create(
+            model=MODEL_ID,
+            messages=messages,
+        )
+        reply = completion.choices[0].message["content"]
     except Exception as e:
+        reply = f"Error: {str(e)}"
+    return reply
+# ---------------------------
+# 4. Gradio UI
+# ---------------------------
+with gr.Blocks(title="YAH Assistant") as demo:
+    gr.Markdown(
+        """
+        ## YAH Assistant
+        Large-model chat interface powered by Hugging Face Router.
+        """
+    )
+    chatbot = gr.Chatbot(height=500)
+    msg = gr.Textbox(label="Message")
+    def respond(message, chat_history):
+        reply = chat_fn(message, chat_history)
+        chat_history.append([message, reply])
+        return "", chat_history
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+# Launch
+demo.launch()