import torch from transformers import AutoModelForCausalLM, AutoTokenizer from fastapi import FastAPI from pydantic import BaseModel import uvicorn from fastapi.middleware.cors import CORSMiddleware import gradio as gr # --- NORA Chat System --- print("🔄 Loading NORA model from Adedoyinjames/NORA...") # Load your custom NORA model model_name = "Adedoyinjames/NORA" try: tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True ) print("✅ NORA model loaded successfully!") except Exception as e: print(f"❌ Error loading model: {e}") raise def generate_response(query): """Generates response using only the NORA model""" try: # Format prompt for chat prompt = f"User: {query}\nAssistant:" # Tokenize input inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, temperature=0.7, do_sample=True, pad_token_id=tokenizer.eos_token_id, repetition_penalty=1.1 ) # Decode response full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant's response if "Assistant:" in full_text: response = full_text.split("Assistant:")[-1].strip() else: response = full_text.replace(prompt, "").strip() return response except Exception as e: return f"Error generating response: {str(e)}" # --- FastAPI App --- app = FastAPI(title="NORA AI", description="Chat with your custom NORA model") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class QueryRequest(BaseModel): query: str @app.post("/chat/") async def chat_with_ai(query_request: QueryRequest): try: response = generate_response(query_request.query) return { "response": response, "model_used": "Adedoyinjames/NORA", "status": "success" } except Exception as e: return { "response": f"Error: {str(e)}", "model_used": "Adedoyinjames/NORA", "status": "error" } @app.get("/status/") async def get_status(): return { "model_loaded": True, "model_name": "Adedoyinjames/NORA", "system_ready": True } @app.get("/") async def root(): return {"message": "NORA AI running with custom model"} # Simple Gradio interface def chat_interface(message, history): try: response = generate_response(message) return response except: return "System busy, please try again." gradio_app = gr.ChatInterface( fn=chat_interface, title="NORA AI", description="Chat with your custom NORA model (Adedoyinjames/NORA)" ) app = gr.mount_gradio_app(app, gradio_app, path="/gradio") if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)