NORA / app.py
Adedoyinjames's picture
Update app.py
34233ac verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn
from fastapi.middleware.cors import CORSMiddleware
import gradio as gr
# --- NORA Chat System ---
print("πŸ”„ Loading NORA model from Adedoyinjames/NORA...")
# Load your custom NORA model
model_name = "Adedoyinjames/NORA"
try:
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
print("βœ… NORA model loaded successfully!")
except Exception as e:
print(f"❌ Error loading model: {e}")
raise
def generate_response(query):
"""Generates response using only the NORA model"""
try:
# Format prompt for chat
prompt = f"User: {query}\nAssistant:"
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.1
)
# Decode response
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
if "Assistant:" in full_text:
response = full_text.split("Assistant:")[-1].strip()
else:
response = full_text.replace(prompt, "").strip()
return response
except Exception as e:
return f"Error generating response: {str(e)}"
# --- FastAPI App ---
app = FastAPI(title="NORA AI", description="Chat with your custom NORA model")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class QueryRequest(BaseModel):
query: str
@app.post("/chat/")
async def chat_with_ai(query_request: QueryRequest):
try:
response = generate_response(query_request.query)
return {
"response": response,
"model_used": "Adedoyinjames/NORA",
"status": "success"
}
except Exception as e:
return {
"response": f"Error: {str(e)}",
"model_used": "Adedoyinjames/NORA",
"status": "error"
}
@app.get("/status/")
async def get_status():
return {
"model_loaded": True,
"model_name": "Adedoyinjames/NORA",
"system_ready": True
}
@app.get("/")
async def root():
return {"message": "NORA AI running with custom model"}
# Simple Gradio interface
def chat_interface(message, history):
try:
response = generate_response(message)
return response
except:
return "System busy, please try again."
gradio_app = gr.ChatInterface(
fn=chat_interface,
title="NORA AI",
description="Chat with your custom NORA model (Adedoyinjames/NORA)"
)
app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)