Spaces:

Adedoyinjames
/

NORA

Sleeping

App Files Files Community

NORA / app.py

Adedoyinjames

Update app.py

34233ac verified 3 months ago

raw

history blame contribute delete

3.41 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from fastapi import FastAPI
	from pydantic import BaseModel
	import uvicorn
	from fastapi.middleware.cors import CORSMiddleware
	import gradio as gr

	# --- NORA Chat System ---

	print("🔄 Loading NORA model from Adedoyinjames/NORA...")

	# Load your custom NORA model
	model_name = "Adedoyinjames/NORA"

	try:
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	trust_remote_code=True
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	print("✅ NORA model loaded successfully!")

	except Exception as e:
	print(f"❌ Error loading model: {e}")
	raise

	def generate_response(query):
	"""Generates response using only the NORA model"""
	try:
	# Format prompt for chat
	prompt = f"User: {query}\nAssistant:"

	# Tokenize input
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	repetition_penalty=1.1
	)

	# Decode response
	full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract only the assistant's response
	if "Assistant:" in full_text:
	response = full_text.split("Assistant:")[-1].strip()
	else:
	response = full_text.replace(prompt, "").strip()

	return response

	except Exception as e:
	return f"Error generating response: {str(e)}"

	# --- FastAPI App ---
	app = FastAPI(title="NORA AI", description="Chat with your custom NORA model")

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	class QueryRequest(BaseModel):
	query: str

	@app.post("/chat/")
	async def chat_with_ai(query_request: QueryRequest):
	try:
	response = generate_response(query_request.query)
	return {
	"response": response,
	"model_used": "Adedoyinjames/NORA",
	"status": "success"
	}

	except Exception as e:
	return {
	"response": f"Error: {str(e)}",
	"model_used": "Adedoyinjames/NORA",
	"status": "error"
	}

	@app.get("/status/")
	async def get_status():
	return {
	"model_loaded": True,
	"model_name": "Adedoyinjames/NORA",
	"system_ready": True
	}

	@app.get("/")
	async def root():
	return {"message": "NORA AI running with custom model"}

	# Simple Gradio interface
	def chat_interface(message, history):
	try:
	response = generate_response(message)
	return response
	except:
	return "System busy, please try again."

	gradio_app = gr.ChatInterface(
	fn=chat_interface,
	title="NORA AI",
	description="Chat with your custom NORA model (Adedoyinjames/NORA)"
	)

	app = gr.mount_gradio_app(app, gradio_app, path="/gradio")

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860)