Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import uvicorn | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import gradio as gr | |
| # --- NORA Chat System --- | |
| print("π Loading NORA model from Adedoyinjames/NORA...") | |
| # Load your custom NORA model | |
| model_name = "Adedoyinjames/NORA" | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| print("β NORA model loaded successfully!") | |
| except Exception as e: | |
| print(f"β Error loading model: {e}") | |
| raise | |
| def generate_response(query): | |
| """Generates response using only the NORA model""" | |
| try: | |
| # Format prompt for chat | |
| prompt = f"User: {query}\nAssistant:" | |
| # Tokenize input | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024) | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| repetition_penalty=1.1 | |
| ) | |
| # Decode response | |
| full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract only the assistant's response | |
| if "Assistant:" in full_text: | |
| response = full_text.split("Assistant:")[-1].strip() | |
| else: | |
| response = full_text.replace(prompt, "").strip() | |
| return response | |
| except Exception as e: | |
| return f"Error generating response: {str(e)}" | |
| # --- FastAPI App --- | |
| app = FastAPI(title="NORA AI", description="Chat with your custom NORA model") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| class QueryRequest(BaseModel): | |
| query: str | |
| async def chat_with_ai(query_request: QueryRequest): | |
| try: | |
| response = generate_response(query_request.query) | |
| return { | |
| "response": response, | |
| "model_used": "Adedoyinjames/NORA", | |
| "status": "success" | |
| } | |
| except Exception as e: | |
| return { | |
| "response": f"Error: {str(e)}", | |
| "model_used": "Adedoyinjames/NORA", | |
| "status": "error" | |
| } | |
| async def get_status(): | |
| return { | |
| "model_loaded": True, | |
| "model_name": "Adedoyinjames/NORA", | |
| "system_ready": True | |
| } | |
| async def root(): | |
| return {"message": "NORA AI running with custom model"} | |
| # Simple Gradio interface | |
| def chat_interface(message, history): | |
| try: | |
| response = generate_response(message) | |
| return response | |
| except: | |
| return "System busy, please try again." | |
| gradio_app = gr.ChatInterface( | |
| fn=chat_interface, | |
| title="NORA AI", | |
| description="Chat with your custom NORA model (Adedoyinjames/NORA)" | |
| ) | |
| app = gr.mount_gradio_app(app, gradio_app, path="/gradio") | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |