Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.responses import FileResponse, Response | |
| from pydantic import BaseModel | |
| import uvicorn | |
| import os | |
| # Settings | |
| BASE_MODEL = "unsloth/Llama-3.2-1B-Instruct" | |
| ADAPTER_PATH = "important/finetuning/models/ora_adapter" | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Global Model Variables | |
| model = None | |
| tokenizer = None | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Advanced AI Models (Voice & Response Quality) | |
| whisper_model = None | |
| emotion_classifier = None | |
| class ChatRequest(BaseModel): | |
| message: str | |
| history: list = [] | |
| async def load_model(): | |
| global model, tokenizer | |
| print(f"Loading ORA Model on {device}...") | |
| # Download adapter from HF Hub if not present | |
| if not os.path.exists(ADAPTER_PATH): | |
| print("Downloading adapter from HF Hub...") | |
| from huggingface_hub import snapshot_download | |
| try: | |
| snapshot_download( | |
| repo_id="Abdalkaderdev/ora-adapter", | |
| local_dir=ADAPTER_PATH, | |
| repo_type="model" | |
| ) | |
| print("Adapter downloaded successfully!") | |
| except Exception as e: | |
| print(f"Could not download adapter: {e}") | |
| print("Will use base model only.") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| device_map=device, | |
| low_cpu_mem_usage=True | |
| ) | |
| if os.path.exists(ADAPTER_PATH): | |
| print(f"Loading adapter from {ADAPTER_PATH}...") | |
| model = PeftModel.from_pretrained(base_model, ADAPTER_PATH) | |
| else: | |
| print("Adapter not found, using base model.") | |
| model = base_model | |
| print("ORA Model Connected and Ready.") | |
| async def load_advanced_ai(): | |
| global whisper_model, emotion_classifier | |
| try: | |
| print("Loading Voice & Response Quality AI...") | |
| from transformers import pipeline | |
| # Whisper V3 for Speech-to-Text (Professional quality) | |
| print("Loading Whisper V3 STT...") | |
| whisper_model = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-large-v3", | |
| device=0 if device == "cuda" else -1 | |
| ) | |
| print("✓ Whisper V3 loaded - Professional STT ready") | |
| # Emotion Detection for Compassionate Responses | |
| print("Loading Emotion Detector...") | |
| emotion_classifier = pipeline( | |
| "text-classification", | |
| model="j-hartmann/emotion-english-distilroberta-base", | |
| device=0 if device == "cuda" else -1 | |
| ) | |
| print("✓ Emotion Detector loaded - Empathetic responses enabled") | |
| print("Voice & Response Quality AI Ready!") | |
| except Exception as e: | |
| print(f"Warning: Could not load some AI models: {e}") | |
| print("ORA will continue with basic functionality.") | |
| async def chat_endpoint(req: ChatRequest): | |
| global model, tokenizer, emotion_classifier | |
| # Detect emotion for compassionate responses | |
| user_emotion = None | |
| if emotion_classifier: | |
| try: | |
| emotion_result = emotion_classifier(req.message)[0] | |
| user_emotion = emotion_result["label"] | |
| except: | |
| pass | |
| # RAG: Retrieve relevant Bible verses | |
| relevant_verses = "" | |
| try: | |
| import lancedb | |
| db = lancedb.connect("important/vector_db") | |
| bible_table = db.open_table("bible_verses") | |
| results = bible_table.search(req.message).limit(3).to_list() | |
| if results: | |
| verses = [f"- {r['text']} ({r.get('reference', '')})" for r in results] | |
| relevant_verses = "\n".join(verses) | |
| except Exception as e: | |
| print(f"RAG retrieval failed: {e}") | |
| # Enhanced system prompt with emotion awareness | |
| emotion_guidance = "" | |
| if user_emotion: | |
| emotion_map = { | |
| "sadness": "The user seems troubled. Offer comfort, hope, and reassurance.", | |
| "joy": "The user is joyful. Share in their celebration with gratitude.", | |
| "anger": "The user may be upset. Respond with patience and understanding.", | |
| "fear": "The user seems anxious. Provide peace and encouragement.", | |
| "surprise": "The user is surprised. Acknowledge their wonder.", | |
| } | |
| emotion_guidance = emotion_map.get(user_emotion.lower(), "") | |
| system_prompt = f"""You are ORA, a wise and compassionate spiritual guide. | |
| Your role: | |
| - Provide biblically-grounded wisdom | |
| - Speak with warmth, empathy, and pastoral care | |
| - Keep responses concise but meaningful (2-3 sentences) | |
| - Always cite scripture when relevant | |
| {emotion_guidance} | |
| Relevant Scripture: | |
| {relevant_verses if relevant_verses else "No specific verses retrieved for this query."} | |
| Respond with compassion and wisdom.""" | |
| # Construct Prompt | |
| messages = [{"role": "system", "content": system_prompt}] | |
| messages.extend(req.history[-4:]) | |
| messages.append({"role": "user", "content": req.message}) | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to(device) | |
| terminators = [ | |
| tokenizer.eos_token_id, | |
| tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = model.generate( | |
| input_ids, | |
| max_new_tokens=128, # Reduced for faster CPU inference | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| ) | |
| response_tokens = outputs[0][input_ids.shape[-1]:] | |
| response_text = tokenizer.decode(response_tokens, skip_special_tokens=True) | |
| return {"response": response_text, "emotion": user_emotion} | |
| # Advanced AI Endpoints | |
| class TranscribeRequest(BaseModel): | |
| audio_data: str # Base64 encoded audio | |
| async def transcribe_audio(req: TranscribeRequest): | |
| global whisper_model | |
| if whisper_model is None: | |
| raise HTTPException(status_code=503, detail="Whisper model not loaded") | |
| try: | |
| import base64 | |
| import io | |
| # Decode base64 audio | |
| audio_bytes = base64.b64decode(req.audio_data) | |
| # Transcribe with Whisper | |
| result = whisper_model(audio_bytes) | |
| return {"text": result["text"], "confidence": 1.0} | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}") | |
| class EmotionRequest(BaseModel): | |
| text: str | |
| async def detect_emotion(req: EmotionRequest): | |
| global emotion_classifier | |
| if emotion_classifier is None: | |
| raise HTTPException(status_code=503, detail="Emotion model not loaded") | |
| try: | |
| result = emotion_classifier(req.text)[0] | |
| return { | |
| "emotion": result["label"], | |
| "confidence": result["score"] | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Emotion detection failed: {str(e)}") | |
| # TTS endpoint using Supertonic 2 (CPU-friendly) | |
| tts_model = None | |
| tts_processor = None | |
| async def load_tts(): | |
| global tts_model, tts_processor | |
| try: | |
| print("Loading Supertonic 2 TTS...") | |
| from transformers import AutoProcessor, AutoModelForTextToWaveform | |
| model_id = "Supertone/supertonic-2" | |
| tts_processor = AutoProcessor.from_pretrained(model_id) | |
| tts_model = AutoModelForTextToWaveform.from_pretrained(model_id) | |
| if device == "cuda": | |
| tts_model = tts_model.to("cuda") | |
| print("✓ Supertonic 2 TTS loaded successfully!") | |
| except Exception as e: | |
| print(f"Could not load TTS model: {e}") | |
| print("Voice will fall back to browser TTS.") | |
| class TTSRequest(BaseModel): | |
| text: str | |
| async def text_to_speech(req: TTSRequest): | |
| global tts_model, tts_processor | |
| if tts_model is None or tts_processor is None: | |
| raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback") | |
| try: | |
| # Process text with Supertonic 2 | |
| inputs = tts_processor(text=req.text, return_tensors="pt", sampling_rate=24000) | |
| if device == "cuda": | |
| inputs = {k: v.to("cuda") for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| audio_values = tts_model.generate(**inputs) | |
| # Convert to WAV format | |
| import io | |
| import wave | |
| audio_np = audio_values.cpu().numpy().squeeze() | |
| # Normalize to 16-bit PCM | |
| audio_np = (audio_np * 32767).astype('int16') | |
| # Create WAV in memory | |
| wav_io = io.BytesIO() | |
| with wave.open(wav_io, 'wb') as wav_file: | |
| wav_file.setnchannels(1) # Mono | |
| wav_file.setsampwidth(2) # 16-bit | |
| wav_file.setframerate(24000) # 24kHz | |
| wav_file.writeframes(audio_np.tobytes()) | |
| wav_io.seek(0) | |
| return Response(content=wav_io.read(), media_type="audio/wav") | |
| except Exception as e: | |
| print(f"TTS error: {e}") | |
| raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}") | |
| # Mount Static Frontend (Must be last) | |
| # Expects 'frontend/out' to exist (built via 'next build') | |
| if os.path.exists("frontend/out"): | |
| app.mount("/_next", StaticFiles(directory="frontend/out/_next"), name="next") | |
| app.mount("/", StaticFiles(directory="frontend/out", html=True), name="static") | |
| async def not_found(request, exc): | |
| return FileResponse("frontend/out/index.html") | |
| if __name__ == "__main__": | |
| # HF Spaces expects port 7860 | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |