Spaces:

Abdalkaderdev
/

ORA

Paused

App Files Files Community

Abdalkaderdev commited on Jan 12

Commit

42545fb

1 Parent(s): efcab75

Add Bark TTS and RAG with enhanced system prompt

Browse files

Files changed (1) hide show

app/ora_server.py +66 -16

app/ora_server.py CHANGED Viewed

@@ -108,14 +108,61 @@ async def load_advanced_ai():
 @app.post("/api/chat")
 async def chat_endpoint(req: ChatRequest):
-    global model, tokenizer
-    system_prompt = "You are ORA, a spiritual assistant specializing in theological insights and biblical wisdom. Provide discerning, compassionate, and doctrine-aware responses."
     # Construct Prompt
     messages = [{"role": "system", "content": system_prompt}]
-    # Add last few turns of history to keep context but save tokens
-    messages.extend(req.history[-4:])
     messages.append({"role": "user", "content": req.message})
     input_ids = tokenizer.apply_chat_template(
@@ -141,7 +188,7 @@ async def chat_endpoint(req: ChatRequest):
     response_tokens = outputs[0][input_ids.shape[-1]:]
     response_text = tokenizer.decode(response_tokens, skip_special_tokens=True)
-    return {"response": response_text}
 # Advanced AI Endpoints
@@ -193,7 +240,7 @@ async def detect_emotion(req: EmotionRequest):
-# TTS endpoint using Supertonic 2
 tts_model = None
 tts_processor = None
@@ -201,17 +248,16 @@ tts_processor = None
 async def load_tts():
     global tts_model, tts_processor
     try:
-        print("Loading Supertonic 2 TTS...")
-        from transformers import AutoProcessor, AutoModelForTextToWaveform
-        model_id = "Supertone/supertonic-2"
-        tts_processor = AutoProcessor.from_pretrained(model_id)
-        tts_model = AutoModelForTextToWaveform.from_pretrained(model_id)
         if device == "cuda":
             tts_model = tts_model.to("cuda")
-        print("Supertonic 2 TTS loaded successfully!")
     except Exception as e:
         print(f"Could not load TTS model: {e}")
         print("Voice will fall back to browser TTS.")
@@ -227,20 +273,24 @@ async def text_to_speech(req: TTSRequest):
         raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback")
     try:
-        # Process text with Supertonic 2
-        inputs = tts_processor(text=req.text, return_tensors="pt", sampling_rate=24000)
         if device == "cuda":
             inputs = {k: v.to("cuda") for k, v in inputs.items()}
         with torch.no_grad():
-            audio_values = tts_model.generate(**inputs)
         # Convert to WAV format
         import io
         import wave
-        audio_np = audio_values.cpu().numpy().squeeze()
         # Normalize to 16-bit PCM
         audio_np = (audio_np * 32767).astype('int16')

 @app.post("/api/chat")
 async def chat_endpoint(req: ChatRequest):
+    global model, tokenizer, emotion_classifier
+    # Detect emotion for compassionate responses
+    user_emotion = None
+    if emotion_classifier:
+        try:
+            emotion_result = emotion_classifier(req.message)[0]
+            user_emotion = emotion_result["label"]
+        except:
+            pass
+    # RAG: Retrieve relevant Bible verses
+    relevant_verses = ""
+    try:
+        import lancedb
+        db = lancedb.connect("important/vector_db")
+        bible_table = db.open_table("bible_verses")
+        results = bible_table.search(req.message).limit(3).to_list()
+        if results:
+            verses = [f"- {r['text']} ({r.get('reference', '')})" for r in results]
+            relevant_verses = "\n".join(verses)
+    except Exception as e:
+        print(f"RAG retrieval failed: {e}")
+    # Enhanced system prompt with emotion awareness
+    emotion_guidance = ""
+    if user_emotion:
+        emotion_map = {
+            "sadness": "The user seems troubled. Offer comfort, hope, and reassurance.",
+            "joy": "The user is joyful. Share in their celebration with gratitude.",
+            "anger": "The user may be upset. Respond with patience and understanding.",
+            "fear": "The user seems anxious. Provide peace and encouragement.",
+            "surprise": "The user is surprised. Acknowledge their wonder.",
+        }
+        emotion_guidance = emotion_map.get(user_emotion.lower(), "")
+    system_prompt = f"""You are ORA, a wise and compassionate spiritual guide.
+Your role:
+- Provide biblically-grounded wisdom
+- Speak with warmth, empathy, and pastoral care
+- Keep responses concise but meaningful (2-3 sentences)
+- Always cite scripture when relevant
+{emotion_guidance}
+Relevant Scripture:
+{relevant_verses if relevant_verses else "No specific verses retrieved for this query."}
+Respond with compassion and wisdom."""
     # Construct Prompt
     messages = [{"role": "system", "content": system_prompt}]
+    messages.extend(req.history[-4:])
     messages.append({"role": "user", "content": req.message})
     input_ids = tokenizer.apply_chat_template(
     response_tokens = outputs[0][input_ids.shape[-1]:]
     response_text = tokenizer.decode(response_tokens, skip_special_tokens=True)
+    return {"response": response_text, "emotion": user_emotion}
 # Advanced AI Endpoints
+# TTS endpoint using Bark (Natural, Expressive Voice)
 tts_model = None
 tts_processor = None
 async def load_tts():
     global tts_model, tts_processor
     try:
+        print("Loading Bark TTS for natural voice...")
+        from transformers import AutoProcessor, BarkModel
+        tts_processor = AutoProcessor.from_pretrained("suno/bark-small")
+        tts_model = BarkModel.from_pretrained("suno/bark-small")
         if device == "cuda":
             tts_model = tts_model.to("cuda")
+        print("✓ Bark TTS loaded - Natural voice ready!")
     except Exception as e:
         print(f"Could not load TTS model: {e}")
         print("Voice will fall back to browser TTS.")
         raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback")
     try:
+        # Use Bark with natural voice preset
+        inputs = tts_processor(
+            text=req.text,
+            voice_preset="v2/en_speaker_6",  # Warm, natural female voice
+            return_tensors="pt"
+        )
         if device == "cuda":
             inputs = {k: v.to("cuda") for k, v in inputs.items()}
         with torch.no_grad():
+            audio_array = tts_model.generate(**inputs)
         # Convert to WAV format
         import io
         import wave
+        audio_np = audio_array.cpu().numpy().squeeze()
         # Normalize to 16-bit PCM
         audio_np = (audio_np * 32767).astype('int16')