Spaces:
Sleeping
Sleeping
Commit
·
42545fb
1
Parent(s):
efcab75
Add Bark TTS and RAG with enhanced system prompt
Browse files- app/ora_server.py +66 -16
app/ora_server.py
CHANGED
|
@@ -108,14 +108,61 @@ async def load_advanced_ai():
|
|
| 108 |
|
| 109 |
@app.post("/api/chat")
|
| 110 |
async def chat_endpoint(req: ChatRequest):
|
| 111 |
-
global model, tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
# Construct Prompt
|
| 116 |
messages = [{"role": "system", "content": system_prompt}]
|
| 117 |
-
|
| 118 |
-
messages.extend(req.history[-4:])
|
| 119 |
messages.append({"role": "user", "content": req.message})
|
| 120 |
|
| 121 |
input_ids = tokenizer.apply_chat_template(
|
|
@@ -141,7 +188,7 @@ async def chat_endpoint(req: ChatRequest):
|
|
| 141 |
response_tokens = outputs[0][input_ids.shape[-1]:]
|
| 142 |
response_text = tokenizer.decode(response_tokens, skip_special_tokens=True)
|
| 143 |
|
| 144 |
-
return {"response": response_text}
|
| 145 |
|
| 146 |
# Advanced AI Endpoints
|
| 147 |
|
|
@@ -193,7 +240,7 @@ async def detect_emotion(req: EmotionRequest):
|
|
| 193 |
|
| 194 |
|
| 195 |
|
| 196 |
-
# TTS endpoint using
|
| 197 |
tts_model = None
|
| 198 |
tts_processor = None
|
| 199 |
|
|
@@ -201,17 +248,16 @@ tts_processor = None
|
|
| 201 |
async def load_tts():
|
| 202 |
global tts_model, tts_processor
|
| 203 |
try:
|
| 204 |
-
print("Loading
|
| 205 |
-
from transformers import AutoProcessor,
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
tts_model = AutoModelForTextToWaveform.from_pretrained(model_id)
|
| 210 |
|
| 211 |
if device == "cuda":
|
| 212 |
tts_model = tts_model.to("cuda")
|
| 213 |
|
| 214 |
-
print("
|
| 215 |
except Exception as e:
|
| 216 |
print(f"Could not load TTS model: {e}")
|
| 217 |
print("Voice will fall back to browser TTS.")
|
|
@@ -227,20 +273,24 @@ async def text_to_speech(req: TTSRequest):
|
|
| 227 |
raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback")
|
| 228 |
|
| 229 |
try:
|
| 230 |
-
#
|
| 231 |
-
inputs = tts_processor(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
if device == "cuda":
|
| 234 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
| 235 |
|
| 236 |
with torch.no_grad():
|
| 237 |
-
|
| 238 |
|
| 239 |
# Convert to WAV format
|
| 240 |
import io
|
| 241 |
import wave
|
| 242 |
|
| 243 |
-
audio_np =
|
| 244 |
|
| 245 |
# Normalize to 16-bit PCM
|
| 246 |
audio_np = (audio_np * 32767).astype('int16')
|
|
|
|
| 108 |
|
| 109 |
@app.post("/api/chat")
|
| 110 |
async def chat_endpoint(req: ChatRequest):
|
| 111 |
+
global model, tokenizer, emotion_classifier
|
| 112 |
+
|
| 113 |
+
# Detect emotion for compassionate responses
|
| 114 |
+
user_emotion = None
|
| 115 |
+
if emotion_classifier:
|
| 116 |
+
try:
|
| 117 |
+
emotion_result = emotion_classifier(req.message)[0]
|
| 118 |
+
user_emotion = emotion_result["label"]
|
| 119 |
+
except:
|
| 120 |
+
pass
|
| 121 |
+
|
| 122 |
+
# RAG: Retrieve relevant Bible verses
|
| 123 |
+
relevant_verses = ""
|
| 124 |
+
try:
|
| 125 |
+
import lancedb
|
| 126 |
+
db = lancedb.connect("important/vector_db")
|
| 127 |
+
bible_table = db.open_table("bible_verses")
|
| 128 |
+
|
| 129 |
+
results = bible_table.search(req.message).limit(3).to_list()
|
| 130 |
+
if results:
|
| 131 |
+
verses = [f"- {r['text']} ({r.get('reference', '')})" for r in results]
|
| 132 |
+
relevant_verses = "\n".join(verses)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"RAG retrieval failed: {e}")
|
| 135 |
|
| 136 |
+
# Enhanced system prompt with emotion awareness
|
| 137 |
+
emotion_guidance = ""
|
| 138 |
+
if user_emotion:
|
| 139 |
+
emotion_map = {
|
| 140 |
+
"sadness": "The user seems troubled. Offer comfort, hope, and reassurance.",
|
| 141 |
+
"joy": "The user is joyful. Share in their celebration with gratitude.",
|
| 142 |
+
"anger": "The user may be upset. Respond with patience and understanding.",
|
| 143 |
+
"fear": "The user seems anxious. Provide peace and encouragement.",
|
| 144 |
+
"surprise": "The user is surprised. Acknowledge their wonder.",
|
| 145 |
+
}
|
| 146 |
+
emotion_guidance = emotion_map.get(user_emotion.lower(), "")
|
| 147 |
+
|
| 148 |
+
system_prompt = f"""You are ORA, a wise and compassionate spiritual guide.
|
| 149 |
+
|
| 150 |
+
Your role:
|
| 151 |
+
- Provide biblically-grounded wisdom
|
| 152 |
+
- Speak with warmth, empathy, and pastoral care
|
| 153 |
+
- Keep responses concise but meaningful (2-3 sentences)
|
| 154 |
+
- Always cite scripture when relevant
|
| 155 |
+
|
| 156 |
+
{emotion_guidance}
|
| 157 |
+
|
| 158 |
+
Relevant Scripture:
|
| 159 |
+
{relevant_verses if relevant_verses else "No specific verses retrieved for this query."}
|
| 160 |
+
|
| 161 |
+
Respond with compassion and wisdom."""
|
| 162 |
|
| 163 |
# Construct Prompt
|
| 164 |
messages = [{"role": "system", "content": system_prompt}]
|
| 165 |
+
messages.extend(req.history[-4:])
|
|
|
|
| 166 |
messages.append({"role": "user", "content": req.message})
|
| 167 |
|
| 168 |
input_ids = tokenizer.apply_chat_template(
|
|
|
|
| 188 |
response_tokens = outputs[0][input_ids.shape[-1]:]
|
| 189 |
response_text = tokenizer.decode(response_tokens, skip_special_tokens=True)
|
| 190 |
|
| 191 |
+
return {"response": response_text, "emotion": user_emotion}
|
| 192 |
|
| 193 |
# Advanced AI Endpoints
|
| 194 |
|
|
|
|
| 240 |
|
| 241 |
|
| 242 |
|
| 243 |
+
# TTS endpoint using Bark (Natural, Expressive Voice)
|
| 244 |
tts_model = None
|
| 245 |
tts_processor = None
|
| 246 |
|
|
|
|
| 248 |
async def load_tts():
|
| 249 |
global tts_model, tts_processor
|
| 250 |
try:
|
| 251 |
+
print("Loading Bark TTS for natural voice...")
|
| 252 |
+
from transformers import AutoProcessor, BarkModel
|
| 253 |
|
| 254 |
+
tts_processor = AutoProcessor.from_pretrained("suno/bark-small")
|
| 255 |
+
tts_model = BarkModel.from_pretrained("suno/bark-small")
|
|
|
|
| 256 |
|
| 257 |
if device == "cuda":
|
| 258 |
tts_model = tts_model.to("cuda")
|
| 259 |
|
| 260 |
+
print("✓ Bark TTS loaded - Natural voice ready!")
|
| 261 |
except Exception as e:
|
| 262 |
print(f"Could not load TTS model: {e}")
|
| 263 |
print("Voice will fall back to browser TTS.")
|
|
|
|
| 273 |
raise HTTPException(status_code=503, detail="TTS model not loaded, use browser fallback")
|
| 274 |
|
| 275 |
try:
|
| 276 |
+
# Use Bark with natural voice preset
|
| 277 |
+
inputs = tts_processor(
|
| 278 |
+
text=req.text,
|
| 279 |
+
voice_preset="v2/en_speaker_6", # Warm, natural female voice
|
| 280 |
+
return_tensors="pt"
|
| 281 |
+
)
|
| 282 |
|
| 283 |
if device == "cuda":
|
| 284 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
| 285 |
|
| 286 |
with torch.no_grad():
|
| 287 |
+
audio_array = tts_model.generate(**inputs)
|
| 288 |
|
| 289 |
# Convert to WAV format
|
| 290 |
import io
|
| 291 |
import wave
|
| 292 |
|
| 293 |
+
audio_np = audio_array.cpu().numpy().squeeze()
|
| 294 |
|
| 295 |
# Normalize to 16-bit PCM
|
| 296 |
audio_np = (audio_np * 32767).astype('int16')
|