Spaces:

Abdalkaderdev
/

ORA

Paused

App Files Files Community

Abdalkaderdev commited on Jan 12

Commit

5306cf5

1 Parent(s): b062b38

Add Whisper V3, Moondream2, and Emotion Detection with API endpoints

Browse files

Files changed (1) hide show

app/ora_server.py +129 -0

app/ora_server.py CHANGED Viewed

@@ -29,6 +29,12 @@ model = None
 tokenizer = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
 class ChatRequest(BaseModel):
     message: str
     history: list = []
@@ -70,6 +76,46 @@ async def load_model():
     print("ORA Model Connected and Ready.")
 @app.post("/api/chat")
 async def chat_endpoint(req: ChatRequest):
     global model, tokenizer
@@ -107,6 +153,89 @@ async def chat_endpoint(req: ChatRequest):
     return {"response": response_text}
 # TTS endpoint using Supertonic 2

 tokenizer = None
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Advanced AI Models
+whisper_model = None
+vision_model = None
+vision_processor = None
+emotion_classifier = None
 class ChatRequest(BaseModel):
     message: str
     history: list = []
     print("ORA Model Connected and Ready.")
+@app.on_event("startup")
+async def load_advanced_ai():
+    global whisper_model, vision_model, vision_processor, emotion_classifier
+    try:
+        print("Loading Advanced AI Models...")
+        from transformers import pipeline, AutoModelForCausalLM, AutoProcessor
+        # Whisper V3 for Speech-to-Text
+        print("Loading Whisper V3...")
+        whisper_model = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-large-v3",
+            device=0 if device == "cuda" else -1
+        )
+        print("✓ Whisper V3 loaded")
+        # Moondream2 for Vision
+        print("Loading Moondream2 Vision...")
+        vision_model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", trust_remote_code=True)
+        vision_processor = AutoProcessor.from_pretrained("vikhyatk/moondream2")
+        if device == "cuda":
+            vision_model = vision_model.to("cuda")
+        print("✓ Moondream2 loaded")
+        # Emotion Detection
+        print("Loading Emotion Detector...")
+        emotion_classifier = pipeline(
+            "text-classification",
+            model="j-hartmann/emotion-english-distilroberta-base",
+            device=0 if device == "cuda" else -1
+        )
+        print("✓ Emotion Detector loaded")
+        print("All Advanced AI Models Ready!")
+    except Exception as e:
+        print(f"Warning: Could not load some AI models: {e}")
+        print("ORA will continue with basic functionality.")
 @app.post("/api/chat")
 async def chat_endpoint(req: ChatRequest):
     global model, tokenizer
     return {"response": response_text}
+# Advanced AI Endpoints
+class TranscribeRequest(BaseModel):
+    audio_data: str  # Base64 encoded audio
+@app.post("/api/transcribe")
+async def transcribe_audio(req: TranscribeRequest):
+    global whisper_model
+    if whisper_model is None:
+        raise HTTPException(status_code=503, detail="Whisper model not loaded")
+    try:
+        import base64
+        import io
+        # Decode base64 audio
+        audio_bytes = base64.b64decode(req.audio_data)
+        # Transcribe with Whisper
+        result = whisper_model(audio_bytes)
+        return {"text": result["text"], "confidence": 1.0}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
+class VisionRequest(BaseModel):
+    image_data: str  # Base64 encoded image
+    question: str = "What spiritual meaning does this image convey?"
+@app.post("/api/analyze-image")
+async def analyze_image(req: VisionRequest):
+    global vision_model, vision_processor
+    if vision_model is None or vision_processor is None:
+        raise HTTPException(status_code=503, detail="Vision model not loaded")
+    try:
+        import base64
+        from PIL import Image
+        import io
+        # Decode base64 image
+        image_bytes = base64.b64decode(req.image_data)
+        image = Image.open(io.BytesIO(image_bytes))
+        # Process with Moondream2
+        inputs = vision_processor(images=image, text=req.question, return_tensors="pt")
+        if device == "cuda":
+            inputs = {k: v.to("cuda") for k, v in inputs.items()}
+        with torch.no_grad():
+            output = vision_model.generate(**inputs, max_new_tokens=256)
+        analysis = vision_processor.decode(output[0], skip_special_tokens=True)
+        return {"analysis": analysis}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Vision analysis failed: {str(e)}")
+class EmotionRequest(BaseModel):
+    text: str
+@app.post("/api/detect-emotion")
+async def detect_emotion(req: EmotionRequest):
+    global emotion_classifier
+    if emotion_classifier is None:
+        raise HTTPException(status_code=503, detail="Emotion model not loaded")
+    try:
+        result = emotion_classifier(req.text)[0]
+        return {
+            "emotion": result["label"],
+            "confidence": result["score"]
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Emotion detection failed: {str(e)}")
 # TTS endpoint using Supertonic 2