Abdalkaderdev commited on
Commit
efcab75
·
1 Parent(s): 3688b19

Remove vision features, focus on voice and response quality

Browse files
Files changed (1) hide show
  1. app/ora_server.py +10 -55
app/ora_server.py CHANGED
@@ -29,10 +29,8 @@ model = None
29
  tokenizer = None
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
 
32
- # Advanced AI Models
33
  whisper_model = None
34
- vision_model = None
35
- vision_processor = None
36
  emotion_classifier = None
37
 
38
  class ChatRequest(BaseModel):
@@ -78,39 +76,31 @@ async def load_model():
78
 
79
  @app.on_event("startup")
80
  async def load_advanced_ai():
81
- global whisper_model, vision_model, vision_processor, emotion_classifier
82
 
83
  try:
84
- print("Loading Advanced AI Models...")
85
- from transformers import pipeline, AutoModelForCausalLM, AutoProcessor
86
 
87
- # Whisper V3 for Speech-to-Text
88
- print("Loading Whisper V3...")
89
  whisper_model = pipeline(
90
  "automatic-speech-recognition",
91
  model="openai/whisper-large-v3",
92
  device=0 if device == "cuda" else -1
93
  )
94
- print("✓ Whisper V3 loaded")
95
 
96
- # Moondream2 for Vision
97
- print("Loading Moondream2 Vision...")
98
- vision_model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", trust_remote_code=True)
99
- vision_processor = AutoProcessor.from_pretrained("vikhyatk/moondream2")
100
- if device == "cuda":
101
- vision_model = vision_model.to("cuda")
102
- print("✓ Moondream2 loaded")
103
-
104
- # Emotion Detection
105
  print("Loading Emotion Detector...")
106
  emotion_classifier = pipeline(
107
  "text-classification",
108
  model="j-hartmann/emotion-english-distilroberta-base",
109
  device=0 if device == "cuda" else -1
110
  )
111
- print("✓ Emotion Detector loaded")
112
 
113
- print("All Advanced AI Models Ready!")
114
 
115
  except Exception as e:
116
  print(f"Warning: Could not load some AI models: {e}")
@@ -180,41 +170,6 @@ async def transcribe_audio(req: TranscribeRequest):
180
  except Exception as e:
181
  raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
182
 
183
- class VisionRequest(BaseModel):
184
- image_data: str # Base64 encoded image
185
- question: str = "What spiritual meaning does this image convey?"
186
-
187
- @app.post("/api/analyze-image")
188
- async def analyze_image(req: VisionRequest):
189
- global vision_model, vision_processor
190
-
191
- if vision_model is None or vision_processor is None:
192
- raise HTTPException(status_code=503, detail="Vision model not loaded")
193
-
194
- try:
195
- import base64
196
- from PIL import Image
197
- import io
198
-
199
- # Decode base64 image
200
- image_bytes = base64.b64decode(req.image_data)
201
- image = Image.open(io.BytesIO(image_bytes))
202
-
203
- # Process with Moondream2
204
- inputs = vision_processor(images=image, text=req.question, return_tensors="pt")
205
- if device == "cuda":
206
- inputs = {k: v.to("cuda") for k, v in inputs.items()}
207
-
208
- with torch.no_grad():
209
- output = vision_model.generate(**inputs, max_new_tokens=256)
210
-
211
- analysis = vision_processor.decode(output[0], skip_special_tokens=True)
212
-
213
- return {"analysis": analysis}
214
-
215
- except Exception as e:
216
- raise HTTPException(status_code=500, detail=f"Vision analysis failed: {str(e)}")
217
-
218
  class EmotionRequest(BaseModel):
219
  text: str
220
 
 
29
  tokenizer = None
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
 
32
+ # Advanced AI Models (Voice & Response Quality)
33
  whisper_model = None
 
 
34
  emotion_classifier = None
35
 
36
  class ChatRequest(BaseModel):
 
76
 
77
  @app.on_event("startup")
78
  async def load_advanced_ai():
79
+ global whisper_model, emotion_classifier
80
 
81
  try:
82
+ print("Loading Voice & Response Quality AI...")
83
+ from transformers import pipeline
84
 
85
+ # Whisper V3 for Speech-to-Text (Professional quality)
86
+ print("Loading Whisper V3 STT...")
87
  whisper_model = pipeline(
88
  "automatic-speech-recognition",
89
  model="openai/whisper-large-v3",
90
  device=0 if device == "cuda" else -1
91
  )
92
+ print("✓ Whisper V3 loaded - Professional STT ready")
93
 
94
+ # Emotion Detection for Compassionate Responses
 
 
 
 
 
 
 
 
95
  print("Loading Emotion Detector...")
96
  emotion_classifier = pipeline(
97
  "text-classification",
98
  model="j-hartmann/emotion-english-distilroberta-base",
99
  device=0 if device == "cuda" else -1
100
  )
101
+ print("✓ Emotion Detector loaded - Empathetic responses enabled")
102
 
103
+ print("Voice & Response Quality AI Ready!")
104
 
105
  except Exception as e:
106
  print(f"Warning: Could not load some AI models: {e}")
 
170
  except Exception as e:
171
  raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  class EmotionRequest(BaseModel):
174
  text: str
175