Spaces:
Running
Running
Language param + posture every 2nd frame
Browse files- live_processor.py +4 -4
live_processor.py
CHANGED
|
@@ -268,8 +268,8 @@ class LiveSessionProcessor:
|
|
| 268 |
except Exception as e:
|
| 269 |
print(f"[LiveProcessor] Face detect error: {e}")
|
| 270 |
|
| 271 |
-
# Posture detection every
|
| 272 |
-
if self._video_frame_count %
|
| 273 |
try:
|
| 274 |
self._posture_frame_count += 1
|
| 275 |
p_result = self._posture_det.detect(image_bytes)
|
|
@@ -281,7 +281,7 @@ class LiveSessionProcessor:
|
|
| 281 |
|
| 282 |
self._run_fusion()
|
| 283 |
|
| 284 |
-
def process_audio_bytes(self, audio_bytes: bytes):
|
| 285 |
"""Process raw audio bytes for voice emotion + STT (no WebRTC)."""
|
| 286 |
if not self._session_active:
|
| 287 |
return
|
|
@@ -309,7 +309,7 @@ class LiveSessionProcessor:
|
|
| 309 |
tmp.close()
|
| 310 |
|
| 311 |
segments, _info = self._whisper_model.transcribe(
|
| 312 |
-
tmp_path,
|
| 313 |
)
|
| 314 |
for seg in segments:
|
| 315 |
text = seg.text.strip()
|
|
|
|
| 268 |
except Exception as e:
|
| 269 |
print(f"[LiveProcessor] Face detect error: {e}")
|
| 270 |
|
| 271 |
+
# Posture detection every 2nd frame
|
| 272 |
+
if self._video_frame_count % 2 == 0 and self._posture_det is not None:
|
| 273 |
try:
|
| 274 |
self._posture_frame_count += 1
|
| 275 |
p_result = self._posture_det.detect(image_bytes)
|
|
|
|
| 281 |
|
| 282 |
self._run_fusion()
|
| 283 |
|
| 284 |
+
def process_audio_bytes(self, audio_bytes: bytes, language: str = None):
|
| 285 |
"""Process raw audio bytes for voice emotion + STT (no WebRTC)."""
|
| 286 |
if not self._session_active:
|
| 287 |
return
|
|
|
|
| 309 |
tmp.close()
|
| 310 |
|
| 311 |
segments, _info = self._whisper_model.transcribe(
|
| 312 |
+
tmp_path, beam_size=1, language=language
|
| 313 |
)
|
| 314 |
for seg in segments:
|
| 315 |
text = seg.text.strip()
|