Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -73,7 +73,7 @@ app.add_middleware(
|
|
| 73 |
class AnalysisResults:
|
| 74 |
timelineData: List[Dict[str, Any]] = field(default_factory=list)
|
| 75 |
duration: float = 0.0
|
| 76 |
-
|
| 77 |
diarizationErrorRate: Optional[float] = None
|
| 78 |
speakerError: Optional[float] = None
|
| 79 |
missedSpeech: Optional[float] = None
|
|
@@ -197,12 +197,12 @@ def analyze_audio(audio_file: str,
|
|
| 197 |
try:
|
| 198 |
print(f"Loading Whisper model '{model_name}' on {device}...")
|
| 199 |
model = whisperx.load_model(model_name, device, compute_type="float32")
|
| 200 |
-
audio_loaded = whisperx.load_audio(audio_for_model)
|
| 201 |
-
language_code_detected = model.detect_language(audio_loaded)
|
| 202 |
-
language_code = language_code_detected
|
| 203 |
print("Transcribing audio...")
|
| 204 |
result = model.transcribe(audio_loaded, batch_size=4, language="ur"
|
| 205 |
)
|
|
|
|
|
|
|
| 206 |
full_text = " ".join([seg['text'] for seg in result.get("segments", [])]).strip()
|
| 207 |
results.rawTranscriptionText = full_text
|
| 208 |
aligned = {"segments": result["segments"]}
|
|
@@ -377,7 +377,7 @@ async def upload_file(audio_file: UploadFile = File(...)):
|
|
| 377 |
analysis_result.duration = 0.0
|
| 378 |
return AnalysisResult(
|
| 379 |
duration=force_float(analysis_result.duration) or 0.0,
|
| 380 |
-
language=
|
| 381 |
timeline_data=[
|
| 382 |
TimelineItem(
|
| 383 |
start=force_float(item.get('start')) or 0.0,
|
|
|
|
| 73 |
class AnalysisResults:
|
| 74 |
timelineData: List[Dict[str, Any]] = field(default_factory=list)
|
| 75 |
duration: float = 0.0
|
| 76 |
+
languageCode: str = "unknown"
|
| 77 |
diarizationErrorRate: Optional[float] = None
|
| 78 |
speakerError: Optional[float] = None
|
| 79 |
missedSpeech: Optional[float] = None
|
|
|
|
| 197 |
try:
|
| 198 |
print(f"Loading Whisper model '{model_name}' on {device}...")
|
| 199 |
model = whisperx.load_model(model_name, device, compute_type="float32")
|
| 200 |
+
audio_loaded = whisperx.load_audio(audio_for_model)
|
|
|
|
|
|
|
| 201 |
print("Transcribing audio...")
|
| 202 |
result = model.transcribe(audio_loaded, batch_size=4, language="ur"
|
| 203 |
)
|
| 204 |
+
language_code = result.get("detected_language")
|
| 205 |
+
results.languageCode = language_code
|
| 206 |
full_text = " ".join([seg['text'] for seg in result.get("segments", [])]).strip()
|
| 207 |
results.rawTranscriptionText = full_text
|
| 208 |
aligned = {"segments": result["segments"]}
|
|
|
|
| 377 |
analysis_result.duration = 0.0
|
| 378 |
return AnalysisResult(
|
| 379 |
duration=force_float(analysis_result.duration) or 0.0,
|
| 380 |
+
language=analysis_result.languageCode,
|
| 381 |
timeline_data=[
|
| 382 |
TimelineItem(
|
| 383 |
start=force_float(item.get('start')) or 0.0,
|