Spaces:
Sleeping
Sleeping
solved ffmpeg-related error
Browse files- requirements.txt +3 -1
- src/app.py +1 -15
requirements.txt
CHANGED
|
@@ -12,8 +12,10 @@ llama-index-llms-llama-cpp
|
|
| 12 |
sentence-transformers>=2.2.0
|
| 13 |
|
| 14 |
# Audio processing
|
| 15 |
-
ffmpeg-python
|
|
|
|
| 16 |
librosa>=0.10.1
|
|
|
|
| 17 |
|
| 18 |
# System utilities
|
| 19 |
psutil
|
|
|
|
| 12 |
sentence-transformers>=2.2.0
|
| 13 |
|
| 14 |
# Audio processing
|
| 15 |
+
ffmpeg-python>=0.2.0
|
| 16 |
+
pydub>=0.25.1
|
| 17 |
librosa>=0.10.1
|
| 18 |
+
soundfile>=0.12.1
|
| 19 |
|
| 20 |
# System utilities
|
| 21 |
psutil
|
src/app.py
CHANGED
|
@@ -264,7 +264,7 @@ def process_speech(audio_data, history):
|
|
| 264 |
sample_rate = 16000
|
| 265 |
|
| 266 |
# Transcribe with error handling
|
| 267 |
-
|
| 268 |
# Format dictionary correctly with required keys
|
| 269 |
input_features = {
|
| 270 |
"raw": audio_array,
|
|
@@ -304,9 +304,6 @@ def process_speech(audio_data, history):
|
|
| 304 |
})}
|
| 305 |
]
|
| 306 |
|
| 307 |
-
except Exception as e:
|
| 308 |
-
print(f"Transcription error: {str(e)}")
|
| 309 |
-
return []
|
| 310 |
else:
|
| 311 |
print(f"Invalid audio format: {type(audio_data)}")
|
| 312 |
return []
|
|
@@ -530,7 +527,6 @@ with gr.Blocks(
|
|
| 530 |
if not audio or not isinstance(audio, tuple):
|
| 531 |
return ""
|
| 532 |
|
| 533 |
-
try:
|
| 534 |
sample_rate, audio_array = audio
|
| 535 |
features = process_audio(audio_array, sample_rate)
|
| 536 |
|
|
@@ -543,10 +539,6 @@ with gr.Blocks(
|
|
| 543 |
elif isinstance(result, str):
|
| 544 |
return result.strip()
|
| 545 |
return ""
|
| 546 |
-
|
| 547 |
-
except Exception as e:
|
| 548 |
-
print(f"Transcription error: {str(e)}")
|
| 549 |
-
return ""
|
| 550 |
|
| 551 |
microphone.stream(
|
| 552 |
fn=update_live_transcription,
|
|
@@ -574,7 +566,6 @@ with gr.Blocks(
|
|
| 574 |
if not text:
|
| 575 |
return history
|
| 576 |
|
| 577 |
-
try:
|
| 578 |
# Limit input length
|
| 579 |
if len(text) > 500:
|
| 580 |
text = text[:500] + "..."
|
|
@@ -600,11 +591,6 @@ with gr.Blocks(
|
|
| 600 |
})}
|
| 601 |
]
|
| 602 |
|
| 603 |
-
except Exception as e:
|
| 604 |
-
print(f"Text processing error: {str(e)}")
|
| 605 |
-
cleanup_memory()
|
| 606 |
-
return history
|
| 607 |
-
|
| 608 |
submit_btn.click(
|
| 609 |
fn=process_text_input,
|
| 610 |
inputs=[text_input, chatbot],
|
|
|
|
| 264 |
sample_rate = 16000
|
| 265 |
|
| 266 |
# Transcribe with error handling
|
| 267 |
+
|
| 268 |
# Format dictionary correctly with required keys
|
| 269 |
input_features = {
|
| 270 |
"raw": audio_array,
|
|
|
|
| 304 |
})}
|
| 305 |
]
|
| 306 |
|
|
|
|
|
|
|
|
|
|
| 307 |
else:
|
| 308 |
print(f"Invalid audio format: {type(audio_data)}")
|
| 309 |
return []
|
|
|
|
| 527 |
if not audio or not isinstance(audio, tuple):
|
| 528 |
return ""
|
| 529 |
|
|
|
|
| 530 |
sample_rate, audio_array = audio
|
| 531 |
features = process_audio(audio_array, sample_rate)
|
| 532 |
|
|
|
|
| 539 |
elif isinstance(result, str):
|
| 540 |
return result.strip()
|
| 541 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
microphone.stream(
|
| 544 |
fn=update_live_transcription,
|
|
|
|
| 566 |
if not text:
|
| 567 |
return history
|
| 568 |
|
|
|
|
| 569 |
# Limit input length
|
| 570 |
if len(text) > 500:
|
| 571 |
text = text[:500] + "..."
|
|
|
|
| 591 |
})}
|
| 592 |
]
|
| 593 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
submit_btn.click(
|
| 595 |
fn=process_text_input,
|
| 596 |
inputs=[text_input, chatbot],
|