Update app.py
Browse files
app.py
CHANGED
|
@@ -72,7 +72,7 @@ app.layout = dbc.Container([
|
|
| 72 |
def transcribe_and_diarize_audio(contents, filename):
|
| 73 |
global generated_file, transcription_text
|
| 74 |
temp_audio_file = None
|
| 75 |
-
|
| 76 |
try:
|
| 77 |
content_type, content_string = contents.split(',')
|
| 78 |
decoded = base64.b64decode(content_string)
|
|
@@ -100,13 +100,21 @@ def transcribe_and_diarize_audio(contents, filename):
|
|
| 100 |
# Rewind the file for diarization
|
| 101 |
audio_file.seek(0)
|
| 102 |
|
| 103 |
-
# Perform diarization (speaker
|
| 104 |
-
diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file,
|
|
|
|
|
|
|
| 105 |
|
| 106 |
# Format the diarized transcript
|
| 107 |
formatted_transcript = ""
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
transcription_text = formatted_transcript
|
| 112 |
logger.info("Transcription and diarization completed successfully")
|
|
@@ -121,10 +129,10 @@ def transcribe_and_diarize_audio(contents, filename):
|
|
| 121 |
return f"An error occurred during transcription and diarization: {str(e)}", False
|
| 122 |
finally:
|
| 123 |
# Clean up temporary files
|
| 124 |
-
if temp_audio_file:
|
| 125 |
os.unlink(temp_audio_file.name)
|
| 126 |
-
if
|
| 127 |
-
os.unlink(
|
| 128 |
|
| 129 |
@app.callback(
|
| 130 |
[Output('output-audio-upload', 'children'),
|
|
|
|
| 72 |
def transcribe_and_diarize_audio(contents, filename):
|
| 73 |
global generated_file, transcription_text
|
| 74 |
temp_audio_file = None
|
| 75 |
+
wav_path = None
|
| 76 |
try:
|
| 77 |
content_type, content_string = contents.split(',')
|
| 78 |
decoded = base64.b64decode(content_string)
|
|
|
|
| 100 |
# Rewind the file for diarization
|
| 101 |
audio_file.seek(0)
|
| 102 |
|
| 103 |
+
# Perform diarization (speaker detection)
|
| 104 |
+
diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
|
| 105 |
+
|
| 106 |
+
logger.info(f"OpenAI API Response: {diarized_transcript}")
|
| 107 |
|
| 108 |
# Format the diarized transcript
|
| 109 |
formatted_transcript = ""
|
| 110 |
+
if 'segments' in diarized_transcript:
|
| 111 |
+
for segment in diarized_transcript["segments"]:
|
| 112 |
+
speaker = segment.get('speaker', 'Unknown')
|
| 113 |
+
text = segment.get('text', '')
|
| 114 |
+
formatted_transcript += f"Speaker {speaker}: {text}\n\n"
|
| 115 |
+
else:
|
| 116 |
+
# If no segments, use the full transcript
|
| 117 |
+
formatted_transcript = transcript.get('text', 'No transcription available.')
|
| 118 |
|
| 119 |
transcription_text = formatted_transcript
|
| 120 |
logger.info("Transcription and diarization completed successfully")
|
|
|
|
| 129 |
return f"An error occurred during transcription and diarization: {str(e)}", False
|
| 130 |
finally:
|
| 131 |
# Clean up temporary files
|
| 132 |
+
if temp_audio_file and os.path.exists(temp_audio_file.name):
|
| 133 |
os.unlink(temp_audio_file.name)
|
| 134 |
+
if wav_path and os.path.exists(wav_path):
|
| 135 |
+
os.unlink(wav_path)
|
| 136 |
|
| 137 |
@app.callback(
|
| 138 |
[Output('output-audio-upload', 'children'),
|