Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 26, 2025

Commit

e72ac8d

verified ·

1 Parent(s): b7ca97f

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -30

app.py CHANGED Viewed

@@ -76,12 +76,22 @@ app.layout = dbc.Container([
     ])
 ], fluid=True)
-def chunk_audio(audio_segment, chunk_length_ms=60000):
     chunks = []
-    for i in range(0, len(audio_segment), chunk_length_ms):
-        chunks.append(audio_segment[i:i+chunk_length_ms])
     return chunks
 def process_media(file_path, is_url=False):
     global generated_file, transcription_text
     temp_file = None
@@ -89,22 +99,22 @@ def process_media(file_path, is_url=False):
     try:
         if is_url:
             logger.info(f"Processing URL: {file_path}")
-            response = requests.get(file_path)
-            content_type = response.headers.get('content-type', '')
-            if 'audio' in content_type:
-                suffix = '.mp3'
-            elif 'video' in content_type:
-                suffix = '.mp4'
-            else:
-                suffix = ''
-            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
-            temp_file.write(response.content)
-            temp_file.close()
         else:
             logger.info("Processing uploaded file")
             temp_file = tempfile.NamedTemporaryFile(delete=False)
             temp_file.write(file_path)
             temp_file.close()
         file_extension = os.path.splitext(temp_file.name)[1].lower()
         logger.info(f"Detected file extension: {file_extension}")
@@ -127,26 +137,20 @@ def process_media(file_path, is_url=False):
         logger.info(f"Audio extracted to WAV: {wav_path}")
         audio = AudioSegment.from_wav(wav_path)
         chunks = chunk_audio(audio)
-        full_transcript = ""
-        for i, chunk in enumerate(chunks):
-            logger.info(f"Processing chunk {i+1}/{len(chunks)}")
-            chunk_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
-            chunk.export(chunk_file.name, format="wav")
-            with open(chunk_file.name, "rb") as audio_file:
-                transcript = openai.Audio.transcribe("whisper-1", audio_file)
-                full_transcript += transcript.get('text', '') + " "
-            os.unlink(chunk_file.name)
-        formatted_transcript = full_transcript.strip()
         transcription_text = formatted_transcript
         generated_file = io.BytesIO(transcription_text.encode())
-        logger.info("Transcription completed successfully")
-        return "Transcription completed successfully!", True
     except Exception as e:
         logger.error(f"Error during processing: {str(e)}")
         return f"An error occurred: {str(e)}", False
@@ -197,7 +201,7 @@ def update_output(contents, n_clicks, filename, url):
 def download_transcription(n_clicks):
     if n_clicks is None:
         return None
-    return dcc.send_bytes(generated_file.getvalue(), "transcription.txt")
 if __name__ == '__main__':
     print("Starting the Dash application...")

     ])
 ], fluid=True)
+def chunk_audio(audio_segment, chunk_size_ms=60000):
     chunks = []
+    for i in range(0, len(audio_segment), chunk_size_ms):
+        chunks.append(audio_segment[i:i+chunk_size_ms])
     return chunks
+def transcribe_audio_chunks(chunks):
+    transcriptions = []
+    for chunk in chunks:
+        with io.BytesIO() as audio_file:
+            chunk.export(audio_file, format="wav")
+            audio_file.seek(0)
+            transcript = openai.Audio.transcribe("whisper-1", audio_file)
+            transcriptions.append(transcript.get('text', ''))
+    return ' '.join(transcriptions)
 def process_media(file_path, is_url=False):
     global generated_file, transcription_text
     temp_file = None
     try:
         if is_url:
             logger.info(f"Processing URL: {file_path}")
+            try:
+                response = requests.get(file_path)
+                response.raise_for_status()
+                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+                temp_file.write(response.content)
+                temp_file.close()
+                logger.info(f"URL content downloaded: {temp_file.name}")
+            except Exception as e:
+                logger.error(f"Error downloading URL content: {str(e)}")
+                return f"Error downloading URL content: {str(e)}", False
         else:
             logger.info("Processing uploaded file")
             temp_file = tempfile.NamedTemporaryFile(delete=False)
             temp_file.write(file_path)
             temp_file.close()
+            logger.info(f"Uploaded file saved: {temp_file.name}")
         file_extension = os.path.splitext(temp_file.name)[1].lower()
         logger.info(f"Detected file extension: {file_extension}")
         logger.info(f"Audio extracted to WAV: {wav_path}")
+        # Chunk the audio file
         audio = AudioSegment.from_wav(wav_path)
         chunks = chunk_audio(audio)
+        # Transcribe chunks
+        transcription = transcribe_audio_chunks(chunks)
+        # Diarization (simplified as OpenAI doesn't provide speaker diarization)
+        formatted_transcript = f"Speaker 1: {transcription}"
         transcription_text = formatted_transcript
         generated_file = io.BytesIO(transcription_text.encode())
+        logger.info("Transcription and diarization completed successfully")
+        return "Transcription and diarization completed successfully!", True
     except Exception as e:
         logger.error(f"Error during processing: {str(e)}")
         return f"An error occurred: {str(e)}", False
 def download_transcription(n_clicks):
     if n_clicks is None:
         return None
+    return dcc.send_bytes(generated_file.getvalue(), "diarized_transcription.txt")
 if __name__ == '__main__':
     print("Starting the Dash application...")