Spaces:

Hackaithon
/

project

Runtime error

App Files Files Community

Letsch22 commited on Apr 10, 2024

Commit

010c52a

1 Parent(s): 5e8b0f6

Simplify video transcription

Browse files

Files changed (1) hide show

app.py +5 -28

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import urllib.request
-import subprocess
 from dataclasses import dataclass
 from time import sleep
 from typing import Dict, List, Generator
@@ -28,24 +27,6 @@ class MockInterviewer:
         self._assistant_id_cache: Dict[Config, str] = {}
         self.clear_thread()
-    def convert_webm_to_mp3(input_webm, output_mp3):
-        command = [
-            'ffmpeg',
-            '-i', input_webm,  # Input file
-            '-vn',             # No video (remove video stream)
-            '-ab', '160k',     # Audio bitrate
-            '-ar', '44100',    # Audio sample rate
-            '-y',              # Overwrite output file if it exists
-            '-f', 'mp3',       # Output format
-            output_mp3        # Output file
-        ]
-        try:
-            subprocess.run(command, check=True)
-            print(f"File converted successfully and saved as {output_mp3}")
-        except subprocess.CalledProcessError as e:
-            print(f"An error occurred while converting the file: {e}")
     def chat_with_text(
         self,
         message: Dict,
@@ -61,7 +42,7 @@ class MockInterviewer:
         config = Config(job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count)
         yield self._chat(message, config)
-    def chat_with_audio(
         self,
         video: str,
         job_role: str,
@@ -72,14 +53,12 @@ class MockInterviewer:
         situational_count: int,
         case_count: int
     ) -> str:
-        audio = 'temp_audio.mp3'
-        MockInterviewer.convert_webm_to_mp3(video,audio)
-        with open(audio, 'rb') as audio_file:
             transcriptions = self._client.audio.transcriptions.create(
                 model='whisper-1',
-                file=audio_file,
             )
-        os.remove(audio)
         config = Config(job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count)
         response = self._chat(transcriptions.text, config)
         return [(transcriptions.text, response)]
@@ -206,14 +185,12 @@ with gr.Blocks(theme=theme) as demo:
                 additional_inputs=[job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count],
                 retry_btn=None,
                 undo_btn=None)
-            chat_interface.chatbot.height= '45vh'
             chat_interface.load(mock_interviewer.clear_thread)
             chat_interface.clear_btn.click(mock_interviewer.clear_thread)
             video = gr.Video(sources='webcam', include_audio=True)
-            video.stop_recording(fn=mock_interviewer.chat_with_audio,
                                 inputs=[video, job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count],
                                 outputs=[chat_interface.chatbot],
                                 api_name=False).then(lambda:None, None, video, queue=False)

 import os
 import urllib.request
 from dataclasses import dataclass
 from time import sleep
 from typing import Dict, List, Generator
         self._assistant_id_cache: Dict[Config, str] = {}
         self.clear_thread()
     def chat_with_text(
         self,
         message: Dict,
         config = Config(job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count)
         yield self._chat(message, config)
+    def chat_with_video(
         self,
         video: str,
         job_role: str,
         situational_count: int,
         case_count: int
     ) -> str:
+        with open(video, 'rb') as file:
             transcriptions = self._client.audio.transcriptions.create(
                 model='whisper-1',
+                file=file,
             )
+        os.remove(video)
         config = Config(job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count)
         response = self._chat(transcriptions.text, config)
         return [(transcriptions.text, response)]
                 additional_inputs=[job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count],
                 retry_btn=None,
                 undo_btn=None)
             chat_interface.load(mock_interviewer.clear_thread)
             chat_interface.clear_btn.click(mock_interviewer.clear_thread)
             video = gr.Video(sources='webcam', include_audio=True)
+            video.stop_recording(fn=mock_interviewer.chat_with_video,
                                 inputs=[video, job_role, company, job_description, behavioral_count, technical_count, situational_count, case_count],
                                 outputs=[chat_interface.chatbot],
                                 api_name=False).then(lambda:None, None, video, queue=False)