Spaces:

Hackaithon
/

project

Runtime error

App Files Files Community

acrep commited on Apr 8, 2024

Commit

1fde471

1 Parent(s): 1528267

working stt skeleton

Browse files

Files changed (3) hide show

.env.example +0 -1
.gitignore +1 -1
app.py +16 -29

.env.example DELETED Viewed

	@@ -1 +0,0 @@
1	- OPENAI_API_KEY=PASTE-KEY-HERE

.gitignore CHANGED Viewed

@@ -120,7 +120,7 @@ celerybeat.pid
 *.sage.py
 # Environments
-.env
 .venv
 env/
 venv/

 *.sage.py
 # Environments
+#.env
 .venv
 env/
 venv/

app.py CHANGED Viewed

@@ -95,28 +95,19 @@ class MockInterviewer:
     def _create_cache_key(self, job_role: str, company: str) -> str:
         return f'{job_role.lower()}+{company.lower()}'
-    def transcribe_and_chat(self, audio_file, job_role: str, company: str):
-        # Transcribe audio
-        transcript = self.transcribe_audio(audio_file)
-        # Now proceed with chat using the transcript
-        # Assuming chat method can handle initial user message as text
-        response = next(self.chat({'text': transcript}, [], job_role, company))
-        return response
-    def transcribe_audio(self, audio_file_path):
-        # Read the audio file
-        with open(audio_file_path, "rb") as audio_file:
-            audio_data = audio_file.read()
-        # Use OpenAI's API to transcribe the audio
-        response = openai.Speech.create(
-            audio=audio_data,
-            model="whisper-large"  # or whichever model is preferred
-        )
-        # Extract and return the transcription text
-        transcript = response['data']['text']
-        return transcript
 # Creating the Gradio interface
 with gr.Blocks() as demo:
@@ -125,18 +116,13 @@ with gr.Blocks() as demo:
     with gr.Row():
         job_role = gr.Textbox(label='Job Role', placeholder='Product Manager')
         company = gr.Textbox(label='Company', placeholder='Amazon')
-        usr_audio = gr.Audio(source="microphone", type="filepath", label="Record or Upload Audio")
     submit_btn = gr.Button("Submit")
     response_output = gr.Textbox(label="Interviewer Response")
-    # Use transcribe_and_chat when audio is provided
-    submit_btn.click(
-        fn=mock_interviewer.transcribe_and_chat,
-        inputs=[usr_audio, job_role, company],
-        outputs=response_output
-    )
     chat_interface = gr.ChatInterface(
         fn=lambda usr_message, history, job_role, company: mock_interviewer.chat(usr_message, history, job_role, company),
@@ -150,6 +136,7 @@ with gr.Blocks() as demo:
     chat_interface.load(mock_interviewer.clear_thread)
     chat_interface.clear_btn.click(mock_interviewer.clear_thread)
 if __name__ == '__main__':
     demo.launch().queue()

     def _create_cache_key(self, job_role: str, company: str) -> str:
         return f'{job_role.lower()}+{company.lower()}'
+    def transcript(audio):
+        try:
+            print(audio)
+            audio_file = open(audio, "rb")
+            transcriptions = openai.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+            )
+        except Exception as error:
+            print(str(error))
+            raise gr.Error("An error occurred while generating speech. Please check your API key and come back try again.")
+        return transcriptions.text
 # Creating the Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         job_role = gr.Textbox(label='Job Role', placeholder='Product Manager')
         company = gr.Textbox(label='Company', placeholder='Amazon')
+        audio = gr.Audio(sources=["microphone"], type="filepath")
     submit_btn = gr.Button("Submit")
     response_output = gr.Textbox(label="Interviewer Response")
+    stt_output = gr.Textbox(label="Speech-To-Text Transcription")
     chat_interface = gr.ChatInterface(
         fn=lambda usr_message, history, job_role, company: mock_interviewer.chat(usr_message, history, job_role, company),
     chat_interface.load(mock_interviewer.clear_thread)
     chat_interface.clear_btn.click(mock_interviewer.clear_thread)
+    audio.stop_recording(fn=MockInterviewer.transcript, inputs=[audio], outputs=stt_output, api_name=False)
 if __name__ == '__main__':
     demo.launch().queue()