acrep commited on
Commit
1fde471
·
1 Parent(s): 1528267

working stt skeleton

Browse files
Files changed (3) hide show
  1. .env.example +0 -1
  2. .gitignore +1 -1
  3. app.py +16 -29
.env.example DELETED
@@ -1 +0,0 @@
1
- OPENAI_API_KEY=PASTE-KEY-HERE
 
 
.gitignore CHANGED
@@ -120,7 +120,7 @@ celerybeat.pid
120
  *.sage.py
121
 
122
  # Environments
123
- .env
124
  .venv
125
  env/
126
  venv/
 
120
  *.sage.py
121
 
122
  # Environments
123
+ #.env
124
  .venv
125
  env/
126
  venv/
app.py CHANGED
@@ -95,28 +95,19 @@ class MockInterviewer:
95
  def _create_cache_key(self, job_role: str, company: str) -> str:
96
  return f'{job_role.lower()}+{company.lower()}'
97
 
98
- def transcribe_and_chat(self, audio_file, job_role: str, company: str):
99
- # Transcribe audio
100
- transcript = self.transcribe_audio(audio_file)
101
- # Now proceed with chat using the transcript
102
- # Assuming chat method can handle initial user message as text
103
- response = next(self.chat({'text': transcript}, [], job_role, company))
104
- return response
105
-
106
- def transcribe_audio(self, audio_file_path):
107
- # Read the audio file
108
- with open(audio_file_path, "rb") as audio_file:
109
- audio_data = audio_file.read()
110
-
111
- # Use OpenAI's API to transcribe the audio
112
- response = openai.Speech.create(
113
- audio=audio_data,
114
- model="whisper-large" # or whichever model is preferred
115
- )
116
-
117
- # Extract and return the transcription text
118
- transcript = response['data']['text']
119
- return transcript
120
 
121
  # Creating the Gradio interface
122
  with gr.Blocks() as demo:
@@ -125,18 +116,13 @@ with gr.Blocks() as demo:
125
  with gr.Row():
126
  job_role = gr.Textbox(label='Job Role', placeholder='Product Manager')
127
  company = gr.Textbox(label='Company', placeholder='Amazon')
128
- usr_audio = gr.Audio(source="microphone", type="filepath", label="Record or Upload Audio")
129
 
130
  submit_btn = gr.Button("Submit")
131
 
132
  response_output = gr.Textbox(label="Interviewer Response")
 
133
 
134
- # Use transcribe_and_chat when audio is provided
135
- submit_btn.click(
136
- fn=mock_interviewer.transcribe_and_chat,
137
- inputs=[usr_audio, job_role, company],
138
- outputs=response_output
139
- )
140
 
141
  chat_interface = gr.ChatInterface(
142
  fn=lambda usr_message, history, job_role, company: mock_interviewer.chat(usr_message, history, job_role, company),
@@ -150,6 +136,7 @@ with gr.Blocks() as demo:
150
 
151
  chat_interface.load(mock_interviewer.clear_thread)
152
  chat_interface.clear_btn.click(mock_interviewer.clear_thread)
 
153
 
154
  if __name__ == '__main__':
155
  demo.launch().queue()
 
95
  def _create_cache_key(self, job_role: str, company: str) -> str:
96
  return f'{job_role.lower()}+{company.lower()}'
97
 
98
+ def transcript(audio):
99
+ try:
100
+ print(audio)
101
+ audio_file = open(audio, "rb")
102
+ transcriptions = openai.audio.transcriptions.create(
103
+ model="whisper-1",
104
+ file=audio_file,
105
+ )
106
+ except Exception as error:
107
+ print(str(error))
108
+ raise gr.Error("An error occurred while generating speech. Please check your API key and come back try again.")
109
+
110
+ return transcriptions.text
 
 
 
 
 
 
 
 
 
111
 
112
  # Creating the Gradio interface
113
  with gr.Blocks() as demo:
 
116
  with gr.Row():
117
  job_role = gr.Textbox(label='Job Role', placeholder='Product Manager')
118
  company = gr.Textbox(label='Company', placeholder='Amazon')
119
+ audio = gr.Audio(sources=["microphone"], type="filepath")
120
 
121
  submit_btn = gr.Button("Submit")
122
 
123
  response_output = gr.Textbox(label="Interviewer Response")
124
+ stt_output = gr.Textbox(label="Speech-To-Text Transcription")
125
 
 
 
 
 
 
 
126
 
127
  chat_interface = gr.ChatInterface(
128
  fn=lambda usr_message, history, job_role, company: mock_interviewer.chat(usr_message, history, job_role, company),
 
136
 
137
  chat_interface.load(mock_interviewer.clear_thread)
138
  chat_interface.clear_btn.click(mock_interviewer.clear_thread)
139
+ audio.stop_recording(fn=MockInterviewer.transcript, inputs=[audio], outputs=stt_output, api_name=False)
140
 
141
  if __name__ == '__main__':
142
  demo.launch().queue()