| import sounddevice as sd |
| import scipy.io.wavfile as wavfile |
| import numpy as np |
| import gradio as gr |
| from groq import Groq |
| import tempfile |
| import os |
|
|
| class Recorder: |
| def __init__(self, sample_rate=44100): |
| self.recording = False |
| self.frames = [] |
| self.sample_rate = sample_rate |
| self.stream = None |
|
|
| def toggle_recording(self): |
| if not self.recording: |
| self.frames = [] |
| self.stream = sd.InputStream(callback=self.callback, channels=2, samplerate=self.sample_rate) |
| self.stream.start() |
| self.recording = True |
| return "Recording... Press to Stop" |
| else: |
| self.stream.stop() |
| self.stream.close() |
| self.recording = False |
| return "Recording stopped. Press to Record" |
|
|
| def callback(self, indata, frames, time, status): |
| if self.recording: |
| self.frames.append(indata.copy()) |
|
|
| def save_audio(self): |
| if self.frames: |
| audio_data = np.concatenate(self.frames, axis=0) |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file: |
| wavfile.write(temp_wav_file.name, self.sample_rate, audio_data) |
| return temp_wav_file.name |
| else: |
| return None |
|
|
| recorder = Recorder() |
|
|
| def record(): |
| return recorder.toggle_recording() |
|
|
| def transcribe(): |
| audio_file = recorder.save_audio() |
| if audio_file: |
| client = Groq(api_key="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") |
| with open(audio_file, "rb") as file: |
| transcription = client.audio.transcriptions.create( |
| file=(audio_file, file.read()), |
| model="whisper-large-v3", |
| prompt="Specify context or spelling", |
| response_format="json", |
| language="en", |
| temperature=0.0 |
| ) |
| os.remove(audio_file) |
|
|
| |
| print(transcription) |
| |
| |
| if hasattr(transcription, 'text'): |
| return transcription.text |
| else: |
| return "Transcription text not found." |
|
|
| else: |
| return "No audio recorded." |
|
|
| with gr.Blocks() as gradio_interface: |
| with gr.Column(): |
| record_button = gr.Button("Press to Record") |
| record_button.click(fn=record, outputs=record_button) |
| transcription_output = gr.Textbox(label="Transcription") |
| record_button.click(fn=transcribe, outputs=transcription_output) |
|
|
| if __name__ == "__main__": |
| gradio_interface.launch() |