| | import os |
| | import json |
| | import tempfile |
| |
|
| | import gradio as gr |
| |
|
| | from google.cloud import speech |
| | from microphone import MicrophoneStream |
| | from utils import listen_print_loop |
| |
|
| | import pyaudio |
| |
|
| | def list_audio_devices(): |
| | audio = pyaudio.PyAudio() |
| | for i in range(audio.get_device_count()): |
| | device_info = audio.get_device_info_by_index(i) |
| | print(f"Device {i}: {device_info['name']}") |
| |
|
| | |
| | def get_credentials(): |
| | creds_json_str = os.getenv("GOOGLE") |
| | if creds_json_str is None: |
| | raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment") |
| |
|
| | |
| | with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp: |
| | temp.write(creds_json_str) |
| | temp_filename = temp.name |
| |
|
| | return temp_filename |
| | |
| | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials() |
| |
|
| | |
| | RATE = 16000 |
| | CHUNK = int(RATE / 10) |
| | LANGUAGE = "id-ID" |
| |
|
| | transcribe_client = speech.SpeechClient() |
| | config = speech.RecognitionConfig( |
| | encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, |
| | sample_rate_hertz=RATE, |
| | language_code=LANGUAGE, |
| | ) |
| |
|
| | streaming_config = speech.StreamingRecognitionConfig( |
| | config=config, interim_results=True |
| | ) |
| | |
| | async def main(audio) -> None: |
| | |
| | print("Streaming started ...") |
| | print(list_audio_devices()) |
| |
|
| | with MicrophoneStream(RATE, CHUNK) as stream: |
| | audio_generator = stream.generator() |
| | requests = ( |
| | speech.StreamingRecognizeRequest(audio_content=content) |
| | for content in audio_generator |
| | ) |
| |
|
| | responses = transcribe_client.streaming_recognize(streaming_config, requests) |
| |
|
| | return await listen_print_loop(responses) |
| | |
| | demo = gr.Interface( |
| | fn=main, |
| | inputs=[ |
| | gr.Audio(sources="microphone", streaming=True, label="Input Speech") |
| | ], |
| | outputs=[ |
| | gr.Textbox(label="Transcription"), |
| | gr.Audio(label="Audio") |
| | ], |
| | live=True) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |