| import gradio as gr |
| import time |
| import numpy as np |
| import os |
| import requests |
| import io |
| from pydub import AudioSegment |
|
|
|
|
|
|
| def translate_audio(audio, language_code, SARVAM_API_KEY): |
|
|
| |
| api_url = "https://api.sarvam.ai/speech-to-text-translate" |
|
|
| |
| headers = { |
| "api-subscription-key": SARVAM_API_KEY |
| } |
|
|
| |
| model_data = { |
| "model": "saaras:v2", |
| "with_diarization": False, |
| "language_code": language_code |
| } |
|
|
|
|
| chunk_buffer = io.BytesIO() |
| audio.export(chunk_buffer, format="wav") |
| chunk_buffer.seek(0) |
|
|
| |
| files = {'file': ('audiofile.wav', chunk_buffer, 'audio/wav')} |
|
|
| try: |
| response = requests.post(api_url, headers=headers, files=files, data=model_data) |
|
|
| if response.status_code == 200 or response.status_code == 201: |
| response_data = response.json() |
| transcript = response_data.get("transcript", "") |
| detected_language = response_data.get("language_code", "") |
| elif response.status_code == 401 or response.status_code == 403: |
| raise ValueError("β Invalid API key. Please check your Sarvam AI key.") |
| else: |
| raise RuntimeError(f"β Request failed with status code: {response.status_code}. Details: {response.text}") |
|
|
| except Exception as e: |
| raise e |
| finally: |
| chunk_buffer.close() |
|
|
| return transcript,detected_language |
|
|
| def stream_transcribe(history, new_chunk, language_code, SARVAM_API_KEY): |
| |
| if history is None: |
| history = "" |
| |
| try: |
| sr, y = new_chunk |
| |
| if y.ndim > 1: |
| y = y.mean(axis=1) |
|
|
| |
| y_int16 = y.astype(np.int16) |
|
|
| |
| audio_segment = AudioSegment( |
| data=y_int16.tobytes(), |
| sample_width=2, |
| frame_rate=sr, |
| channels=1 |
| ) |
|
|
| transcription,detected_language = translate_audio(audio_segment, language_code, SARVAM_API_KEY) |
| |
| history = history + '\n' + f'({detected_language})==> ' + transcription |
|
|
| return history, history |
| except ValueError as ve: |
| return history, str(ve) |
| except Exception as e: |
| print(f"Error during Transcription: {e}") |
| return history, str(e) |
|
|
|
|
|
|
|
|
| def clear(): |
| return "" |
|
|
| def clear_state(): |
| return None |
|
|
| def clear_api_key(): |
| return "" |
|
|
|
|
| with gr.Blocks(theme=gr.themes.Soft()) as microphone: |
| with gr.Column(): |
| |
| gr.Markdown( |
| """ |
| ## Translate simultaneously from multiple Indian languages to **English**. |
| ### It supports **22 Indian languages**, including **Hindi, Oriya, Tamil, Telugu, Gujarati**, and more. |
| |
| ### π Sarvam AI API Key Required |
| To use this app, you need a free API key from [Sarvam AI](https://sarvam.ai). |
| |
| π **Step 1:** Visit [https://sarvam.ai](https://sarvam.ai) |
| π **Step 2:** Sign up or log in |
| π **Step 3:** Generate your API key and paste it below |
| |
| Your key stays on your device and is not stored. |
| """ |
| ) |
|
|
|
|
| api_key_box = gr.Textbox(label="Enter SARVAM AI API Key", type="password") |
|
|
| language_options = [ |
| "hi-IN", "bn-IN", "kn-IN", "ml-IN", "mr-IN", "od-IN", |
| "pa-IN", "ta-IN", "te-IN", "en-IN", "gu-IN", "unknown" |
| ] |
| language_code_box = gr.Dropdown( |
| choices=language_options, |
| label="Select Language Code", |
| value="unknown" |
| ) |
|
|
| |
| input_audio_microphone = gr.Audio(streaming=True) |
| output = gr.Textbox(label="Transcription", lines=10,max_lines=100, show_copy_button=True, value="") |
|
|
| with gr.Row(): |
| clear_button = gr.Button("Clear Output") |
| clear_api_key_button = gr.Button("Clear API Key") |
| |
| state = gr.State(value="") |
| |
| input_audio_microphone.stream( |
| stream_transcribe, |
| [state, input_audio_microphone,language_code_box, api_key_box], |
| [state, output], |
| time_limit=30, |
| stream_every=5, |
| concurrency_limit=None, |
| ) |
|
|
| clear_button.click(clear_state, outputs=[state]).then(clear, outputs=[output]) |
| clear_api_key_button.click(clear_api_key, outputs=[api_key_box]) |
|
|
| gr.Markdown( |
| """ |
| --- |
| |
| ### π Who am I? |
| |
| I am **Dr. Mohan Dash**, a PhD in Industrial Computer Science and an AI Research Engineer. |
| I run a YouTube channel called **[Intelligent Machines](https://www.youtube.com/@Mohankumardash)** where I share practical tutorials and insights on building real-world AI applications. |
| |
| If you find this app useful, you'll definitely enjoy the tutorials and breakdowns I post there. |
|  |
| |
| --- |
| """ |
| ) |
|
|
| demo = microphone |
| demo.launch() |