Spaces:

prasanacodes
/

Indic-Translation-Toolkit

Sleeping

App Files Files Community

prasanacodes commited on Aug 18, 2025

Commit

b1a3dad

verified ·

1 Parent(s): 8846bec

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -14

app.py CHANGED Viewed

@@ -1,18 +1,78 @@
 import gradio as gr
-# 1. Define the function
-def add(num1, num2):
-  """This function adds two numbers and returns the sum."""
-  return num1 + num2
-# 2. Create the Gradio Interface
-iface = gr.Interface(
-    fn=add,
-    inputs=[gr.Number(label="First Number"), gr.Number(label="Second Number")],
-    outputs=gr.Number(label="Sum"),
-    title="Simple Adder ➕",
-    description="Enter two numbers and click 'Submit' to see their sum."
 )
-# 3. Launch the Interface
-iface.launch()

+# app.py
 import gradio as gr
+from transformers import pipeline
+import torch
+# --- Model Loading ---
+# We load the model once when the app starts, not on every function call.
+# This makes the app much more efficient.
+# We also check for GPU availability to speed things up if possible.
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+print(f"Using device: {device}")
+# Initialize the ASR pipeline from Hugging Face Transformers
+transcriber = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-large-v2",
+    torch_dtype=torch_dtype,
+    device=device,
+)
+# --- Transcription Function ---
+def transcribe_audio(audio_path):
+    """
+    This function takes an audio file path, transcribes it using the Whisper model,
+    and returns the transcribed text.
+    """
+    if audio_path is None:
+        return "No audio file provided. Please upload or record an audio file."
+    print(f"Transcribing audio file: {audio_path}")
+    try:
+        # The pipeline handles all the complex steps of loading and processing the audio
+        result = transcriber(audio_path)
+        # The result is a dictionary, and we need the 'text' key
+        transcription = result["text"]
+        print(f"Transcription successful: {transcription}")
+        return transcription
+    except Exception as e:
+        print(f"An error occurred during transcription: {e}")
+        return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
+# --- Gradio Interface Definition ---
+# Title and description for the new Space
+title = "Custom Whisper Transcription App"
+description = """
+This is a custom Gradio app that uses the <b>openai/whisper-large-v2</b> model
+from the Hugging Face Hub for transcription. Upload an audio file or record
+directly from your microphone to get the transcript.
+"""
+article = "<p style='text-align: center'><a href='https://huggingface.co/openai/whisper-large-v2' target='_blank'>Model Card</a></p>"
+# Create the Gradio interface with our custom function
+# We define the input as an Audio component and the output as a Textbox
+app_interface = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(
+        sources=["microphone", "upload"],
+        type="filepath",
+        label="Upload Audio or Record"
+    ),
+    outputs=gr.Textbox(label="Transcription Result"),
+    title=title,
+    description=description,
+    article=article,
+    examples=[
+        ["./sample1.flac"],
+        ["./sample2.wav"],
+    ],
+    allow_flagging="never"
 )
+# --- Launch the App ---
+if __name__ == "__main__":
+    # The launch() method creates a web server and makes the interface accessible.
+    app_interface.launch()