prasanacodes commited on
Commit
b1a3dad
·
verified ·
1 Parent(s): 8846bec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -14
app.py CHANGED
@@ -1,18 +1,78 @@
 
1
  import gradio as gr
 
 
2
 
3
- # 1. Define the function
4
- def add(num1, num2):
5
- """This function adds two numbers and returns the sum."""
6
- return num1 + num2
7
-
8
- # 2. Create the Gradio Interface
9
- iface = gr.Interface(
10
- fn=add,
11
- inputs=[gr.Number(label="First Number"), gr.Number(label="Second Number")],
12
- outputs=gr.Number(label="Sum"),
13
- title="Simple Adder ➕",
14
- description="Enter two numbers and click 'Submit' to see their sum."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
- # 3. Launch the Interface
18
- iface.launch()
 
 
 
1
+ # app.py
2
  import gradio as gr
3
+ from transformers import pipeline
4
+ import torch
5
 
6
+ # --- Model Loading ---
7
+ # We load the model once when the app starts, not on every function call.
8
+ # This makes the app much more efficient.
9
+ # We also check for GPU availability to speed things up if possible.
10
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
12
+
13
+ print(f"Using device: {device}")
14
+
15
+ # Initialize the ASR pipeline from Hugging Face Transformers
16
+ transcriber = pipeline(
17
+ "automatic-speech-recognition",
18
+ model="openai/whisper-large-v2",
19
+ torch_dtype=torch_dtype,
20
+ device=device,
21
+ )
22
+
23
+ # --- Transcription Function ---
24
+ def transcribe_audio(audio_path):
25
+ """
26
+ This function takes an audio file path, transcribes it using the Whisper model,
27
+ and returns the transcribed text.
28
+ """
29
+ if audio_path is None:
30
+ return "No audio file provided. Please upload or record an audio file."
31
+
32
+ print(f"Transcribing audio file: {audio_path}")
33
+ try:
34
+ # The pipeline handles all the complex steps of loading and processing the audio
35
+ result = transcriber(audio_path)
36
+ # The result is a dictionary, and we need the 'text' key
37
+ transcription = result["text"]
38
+ print(f"Transcription successful: {transcription}")
39
+ return transcription
40
+ except Exception as e:
41
+ print(f"An error occurred during transcription: {e}")
42
+ return f"Sorry, an error occurred. Please try again. Details: {str(e)}"
43
+
44
+ # --- Gradio Interface Definition ---
45
+ # Title and description for the new Space
46
+ title = "Custom Whisper Transcription App"
47
+ description = """
48
+ This is a custom Gradio app that uses the <b>openai/whisper-large-v2</b> model
49
+ from the Hugging Face Hub for transcription. Upload an audio file or record
50
+ directly from your microphone to get the transcript.
51
+ """
52
+ article = "<p style='text-align: center'><a href='https://huggingface.co/openai/whisper-large-v2' target='_blank'>Model Card</a></p>"
53
+
54
+
55
+ # Create the Gradio interface with our custom function
56
+ # We define the input as an Audio component and the output as a Textbox
57
+ app_interface = gr.Interface(
58
+ fn=transcribe_audio,
59
+ inputs=gr.Audio(
60
+ sources=["microphone", "upload"],
61
+ type="filepath",
62
+ label="Upload Audio or Record"
63
+ ),
64
+ outputs=gr.Textbox(label="Transcription Result"),
65
+ title=title,
66
+ description=description,
67
+ article=article,
68
+ examples=[
69
+ ["./sample1.flac"],
70
+ ["./sample2.wav"],
71
+ ],
72
+ allow_flagging="never"
73
  )
74
 
75
+ # --- Launch the App ---
76
+ if __name__ == "__main__":
77
+ # The launch() method creates a web server and makes the interface accessible.
78
+ app_interface.launch()