ombhojane commited on
Commit
e2c6f94
Β·
1 Parent(s): 9fa1ac5

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +59 -0
  2. templates/index.html +25 -0
  3. templates/result.html +15 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ from flask import Flask, render_template, request
4
+ import gradio as gr
5
+ from transformers import pipeline
6
+
7
+ app = Flask(__name__)
8
+
9
+ MODEL_NAME = "openai/whisper-large-v3"
10
+ BATCH_SIZE = 8
11
+
12
+ device = 0 if torch.cuda.is_available() else "cpu"
13
+
14
+ pipe = pipeline(
15
+ task="automatic-speech-recognition",
16
+ model=MODEL_NAME,
17
+ chunk_length_s=30,
18
+ device=device,
19
+ )
20
+
21
+ def transcribe(inputs, task):
22
+ if inputs is None:
23
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
24
+
25
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
26
+ return text
27
+
28
+ audio_transcribe_interface = gr.Interface(
29
+ fn=transcribe,
30
+ inputs=[
31
+ gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
32
+ gr.inputs.Radio(["transcribe", "translate"], label="Task", default="translate"),
33
+ ],
34
+ outputs="text",
35
+ theme="huggingface",
36
+ title="Whisper Large V3: Translate Audio",
37
+ description=(
38
+ "Translate long-form audio inputs with the click of a button! Demo uses the"
39
+ f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and πŸ€— Transformers to transcribe audio files"
40
+ " of arbitrary length."
41
+ ),
42
+ )
43
+
44
+ @app.route('/')
45
+ def index():
46
+ return render_template('index.html')
47
+
48
+ @app.route('/translate_audio', methods=['POST'])
49
+ def translate_audio():
50
+ if request.method == 'POST':
51
+ audio_file = request.files['audio_file']
52
+ task = request.form['task']
53
+
54
+ result = audio_transcribe_interface.process(audio_file, task)
55
+
56
+ return render_template('result.html', result=result)
57
+
58
+ if __name__ == '__main__':
59
+ app.run(debug=True)
templates/index.html ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/index.html -->
2
+
3
+ <!DOCTYPE html>
4
+ <html lang="en">
5
+ <head>
6
+ <meta charset="UTF-8">
7
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
8
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
9
+ <title>Translate Audio</title>
10
+ </head>
11
+ <body>
12
+ <h1>Translate Audio</h1>
13
+ <form action="/translate_audio" method="post" enctype="multipart/form-data">
14
+ <label for="audio_file">Choose an audio file:</label>
15
+ <input type="file" id="audio_file" name="audio_file" accept="audio/*">
16
+ <br>
17
+ <label for="task">Choose a task:</label>
18
+ <select id="task" name="task">
19
+ <option value="translate" selected>Translate</option>
20
+ </select>
21
+ <br>
22
+ <input type="submit" value="Submit">
23
+ </form>
24
+ </body>
25
+ </html>
templates/result.html ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- templates/result.html -->
2
+
3
+ <!DOCTYPE html>
4
+ <html lang="en">
5
+ <head>
6
+ <meta charset="UTF-8">
7
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
8
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
9
+ <title>Translation Result</title>
10
+ </head>
11
+ <body>
12
+ <h1>Translation Result</h1>
13
+ <p>{{ result }}</p>
14
+ </body>
15
+ </html>