futranbg commited on
Commit
1e5cce1
·
1 Parent(s): bd46960

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import requests
3
+
4
+ from datetime import datetime
5
+
6
+ import time
7
+ import traceback
8
+ import gradio as gr # Imports the Gradio library, which is used to create user interfaces for machine learning models.
9
+
10
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
+
12
+ def date_now():
13
+ return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
14
+
15
+ def record_opt(msg):
16
+ return f"{date_now()} {msg}\n"
17
+
18
+ def speech_recognize(audio, model_name, opt):
19
+ opt += record_opt("Transcription starting ...")
20
+ yield "Transcribing, please wait ...", opt
21
+ start = time.monotonic()
22
+
23
+ with open(audio, "rb") as f:
24
+ data = f.read()
25
+ try:
26
+ url = API_URL + model_name
27
+ print(f">>> url is {url}")
28
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
29
+ response = requests.request("POST", url, headers=headers, data=data)
30
+ text = json.loads(response.content.decode("utf-8"))
31
+ print(f">>> text is {text}")
32
+ text = text['text']
33
+ except:
34
+ text = f"Transcription failed with error:\n{traceback.format_exc()}"
35
+
36
+ cost = time.monotonic() - start
37
+ opt += record_opt(f"Transcription ends, time consuming {cost:.3f}s")
38
+ yield text, opt
39
+
40
+ import gradio as gr
41
+
42
+ with gr.Blocks() as demo:
43
+ with gr.Row():
44
+ with gr.Column():
45
+ audio = gr.Audio(source="microphone", type="filepath")
46
+ model_name = gr.Dropdown(
47
+ label="Models:",
48
+ choices=[
49
+ "openai/whisper-large-v3",
50
+ "openai/whisper-large-v2",
51
+ "openai/whisper-large",
52
+ "openai/whisper-medium",
53
+ "openai/whisper-small",
54
+ "openai/whisper-base",
55
+ "openai/whisper-tiny",
56
+ ],
57
+ value="openai/whisper-large-v3",
58
+ )
59
+ = gr.Textbox(label="Huggingface token")
60
+ with gr.Column():
61
+ output = gr.Textbox(label="Transcription results")
62
+ operation = gr.Textbox(label="Logging")
63
+ audio.start_recording(
64
+ lambda x: x + record_opt("Start recording ..."),
65
+ inputs=operation, outputs=operation
66
+ )
67
+ audio.play(
68
+ lambda x: x + record_opt("Play recording"),
69
+ inputs=operation, outputs=operation
70
+ )
71
+ audio.pause(
72
+ lambda x: x + record_opt("Pause playback"),
73
+ inputs=operation, outputs=operation
74
+ )
75
+ audio.stop(
76
+ lambda x: x + record_opt("Stop playing"),
77
+ inputs=operation, outputs=operation
78
+ )
79
+ audio.end(
80
+ lambda x: x + record_opt("Finished playing"),
81
+ inputs=operation, outputs=operation
82
+ )
83
+ audio.stop_recording(speech_recognize, inputs=[audio, model_name, operation], outputs=[output, operation])
84
+
85
+ demo.queue(max_size=4, concurrency_count=4)
86
+ demo.launch()