afaqalinagra commited on
Commit
bbbf3e8
·
verified ·
1 Parent(s): dae1e6f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import librosa
5
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
6
+
7
+
8
+ # =========================
9
+ # MODEL CONFIGURATION
10
+ # =========================
11
+ MODEL_ID = "afaqalinagra/PASHTO-ASR-MODEL"
12
+
13
+ DEVICE = "cpu"
14
+ DTYPE = torch.float32
15
+
16
+
17
+ # =========================
18
+ # LOAD MODEL & PROCESSOR
19
+ # =========================
20
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
21
+
22
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
23
+ MODEL_ID,
24
+ torch_dtype=DTYPE,
25
+ low_cpu_mem_usage=True
26
+ )
27
+
28
+ model.to(DEVICE)
29
+ model.eval()
30
+
31
+
32
+ # =========================
33
+ # ASR FUNCTION
34
+ # =========================
35
+ def transcribe(audio):
36
+ if audio is None:
37
+ return "No audio provided."
38
+
39
+ sample_rate, waveform = audio
40
+
41
+ # Convert stereo to mono
42
+ if waveform.ndim > 1:
43
+ waveform = np.mean(waveform, axis=1)
44
+
45
+ # Ensure float32
46
+ waveform = waveform.astype(np.float32)
47
+
48
+ # Resample to 16kHz (mandatory for ASR)
49
+ if sample_rate != 16000:
50
+ waveform = librosa.resample(
51
+ waveform,
52
+ orig_sr=sample_rate,
53
+ target_sr=16000
54
+ )
55
+
56
+ inputs = processor(
57
+ waveform,
58
+ sampling_rate=16000,
59
+ return_tensors="pt"
60
+ )
61
+
62
+ with torch.no_grad():
63
+ generated_ids = model.generate(
64
+ inputs.input_features.to(DEVICE)
65
+ )
66
+
67
+ transcription = processor.batch_decode(
68
+ generated_ids,
69
+ skip_special_tokens=True
70
+ )[0]
71
+
72
+ return transcription.strip()
73
+
74
+
75
+ # =========================
76
+ # CUSTOM GLASS-MORPHISM CSS
77
+ # =========================
78
+ custom_css = """
79
+ body {
80
+ background: linear-gradient(135deg, #1e1e2f, #2b5876);
81
+ font-family: Inter, system-ui, -apple-system, BlinkMacSystemFont;
82
+ }
83
+
84
+ .glass-card {
85
+ background: rgba(255, 255, 255, 0.15);
86
+ backdrop-filter: blur(16px);
87
+ -webkit-backdrop-filter: blur(16px);
88
+ border-radius: 22px;
89
+ padding: 28px;
90
+ border: 1px solid rgba(255, 255, 255, 0.25);
91
+ box-shadow: 0 10px 40px rgba(0, 0, 0, 0.35);
92
+ }
93
+
94
+ h1, h2, h3, label {
95
+ color: white !important;
96
+ }
97
+
98
+ .gr-button {
99
+ background: linear-gradient(135deg, #ff7a18, #ffb347);
100
+ border-radius: 14px;
101
+ font-weight: 600;
102
+ color: black;
103
+ height: 48px;
104
+ }
105
+
106
+ .gr-textbox textarea {
107
+ background: rgba(255, 255, 255, 0.25);
108
+ color: white;
109
+ border-radius: 12px;
110
+ }
111
+
112
+ .gr-audio {
113
+ background: rgba(255, 255, 255, 0.18);
114
+ border-radius: 14px;
115
+ }
116
+ """
117
+
118
+
119
+ # =========================
120
+ # GRADIO UI
121
+ # =========================
122
+ with gr.Blocks(css=custom_css) as demo:
123
+
124
+ with gr.Column(elem_classes=["glass-card"]):
125
+ gr.Markdown(
126
+ """
127
+ <h1 style="text-align:center;">Pashto Speech-to-Text</h1>
128
+ <h3 style="text-align:center;">Powered by Custom ASR Model</h3>
129
+ <p style="text-align:center; color:white;">
130
+ Upload or record Pashto audio and receive accurate transcription.
131
+ </p>
132
+ """
133
+ )
134
+
135
+ with gr.Row():
136
+ with gr.Column(scale=1):
137
+ audio_input = gr.Audio(
138
+ sources=["upload", "microphone"],
139
+ type="numpy",
140
+ label="Upload or Record Pashto Audio"
141
+ )
142
+
143
+ transcribe_btn = gr.Button("Transcribe")
144
+
145
+ with gr.Column(scale=1):
146
+ output_text = gr.Textbox(
147
+ label="Transcription Output",
148
+ lines=8,
149
+ placeholder="Transcribed text will appear here..."
150
+ )
151
+
152
+ transcribe_btn.click(
153
+ fn=transcribe,
154
+ inputs=audio_input,
155
+ outputs=output_text
156
+ )
157
+
158
+
159
+ # =========================
160
+ # LAUNCH
161
+ # =========================
162
+ demo.launch()