junaid008 commited on
Commit
abd1c03
·
verified ·
1 Parent(s): cfca7d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -10
app.py CHANGED
@@ -6,9 +6,11 @@ import spaces
6
  # =========================================
7
  # LOAD MODEL
8
  # =========================================
 
9
  pipe = pipeline(
10
  "automatic-speech-recognition",
11
  model="uzair0/Katib-ASR",
 
12
  device="cpu"
13
  )
14
 
@@ -17,43 +19,67 @@ def transcribe_audio(audio_filepath):
17
  if audio_filepath is None:
18
  return "⚠️ Please record some audio first!"
19
 
 
 
 
 
 
20
  pipe.model.to("cuda")
 
21
  result = pipe(
22
  audio_filepath,
23
- generate_kwargs={"language": "pashto", "task": "transcribe"}
 
 
 
24
  )
 
 
 
 
25
  return result["text"]
26
 
27
  # =========================================
28
- # UI DESIGN (Side-by-Side Layout)
29
  # =========================================
30
 
31
  custom_css = """
32
- #header { text-align: left; padding-bottom: 20px; }
 
 
 
33
  .transcription-box textarea {
34
  direction: rtl !important;
35
  text-align: right !important;
36
  font-size: 1.2em !important;
37
  background-color: #1f2937 !important;
38
  color: white !important;
 
39
  }
 
 
40
  .submit-btn {
41
  background: linear-gradient(90deg, #ff5722, #ff7043) !important;
42
  color: white !important;
43
  font-weight: bold !important;
 
44
  }
 
45
  .clear-btn {
46
  background-color: #374151 !important;
47
  color: white !important;
 
48
  }
 
 
 
49
  """
50
 
51
- with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
52
- with gr.Column(elem_id="header"):
53
  gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
54
  gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
55
 
56
- # Side-by-side layout
57
  with gr.Row():
58
  with gr.Column(scale=1):
59
  audio_input = gr.Audio(
@@ -72,8 +98,18 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
72
  elem_classes="transcription-box"
73
  )
74
 
75
- # Logic
76
- submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
77
- clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])
 
 
 
 
 
 
 
 
 
78
 
79
- demo.launch()
 
 
6
  # =========================================
7
  # LOAD MODEL
8
  # =========================================
9
+ # We load on CPU first, then move it inside the ZeroGPU function
10
  pipe = pipeline(
11
  "automatic-speech-recognition",
12
  model="uzair0/Katib-ASR",
13
+ torch_dtype=torch.bfloat16,
14
  device="cpu"
15
  )
16
 
 
19
  if audio_filepath is None:
20
  return "⚠️ Please record some audio first!"
21
 
22
+ # MOVE ENTIRE PIPELINE TO CUDA
23
+ # This ensures both weights and inputs are handled on the GPU
24
+ pipe.to("cuda")
25
+
26
+ # Explicitly move the model too, just to be safe with Whisper-based models
27
  pipe.model.to("cuda")
28
+
29
  result = pipe(
30
  audio_filepath,
31
+ generate_kwargs={
32
+ "language": "pashto",
33
+ "task": "transcribe"
34
+ }
35
  )
36
+
37
+ # Move back to CPU after finishing to free up GPU memory for the next call
38
+ pipe.to("cpu")
39
+
40
  return result["text"]
41
 
42
  # =========================================
43
+ # UI DESIGN (Side-by-Side Dark Mode)
44
  # =========================================
45
 
46
  custom_css = """
47
+ .gradio-container { background-color: #0b0f19 !important; }
48
+ h2, p { color: white !important; }
49
+
50
+ /* Transcription box styling */
51
  .transcription-box textarea {
52
  direction: rtl !important;
53
  text-align: right !important;
54
  font-size: 1.2em !important;
55
  background-color: #1f2937 !important;
56
  color: white !important;
57
+ border: 1px solid #374151 !important;
58
  }
59
+
60
+ /* Matching the orange Submit button from your photo */
61
  .submit-btn {
62
  background: linear-gradient(90deg, #ff5722, #ff7043) !important;
63
  color: white !important;
64
  font-weight: bold !important;
65
+ border: none !important;
66
  }
67
+
68
  .clear-btn {
69
  background-color: #374151 !important;
70
  color: white !important;
71
+ border: none !important;
72
  }
73
+
74
+ /* Keep audio player UI visible */
75
+ audio { filter: invert(100%) hue-rotate(180deg); }
76
  """
77
 
78
+ with gr.Blocks() as demo:
79
+ with gr.Column():
80
  gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
81
  gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
82
 
 
83
  with gr.Row():
84
  with gr.Column(scale=1):
85
  audio_input = gr.Audio(
 
98
  elem_classes="transcription-box"
99
  )
100
 
101
+ # Submission Logic
102
+ submit_btn.click(
103
+ fn=transcribe_audio,
104
+ inputs=audio_input,
105
+ outputs=output_text
106
+ )
107
+
108
+ clear_btn.click(
109
+ fn=lambda: [None, ""],
110
+ inputs=None,
111
+ outputs=[audio_input, output_text]
112
+ )
113
 
114
+ # Corrected: Passing css/theme to launch()
115
+ demo.launch(theme=gr.themes.Default(), css=custom_css, ssr_mode=False)