Spaces:

junaid008
/

Katib-ASR

Running on Zero

App Files Files Community

junaid008 commited on 19 days ago

Commit

abd1c03

verified ·

1 Parent(s): cfca7d9

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -10

app.py CHANGED Viewed

@@ -6,9 +6,11 @@ import spaces
 # =========================================
 # LOAD MODEL
 # =========================================
 pipe = pipeline(
     "automatic-speech-recognition",
     model="uzair0/Katib-ASR",
     device="cpu"
 )
@@ -17,43 +19,67 @@ def transcribe_audio(audio_filepath):
     if audio_filepath is None:
         return "⚠️ Please record some audio first!"
     pipe.model.to("cuda")
     result = pipe(
         audio_filepath,
-        generate_kwargs={"language": "pashto", "task": "transcribe"}
     )
     return result["text"]
 # =========================================
-# UI DESIGN (Side-by-Side Layout)
 # =========================================
 custom_css = """
-#header { text-align: left; padding-bottom: 20px; }
 .transcription-box textarea {
     direction: rtl !important;
     text-align: right !important;
     font-size: 1.2em !important;
     background-color: #1f2937 !important;
     color: white !important;
 }
 .submit-btn {
     background: linear-gradient(90deg, #ff5722, #ff7043) !important;
     color: white !important;
     font-weight: bold !important;
 }
 .clear-btn {
     background-color: #374151 !important;
     color: white !important;
 }
 """
-with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
-    with gr.Column(elem_id="header"):
         gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
         gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
-    # Side-by-side layout
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(
@@ -72,8 +98,18 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
                 elem_classes="transcription-box"
             )
-    # Logic
-    submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
-    clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])
-demo.launch()

 # =========================================
 # LOAD MODEL
 # =========================================
+# We load on CPU first, then move it inside the ZeroGPU function
 pipe = pipeline(
     "automatic-speech-recognition",
     model="uzair0/Katib-ASR",
+    torch_dtype=torch.bfloat16,
     device="cpu"
 )
     if audio_filepath is None:
         return "⚠️ Please record some audio first!"
+    # MOVE ENTIRE PIPELINE TO CUDA
+    # This ensures both weights and inputs are handled on the GPU
+    pipe.to("cuda")
+    # Explicitly move the model too, just to be safe with Whisper-based models
     pipe.model.to("cuda")
     result = pipe(
         audio_filepath,
+        generate_kwargs={
+            "language": "pashto",
+            "task": "transcribe"
+        }
     )
+    # Move back to CPU after finishing to free up GPU memory for the next call
+    pipe.to("cpu")
     return result["text"]
 # =========================================
+# UI DESIGN (Side-by-Side Dark Mode)
 # =========================================
 custom_css = """
+.gradio-container { background-color: #0b0f19 !important; }
+h2, p { color: white !important; }
+/* Transcription box styling */
 .transcription-box textarea {
     direction: rtl !important;
     text-align: right !important;
     font-size: 1.2em !important;
     background-color: #1f2937 !important;
     color: white !important;
+    border: 1px solid #374151 !important;
 }
+/* Matching the orange Submit button from your photo */
 .submit-btn {
     background: linear-gradient(90deg, #ff5722, #ff7043) !important;
     color: white !important;
     font-weight: bold !important;
+    border: none !important;
 }
 .clear-btn {
     background-color: #374151 !important;
     color: white !important;
+    border: none !important;
 }
+/* Keep audio player UI visible */
+audio { filter: invert(100%) hue-rotate(180deg); }
 """
+with gr.Blocks() as demo:
+    with gr.Column():
         gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
         gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(
                 elem_classes="transcription-box"
             )
+    # Submission Logic
+    submit_btn.click(
+        fn=transcribe_audio,
+        inputs=audio_input,
+        outputs=output_text
+    )
+    clear_btn.click(
+        fn=lambda: [None, ""],
+        inputs=None,
+        outputs=[audio_input, output_text]
+    )
+# Corrected: Passing css/theme to launch()
+demo.launch(theme=gr.themes.Default(), css=custom_css, ssr_mode=False)