Spaces:

junaid008
/

Katib-ASR

Running on Zero

App Files Files Community

junaid008 commited on 11 days ago

Commit

024de1d

verified ·

1 Parent(s): 7bf1f68

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -35

app.py CHANGED Viewed

@@ -4,72 +4,76 @@ from transformers import pipeline
 import spaces
 # =========================================
-# LOAD MODEL (CPU initially)
 # =========================================
-# Katib-ASR is usually a Whisper-based model.
-# We load it on CPU to save GPU quota during the "idle" phase.
 pipe = pipeline(
     "automatic-speech-recognition",
     model="uzair0/Katib-ASR",
     device="cpu"
 )
-# =========================================
-# TRANSCRIPTION LOGIC
-# =========================================
 @spaces.GPU(duration=60)
 def transcribe_audio(audio_filepath):
     if audio_filepath is None:
         return "⚠️ Please record some audio first!"
-    # Move to GPU for the actual processing
     pipe.model.to("cuda")
-    # Generate transcription
     result = pipe(
         audio_filepath,
         generate_kwargs={"language": "pashto", "task": "transcribe"}
     )
     return result["text"]
 # =========================================
-# UI DESIGN (RTL & Professional)
 # =========================================
 custom_css = """
-/* Make the transcription text large and RTL for Pashto */
-textarea {
     direction: rtl !important;
     text-align: right !important;
-    font-size: 1.2em !important;
-    color: #1a1a1a !important;
 }
-#header { text-align: center; }
 """
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="header"):
-        gr.Markdown("# 🎙️ Katib ASR")
-        gr.Markdown("### Advanced Pashto Speech-to-Text")
-        gr.Markdown("Speak Pashto into your microphone and Katib will transcribe it.")
     with gr.Row():
-        audio_input = gr.Audio(
-            sources=["microphone"],
-            type="filepath",
-            label="Record Pashto Audio"
-        )
-    with gr.Row():
-        output_text = gr.Textbox(
-            label="Transcription Result",
-            lines=5,
-            placeholder="ستاسو لیکل شوې خبرې به دلته ښکاره شي..." # Pashto placeholder
-        )
-    # Trigger transcription when audio is finished/uploaded
-    audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
 demo.launch()

 import spaces
 # =========================================
+# LOAD MODEL
 # =========================================
 pipe = pipeline(
     "automatic-speech-recognition",
     model="uzair0/Katib-ASR",
     device="cpu"
 )
 @spaces.GPU(duration=60)
 def transcribe_audio(audio_filepath):
     if audio_filepath is None:
         return "⚠️ Please record some audio first!"
     pipe.model.to("cuda")
     result = pipe(
         audio_filepath,
         generate_kwargs={"language": "pashto", "task": "transcribe"}
     )
     return result["text"]
 # =========================================
+# UI DESIGN (Side-by-Side Layout)
 # =========================================
 custom_css = """
+#header { text-align: left; padding-bottom: 20px; }
+.transcription-box textarea {
     direction: rtl !important;
     text-align: right !important;
+    font-size: 1.2em !important;
+    background-color: #1f2937 !important;
+    color: white !important;
+}
+.submit-btn {
+    background: linear-gradient(90deg, #ff5722, #ff7043) !important;
+    color: white !important;
+    font-weight: bold !important;
+}
+.clear-btn {
+    background-color: #374151 !important;
+    color: white !important;
 }
 """
+with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
     with gr.Column(elem_id="header"):
+        gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
+        gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
+    # Side-by-side layout
     with gr.Row():
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                label="Record Pashto"
+            )
+            with gr.Row():
+                clear_btn = gr.Button("Clear", elem_classes="clear-btn")
+                submit_btn = gr.Button("Submit", elem_classes="submit-btn")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="Katib ASR Transcription",
+                lines=8,
+                elem_classes="transcription-box"
+            )
+    # Logic
+    submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
+    clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])
 demo.launch()