junaid008 commited on
Commit
024de1d
·
verified ·
1 Parent(s): 7bf1f68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -35
app.py CHANGED
@@ -4,72 +4,76 @@ from transformers import pipeline
4
  import spaces
5
 
6
  # =========================================
7
- # LOAD MODEL (CPU initially)
8
  # =========================================
9
- # Katib-ASR is usually a Whisper-based model.
10
- # We load it on CPU to save GPU quota during the "idle" phase.
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model="uzair0/Katib-ASR",
14
  device="cpu"
15
  )
16
 
17
- # =========================================
18
- # TRANSCRIPTION LOGIC
19
- # =========================================
20
-
21
  @spaces.GPU(duration=60)
22
  def transcribe_audio(audio_filepath):
23
  if audio_filepath is None:
24
  return "⚠️ Please record some audio first!"
25
 
26
- # Move to GPU for the actual processing
27
  pipe.model.to("cuda")
28
-
29
- # Generate transcription
30
  result = pipe(
31
  audio_filepath,
32
  generate_kwargs={"language": "pashto", "task": "transcribe"}
33
  )
34
-
35
  return result["text"]
36
 
37
  # =========================================
38
- # UI DESIGN (RTL & Professional)
39
  # =========================================
40
 
41
  custom_css = """
42
- /* Make the transcription text large and RTL for Pashto */
43
- textarea {
44
  direction: rtl !important;
45
  text-align: right !important;
46
- font-size: 1.2em !important;
47
- color: #1a1a1a !important;
 
 
 
 
 
 
 
 
 
 
48
  }
49
- #header { text-align: center; }
50
  """
51
 
52
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
53
  with gr.Column(elem_id="header"):
54
- gr.Markdown("# 🎙️ Katib ASR")
55
- gr.Markdown("### Advanced Pashto Speech-to-Text")
56
- gr.Markdown("Speak Pashto into your microphone and Katib will transcribe it.")
57
 
 
58
  with gr.Row():
59
- audio_input = gr.Audio(
60
- sources=["microphone"],
61
- type="filepath",
62
- label="Record Pashto Audio"
63
- )
64
-
65
- with gr.Row():
66
- output_text = gr.Textbox(
67
- label="Transcription Result",
68
- lines=5,
69
- placeholder="ستاسو لیکل شوې خبرې به دلته ښکاره شي..." # Pashto placeholder
70
- )
 
 
 
 
71
 
72
- # Trigger transcription when audio is finished/uploaded
73
- audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
 
74
 
75
  demo.launch()
 
4
  import spaces
5
 
6
  # =========================================
7
+ # LOAD MODEL
8
  # =========================================
 
 
9
  pipe = pipeline(
10
  "automatic-speech-recognition",
11
  model="uzair0/Katib-ASR",
12
  device="cpu"
13
  )
14
 
 
 
 
 
15
  @spaces.GPU(duration=60)
16
  def transcribe_audio(audio_filepath):
17
  if audio_filepath is None:
18
  return "⚠️ Please record some audio first!"
19
 
 
20
  pipe.model.to("cuda")
 
 
21
  result = pipe(
22
  audio_filepath,
23
  generate_kwargs={"language": "pashto", "task": "transcribe"}
24
  )
 
25
  return result["text"]
26
 
27
  # =========================================
28
+ # UI DESIGN (Side-by-Side Layout)
29
  # =========================================
30
 
31
  custom_css = """
32
+ #header { text-align: left; padding-bottom: 20px; }
33
+ .transcription-box textarea {
34
  direction: rtl !important;
35
  text-align: right !important;
36
+ font-size: 1.2em !important;
37
+ background-color: #1f2937 !important;
38
+ color: white !important;
39
+ }
40
+ .submit-btn {
41
+ background: linear-gradient(90deg, #ff5722, #ff7043) !important;
42
+ color: white !important;
43
+ font-weight: bold !important;
44
+ }
45
+ .clear-btn {
46
+ background-color: #374151 !important;
47
+ color: white !important;
48
  }
 
49
  """
50
 
51
+ with gr.Blocks(css=custom_css, theme=gr.themes.Default()) as demo:
52
  with gr.Column(elem_id="header"):
53
+ gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
54
+ gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
 
55
 
56
+ # Side-by-side layout
57
  with gr.Row():
58
+ with gr.Column(scale=1):
59
+ audio_input = gr.Audio(
60
+ sources=["microphone"],
61
+ type="filepath",
62
+ label="Record Pashto"
63
+ )
64
+ with gr.Row():
65
+ clear_btn = gr.Button("Clear", elem_classes="clear-btn")
66
+ submit_btn = gr.Button("Submit", elem_classes="submit-btn")
67
+
68
+ with gr.Column(scale=1):
69
+ output_text = gr.Textbox(
70
+ label="Katib ASR Transcription",
71
+ lines=8,
72
+ elem_classes="transcription-box"
73
+ )
74
 
75
+ # Logic
76
+ submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
77
+ clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])
78
 
79
  demo.launch()