Sammaali commited on
Commit
b7809d1
·
verified ·
1 Parent(s): 6d44df0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -18,6 +18,10 @@ REPEAT_WORD = re.compile(r'\b(\w+)(?:\s+\1\b)+', re.IGNORECASE)
18
  CHAR_STRETCH = re.compile(r'(.)\1{2,}')
19
  REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)
20
 
 
 
 
 
21
  def is_filler(word):
22
  w = word.lower()
23
 
@@ -30,12 +34,27 @@ def is_filler(word):
30
  return False
31
 
32
 
 
33
  def clean_transcript(text):
34
 
 
35
  text = CHAR_STRETCH.sub(r'\1', text)
 
 
 
 
 
36
  text = REPEAT_WORD.sub(r'\1', text)
 
 
37
  text = REPEAT_SYLLABLE.sub(r'\1', text)
38
 
 
 
 
 
 
 
39
  words = text.split()
40
 
41
  filtered = []
@@ -45,7 +64,6 @@ def clean_transcript(text):
45
 
46
  return " ".join(filtered)
47
 
48
-
49
  # =========================
50
  # Speech To Text
51
  # =========================
@@ -55,10 +73,14 @@ def transcribe_audio(audio_file):
55
  if audio_file is None:
56
  return "No audio uploaded", ""
57
 
58
- headers = {"xi-api-key": ELEVENLABS_API_KEY}
 
 
59
 
60
  with open(audio_file, "rb") as f:
 
61
  files = {"file": f}
 
62
  data = {
63
  "model_id": "scribe_v2",
64
  "enable_logging": "false"
@@ -79,8 +101,8 @@ def transcribe_audio(audio_file):
79
  text = ""
80
 
81
  if "segments" in result:
82
- for seg in result["segments"]:
83
- text += seg.get("text", "") + " "
84
  else:
85
  text = result.get("text", "")
86
 
@@ -90,15 +112,21 @@ def transcribe_audio(audio_file):
90
 
91
 
92
  # =========================
93
- # Gradio UI
94
  # =========================
95
 
96
  with gr.Blocks() as demo:
97
 
98
- gr.Markdown("# Speech To Text Cleaner")
99
- gr.Markdown("Upload audio → convert to text → remove fillers")
100
 
101
- audio_input = gr.Audio(type="filepath", label="Upload Audio")
 
 
 
 
 
 
 
102
 
103
  raw_text = gr.Textbox(
104
  label="Original Transcript",
@@ -118,6 +146,4 @@ with gr.Blocks() as demo:
118
  outputs=[raw_text, cleaned_text]
119
  )
120
 
121
-
122
- if __name__ == "__main__":
123
- demo.launch()
 
18
  CHAR_STRETCH = re.compile(r'(.)\1{2,}')
19
  REPEAT_SYLLABLE = re.compile(r'\b(\w{1,3})(?:\s+\1\b)+', re.IGNORECASE)
20
 
21
+ STUTTER = re.compile(r'\b(\w)[\-ـ]+(\1[\-ـ]+)+')
22
+ REPEAT_AFTER_COMMA = re.compile(r'(\b\w+\b)[،,]\s+\1')
23
+ COMMA_SPACES = re.compile(r'\s+([،,])')
24
+
25
  def is_filler(word):
26
  w = word.lower()
27
 
 
34
  return False
35
 
36
 
37
+
38
  def clean_transcript(text):
39
 
40
+ # collapse stretched sounds
41
  text = CHAR_STRETCH.sub(r'\1', text)
42
+
43
+ # fix stutter like ب-ب-بالشيء
44
+ text = STUTTER.sub(r'\1', text)
45
+
46
+ # repeated words
47
  text = REPEAT_WORD.sub(r'\1', text)
48
+
49
+ # repeated short syllables
50
  text = REPEAT_SYLLABLE.sub(r'\1', text)
51
 
52
+ # repeated after comma
53
+ text = REPEAT_AFTER_COMMA.sub(r'\1', text)
54
+
55
+ # fix spaces before comma
56
+ text = COMMA_SPACES.sub(r'\1', text)
57
+
58
  words = text.split()
59
 
60
  filtered = []
 
64
 
65
  return " ".join(filtered)
66
 
 
67
  # =========================
68
  # Speech To Text
69
  # =========================
 
73
  if audio_file is None:
74
  return "No audio uploaded", ""
75
 
76
+ headers = {
77
+ "xi-api-key": ELEVENLABS_API_KEY
78
+ }
79
 
80
  with open(audio_file, "rb") as f:
81
+
82
  files = {"file": f}
83
+
84
  data = {
85
  "model_id": "scribe_v2",
86
  "enable_logging": "false"
 
101
  text = ""
102
 
103
  if "segments" in result:
104
+ for segment in result["segments"]:
105
+ text += segment.get("text", "") + " "
106
  else:
107
  text = result.get("text", "")
108
 
 
112
 
113
 
114
  # =========================
115
+ # Gradio Interface
116
  # =========================
117
 
118
  with gr.Blocks() as demo:
119
 
120
+ gr.Markdown("# Arabic Speech Cleaner")
 
121
 
122
+ gr.Markdown(
123
+ "Upload audio → convert to text using ElevenLabs → remove fillers and stuttering"
124
+ )
125
+
126
+ audio_input = gr.Audio(
127
+ type="filepath",
128
+ label="Upload Audio"
129
+ )
130
 
131
  raw_text = gr.Textbox(
132
  label="Original Transcript",
 
146
  outputs=[raw_text, cleaned_text]
147
  )
148
 
149
+ demo.launch()