lucamartinelli commited on
Commit
3d5ee3a
·
1 Parent(s): 9ed7329
Files changed (2) hide show
  1. app.py +6 -17
  2. src/vtt_utils.py +1 -73
app.py CHANGED
@@ -7,7 +7,7 @@ import gradio as gr
7
 
8
  from src.audio_processor import AudioProcessor
9
  from src.speaker_manager import SpeakerManager
10
- from src.vtt_utils import clean_vtt, validate_vtt
11
 
12
  logging.basicConfig(level=logging.INFO)
13
 
@@ -123,11 +123,12 @@ with gr.Blocks(title="Transcription & Diarization") as app:
123
 
124
  with gr.Column():
125
  with gr.Group():
 
126
  output_vtt = gr.Textbox(
127
  label="Transcription",
128
  lines=20,
129
  placeholder="Your transcription will appear here...",
130
- container=False,
131
  )
132
 
133
  validation_status = gr.Markdown("⚪ No content", container=True)
@@ -138,13 +139,9 @@ with gr.Blocks(title="Transcription & Diarization") as app:
138
  new_speaker_name = gr.Textbox(label="New speaker name", placeholder="Davide")
139
 
140
  rename_btn = gr.Button("Rename")
141
-
142
- with gr.Row():
143
- clean_btn = gr.Button("Fix", variant="secondary", interactive=False)
144
 
145
  download_file = gr.File(label="Download VTT", visible=False)
146
 
147
-
148
  def check_inputs(openai_key: str, hf_key: str, audio) -> gr.Button:
149
  """
150
  Enable submit button only if both API keys and audio are provided.
@@ -169,7 +166,7 @@ with gr.Blocks(title="Transcription & Diarization") as app:
169
  audio_filename: Audio filename for download
170
 
171
  Returns:
172
- Tuple of (status_message, clean_button, download_file)
173
  """
174
  status, status_type = validate_vtt(vtt_content)
175
 
@@ -183,7 +180,6 @@ with gr.Blocks(title="Transcription & Diarization") as app:
183
 
184
  return (
185
  status,
186
- gr.Button(interactive=is_valid), # clean_btn
187
  gr.File(value=file_path, visible=False) # download_file
188
  )
189
 
@@ -224,20 +220,13 @@ with gr.Blocks(title="Transcription & Diarization") as app:
224
  output_vtt.change(
225
  fn=update_validation,
226
  inputs=[output_vtt, audio_filename_state],
227
- outputs=[validation_status, clean_btn, download_file]
228
  )
229
 
230
  audio_filename_state.change(
231
  fn=update_validation,
232
  inputs=[output_vtt, audio_filename_state],
233
- outputs=[validation_status, clean_btn, download_file]
234
- )
235
-
236
- # VTT cleaning and improvement
237
- clean_btn.click(
238
- fn=clean_vtt,
239
- inputs=[output_vtt],
240
- outputs=[output_vtt]
241
  )
242
 
243
  # Speaker renaming
 
7
 
8
  from src.audio_processor import AudioProcessor
9
  from src.speaker_manager import SpeakerManager
10
+ from src.vtt_utils import validate_vtt
11
 
12
  logging.basicConfig(level=logging.INFO)
13
 
 
123
 
124
  with gr.Column():
125
  with gr.Group():
126
+
127
  output_vtt = gr.Textbox(
128
  label="Transcription",
129
  lines=20,
130
  placeholder="Your transcription will appear here...",
131
+ show_copy_button=True,
132
  )
133
 
134
  validation_status = gr.Markdown("⚪ No content", container=True)
 
139
  new_speaker_name = gr.Textbox(label="New speaker name", placeholder="Davide")
140
 
141
  rename_btn = gr.Button("Rename")
 
 
 
142
 
143
  download_file = gr.File(label="Download VTT", visible=False)
144
 
 
145
  def check_inputs(openai_key: str, hf_key: str, audio) -> gr.Button:
146
  """
147
  Enable submit button only if both API keys and audio are provided.
 
166
  audio_filename: Audio filename for download
167
 
168
  Returns:
169
+ Tuple of (status_message, download_file)
170
  """
171
  status, status_type = validate_vtt(vtt_content)
172
 
 
180
 
181
  return (
182
  status,
 
183
  gr.File(value=file_path, visible=False) # download_file
184
  )
185
 
 
220
  output_vtt.change(
221
  fn=update_validation,
222
  inputs=[output_vtt, audio_filename_state],
223
+ outputs=[validation_status, download_file]
224
  )
225
 
226
  audio_filename_state.change(
227
  fn=update_validation,
228
  inputs=[output_vtt, audio_filename_state],
229
+ outputs=[validation_status, download_file]
 
 
 
 
 
 
 
230
  )
231
 
232
  # Speaker renaming
src/vtt_utils.py CHANGED
@@ -85,76 +85,4 @@ def validate_vtt(vtt_content: str) -> Tuple[str, str]:
85
 
86
  return "🟢 Valid", "success"
87
  except Exception as e:
88
- return f"🔴 Validation error: {str(e)}", "error"
89
-
90
-
91
- def clean_vtt(vtt_content: str) -> str:
92
- """
93
- Clean and improve VTT content.
94
-
95
- Improvements:
96
- - Capitalizes after sentence-ending punctuation (. ! ?)
97
- - Handles cross-segment capitalization intelligently
98
- - Removes multiple spaces
99
- - Preserves speaker tags
100
-
101
- Args:
102
- vtt_content: VTT file content as string
103
-
104
- Returns:
105
- Cleaned VTT content
106
- """
107
- if not vtt_content:
108
- return vtt_content
109
-
110
- lines = vtt_content.split('\n')
111
- cleaned_lines = []
112
- last_text_ended_with_sentence_end = False
113
-
114
- for line in lines:
115
- # Skip empty lines and WEBVTT header
116
- if not line.strip() or line.startswith('WEBVTT'):
117
- cleaned_lines.append(line)
118
- continue
119
-
120
- # Skip timestamp lines
121
- if '-->' in line:
122
- cleaned_lines.append(line)
123
- continue
124
-
125
- # Extract speaker tag if present
126
- speaker_tag = ""
127
- text_content = line
128
- speaker_match = re.match(r'^(<v [^>]+>)\s*(.*)', line)
129
- if speaker_match:
130
- speaker_tag = speaker_match.group(1)
131
- text_content = speaker_match.group(2)
132
-
133
- # Capitalize first letter if previous segment ended with sentence-ending punctuation
134
- if last_text_ended_with_sentence_end and text_content and text_content[0].islower():
135
- text_content = text_content[0].upper() + text_content[1:]
136
-
137
- # Fix capitalization after punctuation within the same line
138
- text_content = re.sub(
139
- r'([.!?])\s+([a-z])',
140
- lambda m: m.group(1) + m.group(2).upper(),
141
- text_content
142
- )
143
-
144
- # Remove multiple spaces
145
- text_content = re.sub(r'\s{2,}', ' ', text_content)
146
-
147
- # Trim leading/trailing spaces
148
- text_content = text_content.strip()
149
-
150
- # Rebuild line with speaker tag if it existed
151
- cleaned_line = f"{speaker_tag} {text_content}" if speaker_tag else text_content
152
-
153
- # Check if this line ends with sentence-ending punctuation
154
- last_text_ended_with_sentence_end = bool(
155
- text_content and re.search(r'[.!?]\s*$', text_content)
156
- )
157
-
158
- cleaned_lines.append(cleaned_line)
159
-
160
- return '\n'.join(cleaned_lines)
 
85
 
86
  return "🟢 Valid", "success"
87
  except Exception as e:
88
+ return f"🔴 Validation error: {str(e)}", "error"