Spaces:
Sleeping
Sleeping
Commit
·
3d5ee3a
1
Parent(s):
9ed7329
Fixes
Browse files- app.py +6 -17
- src/vtt_utils.py +1 -73
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import gradio as gr
|
|
| 7 |
|
| 8 |
from src.audio_processor import AudioProcessor
|
| 9 |
from src.speaker_manager import SpeakerManager
|
| 10 |
-
from src.vtt_utils import
|
| 11 |
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
|
|
@@ -123,11 +123,12 @@ with gr.Blocks(title="Transcription & Diarization") as app:
|
|
| 123 |
|
| 124 |
with gr.Column():
|
| 125 |
with gr.Group():
|
|
|
|
| 126 |
output_vtt = gr.Textbox(
|
| 127 |
label="Transcription",
|
| 128 |
lines=20,
|
| 129 |
placeholder="Your transcription will appear here...",
|
| 130 |
-
|
| 131 |
)
|
| 132 |
|
| 133 |
validation_status = gr.Markdown("⚪ No content", container=True)
|
|
@@ -138,13 +139,9 @@ with gr.Blocks(title="Transcription & Diarization") as app:
|
|
| 138 |
new_speaker_name = gr.Textbox(label="New speaker name", placeholder="Davide")
|
| 139 |
|
| 140 |
rename_btn = gr.Button("Rename")
|
| 141 |
-
|
| 142 |
-
with gr.Row():
|
| 143 |
-
clean_btn = gr.Button("Fix", variant="secondary", interactive=False)
|
| 144 |
|
| 145 |
download_file = gr.File(label="Download VTT", visible=False)
|
| 146 |
|
| 147 |
-
|
| 148 |
def check_inputs(openai_key: str, hf_key: str, audio) -> gr.Button:
|
| 149 |
"""
|
| 150 |
Enable submit button only if both API keys and audio are provided.
|
|
@@ -169,7 +166,7 @@ with gr.Blocks(title="Transcription & Diarization") as app:
|
|
| 169 |
audio_filename: Audio filename for download
|
| 170 |
|
| 171 |
Returns:
|
| 172 |
-
Tuple of (status_message,
|
| 173 |
"""
|
| 174 |
status, status_type = validate_vtt(vtt_content)
|
| 175 |
|
|
@@ -183,7 +180,6 @@ with gr.Blocks(title="Transcription & Diarization") as app:
|
|
| 183 |
|
| 184 |
return (
|
| 185 |
status,
|
| 186 |
-
gr.Button(interactive=is_valid), # clean_btn
|
| 187 |
gr.File(value=file_path, visible=False) # download_file
|
| 188 |
)
|
| 189 |
|
|
@@ -224,20 +220,13 @@ with gr.Blocks(title="Transcription & Diarization") as app:
|
|
| 224 |
output_vtt.change(
|
| 225 |
fn=update_validation,
|
| 226 |
inputs=[output_vtt, audio_filename_state],
|
| 227 |
-
outputs=[validation_status,
|
| 228 |
)
|
| 229 |
|
| 230 |
audio_filename_state.change(
|
| 231 |
fn=update_validation,
|
| 232 |
inputs=[output_vtt, audio_filename_state],
|
| 233 |
-
outputs=[validation_status,
|
| 234 |
-
)
|
| 235 |
-
|
| 236 |
-
# VTT cleaning and improvement
|
| 237 |
-
clean_btn.click(
|
| 238 |
-
fn=clean_vtt,
|
| 239 |
-
inputs=[output_vtt],
|
| 240 |
-
outputs=[output_vtt]
|
| 241 |
)
|
| 242 |
|
| 243 |
# Speaker renaming
|
|
|
|
| 7 |
|
| 8 |
from src.audio_processor import AudioProcessor
|
| 9 |
from src.speaker_manager import SpeakerManager
|
| 10 |
+
from src.vtt_utils import validate_vtt
|
| 11 |
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
|
|
|
|
| 123 |
|
| 124 |
with gr.Column():
|
| 125 |
with gr.Group():
|
| 126 |
+
|
| 127 |
output_vtt = gr.Textbox(
|
| 128 |
label="Transcription",
|
| 129 |
lines=20,
|
| 130 |
placeholder="Your transcription will appear here...",
|
| 131 |
+
show_copy_button=True,
|
| 132 |
)
|
| 133 |
|
| 134 |
validation_status = gr.Markdown("⚪ No content", container=True)
|
|
|
|
| 139 |
new_speaker_name = gr.Textbox(label="New speaker name", placeholder="Davide")
|
| 140 |
|
| 141 |
rename_btn = gr.Button("Rename")
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
download_file = gr.File(label="Download VTT", visible=False)
|
| 144 |
|
|
|
|
| 145 |
def check_inputs(openai_key: str, hf_key: str, audio) -> gr.Button:
|
| 146 |
"""
|
| 147 |
Enable submit button only if both API keys and audio are provided.
|
|
|
|
| 166 |
audio_filename: Audio filename for download
|
| 167 |
|
| 168 |
Returns:
|
| 169 |
+
Tuple of (status_message, download_file)
|
| 170 |
"""
|
| 171 |
status, status_type = validate_vtt(vtt_content)
|
| 172 |
|
|
|
|
| 180 |
|
| 181 |
return (
|
| 182 |
status,
|
|
|
|
| 183 |
gr.File(value=file_path, visible=False) # download_file
|
| 184 |
)
|
| 185 |
|
|
|
|
| 220 |
output_vtt.change(
|
| 221 |
fn=update_validation,
|
| 222 |
inputs=[output_vtt, audio_filename_state],
|
| 223 |
+
outputs=[validation_status, download_file]
|
| 224 |
)
|
| 225 |
|
| 226 |
audio_filename_state.change(
|
| 227 |
fn=update_validation,
|
| 228 |
inputs=[output_vtt, audio_filename_state],
|
| 229 |
+
outputs=[validation_status, download_file]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
)
|
| 231 |
|
| 232 |
# Speaker renaming
|
src/vtt_utils.py
CHANGED
|
@@ -85,76 +85,4 @@ def validate_vtt(vtt_content: str) -> Tuple[str, str]:
|
|
| 85 |
|
| 86 |
return "🟢 Valid", "success"
|
| 87 |
except Exception as e:
|
| 88 |
-
return f"🔴 Validation error: {str(e)}", "error"
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
def clean_vtt(vtt_content: str) -> str:
|
| 92 |
-
"""
|
| 93 |
-
Clean and improve VTT content.
|
| 94 |
-
|
| 95 |
-
Improvements:
|
| 96 |
-
- Capitalizes after sentence-ending punctuation (. ! ?)
|
| 97 |
-
- Handles cross-segment capitalization intelligently
|
| 98 |
-
- Removes multiple spaces
|
| 99 |
-
- Preserves speaker tags
|
| 100 |
-
|
| 101 |
-
Args:
|
| 102 |
-
vtt_content: VTT file content as string
|
| 103 |
-
|
| 104 |
-
Returns:
|
| 105 |
-
Cleaned VTT content
|
| 106 |
-
"""
|
| 107 |
-
if not vtt_content:
|
| 108 |
-
return vtt_content
|
| 109 |
-
|
| 110 |
-
lines = vtt_content.split('\n')
|
| 111 |
-
cleaned_lines = []
|
| 112 |
-
last_text_ended_with_sentence_end = False
|
| 113 |
-
|
| 114 |
-
for line in lines:
|
| 115 |
-
# Skip empty lines and WEBVTT header
|
| 116 |
-
if not line.strip() or line.startswith('WEBVTT'):
|
| 117 |
-
cleaned_lines.append(line)
|
| 118 |
-
continue
|
| 119 |
-
|
| 120 |
-
# Skip timestamp lines
|
| 121 |
-
if '-->' in line:
|
| 122 |
-
cleaned_lines.append(line)
|
| 123 |
-
continue
|
| 124 |
-
|
| 125 |
-
# Extract speaker tag if present
|
| 126 |
-
speaker_tag = ""
|
| 127 |
-
text_content = line
|
| 128 |
-
speaker_match = re.match(r'^(<v [^>]+>)\s*(.*)', line)
|
| 129 |
-
if speaker_match:
|
| 130 |
-
speaker_tag = speaker_match.group(1)
|
| 131 |
-
text_content = speaker_match.group(2)
|
| 132 |
-
|
| 133 |
-
# Capitalize first letter if previous segment ended with sentence-ending punctuation
|
| 134 |
-
if last_text_ended_with_sentence_end and text_content and text_content[0].islower():
|
| 135 |
-
text_content = text_content[0].upper() + text_content[1:]
|
| 136 |
-
|
| 137 |
-
# Fix capitalization after punctuation within the same line
|
| 138 |
-
text_content = re.sub(
|
| 139 |
-
r'([.!?])\s+([a-z])',
|
| 140 |
-
lambda m: m.group(1) + m.group(2).upper(),
|
| 141 |
-
text_content
|
| 142 |
-
)
|
| 143 |
-
|
| 144 |
-
# Remove multiple spaces
|
| 145 |
-
text_content = re.sub(r'\s{2,}', ' ', text_content)
|
| 146 |
-
|
| 147 |
-
# Trim leading/trailing spaces
|
| 148 |
-
text_content = text_content.strip()
|
| 149 |
-
|
| 150 |
-
# Rebuild line with speaker tag if it existed
|
| 151 |
-
cleaned_line = f"{speaker_tag} {text_content}" if speaker_tag else text_content
|
| 152 |
-
|
| 153 |
-
# Check if this line ends with sentence-ending punctuation
|
| 154 |
-
last_text_ended_with_sentence_end = bool(
|
| 155 |
-
text_content and re.search(r'[.!?]\s*$', text_content)
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
cleaned_lines.append(cleaned_line)
|
| 159 |
-
|
| 160 |
-
return '\n'.join(cleaned_lines)
|
|
|
|
| 85 |
|
| 86 |
return "🟢 Valid", "success"
|
| 87 |
except Exception as e:
|
| 88 |
+
return f"🔴 Validation error: {str(e)}", "error"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|