Spaces:

lucamartinelli
/

whisper-diarization

Sleeping

App Files Files Community

lucamartinelli commited on 18 days ago

Commit

3d5ee3a

1 Parent(s): 9ed7329

Fixes

Browse files

Files changed (2) hide show

app.py +6 -17
src/vtt_utils.py +1 -73

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import gradio as gr
 from src.audio_processor import AudioProcessor
 from src.speaker_manager import SpeakerManager
-from src.vtt_utils import clean_vtt, validate_vtt
 logging.basicConfig(level=logging.INFO)
@@ -123,11 +123,12 @@ with gr.Blocks(title="Transcription & Diarization") as app:
         with gr.Column():
             with gr.Group():
                 output_vtt = gr.Textbox(
                     label="Transcription",
                     lines=20,
                     placeholder="Your transcription will appear here...",
-                    container=False,
                 )
                 validation_status = gr.Markdown("⚪ No content", container=True)
@@ -138,13 +139,9 @@ with gr.Blocks(title="Transcription & Diarization") as app:
                     new_speaker_name = gr.Textbox(label="New speaker name", placeholder="Davide")
                 rename_btn = gr.Button("Rename")
-            with gr.Row():
-                clean_btn = gr.Button("Fix", variant="secondary", interactive=False)
             download_file = gr.File(label="Download VTT", visible=False)
     def check_inputs(openai_key: str, hf_key: str, audio) -> gr.Button:
         """
         Enable submit button only if both API keys and audio are provided.
@@ -169,7 +166,7 @@ with gr.Blocks(title="Transcription & Diarization") as app:
             audio_filename: Audio filename for download
         Returns:
-            Tuple of (status_message, clean_button, download_file)
         """
         status, status_type = validate_vtt(vtt_content)
@@ -183,7 +180,6 @@ with gr.Blocks(title="Transcription & Diarization") as app:
         return (
             status,
-            gr.Button(interactive=is_valid),  # clean_btn
             gr.File(value=file_path, visible=False)  # download_file
         )
@@ -224,20 +220,13 @@ with gr.Blocks(title="Transcription & Diarization") as app:
     output_vtt.change(
         fn=update_validation,
         inputs=[output_vtt, audio_filename_state],
-        outputs=[validation_status, clean_btn, download_file]
     )
     audio_filename_state.change(
         fn=update_validation,
         inputs=[output_vtt, audio_filename_state],
-        outputs=[validation_status, clean_btn, download_file]
-    )
-    # VTT cleaning and improvement
-    clean_btn.click(
-        fn=clean_vtt,
-        inputs=[output_vtt],
-        outputs=[output_vtt]
     )
     # Speaker renaming

 from src.audio_processor import AudioProcessor
 from src.speaker_manager import SpeakerManager
+from src.vtt_utils import validate_vtt
 logging.basicConfig(level=logging.INFO)
         with gr.Column():
             with gr.Group():
                 output_vtt = gr.Textbox(
                     label="Transcription",
                     lines=20,
                     placeholder="Your transcription will appear here...",
+                    show_copy_button=True,
                 )
                 validation_status = gr.Markdown("⚪ No content", container=True)
                     new_speaker_name = gr.Textbox(label="New speaker name", placeholder="Davide")
                 rename_btn = gr.Button("Rename")
             download_file = gr.File(label="Download VTT", visible=False)
     def check_inputs(openai_key: str, hf_key: str, audio) -> gr.Button:
         """
         Enable submit button only if both API keys and audio are provided.
             audio_filename: Audio filename for download
         Returns:
+            Tuple of (status_message, download_file)
         """
         status, status_type = validate_vtt(vtt_content)
         return (
             status,
             gr.File(value=file_path, visible=False)  # download_file
         )
     output_vtt.change(
         fn=update_validation,
         inputs=[output_vtt, audio_filename_state],
+        outputs=[validation_status, download_file]
     )
     audio_filename_state.change(
         fn=update_validation,
         inputs=[output_vtt, audio_filename_state],
+        outputs=[validation_status, download_file]
     )
     # Speaker renaming

src/vtt_utils.py CHANGED Viewed

@@ -85,76 +85,4 @@ def validate_vtt(vtt_content: str) -> Tuple[str, str]:
         return "🟢 Valid", "success"
     except Exception as e:
-        return f"🔴 Validation error: {str(e)}", "error"
-def clean_vtt(vtt_content: str) -> str:
-    """
-    Clean and improve VTT content.
-    Improvements:
-    - Capitalizes after sentence-ending punctuation (. ! ?)
-    - Handles cross-segment capitalization intelligently
-    - Removes multiple spaces
-    - Preserves speaker tags
-    Args:
-        vtt_content: VTT file content as string
-    Returns:
-        Cleaned VTT content
-    """
-    if not vtt_content:
-        return vtt_content
-    lines = vtt_content.split('\n')
-    cleaned_lines = []
-    last_text_ended_with_sentence_end = False
-    for line in lines:
-        # Skip empty lines and WEBVTT header
-        if not line.strip() or line.startswith('WEBVTT'):
-            cleaned_lines.append(line)
-            continue
-        # Skip timestamp lines
-        if '-->' in line:
-            cleaned_lines.append(line)
-            continue
-        # Extract speaker tag if present
-        speaker_tag = ""
-        text_content = line
-        speaker_match = re.match(r'^(<v [^>]+>)\s*(.*)', line)
-        if speaker_match:
-            speaker_tag = speaker_match.group(1)
-            text_content = speaker_match.group(2)
-        # Capitalize first letter if previous segment ended with sentence-ending punctuation
-        if last_text_ended_with_sentence_end and text_content and text_content[0].islower():
-            text_content = text_content[0].upper() + text_content[1:]
-        # Fix capitalization after punctuation within the same line
-        text_content = re.sub(
-            r'([.!?])\s+([a-z])',
-            lambda m: m.group(1) + m.group(2).upper(),
-            text_content
-        )
-        # Remove multiple spaces
-        text_content = re.sub(r'\s{2,}', ' ', text_content)
-        # Trim leading/trailing spaces
-        text_content = text_content.strip()
-        # Rebuild line with speaker tag if it existed
-        cleaned_line = f"{speaker_tag} {text_content}" if speaker_tag else text_content
-        # Check if this line ends with sentence-ending punctuation
-        last_text_ended_with_sentence_end = bool(
-            text_content and re.search(r'[.!?]\s*$', text_content)
-        )
-        cleaned_lines.append(cleaned_line)
-    return '\n'.join(cleaned_lines)

         return "🟢 Valid", "success"
     except Exception as e:
+        return f"🔴 Validation error: {str(e)}", "error"