Spaces:

Mahnoor00
/

vocal-sync-intelligence

Sleeping

App Files Files Community

Fnu Mahnoor commited on Jan 26

Commit

7cc7815

1 Parent(s): de99631

Fix app

Browse files

Files changed (1) hide show

app.py +83 -104

app.py CHANGED Viewed

@@ -1,166 +1,145 @@
 import gradio as gr
 import numpy as np
 import logging
-import os
 import librosa
 from dotenv import load_dotenv
-# Your custom logic imports
 from src.transcription.streaming_transcriber import StreamingTranscriber
 from src.handlers.analysis_handler import analyze_transcript
 from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
 load_dotenv()
-# Global config for stability
-MAX_BUFFER_SECONDS = 5
 SAMPLE_RATE = 16000
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 transcriber = StreamingTranscriber()
-# --- 1. PRO THEME DEFINITION ---
 theme = gr.themes.Monochrome(
-    primary_hue="emerald",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
-).set(
-    block_title_text_weight="700",
-    block_label_text_size="xs",
-    button_primary_background_fill="*primary_600",
 )
-# ... (imports remain the same)
-def process_stream(audio, state, mode, language_code): # Added language_code
     if mode != "Real-time":
-        if audio is None: return state, gr.skip()
         sr, data = audio
         data = data.astype(np.float32) / 32768.0
         if sr != SAMPLE_RATE:
             data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
-        new_state = np.concatenate([state, data]) if state is not None else data
-        return new_state, gr.skip()
-    # Pass the language selection to the transcriber
-    # Note: You'll need to update your StreamingTranscriber.process_stream
-    # to accept and use a 'language' argument in self.model.transcribe
     new_state, text = transcriber.process_stream(audio, state)
-    return new_state, text
 def clear_session():
-    transcriber.clear_history() # Reset the class internal string
     return np.array([], dtype=np.float32), "", ""
-# ... (rest of Gradio UI remains the same)
-# --- 2. THE UI INTERFACE ---
-# Removed 'theme' and 'title' from here to fix the UserWarning
 css = """
-/* Shrink the audio component container */
-.compact-audio {
-    min-width: 150px !important;
-}
-/* Hide the 'Live Input' label to save vertical space */
-.compact-audio label {
-    display: none !important;
-}
-/* Reduce internal padding and center the mic button */
-.compact-audio .container {
-    padding: 0 !important;
-}
 """
-# --- THE UI INTERFACE ---
-with gr.Blocks(api_open=False, theme=theme, css=css) as demo:
-    gr.Markdown("""
         # 🎙️ **VocalSync Intelligence**
         *Transforming messy speech into clear guidelines, minutes, and maps.*
-    """)
     with gr.Tabs():
         with gr.Tab("Live Intelligence"):
             with gr.Row():
-                # SIDEBAR (Input Controls)
-                with gr.Column(scale=1, min_width=180): # Keeps this column small
-                    mode = gr.Radio(["Real-time", "After Speech"], value="Real-time", label="Mode")
-                    # --- NEW: Language Selection ---
                     language_dropdown = gr.Dropdown(
-                        choices=[("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", None)],
                         value="en",
-                        label="Speech Language"
                     )
                     audio_in = gr.Audio(
-                        sources=["microphone"],
-                        streaming=True,
                         type="numpy",
-                        elem_classes="compact-audio" # Applies the CSS
                     )
                     clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
-                # MAIN AREA (Analysis and Transcript)
-                with gr.Column(scale=4): # Takes up most of the page
-                    text_out = gr.Textbox(label="Transcript", lines=10, autoscroll=True)
-                    analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
-                    analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12, placeholder="Analysis will appear here after clicking the button...")
             state = gr.State(value=np.array([], dtype=np.float32))
-            # Updated inputs to include language_dropdown
             audio_in.stream(
-                process_stream,
-                [audio_in, state, mode, language_dropdown],
-                [state, text_out]
-                )
-            analyze_btn.click(analyze_transcript, inputs=text_out, outputs=analysis_out)
-            clear_btn.click(clear_session, outputs=[state, text_out, analysis_out])
-        # ... (Media Ingestion)
         with gr.Tab("Media Ingestion"):
             with gr.Row():
-                # LEFT SIDEBAR: Upload & URL (Scale 1)
                 with gr.Column(scale=1, min_width=300):
-                    gr.Markdown("### 📥 Source")
-                    url_input = gr.Textbox(
-                        placeholder="Accesible Video URL...",
-                        label="Remote Link",
-                        show_label=False # Keeps it clean
-                    )
-                    url_btn = gr.Button("Extract & Transcribe", variant="secondary")
-                    gr.HTML("<div style='margin: 15px 0; border-bottom: 1px solid #333;'></div>")
-                    with gr.Group(): # Groups Upload + Button
-                        file_audio = gr.Audio(
-                            sources=["upload"],
-                            type="filepath",
-                            label="Drop Audio/Video File Here",
-                            elem_classes="compact-upload" # Apply the CSS
-                        )
-                        file_btn = gr.Button("🚀 Process File", variant="secondary")
-                # RIGHT PANEL: Transcription & Analysis
-                with gr.Column(scale=3):
-                    media_text_out = gr.Textbox(label="Media Transcript", lines=12,  autoscroll=True, placeholder="Transcription will appear here...")
-                    media_analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
-                    media_analysis_out = gr.Textbox(label="AI Intelligence Output", lines=10,  autoscroll=True, placeholder="Transcription will appear here...")
-            url_btn.click(transcribe_video_url, inputs=url_input, outputs=media_text_out)
-            file_btn.click(transcribe_file, inputs=file_audio, outputs=media_text_out)
-            media_analyze_btn.click(analyze_transcript, inputs=media_text_out, outputs=media_analysis_out)
-    gr.HTML("<div style='text-align: center; color: #666; font-size: 0.8em;'>Powered by ContextMap Engine</div>")
 if __name__ == "__main__":
     demo.launch(
-        show_api=False,
-        share=True,
-        server_name="0.0.0.0",   # Required for HF Spaces to map the port
-        server_port=7860,         # Standard HF port
     )

 import gradio as gr
 import numpy as np
 import logging
 import librosa
 from dotenv import load_dotenv
 from src.transcription.streaming_transcriber import StreamingTranscriber
 from src.handlers.analysis_handler import analyze_transcript
 from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
 load_dotenv()
 SAMPLE_RATE = 16000
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 transcriber = StreamingTranscriber()
+# --- THEME ---
 theme = gr.themes.Monochrome(
+    primary_hue="emerald",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
 )
+# --- STREAM HANDLER ---
+def process_stream(audio, state, mode, language_code):
+    if audio is None:
+        return state, ""
     if mode != "Real-time":
         sr, data = audio
         data = data.astype(np.float32) / 32768.0
         if sr != SAMPLE_RATE:
             data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
+        state = np.concatenate([state, data]) if state.size else data
+        return state, ""
+    # Real-time
     new_state, text = transcriber.process_stream(audio, state)
+    return new_state, text or ""
 def clear_session():
+    transcriber.clear_history()
     return np.array([], dtype=np.float32), "", ""
+# --- CSS ---
 css = """
+.compact-audio { min-width: 150px !important; }
+.compact-audio label { display: none !important; }
+.compact-audio .container { padding: 0 !important; }
 """
+# --- UI ---
+with gr.Blocks(theme=theme, css=css) as demo:
+    gr.Markdown(
+        """
         # 🎙️ **VocalSync Intelligence**
         *Transforming messy speech into clear guidelines, minutes, and maps.*
+        """
+    )
     with gr.Tabs():
         with gr.Tab("Live Intelligence"):
             with gr.Row():
+                with gr.Column(scale=1, min_width=180):
+                    mode = gr.Radio(
+                        ["Real-time", "After Speech"],
+                        value="Real-time",
+                        label="Mode",
+                    )
+                    # ⚠️ IMPORTANT:
+                    # DO NOT use None in dropdown values in Gradio 4
                     language_dropdown = gr.Dropdown(
+                        choices=[
+                            ("English", "en"),
+                            ("Spanish", "es"),
+                            ("French", "fr"),
+                            ("German", "de"),
+                            ("Chinese", "zh"),
+                            ("Auto-Detect", "auto"),
+                        ],
                         value="en",
+                        label="Speech Language",
                     )
                     audio_in = gr.Audio(
+                        sources=["microphone"],
+                        streaming=True,
                         type="numpy",
+                        elem_classes="compact-audio",
                     )
                     clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
+                with gr.Column(scale=4):
+                    text_out = gr.Textbox(label="Transcript", lines=10)
+                    analyze_btn = gr.Button(
+                        "✨ Generate Actionable Insights", variant="primary", size="lg"
+                    )
+                    analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12)
             state = gr.State(value=np.array([], dtype=np.float32))
             audio_in.stream(
+                process_stream,
+                inputs=[audio_in, state, mode, language_dropdown],
+                outputs=[state, text_out],
+            )
+            analyze_btn.click(analyze_transcript, text_out, analysis_out)
+            clear_btn.click(clear_session, None, [state, text_out, analysis_out])
         with gr.Tab("Media Ingestion"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=300):
+                    url_input = gr.Textbox(placeholder="Accessible Video URL...")
+                    url_btn = gr.Button("Extract & Transcribe")
+                    file_audio = gr.Audio(sources=["upload"], type="filepath")
+                    file_btn = gr.Button("Process File")
+                with gr.Column(scale=3):
+                    media_text_out = gr.Textbox(lines=12)
+                    media_analyze_btn = gr.Button("✨ Generate Actionable Insights")
+                    media_analysis_out = gr.Textbox(lines=10)
+            url_btn.click(transcribe_video_url, url_input, media_text_out)
+            file_btn.click(transcribe_file, file_audio, media_text_out)
+            media_analyze_btn.click(analyze_transcript, media_text_out, media_analysis_out)
+    gr.HTML(
+        "<div style='text-align:center;color:#666;font-size:0.8em;'>Powered by ContextMap Engine</div>"
+    )
+# 🚨 CRITICAL FOR GRADIO 4
+demo.queue(concurrency_count=1)
 if __name__ == "__main__":
     demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,      # HF Spaces requirement
+        show_api=False,  # UI only (schema still built internally)
     )