Spaces:

Mahnoor00
/

vocal-sync-intelligence

Sleeping

App Files Files Community

Fnu Mahnoor commited on Jan 26

Commit

52cf5d6

1 Parent(s): a5550ba

new app.py

Browse files

Files changed (1) hide show

app.py +145 -78

app.py CHANGED Viewed

@@ -5,23 +5,22 @@ import os
 import librosa
 from dotenv import load_dotenv
-# Your custom logic imports
 from src.transcription.streaming_transcriber import StreamingTranscriber
 from src.handlers.analysis_handler import analyze_transcript
 from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
 load_dotenv()
-# Global config for stability
-MAX_BUFFER_SECONDS = 5
 SAMPLE_RATE = 16000
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 transcriber = StreamingTranscriber()
-# --- 1. PRO THEME DEFINITION ---
 theme = gr.themes.Monochrome(
-    primary_hue="emerald",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
 ).set(
@@ -30,11 +29,10 @@ theme = gr.themes.Monochrome(
     button_primary_background_fill="*primary_600",
 )
-# ... (imports remain the same)
 def process_stream(audio, state, mode, language_code): # Added language_code
     if mode != "Real-time":
-        if audio is None: return state, gr.skip()
         sr, data = audio
         data = data.astype(np.float32) / 32768.0
         if sr != SAMPLE_RATE:
@@ -45,121 +43,190 @@ def process_stream(audio, state, mode, language_code): # Added language_code
     # Pass the language selection to the transcriber
     # Note: You'll need to update your StreamingTranscriber.process_stream
     # to accept and use a 'language' argument in self.model.transcribe
-    new_state, text = transcriber.process_stream(audio, state)
     return new_state, text
 def clear_session():
-    transcriber.clear_history() # Reset the class internal string
     return np.array([], dtype=np.float32), "", ""
-# ... (rest of Gradio UI remains the same)
-# --- 2. THE UI INTERFACE ---
-# Removed 'theme' and 'title' from here to fix the UserWarning
 css = """
-/* Shrink the audio component container */
 .compact-audio {
     min-width: 150px !important;
 }
-/* Hide the 'Live Input' label to save vertical space */
 .compact-audio label {
     display: none !important;
 }
-/* Reduce internal padding and center the mic button */
 .compact-audio .container {
     padding: 0 !important;
 }
 """
-# --- THE UI INTERFACE ---
 with gr.Blocks(theme=theme, css=css) as demo:
     gr.Markdown("""
-        # 🎙️ **VocalSync Intelligence**
-        *Transforming messy speech into clear guidelines, minutes, and maps.*
     """)
     with gr.Tabs():
         with gr.Tab("Live Intelligence"):
             with gr.Row():
-                # SIDEBAR (Input Controls)
-                with gr.Column(scale=1, min_width=180): # Keeps this column small
-                    mode = gr.Radio(["Real-time", "After Speech"], value="Real-time", label="Mode")
-                    # --- NEW: Language Selection ---
                     language_dropdown = gr.Dropdown(
-                        choices=[("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", None)],
                         value="en",
                         label="Speech Language"
                     )
                     audio_in = gr.Audio(
-                        sources=["microphone"],
-                        streaming=True,
                         type="numpy",
-                        elem_classes="compact-audio" # Applies the CSS
                     )
                     clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
-                # MAIN AREA (Analysis and Transcript)
-                with gr.Column(scale=4): # Takes up most of the page
-                    text_out = gr.Textbox(label="Transcript", lines=10, autoscroll=True)
-                    analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
-                    analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12, placeholder="Analysis will appear here after clicking the button...")
             state = gr.State(value=np.array([], dtype=np.float32))
-            # Updated inputs to include language_dropdown
             audio_in.stream(
-                process_stream,
-                [audio_in, state, mode, language_dropdown],
-                [state, text_out]
-                )
-            analyze_btn.click(analyze_transcript, inputs=text_out, outputs=analysis_out)
-            clear_btn.click(clear_session, outputs=[state, text_out, analysis_out])
-        # ... (Media Ingestion)
         with gr.Tab("Media Ingestion"):
             with gr.Row():
-                # LEFT SIDEBAR: Upload & URL (Scale 1)
                 with gr.Column(scale=1, min_width=300):
-                    gr.Markdown("### 📥 Source")
                     url_input = gr.Textbox(
-                        placeholder="Accesible Video URL...",
-                        label="Remote Link",
-                        show_label=False # Keeps it clean
                     )
-                    url_btn = gr.Button("Extract & Transcribe", variant="secondary")
-                    gr.HTML("<div style='margin: 15px 0; border-bottom: 1px solid #333;'></div>")
-                    with gr.Group(): # Groups Upload + Button
-                        file_audio = gr.Audio(
-                            sources=["upload"],
-                            type="filepath",
-                            label="Drop Audio/Video File Here",
-                            elem_classes="compact-upload" # Apply the CSS
-                        )
-                        file_btn = gr.Button("🚀 Process File", variant="secondary")
-                # RIGHT PANEL: Transcription & Analysis
-                with gr.Column(scale=3):
-                    media_text_out = gr.Textbox(label="Media Transcript", lines=12,  autoscroll=True, placeholder="Transcription will appear here...")
-                    media_analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
-                    media_analysis_out = gr.Textbox(label="AI Intelligence Output", lines=10,  autoscroll=True, placeholder="Transcription will appear here...")
-            url_btn.click(transcribe_video_url, inputs=url_input, outputs=media_text_out)
-            file_btn.click(transcribe_file, inputs=file_audio, outputs=media_text_out)
-            media_analyze_btn.click(analyze_transcript, inputs=media_text_out, outputs=media_analysis_out)
-    gr.HTML("<div style='text-align: center; color: #666; font-size: 0.8em;'>Powered by ContextMap Engine</div>")
 if __name__ == "__main__":
     demo.launch(
-        # show_api=False,
-        server_name="0.0.0.0",   # Required for HF Spaces to map the port
-        server_port=7860,         # Standard HF port
     )

 import librosa
 from dotenv import load_dotenv
+# Custom logic
 from src.transcription.streaming_transcriber import StreamingTranscriber
 from src.handlers.analysis_handler import analyze_transcript
 from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
 load_dotenv()
+# ---------------- CONFIG ----------------
 SAMPLE_RATE = 16000
+logging.basicConfig(level=logging.INFO)
 transcriber = StreamingTranscriber()
+# ---------------- THEME ----------------
 theme = gr.themes.Monochrome(
+    primary_hue="emerald",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
 ).set(
     button_primary_background_fill="*primary_600",
 )
+# ---------------- STREAMING LOGIC (UI ONLY) ----------------
 def process_stream(audio, state, mode, language_code): # Added language_code
+    if audio is None: return state, ""
     if mode != "Real-time":
         sr, data = audio
         data = data.astype(np.float32) / 32768.0
         if sr != SAMPLE_RATE:
     # Pass the language selection to the transcriber
     # Note: You'll need to update your StreamingTranscriber.process_stream
     # to accept and use a 'language' argument in self.model.transcribe
+    new_state, text = transcriber.process_stream(audio, state, language_code)
     return new_state, text
 def clear_session():
+    transcriber.clear_history()
     return np.array([], dtype=np.float32), "", ""
+# ---------------- API SAFE FUNCTION ----------------
+# This is what Gradio exposes as an API
+def api_analyze(text: str) -> str:
+    if not text or not text.strip():
+        return "No transcript provided."
+    return analyze_transcript(text)
+# ---------------- CSS ----------------
 css = """
 .compact-audio {
     min-width: 150px !important;
 }
 .compact-audio label {
     display: none !important;
 }
 .compact-audio .container {
     padding: 0 !important;
 }
 """
+# ---------------- UI ----------------
 with gr.Blocks(theme=theme, css=css) as demo:
     gr.Markdown("""
+    # 🎙️ **VocalSync Intelligence**
+    Turning messy speech into clear thinking, meeting notes, and action.
     """)
     with gr.Tabs():
+        # -------- LIVE INTELLIGENCE TAB --------
         with gr.Tab("Live Intelligence"):
             with gr.Row():
+                with gr.Column(scale=1, min_width=180):
+                    mode = gr.Radio(
+                        ["Real-time", "After Speech"],
+                        value="Real-time",
+                        label="Mode"
+                    )
                     language_dropdown = gr.Dropdown(
+                        choices=[
+                            ("English", "en"),
+                            ("Spanish", "es"),
+                            ("French", "fr"),
+                            ("German", "de"),
+                            ("Chinese", "zh"),
+                            ("Auto-Detect", None)
+                        ],
                         value="en",
                         label="Speech Language"
                     )
                     audio_in = gr.Audio(
+                        sources=["microphone"],
+                        streaming=True,
                         type="numpy",
+                        elem_classes="compact-audio"
                     )
                     clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
+                with gr.Column(scale=4):
+                    text_out = gr.Textbox(
+                        label="Transcript",
+                        lines=10,
+                        autoscroll=True
+                    )
+                    analyze_btn = gr.Button(
+                        "Generate Actionable Insights",
+                        variant="primary",
+                        size="lg"
+                    )
+                    analysis_out = gr.Textbox(
+                        label="AI Intelligence Output",
+                        lines=12
+                    )
             state = gr.State(value=np.array([], dtype=np.float32))
             audio_in.stream(
+                process_stream,
+                inputs=[audio_in, state, mode, language_dropdown],
+                outputs=[state, text_out]
+            )
+            analyze_btn.click(
+                analyze_transcript,
+                inputs=text_out,
+                outputs=analysis_out
+            )
+            clear_btn.click(
+                clear_session,
+                outputs=[state, text_out, analysis_out]
+            )
+        # -------- MEDIA INGESTION TAB --------
         with gr.Tab("Media Ingestion"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=300):
+                    gr.Markdown("### Source")
                     url_input = gr.Textbox(
+                        placeholder="Accessible video URL",
+                        show_label=False
                     )
+                    url_btn = gr.Button("Extract and Transcribe")
+                    gr.HTML("<hr>")
+                    file_audio = gr.Audio(
+                        sources=["upload"],
+                        type="filepath",
+                        label="Upload Audio or Video"
+                    )
+                    file_btn = gr.Button("Process File")
+                with gr.Column(scale=3):
+                    media_text_out = gr.Textbox(
+                        label="Media Transcript",
+                        lines=12
+                    )
+                    media_analyze_btn = gr.Button(
+                        "Generate Actionable Insights",
+                        variant="primary",
+                        size="lg"
+                    )
+                    media_analysis_out = gr.Textbox(
+                        label="AI Intelligence Output",
+                        lines=10
+                    )
+            url_btn.click(
+                transcribe_video_url,
+                inputs=url_input,
+                outputs=media_text_out
+            )
+            file_btn.click(
+                transcribe_file,
+                inputs=file_audio,
+                outputs=media_text_out
+            )
+            media_analyze_btn.click(
+                analyze_transcript,
+                inputs=media_text_out,
+                outputs=media_analysis_out
+            )
+    # -------- API TAB (VISIBLE + DOCUMENTED) --------
+    gr.Markdown("## 🔌 Public API")
+    gr.Interface(
+        fn=api_analyze,
+        inputs=gr.Textbox(label="Transcript"),
+        outputs=gr.Textbox(label="Analysis"),
+        api_name="analyze"
+    )
+    gr.HTML(
+        "<div style='text-align:center; color:#666; font-size:0.8em;'>"
+        "Powered by ContextMap Engine"
+        "</div>"
+    )
+# ---------------- LAUNCH ----------------
 if __name__ == "__main__":
     demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
     )