Spaces:

Mahnoor00
/

vocal-sync-intelligence

Sleeping

App Files Files Community

Fnu Mahnoor commited on about 1 month ago

Commit

c3b8b39

1 Parent(s): 52cf5d6

Fix app and readme

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +78 -145

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🎙️
 colorFrom: blue
 colorTo: green
 sdk: gradio
-sdk_version: 5.12.0  # <--- Change this from 4.44.1
 python_version: '3.10'
 app_file: app.py
 pinned: false

 colorFrom: blue
 colorTo: green
 sdk: gradio
+sdk_version: 4.44.1
 python_version: '3.10'
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -5,22 +5,23 @@ import os
 import librosa
 from dotenv import load_dotenv
-# Custom logic
 from src.transcription.streaming_transcriber import StreamingTranscriber
 from src.handlers.analysis_handler import analyze_transcript
 from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
 load_dotenv()
-# ---------------- CONFIG ----------------
 SAMPLE_RATE = 16000
-logging.basicConfig(level=logging.INFO)
 transcriber = StreamingTranscriber()
-# ---------------- THEME ----------------
 theme = gr.themes.Monochrome(
-    primary_hue="emerald",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
 ).set(
@@ -29,10 +30,11 @@ theme = gr.themes.Monochrome(
     button_primary_background_fill="*primary_600",
 )
-# ---------------- STREAMING LOGIC (UI ONLY) ----------------
 def process_stream(audio, state, mode, language_code): # Added language_code
-    if audio is None: return state, ""
     if mode != "Real-time":
         sr, data = audio
         data = data.astype(np.float32) / 32768.0
         if sr != SAMPLE_RATE:
@@ -43,190 +45,121 @@ def process_stream(audio, state, mode, language_code): # Added language_code
     # Pass the language selection to the transcriber
     # Note: You'll need to update your StreamingTranscriber.process_stream
     # to accept and use a 'language' argument in self.model.transcribe
-    new_state, text = transcriber.process_stream(audio, state, language_code)
     return new_state, text
 def clear_session():
-    transcriber.clear_history()
     return np.array([], dtype=np.float32), "", ""
-# ---------------- API SAFE FUNCTION ----------------
-# This is what Gradio exposes as an API
-def api_analyze(text: str) -> str:
-    if not text or not text.strip():
-        return "No transcript provided."
-    return analyze_transcript(text)
-# ---------------- CSS ----------------
 css = """
 .compact-audio {
     min-width: 150px !important;
 }
 .compact-audio label {
     display: none !important;
 }
 .compact-audio .container {
     padding: 0 !important;
 }
 """
-# ---------------- UI ----------------
 with gr.Blocks(theme=theme, css=css) as demo:
     gr.Markdown("""
-    # 🎙️ **VocalSync Intelligence**
-    Turning messy speech into clear thinking, meeting notes, and action.
     """)
     with gr.Tabs():
-        # -------- LIVE INTELLIGENCE TAB --------
         with gr.Tab("Live Intelligence"):
             with gr.Row():
-                with gr.Column(scale=1, min_width=180):
-                    mode = gr.Radio(
-                        ["Real-time", "After Speech"],
-                        value="Real-time",
-                        label="Mode"
-                    )
                     language_dropdown = gr.Dropdown(
-                        choices=[
-                            ("English", "en"),
-                            ("Spanish", "es"),
-                            ("French", "fr"),
-                            ("German", "de"),
-                            ("Chinese", "zh"),
-                            ("Auto-Detect", None)
-                        ],
                         value="en",
                         label="Speech Language"
                     )
                     audio_in = gr.Audio(
-                        sources=["microphone"],
-                        streaming=True,
                         type="numpy",
-                        elem_classes="compact-audio"
                     )
                     clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
-                with gr.Column(scale=4):
-                    text_out = gr.Textbox(
-                        label="Transcript",
-                        lines=10,
-                        autoscroll=True
-                    )
-                    analyze_btn = gr.Button(
-                        "Generate Actionable Insights",
-                        variant="primary",
-                        size="lg"
-                    )
-                    analysis_out = gr.Textbox(
-                        label="AI Intelligence Output",
-                        lines=12
-                    )
             state = gr.State(value=np.array([], dtype=np.float32))
             audio_in.stream(
-                process_stream,
-                inputs=[audio_in, state, mode, language_dropdown],
-                outputs=[state, text_out]
-            )
-            analyze_btn.click(
-                analyze_transcript,
-                inputs=text_out,
-                outputs=analysis_out
-            )
-            clear_btn.click(
-                clear_session,
-                outputs=[state, text_out, analysis_out]
-            )
-        # -------- MEDIA INGESTION TAB --------
         with gr.Tab("Media Ingestion"):
             with gr.Row():
                 with gr.Column(scale=1, min_width=300):
-                    gr.Markdown("### Source")
                     url_input = gr.Textbox(
-                        placeholder="Accessible video URL",
-                        show_label=False
                     )
-                    url_btn = gr.Button("Extract and Transcribe")
-                    gr.HTML("<hr>")
-                    file_audio = gr.Audio(
-                        sources=["upload"],
-                        type="filepath",
-                        label="Upload Audio or Video"
-                    )
-                    file_btn = gr.Button("Process File")
-                with gr.Column(scale=3):
-                    media_text_out = gr.Textbox(
-                        label="Media Transcript",
-                        lines=12
-                    )
-                    media_analyze_btn = gr.Button(
-                        "Generate Actionable Insights",
-                        variant="primary",
-                        size="lg"
-                    )
-                    media_analysis_out = gr.Textbox(
-                        label="AI Intelligence Output",
-                        lines=10
-                    )
-            url_btn.click(
-                transcribe_video_url,
-                inputs=url_input,
-                outputs=media_text_out
-            )
-            file_btn.click(
-                transcribe_file,
-                inputs=file_audio,
-                outputs=media_text_out
-            )
-            media_analyze_btn.click(
-                analyze_transcript,
-                inputs=media_text_out,
-                outputs=media_analysis_out
-            )
-    # -------- API TAB (VISIBLE + DOCUMENTED) --------
-    gr.Markdown("## 🔌 Public API")
-    gr.Interface(
-        fn=api_analyze,
-        inputs=gr.Textbox(label="Transcript"),
-        outputs=gr.Textbox(label="Analysis"),
-        api_name="analyze"
-    )
-    gr.HTML(
-        "<div style='text-align:center; color:#666; font-size:0.8em;'>"
-        "Powered by ContextMap Engine"
-        "</div>"
-    )
-# ---------------- LAUNCH ----------------
 if __name__ == "__main__":
     demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
     )

 import librosa
 from dotenv import load_dotenv
+# Your custom logic imports
 from src.transcription.streaming_transcriber import StreamingTranscriber
 from src.handlers.analysis_handler import analyze_transcript
 from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
 load_dotenv()
+# Global config for stability
+MAX_BUFFER_SECONDS = 5
 SAMPLE_RATE = 16000
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 transcriber = StreamingTranscriber()
+# --- 1. PRO THEME DEFINITION ---
 theme = gr.themes.Monochrome(
+    primary_hue="emerald",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
 ).set(
     button_primary_background_fill="*primary_600",
 )
+# ... (imports remain the same)
 def process_stream(audio, state, mode, language_code): # Added language_code
     if mode != "Real-time":
+        if audio is None: return state, gr.skip()
         sr, data = audio
         data = data.astype(np.float32) / 32768.0
         if sr != SAMPLE_RATE:
     # Pass the language selection to the transcriber
     # Note: You'll need to update your StreamingTranscriber.process_stream
     # to accept and use a 'language' argument in self.model.transcribe
+    new_state, text = transcriber.process_stream(audio, state)
     return new_state, text
 def clear_session():
+    transcriber.clear_history() # Reset the class internal string
     return np.array([], dtype=np.float32), "", ""
+# ... (rest of Gradio UI remains the same)
+# --- 2. THE UI INTERFACE ---
+# Removed 'theme' and 'title' from here to fix the UserWarning
 css = """
+/* Shrink the audio component container */
 .compact-audio {
     min-width: 150px !important;
 }
+/* Hide the 'Live Input' label to save vertical space */
 .compact-audio label {
     display: none !important;
 }
+/* Reduce internal padding and center the mic button */
 .compact-audio .container {
     padding: 0 !important;
 }
 """
+# --- THE UI INTERFACE ---
 with gr.Blocks(theme=theme, css=css) as demo:
     gr.Markdown("""
+        # 🎙️ **VocalSync Intelligence**
+        *Transforming messy speech into clear guidelines, minutes, and maps.*
     """)
     with gr.Tabs():
         with gr.Tab("Live Intelligence"):
             with gr.Row():
+                # SIDEBAR (Input Controls)
+                with gr.Column(scale=1, min_width=180): # Keeps this column small
+                    mode = gr.Radio(["Real-time", "After Speech"], value="Real-time", label="Mode")
+                    # --- NEW: Language Selection ---
                     language_dropdown = gr.Dropdown(
+                        choices=[("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", None)],
                         value="en",
                         label="Speech Language"
                     )
                     audio_in = gr.Audio(
+                        sources=["microphone"],
+                        streaming=True,
                         type="numpy",
+                        elem_classes="compact-audio" # Applies the CSS
                     )
                     clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
+                # MAIN AREA (Analysis and Transcript)
+                with gr.Column(scale=4): # Takes up most of the page
+                    text_out = gr.Textbox(label="Transcript", lines=10, autoscroll=True)
+                    analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
+                    analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12, placeholder="Analysis will appear here after clicking the button...")
             state = gr.State(value=np.array([], dtype=np.float32))
+            # Updated inputs to include language_dropdown
             audio_in.stream(
+                process_stream,
+                [audio_in, state, mode, language_dropdown],
+                [state, text_out]
+                )
+            analyze_btn.click(analyze_transcript, inputs=text_out, outputs=analysis_out)
+            clear_btn.click(clear_session, outputs=[state, text_out, analysis_out])
+        # ... (Media Ingestion)
         with gr.Tab("Media Ingestion"):
             with gr.Row():
+                # LEFT SIDEBAR: Upload & URL (Scale 1)
                 with gr.Column(scale=1, min_width=300):
+                    gr.Markdown("### 📥 Source")
                     url_input = gr.Textbox(
+                        placeholder="Accesible Video URL...",
+                        label="Remote Link",
+                        show_label=False # Keeps it clean
                     )
+                    url_btn = gr.Button("Extract & Transcribe", variant="secondary")
+                    gr.HTML("<div style='margin: 15px 0; border-bottom: 1px solid #333;'></div>")
+                    with gr.Group(): # Groups Upload + Button
+                        file_audio = gr.Audio(
+                            sources=["upload"],
+                            type="filepath",
+                            label="Drop Audio/Video File Here",
+                            elem_classes="compact-upload" # Apply the CSS
+                        )
+                        file_btn = gr.Button("🚀 Process File", variant="secondary")
+                # RIGHT PANEL: Transcription & Analysis
+                with gr.Column(scale=3):
+                    media_text_out = gr.Textbox(label="Media Transcript", lines=12,  autoscroll=True, placeholder="Transcription will appear here...")
+                    media_analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
+                    media_analysis_out = gr.Textbox(label="AI Intelligence Output", lines=10,  autoscroll=True, placeholder="Transcription will appear here...")
+            url_btn.click(transcribe_video_url, inputs=url_input, outputs=media_text_out)
+            file_btn.click(transcribe_file, inputs=file_audio, outputs=media_text_out)
+            media_analyze_btn.click(analyze_transcript, inputs=media_text_out, outputs=media_analysis_out)
+    gr.HTML("<div style='text-align: center; color: #666; font-size: 0.8em;'>Powered by ContextMap Engine</div>")
 if __name__ == "__main__":
     demo.launch(
+        # show_api=False,
+        server_name="0.0.0.0",   # Required for HF Spaces to map the port
+        server_port=7860,         # Standard HF port
     )