import gradio.routes # 🔥 Hard-disable API schema generation (Gradio 4 bug workaround) gradio.routes.api_info = lambda *args, **kwargs: {} import gradio as gr import numpy as np import logging import librosa from dotenv import load_dotenv from src.transcription.streaming_transcriber import StreamingTranscriber from src.handlers.analysis_handler import analyze_transcript from src.handlers.transcription_handler import transcribe_file, transcribe_video_url load_dotenv() SAMPLE_RATE = 16000 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") transcriber = StreamingTranscriber() # --- THEME --- theme = gr.themes.Monochrome( primary_hue="emerald", neutral_hue="slate", font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"], ) # --- STREAM HANDLER --- def process_stream(audio, state, mode, language_code): if audio is None: return state, "" if mode != "Real-time": sr, data = audio data = data.astype(np.float32) / 32768.0 if sr != SAMPLE_RATE: data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE) state = np.concatenate([state, data]) if state.size else data return state, "" # Real-time new_state, text = transcriber.process_stream(audio, state) return new_state, text or "" def clear_session(): transcriber.clear_history() return np.array([], dtype=np.float32), "", "" # --- CSS --- css = """ .compact-audio { min-width: 150px !important; } .compact-audio label { display: none !important; } .compact-audio .container { padding: 0 !important; } """ # --- UI --- with gr.Blocks() as demo: gr.Markdown( """ # 🎙️ **VocalSync Intelligence** *Transforming messy speech into clear guidelines, minutes, and maps.* """ ) with gr.Tabs(): with gr.Tab("Live Intelligence"): with gr.Row(): with gr.Column(scale=1, min_width=180): mode = gr.Radio( ["Real-time", "After Speech"], value="Real-time", label="Mode", ) # ⚠️ IMPORTANT: # DO NOT use None in dropdown values in Gradio 4 language_dropdown = gr.Dropdown( choices=[ ("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", "auto"), ], value="en", label="Speech Language", ) audio_in = gr.Audio( sources=["microphone"], streaming=True, type="numpy", elem_classes="compact-audio", ) clear_btn = gr.Button("Clear Session", variant="stop", size="sm") with gr.Column(scale=4): text_out = gr.Textbox(label="Transcript", lines=10) analyze_btn = gr.Button( "✨ Generate Actionable Insights", variant="primary", size="lg" ) analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12) state = gr.State(value=np.array([], dtype=np.float32)) audio_in.stream( process_stream, inputs=[audio_in, state, mode, language_dropdown], outputs=[state, text_out], ) analyze_btn.click(analyze_transcript, text_out, analysis_out) clear_btn.click(clear_session, None, [state, text_out, analysis_out]) with gr.Tab("Media Ingestion"): with gr.Row(): with gr.Column(scale=1, min_width=300): url_input = gr.Textbox(placeholder="Accessible Video URL...") url_btn = gr.Button("Extract & Transcribe") file_audio = gr.Audio(sources=["upload"], type="filepath") file_btn = gr.Button("Process File") with gr.Column(scale=3): media_text_out = gr.Textbox(lines=12) media_analyze_btn = gr.Button("✨ Generate Actionable Insights") media_analysis_out = gr.Textbox(lines=10) url_btn.click(transcribe_video_url, url_input, media_text_out) file_btn.click(transcribe_file, file_audio, media_text_out) media_analyze_btn.click(analyze_transcript, media_text_out, media_analysis_out) gr.HTML( "
Powered by ContextMap Engine
" ) if __name__ == "__main__": demo.launch( theme=theme, css=css, server_name="0.0.0.0", server_port=7860, # show_api=False, # UI only (schema still built internally) max_threads=1, # ✅ Gradio 4–safe replacement share=True, )