Fnu Mahnoor
Fix app
67b11dc
import gradio.routes
# 🔥 Hard-disable API schema generation (Gradio 4 bug workaround)
gradio.routes.api_info = lambda *args, **kwargs: {}
import gradio as gr
import numpy as np
import logging
import librosa
from dotenv import load_dotenv
from src.transcription.streaming_transcriber import StreamingTranscriber
from src.handlers.analysis_handler import analyze_transcript
from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
load_dotenv()
SAMPLE_RATE = 16000
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
transcriber = StreamingTranscriber()
# --- THEME ---
theme = gr.themes.Monochrome(
primary_hue="emerald",
neutral_hue="slate",
font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
)
# --- STREAM HANDLER ---
def process_stream(audio, state, mode, language_code):
if audio is None:
return state, ""
if mode != "Real-time":
sr, data = audio
data = data.astype(np.float32) / 32768.0
if sr != SAMPLE_RATE:
data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
state = np.concatenate([state, data]) if state.size else data
return state, ""
# Real-time
new_state, text = transcriber.process_stream(audio, state)
return new_state, text or ""
def clear_session():
transcriber.clear_history()
return np.array([], dtype=np.float32), "", ""
# --- CSS ---
css = """
.compact-audio { min-width: 150px !important; }
.compact-audio label { display: none !important; }
.compact-audio .container { padding: 0 !important; }
"""
# --- UI ---
with gr.Blocks() as demo:
gr.Markdown(
"""
# 🎙️ **VocalSync Intelligence**
*Transforming messy speech into clear guidelines, minutes, and maps.*
"""
)
with gr.Tabs():
with gr.Tab("Live Intelligence"):
with gr.Row():
with gr.Column(scale=1, min_width=180):
mode = gr.Radio(
["Real-time", "After Speech"],
value="Real-time",
label="Mode",
)
# ⚠️ IMPORTANT:
# DO NOT use None in dropdown values in Gradio 4
language_dropdown = gr.Dropdown(
choices=[
("English", "en"),
("Spanish", "es"),
("French", "fr"),
("German", "de"),
("Chinese", "zh"),
("Auto-Detect", "auto"),
],
value="en",
label="Speech Language",
)
audio_in = gr.Audio(
sources=["microphone"],
streaming=True,
type="numpy",
elem_classes="compact-audio",
)
clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
with gr.Column(scale=4):
text_out = gr.Textbox(label="Transcript", lines=10)
analyze_btn = gr.Button(
"✨ Generate Actionable Insights", variant="primary", size="lg"
)
analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12)
state = gr.State(value=np.array([], dtype=np.float32))
audio_in.stream(
process_stream,
inputs=[audio_in, state, mode, language_dropdown],
outputs=[state, text_out],
)
analyze_btn.click(analyze_transcript, text_out, analysis_out)
clear_btn.click(clear_session, None, [state, text_out, analysis_out])
with gr.Tab("Media Ingestion"):
with gr.Row():
with gr.Column(scale=1, min_width=300):
url_input = gr.Textbox(placeholder="Accessible Video URL...")
url_btn = gr.Button("Extract & Transcribe")
file_audio = gr.Audio(sources=["upload"], type="filepath")
file_btn = gr.Button("Process File")
with gr.Column(scale=3):
media_text_out = gr.Textbox(lines=12)
media_analyze_btn = gr.Button("✨ Generate Actionable Insights")
media_analysis_out = gr.Textbox(lines=10)
url_btn.click(transcribe_video_url, url_input, media_text_out)
file_btn.click(transcribe_file, file_audio, media_text_out)
media_analyze_btn.click(analyze_transcript, media_text_out, media_analysis_out)
gr.HTML(
"<div style='text-align:center;color:#666;font-size:0.8em;'>Powered by ContextMap Engine</div>"
)
if __name__ == "__main__":
demo.launch(
theme=theme, css=css,
server_name="0.0.0.0",
server_port=7860,
# show_api=False, # UI only (schema still built internally)
max_threads=1, # ✅ Gradio 4–safe replacement
share=True,
)