Spaces:
Sleeping
Sleeping
File size: 5,215 Bytes
e26fd60 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 c3b8b39 bf2d622 7cc7815 bf2d622 7cc7815 c3b8b39 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 67b11dc 7cc7815 c3b8b39 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 1ca4f5c 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 7cc7815 bf2d622 e55b5b0 67b11dc 7cc7815 69852b5 8b2d877 8f4bcd9 81c7da0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import gradio.routes
# 🔥 Hard-disable API schema generation (Gradio 4 bug workaround)
gradio.routes.api_info = lambda *args, **kwargs: {}
import gradio as gr
import numpy as np
import logging
import librosa
from dotenv import load_dotenv
from src.transcription.streaming_transcriber import StreamingTranscriber
from src.handlers.analysis_handler import analyze_transcript
from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
load_dotenv()
SAMPLE_RATE = 16000
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
transcriber = StreamingTranscriber()
# --- THEME ---
theme = gr.themes.Monochrome(
primary_hue="emerald",
neutral_hue="slate",
font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
)
# --- STREAM HANDLER ---
def process_stream(audio, state, mode, language_code):
if audio is None:
return state, ""
if mode != "Real-time":
sr, data = audio
data = data.astype(np.float32) / 32768.0
if sr != SAMPLE_RATE:
data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
state = np.concatenate([state, data]) if state.size else data
return state, ""
# Real-time
new_state, text = transcriber.process_stream(audio, state)
return new_state, text or ""
def clear_session():
transcriber.clear_history()
return np.array([], dtype=np.float32), "", ""
# --- CSS ---
css = """
.compact-audio { min-width: 150px !important; }
.compact-audio label { display: none !important; }
.compact-audio .container { padding: 0 !important; }
"""
# --- UI ---
with gr.Blocks() as demo:
gr.Markdown(
"""
# 🎙️ **VocalSync Intelligence**
*Transforming messy speech into clear guidelines, minutes, and maps.*
"""
)
with gr.Tabs():
with gr.Tab("Live Intelligence"):
with gr.Row():
with gr.Column(scale=1, min_width=180):
mode = gr.Radio(
["Real-time", "After Speech"],
value="Real-time",
label="Mode",
)
# ⚠️ IMPORTANT:
# DO NOT use None in dropdown values in Gradio 4
language_dropdown = gr.Dropdown(
choices=[
("English", "en"),
("Spanish", "es"),
("French", "fr"),
("German", "de"),
("Chinese", "zh"),
("Auto-Detect", "auto"),
],
value="en",
label="Speech Language",
)
audio_in = gr.Audio(
sources=["microphone"],
streaming=True,
type="numpy",
elem_classes="compact-audio",
)
clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
with gr.Column(scale=4):
text_out = gr.Textbox(label="Transcript", lines=10)
analyze_btn = gr.Button(
"✨ Generate Actionable Insights", variant="primary", size="lg"
)
analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12)
state = gr.State(value=np.array([], dtype=np.float32))
audio_in.stream(
process_stream,
inputs=[audio_in, state, mode, language_dropdown],
outputs=[state, text_out],
)
analyze_btn.click(analyze_transcript, text_out, analysis_out)
clear_btn.click(clear_session, None, [state, text_out, analysis_out])
with gr.Tab("Media Ingestion"):
with gr.Row():
with gr.Column(scale=1, min_width=300):
url_input = gr.Textbox(placeholder="Accessible Video URL...")
url_btn = gr.Button("Extract & Transcribe")
file_audio = gr.Audio(sources=["upload"], type="filepath")
file_btn = gr.Button("Process File")
with gr.Column(scale=3):
media_text_out = gr.Textbox(lines=12)
media_analyze_btn = gr.Button("✨ Generate Actionable Insights")
media_analysis_out = gr.Textbox(lines=10)
url_btn.click(transcribe_video_url, url_input, media_text_out)
file_btn.click(transcribe_file, file_audio, media_text_out)
media_analyze_btn.click(analyze_transcript, media_text_out, media_analysis_out)
gr.HTML(
"<div style='text-align:center;color:#666;font-size:0.8em;'>Powered by ContextMap Engine</div>"
)
if __name__ == "__main__":
demo.launch(
theme=theme, css=css,
server_name="0.0.0.0",
server_port=7860,
# show_api=False, # UI only (schema still built internally)
max_threads=1, # ✅ Gradio 4–safe replacement
share=True,
)
|