File size: 5,215 Bytes
e26fd60
 
 
 
 
 
bf2d622
 
 
 
 
 
 
 
 
 
 
 
 
 
7cc7815
bf2d622
 
7cc7815
bf2d622
7cc7815
bf2d622
 
 
 
7cc7815
 
 
 
c3b8b39
bf2d622
 
 
 
 
7cc7815
 
bf2d622
7cc7815
c3b8b39
7cc7815
bf2d622
 
7cc7815
bf2d622
 
7cc7815
bf2d622
7cc7815
 
 
bf2d622
7cc7815
 
67b11dc
7cc7815
 
c3b8b39
 
7cc7815
 
 
bf2d622
 
 
7cc7815
 
 
 
 
 
bf2d622
7cc7815
 
bf2d622
7cc7815
 
 
 
 
 
 
 
bf2d622
7cc7815
bf2d622
7cc7815
bf2d622
7cc7815
 
1ca4f5c
7cc7815
bf2d622
7cc7815
bf2d622
7cc7815
 
 
 
 
 
 
bf2d622
 
7cc7815
bf2d622
7cc7815
 
 
 
 
 
 
 
bf2d622
 
 
7cc7815
 
bf2d622
7cc7815
 
bf2d622
7cc7815
 
 
 
 
 
 
 
bf2d622
7cc7815
 
 
bf2d622
 
e55b5b0
67b11dc
7cc7815
 
69852b5
8b2d877
8f4bcd9
81c7da0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import gradio.routes

# 🔥 Hard-disable API schema generation (Gradio 4 bug workaround)
gradio.routes.api_info = lambda *args, **kwargs: {}


import gradio as gr
import numpy as np
import logging
import librosa
from dotenv import load_dotenv

from src.transcription.streaming_transcriber import StreamingTranscriber
from src.handlers.analysis_handler import analyze_transcript
from src.handlers.transcription_handler import transcribe_file, transcribe_video_url

load_dotenv()

SAMPLE_RATE = 16000

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
transcriber = StreamingTranscriber()

# --- THEME ---
theme = gr.themes.Monochrome(
    primary_hue="emerald",
    neutral_hue="slate",
    font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
)

# --- STREAM HANDLER ---
def process_stream(audio, state, mode, language_code):
    if audio is None:
        return state, ""

    if mode != "Real-time":
        sr, data = audio
        data = data.astype(np.float32) / 32768.0
        if sr != SAMPLE_RATE:
            data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
        state = np.concatenate([state, data]) if state.size else data
        return state, ""

    # Real-time
    new_state, text = transcriber.process_stream(audio, state)
    return new_state, text or ""

def clear_session():
    transcriber.clear_history()
    return np.array([], dtype=np.float32), "", ""

# --- CSS ---
css = """
.compact-audio { min-width: 150px !important; }
.compact-audio label { display: none !important; }
.compact-audio .container { padding: 0 !important; }
"""

# --- UI ---
with gr.Blocks() as demo:
    gr.Markdown(
        """
        # 🎙️ **VocalSync Intelligence**
        *Transforming messy speech into clear guidelines, minutes, and maps.*
        """
    )

    with gr.Tabs():
        with gr.Tab("Live Intelligence"):
            with gr.Row():
                with gr.Column(scale=1, min_width=180):
                    mode = gr.Radio(
                        ["Real-time", "After Speech"],
                        value="Real-time",
                        label="Mode",
                    )

                    # ⚠️ IMPORTANT:
                    # DO NOT use None in dropdown values in Gradio 4
                    language_dropdown = gr.Dropdown(
                        choices=[
                            ("English", "en"),
                            ("Spanish", "es"),
                            ("French", "fr"),
                            ("German", "de"),
                            ("Chinese", "zh"),
                            ("Auto-Detect", "auto"),
                        ],
                        value="en",
                        label="Speech Language",
                    )

                    audio_in = gr.Audio(
                        sources=["microphone"],
                        streaming=True,
                        type="numpy",
                        elem_classes="compact-audio",
                    )

                    clear_btn = gr.Button("Clear Session", variant="stop", size="sm")

                with gr.Column(scale=4):
                    text_out = gr.Textbox(label="Transcript", lines=10)
                    analyze_btn = gr.Button(
                        "✨ Generate Actionable Insights", variant="primary", size="lg"
                    )
                    analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12)

            state = gr.State(value=np.array([], dtype=np.float32))

            audio_in.stream(
                process_stream,
                inputs=[audio_in, state, mode, language_dropdown],
                outputs=[state, text_out],
            )

            analyze_btn.click(analyze_transcript, text_out, analysis_out)
            clear_btn.click(clear_session, None, [state, text_out, analysis_out])

        with gr.Tab("Media Ingestion"):
            with gr.Row():
                with gr.Column(scale=1, min_width=300):
                    url_input = gr.Textbox(placeholder="Accessible Video URL...")
                    url_btn = gr.Button("Extract & Transcribe")

                    file_audio = gr.Audio(sources=["upload"], type="filepath")
                    file_btn = gr.Button("Process File")

                with gr.Column(scale=3):
                    media_text_out = gr.Textbox(lines=12)
                    media_analyze_btn = gr.Button("✨ Generate Actionable Insights")
                    media_analysis_out = gr.Textbox(lines=10)

            url_btn.click(transcribe_video_url, url_input, media_text_out)
            file_btn.click(transcribe_file, file_audio, media_text_out)
            media_analyze_btn.click(analyze_transcript, media_text_out, media_analysis_out)

    gr.HTML(
        "<div style='text-align:center;color:#666;font-size:0.8em;'>Powered by ContextMap Engine</div>"
    )

if __name__ == "__main__":
    demo.launch(
        theme=theme, css=css,
        server_name="0.0.0.0",
        server_port=7860,
        # show_api=False,  # UI only (schema still built internally)
        max_threads=1,   # ✅ Gradio 4–safe replacement
        share=True,
    )