Fnu Mahnoor commited on
Commit
7cc7815
·
1 Parent(s): de99631
Files changed (1) hide show
  1. app.py +83 -104
app.py CHANGED
@@ -1,166 +1,145 @@
1
  import gradio as gr
2
  import numpy as np
3
  import logging
4
- import os
5
  import librosa
6
  from dotenv import load_dotenv
7
 
8
- # Your custom logic imports
9
  from src.transcription.streaming_transcriber import StreamingTranscriber
10
  from src.handlers.analysis_handler import analyze_transcript
11
  from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
12
 
13
  load_dotenv()
14
 
15
- # Global config for stability
16
- MAX_BUFFER_SECONDS = 5
17
  SAMPLE_RATE = 16000
18
 
19
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
  transcriber = StreamingTranscriber()
21
 
22
- # --- 1. PRO THEME DEFINITION ---
23
  theme = gr.themes.Monochrome(
24
- primary_hue="emerald",
25
  neutral_hue="slate",
26
  font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
27
- ).set(
28
- block_title_text_weight="700",
29
- block_label_text_size="xs",
30
- button_primary_background_fill="*primary_600",
31
  )
32
 
33
- # ... (imports remain the same)
 
 
 
34
 
35
- def process_stream(audio, state, mode, language_code): # Added language_code
36
  if mode != "Real-time":
37
- if audio is None: return state, gr.skip()
38
  sr, data = audio
39
  data = data.astype(np.float32) / 32768.0
40
  if sr != SAMPLE_RATE:
41
  data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
42
- new_state = np.concatenate([state, data]) if state is not None else data
43
- return new_state, gr.skip()
44
 
45
- # Pass the language selection to the transcriber
46
- # Note: You'll need to update your StreamingTranscriber.process_stream
47
- # to accept and use a 'language' argument in self.model.transcribe
48
  new_state, text = transcriber.process_stream(audio, state)
49
- return new_state, text
50
 
51
  def clear_session():
52
- transcriber.clear_history() # Reset the class internal string
53
  return np.array([], dtype=np.float32), "", ""
54
 
55
- # ... (rest of Gradio UI remains the same)
56
-
57
- # --- 2. THE UI INTERFACE ---
58
-
59
- # Removed 'theme' and 'title' from here to fix the UserWarning
60
  css = """
61
- /* Shrink the audio component container */
62
- .compact-audio {
63
- min-width: 150px !important;
64
- }
65
- /* Hide the 'Live Input' label to save vertical space */
66
- .compact-audio label {
67
- display: none !important;
68
- }
69
- /* Reduce internal padding and center the mic button */
70
- .compact-audio .container {
71
- padding: 0 !important;
72
- }
73
  """
74
- # --- THE UI INTERFACE ---
75
- with gr.Blocks(api_open=False, theme=theme, css=css) as demo:
76
- gr.Markdown("""
 
 
77
  # 🎙️ **VocalSync Intelligence**
78
  *Transforming messy speech into clear guidelines, minutes, and maps.*
79
- """)
 
 
80
  with gr.Tabs():
81
  with gr.Tab("Live Intelligence"):
82
  with gr.Row():
83
- # SIDEBAR (Input Controls)
84
- with gr.Column(scale=1, min_width=180): # Keeps this column small
85
- mode = gr.Radio(["Real-time", "After Speech"], value="Real-time", label="Mode")
 
 
 
86
 
87
- # --- NEW: Language Selection ---
 
88
  language_dropdown = gr.Dropdown(
89
- choices=[("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", None)],
 
 
 
 
 
 
 
90
  value="en",
91
- label="Speech Language"
92
  )
 
93
  audio_in = gr.Audio(
94
- sources=["microphone"],
95
- streaming=True,
96
  type="numpy",
97
- elem_classes="compact-audio" # Applies the CSS
98
  )
 
99
  clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
100
-
101
- # MAIN AREA (Analysis and Transcript)
102
- with gr.Column(scale=4): # Takes up most of the page
103
- text_out = gr.Textbox(label="Transcript", lines=10, autoscroll=True)
104
- analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
105
- analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12, placeholder="Analysis will appear here after clicking the button...")
 
106
 
107
  state = gr.State(value=np.array([], dtype=np.float32))
108
-
109
- # Updated inputs to include language_dropdown
110
  audio_in.stream(
111
- process_stream,
112
- [audio_in, state, mode, language_dropdown],
113
- [state, text_out]
114
- )
115
-
116
- analyze_btn.click(analyze_transcript, inputs=text_out, outputs=analysis_out)
117
- clear_btn.click(clear_session, outputs=[state, text_out, analysis_out])
118
-
119
- # ... (Media Ingestion)
120
  with gr.Tab("Media Ingestion"):
121
  with gr.Row():
122
- # LEFT SIDEBAR: Upload & URL (Scale 1)
123
  with gr.Column(scale=1, min_width=300):
124
- gr.Markdown("### 📥 Source")
125
- url_input = gr.Textbox(
126
- placeholder="Accesible Video URL...",
127
- label="Remote Link",
128
- show_label=False # Keeps it clean
129
- )
130
- url_btn = gr.Button("Extract & Transcribe", variant="secondary")
131
-
132
- gr.HTML("<div style='margin: 15px 0; border-bottom: 1px solid #333;'></div>")
133
-
134
- with gr.Group(): # Groups Upload + Button
135
- file_audio = gr.Audio(
136
- sources=["upload"],
137
- type="filepath",
138
- label="Drop Audio/Video File Here",
139
- elem_classes="compact-upload" # Apply the CSS
140
- )
141
- file_btn = gr.Button("🚀 Process File", variant="secondary")
142
-
143
- # RIGHT PANEL: Transcription & Analysis
144
- with gr.Column(scale=3):
145
- media_text_out = gr.Textbox(label="Media Transcript", lines=12, autoscroll=True, placeholder="Transcription will appear here...")
146
- media_analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
147
- media_analysis_out = gr.Textbox(label="AI Intelligence Output", lines=10, autoscroll=True, placeholder="Transcription will appear here...")
148
-
149
-
150
- url_btn.click(transcribe_video_url, inputs=url_input, outputs=media_text_out)
151
- file_btn.click(transcribe_file, inputs=file_audio, outputs=media_text_out)
152
- media_analyze_btn.click(analyze_transcript, inputs=media_text_out, outputs=media_analysis_out)
153
 
 
 
154
 
 
 
 
 
 
 
 
 
155
 
156
- gr.HTML("<div style='text-align: center; color: #666; font-size: 0.8em;'>Powered by ContextMap Engine</div>")
 
 
157
 
 
 
158
 
159
  if __name__ == "__main__":
160
  demo.launch(
161
- show_api=False,
162
- share=True,
163
- server_name="0.0.0.0", # Required for HF Spaces to map the port
164
- server_port=7860, # Standard HF port
165
  )
166
-
 
1
  import gradio as gr
2
  import numpy as np
3
  import logging
 
4
  import librosa
5
  from dotenv import load_dotenv
6
 
 
7
  from src.transcription.streaming_transcriber import StreamingTranscriber
8
  from src.handlers.analysis_handler import analyze_transcript
9
  from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
10
 
11
  load_dotenv()
12
 
 
 
13
  SAMPLE_RATE = 16000
14
 
15
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
16
  transcriber = StreamingTranscriber()
17
 
18
+ # --- THEME ---
19
  theme = gr.themes.Monochrome(
20
+ primary_hue="emerald",
21
  neutral_hue="slate",
22
  font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
 
 
 
 
23
  )
24
 
25
+ # --- STREAM HANDLER ---
26
+ def process_stream(audio, state, mode, language_code):
27
+ if audio is None:
28
+ return state, ""
29
 
 
30
  if mode != "Real-time":
 
31
  sr, data = audio
32
  data = data.astype(np.float32) / 32768.0
33
  if sr != SAMPLE_RATE:
34
  data = librosa.resample(data, orig_sr=sr, target_sr=SAMPLE_RATE)
35
+ state = np.concatenate([state, data]) if state.size else data
36
+ return state, ""
37
 
38
+ # Real-time
 
 
39
  new_state, text = transcriber.process_stream(audio, state)
40
+ return new_state, text or ""
41
 
42
  def clear_session():
43
+ transcriber.clear_history()
44
  return np.array([], dtype=np.float32), "", ""
45
 
46
+ # --- CSS ---
 
 
 
 
47
  css = """
48
+ .compact-audio { min-width: 150px !important; }
49
+ .compact-audio label { display: none !important; }
50
+ .compact-audio .container { padding: 0 !important; }
 
 
 
 
 
 
 
 
 
51
  """
52
+
53
+ # --- UI ---
54
+ with gr.Blocks(theme=theme, css=css) as demo:
55
+ gr.Markdown(
56
+ """
57
  # 🎙️ **VocalSync Intelligence**
58
  *Transforming messy speech into clear guidelines, minutes, and maps.*
59
+ """
60
+ )
61
+
62
  with gr.Tabs():
63
  with gr.Tab("Live Intelligence"):
64
  with gr.Row():
65
+ with gr.Column(scale=1, min_width=180):
66
+ mode = gr.Radio(
67
+ ["Real-time", "After Speech"],
68
+ value="Real-time",
69
+ label="Mode",
70
+ )
71
 
72
+ # ⚠️ IMPORTANT:
73
+ # DO NOT use None in dropdown values in Gradio 4
74
  language_dropdown = gr.Dropdown(
75
+ choices=[
76
+ ("English", "en"),
77
+ ("Spanish", "es"),
78
+ ("French", "fr"),
79
+ ("German", "de"),
80
+ ("Chinese", "zh"),
81
+ ("Auto-Detect", "auto"),
82
+ ],
83
  value="en",
84
+ label="Speech Language",
85
  )
86
+
87
  audio_in = gr.Audio(
88
+ sources=["microphone"],
89
+ streaming=True,
90
  type="numpy",
91
+ elem_classes="compact-audio",
92
  )
93
+
94
  clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
95
+
96
+ with gr.Column(scale=4):
97
+ text_out = gr.Textbox(label="Transcript", lines=10)
98
+ analyze_btn = gr.Button(
99
+ "✨ Generate Actionable Insights", variant="primary", size="lg"
100
+ )
101
+ analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12)
102
 
103
  state = gr.State(value=np.array([], dtype=np.float32))
104
+
 
105
  audio_in.stream(
106
+ process_stream,
107
+ inputs=[audio_in, state, mode, language_dropdown],
108
+ outputs=[state, text_out],
109
+ )
110
+
111
+ analyze_btn.click(analyze_transcript, text_out, analysis_out)
112
+ clear_btn.click(clear_session, None, [state, text_out, analysis_out])
113
+
 
114
  with gr.Tab("Media Ingestion"):
115
  with gr.Row():
 
116
  with gr.Column(scale=1, min_width=300):
117
+ url_input = gr.Textbox(placeholder="Accessible Video URL...")
118
+ url_btn = gr.Button("Extract & Transcribe")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ file_audio = gr.Audio(sources=["upload"], type="filepath")
121
+ file_btn = gr.Button("Process File")
122
 
123
+ with gr.Column(scale=3):
124
+ media_text_out = gr.Textbox(lines=12)
125
+ media_analyze_btn = gr.Button("✨ Generate Actionable Insights")
126
+ media_analysis_out = gr.Textbox(lines=10)
127
+
128
+ url_btn.click(transcribe_video_url, url_input, media_text_out)
129
+ file_btn.click(transcribe_file, file_audio, media_text_out)
130
+ media_analyze_btn.click(analyze_transcript, media_text_out, media_analysis_out)
131
 
132
+ gr.HTML(
133
+ "<div style='text-align:center;color:#666;font-size:0.8em;'>Powered by ContextMap Engine</div>"
134
+ )
135
 
136
+ # 🚨 CRITICAL FOR GRADIO 4
137
+ demo.queue(concurrency_count=1)
138
 
139
  if __name__ == "__main__":
140
  demo.launch(
141
+ server_name="0.0.0.0",
142
+ server_port=7860,
143
+ share=True, # HF Spaces requirement
144
+ show_api=False, # UI only (schema still built internally)
145
  )