Fnu Mahnoor commited on
Commit
52cf5d6
·
1 Parent(s): a5550ba

new app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -78
app.py CHANGED
@@ -5,23 +5,22 @@ import os
5
  import librosa
6
  from dotenv import load_dotenv
7
 
8
- # Your custom logic imports
9
  from src.transcription.streaming_transcriber import StreamingTranscriber
10
  from src.handlers.analysis_handler import analyze_transcript
11
  from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
12
 
13
  load_dotenv()
14
 
15
- # Global config for stability
16
- MAX_BUFFER_SECONDS = 5
17
  SAMPLE_RATE = 16000
 
18
 
19
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
  transcriber = StreamingTranscriber()
21
 
22
- # --- 1. PRO THEME DEFINITION ---
23
  theme = gr.themes.Monochrome(
24
- primary_hue="emerald",
25
  neutral_hue="slate",
26
  font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
27
  ).set(
@@ -30,11 +29,10 @@ theme = gr.themes.Monochrome(
30
  button_primary_background_fill="*primary_600",
31
  )
32
 
33
- # ... (imports remain the same)
34
-
35
  def process_stream(audio, state, mode, language_code): # Added language_code
 
36
  if mode != "Real-time":
37
- if audio is None: return state, gr.skip()
38
  sr, data = audio
39
  data = data.astype(np.float32) / 32768.0
40
  if sr != SAMPLE_RATE:
@@ -45,121 +43,190 @@ def process_stream(audio, state, mode, language_code): # Added language_code
45
  # Pass the language selection to the transcriber
46
  # Note: You'll need to update your StreamingTranscriber.process_stream
47
  # to accept and use a 'language' argument in self.model.transcribe
48
- new_state, text = transcriber.process_stream(audio, state)
49
  return new_state, text
50
 
 
 
51
  def clear_session():
52
- transcriber.clear_history() # Reset the class internal string
53
  return np.array([], dtype=np.float32), "", ""
54
 
55
- # ... (rest of Gradio UI remains the same)
56
 
57
- # --- 2. THE UI INTERFACE ---
 
 
 
 
 
 
58
 
59
- # Removed 'theme' and 'title' from here to fix the UserWarning
60
  css = """
61
- /* Shrink the audio component container */
62
  .compact-audio {
63
  min-width: 150px !important;
64
  }
65
- /* Hide the 'Live Input' label to save vertical space */
66
  .compact-audio label {
67
  display: none !important;
68
  }
69
- /* Reduce internal padding and center the mic button */
70
  .compact-audio .container {
71
  padding: 0 !important;
72
  }
73
  """
74
- # --- THE UI INTERFACE ---
 
75
  with gr.Blocks(theme=theme, css=css) as demo:
76
  gr.Markdown("""
77
- # 🎙️ **VocalSync Intelligence**
78
- *Transforming messy speech into clear guidelines, minutes, and maps.*
79
  """)
 
80
  with gr.Tabs():
 
 
81
  with gr.Tab("Live Intelligence"):
82
  with gr.Row():
83
- # SIDEBAR (Input Controls)
84
- with gr.Column(scale=1, min_width=180): # Keeps this column small
85
- mode = gr.Radio(["Real-time", "After Speech"], value="Real-time", label="Mode")
86
 
87
- # --- NEW: Language Selection ---
 
 
 
 
 
 
88
  language_dropdown = gr.Dropdown(
89
- choices=[("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", None)],
 
 
 
 
 
 
 
90
  value="en",
91
  label="Speech Language"
92
  )
 
93
  audio_in = gr.Audio(
94
- sources=["microphone"],
95
- streaming=True,
96
  type="numpy",
97
- elem_classes="compact-audio" # Applies the CSS
98
  )
 
99
  clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
100
-
101
- # MAIN AREA (Analysis and Transcript)
102
- with gr.Column(scale=4): # Takes up most of the page
103
- text_out = gr.Textbox(label="Transcript", lines=10, autoscroll=True)
104
- analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
105
- analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12, placeholder="Analysis will appear here after clicking the button...")
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  state = gr.State(value=np.array([], dtype=np.float32))
108
-
109
- # Updated inputs to include language_dropdown
110
  audio_in.stream(
111
- process_stream,
112
- [audio_in, state, mode, language_dropdown],
113
- [state, text_out]
114
- )
115
-
116
- analyze_btn.click(analyze_transcript, inputs=text_out, outputs=analysis_out)
117
- clear_btn.click(clear_session, outputs=[state, text_out, analysis_out])
118
-
119
- # ... (Media Ingestion)
 
 
 
 
 
 
 
 
120
  with gr.Tab("Media Ingestion"):
121
  with gr.Row():
122
- # LEFT SIDEBAR: Upload & URL (Scale 1)
123
  with gr.Column(scale=1, min_width=300):
124
- gr.Markdown("### 📥 Source")
 
125
  url_input = gr.Textbox(
126
- placeholder="Accesible Video URL...",
127
- label="Remote Link",
128
- show_label=False # Keeps it clean
129
  )
130
- url_btn = gr.Button("Extract & Transcribe", variant="secondary")
131
-
132
- gr.HTML("<div style='margin: 15px 0; border-bottom: 1px solid #333;'></div>")
133
-
134
- with gr.Group(): # Groups Upload + Button
135
- file_audio = gr.Audio(
136
- sources=["upload"],
137
- type="filepath",
138
- label="Drop Audio/Video File Here",
139
- elem_classes="compact-upload" # Apply the CSS
140
- )
141
- file_btn = gr.Button("🚀 Process File", variant="secondary")
142
-
143
- # RIGHT PANEL: Transcription & Analysis
144
- with gr.Column(scale=3):
145
- media_text_out = gr.Textbox(label="Media Transcript", lines=12, autoscroll=True, placeholder="Transcription will appear here...")
146
- media_analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
147
- media_analysis_out = gr.Textbox(label="AI Intelligence Output", lines=10, autoscroll=True, placeholder="Transcription will appear here...")
148
-
149
-
150
- url_btn.click(transcribe_video_url, inputs=url_input, outputs=media_text_out)
151
- file_btn.click(transcribe_file, inputs=file_audio, outputs=media_text_out)
152
- media_analyze_btn.click(analyze_transcript, inputs=media_text_out, outputs=media_analysis_out)
153
 
 
154
 
 
 
 
 
 
 
155
 
156
- gr.HTML("<div style='text-align: center; color: #666; font-size: 0.8em;'>Powered by ContextMap Engine</div>")
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
 
 
 
 
 
 
 
159
  if __name__ == "__main__":
160
  demo.launch(
161
- # show_api=False,
162
- server_name="0.0.0.0", # Required for HF Spaces to map the port
163
- server_port=7860, # Standard HF port
164
  )
165
-
 
5
  import librosa
6
  from dotenv import load_dotenv
7
 
8
+ # Custom logic
9
  from src.transcription.streaming_transcriber import StreamingTranscriber
10
  from src.handlers.analysis_handler import analyze_transcript
11
  from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
12
 
13
  load_dotenv()
14
 
15
+ # ---------------- CONFIG ----------------
 
16
  SAMPLE_RATE = 16000
17
+ logging.basicConfig(level=logging.INFO)
18
 
 
19
  transcriber = StreamingTranscriber()
20
 
21
+ # ---------------- THEME ----------------
22
  theme = gr.themes.Monochrome(
23
+ primary_hue="emerald",
24
  neutral_hue="slate",
25
  font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
26
  ).set(
 
29
  button_primary_background_fill="*primary_600",
30
  )
31
 
32
+ # ---------------- STREAMING LOGIC (UI ONLY) ----------------
 
33
  def process_stream(audio, state, mode, language_code): # Added language_code
34
+ if audio is None: return state, ""
35
  if mode != "Real-time":
 
36
  sr, data = audio
37
  data = data.astype(np.float32) / 32768.0
38
  if sr != SAMPLE_RATE:
 
43
  # Pass the language selection to the transcriber
44
  # Note: You'll need to update your StreamingTranscriber.process_stream
45
  # to accept and use a 'language' argument in self.model.transcribe
46
+ new_state, text = transcriber.process_stream(audio, state, language_code)
47
  return new_state, text
48
 
49
+
50
+
51
  def clear_session():
52
+ transcriber.clear_history()
53
  return np.array([], dtype=np.float32), "", ""
54
 
 
55
 
56
+ # ---------------- API SAFE FUNCTION ----------------
57
+ # This is what Gradio exposes as an API
58
+ def api_analyze(text: str) -> str:
59
+ if not text or not text.strip():
60
+ return "No transcript provided."
61
+ return analyze_transcript(text)
62
+
63
 
64
+ # ---------------- CSS ----------------
65
  css = """
 
66
  .compact-audio {
67
  min-width: 150px !important;
68
  }
 
69
  .compact-audio label {
70
  display: none !important;
71
  }
 
72
  .compact-audio .container {
73
  padding: 0 !important;
74
  }
75
  """
76
+
77
+ # ---------------- UI ----------------
78
  with gr.Blocks(theme=theme, css=css) as demo:
79
  gr.Markdown("""
80
+ # 🎙️ **VocalSync Intelligence**
81
+ Turning messy speech into clear thinking, meeting notes, and action.
82
  """)
83
+
84
  with gr.Tabs():
85
+
86
+ # -------- LIVE INTELLIGENCE TAB --------
87
  with gr.Tab("Live Intelligence"):
88
  with gr.Row():
 
 
 
89
 
90
+ with gr.Column(scale=1, min_width=180):
91
+ mode = gr.Radio(
92
+ ["Real-time", "After Speech"],
93
+ value="Real-time",
94
+ label="Mode"
95
+ )
96
+
97
  language_dropdown = gr.Dropdown(
98
+ choices=[
99
+ ("English", "en"),
100
+ ("Spanish", "es"),
101
+ ("French", "fr"),
102
+ ("German", "de"),
103
+ ("Chinese", "zh"),
104
+ ("Auto-Detect", None)
105
+ ],
106
  value="en",
107
  label="Speech Language"
108
  )
109
+
110
  audio_in = gr.Audio(
111
+ sources=["microphone"],
112
+ streaming=True,
113
  type="numpy",
114
+ elem_classes="compact-audio"
115
  )
116
+
117
  clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
118
+
119
+ with gr.Column(scale=4):
120
+ text_out = gr.Textbox(
121
+ label="Transcript",
122
+ lines=10,
123
+ autoscroll=True
124
+ )
125
+
126
+ analyze_btn = gr.Button(
127
+ "Generate Actionable Insights",
128
+ variant="primary",
129
+ size="lg"
130
+ )
131
+
132
+ analysis_out = gr.Textbox(
133
+ label="AI Intelligence Output",
134
+ lines=12
135
+ )
136
 
137
  state = gr.State(value=np.array([], dtype=np.float32))
138
+
 
139
  audio_in.stream(
140
+ process_stream,
141
+ inputs=[audio_in, state, mode, language_dropdown],
142
+ outputs=[state, text_out]
143
+ )
144
+
145
+ analyze_btn.click(
146
+ analyze_transcript,
147
+ inputs=text_out,
148
+ outputs=analysis_out
149
+ )
150
+
151
+ clear_btn.click(
152
+ clear_session,
153
+ outputs=[state, text_out, analysis_out]
154
+ )
155
+
156
+ # -------- MEDIA INGESTION TAB --------
157
  with gr.Tab("Media Ingestion"):
158
  with gr.Row():
159
+
160
  with gr.Column(scale=1, min_width=300):
161
+ gr.Markdown("### Source")
162
+
163
  url_input = gr.Textbox(
164
+ placeholder="Accessible video URL",
165
+ show_label=False
 
166
  )
167
+ url_btn = gr.Button("Extract and Transcribe")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ gr.HTML("<hr>")
170
 
171
+ file_audio = gr.Audio(
172
+ sources=["upload"],
173
+ type="filepath",
174
+ label="Upload Audio or Video"
175
+ )
176
+ file_btn = gr.Button("Process File")
177
 
178
+ with gr.Column(scale=3):
179
+ media_text_out = gr.Textbox(
180
+ label="Media Transcript",
181
+ lines=12
182
+ )
183
+ media_analyze_btn = gr.Button(
184
+ "Generate Actionable Insights",
185
+ variant="primary",
186
+ size="lg"
187
+ )
188
+ media_analysis_out = gr.Textbox(
189
+ label="AI Intelligence Output",
190
+ lines=10
191
+ )
192
 
193
+ url_btn.click(
194
+ transcribe_video_url,
195
+ inputs=url_input,
196
+ outputs=media_text_out
197
+ )
198
+
199
+ file_btn.click(
200
+ transcribe_file,
201
+ inputs=file_audio,
202
+ outputs=media_text_out
203
+ )
204
+
205
+ media_analyze_btn.click(
206
+ analyze_transcript,
207
+ inputs=media_text_out,
208
+ outputs=media_analysis_out
209
+ )
210
+
211
+ # -------- API TAB (VISIBLE + DOCUMENTED) --------
212
+ gr.Markdown("## 🔌 Public API")
213
+
214
+ gr.Interface(
215
+ fn=api_analyze,
216
+ inputs=gr.Textbox(label="Transcript"),
217
+ outputs=gr.Textbox(label="Analysis"),
218
+ api_name="analyze"
219
+ )
220
 
221
+ gr.HTML(
222
+ "<div style='text-align:center; color:#666; font-size:0.8em;'>"
223
+ "Powered by ContextMap Engine"
224
+ "</div>"
225
+ )
226
+
227
+ # ---------------- LAUNCH ----------------
228
  if __name__ == "__main__":
229
  demo.launch(
230
+ server_name="0.0.0.0",
231
+ server_port=7860,
 
232
  )