Fnu Mahnoor commited on
Commit
c3b8b39
·
1 Parent(s): 52cf5d6

Fix app and readme

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +78 -145
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🎙️
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.12.0 # <--- Change this from 4.44.1
8
  python_version: '3.10'
9
  app_file: app.py
10
  pinned: false
 
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.44.1
8
  python_version: '3.10'
9
  app_file: app.py
10
  pinned: false
app.py CHANGED
@@ -5,22 +5,23 @@ import os
5
  import librosa
6
  from dotenv import load_dotenv
7
 
8
- # Custom logic
9
  from src.transcription.streaming_transcriber import StreamingTranscriber
10
  from src.handlers.analysis_handler import analyze_transcript
11
  from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
12
 
13
  load_dotenv()
14
 
15
- # ---------------- CONFIG ----------------
 
16
  SAMPLE_RATE = 16000
17
- logging.basicConfig(level=logging.INFO)
18
 
 
19
  transcriber = StreamingTranscriber()
20
 
21
- # ---------------- THEME ----------------
22
  theme = gr.themes.Monochrome(
23
- primary_hue="emerald",
24
  neutral_hue="slate",
25
  font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
26
  ).set(
@@ -29,10 +30,11 @@ theme = gr.themes.Monochrome(
29
  button_primary_background_fill="*primary_600",
30
  )
31
 
32
- # ---------------- STREAMING LOGIC (UI ONLY) ----------------
 
33
  def process_stream(audio, state, mode, language_code): # Added language_code
34
- if audio is None: return state, ""
35
  if mode != "Real-time":
 
36
  sr, data = audio
37
  data = data.astype(np.float32) / 32768.0
38
  if sr != SAMPLE_RATE:
@@ -43,190 +45,121 @@ def process_stream(audio, state, mode, language_code): # Added language_code
43
  # Pass the language selection to the transcriber
44
  # Note: You'll need to update your StreamingTranscriber.process_stream
45
  # to accept and use a 'language' argument in self.model.transcribe
46
- new_state, text = transcriber.process_stream(audio, state, language_code)
47
  return new_state, text
48
 
49
-
50
-
51
  def clear_session():
52
- transcriber.clear_history()
53
  return np.array([], dtype=np.float32), "", ""
54
 
 
55
 
56
- # ---------------- API SAFE FUNCTION ----------------
57
- # This is what Gradio exposes as an API
58
- def api_analyze(text: str) -> str:
59
- if not text or not text.strip():
60
- return "No transcript provided."
61
- return analyze_transcript(text)
62
-
63
 
64
- # ---------------- CSS ----------------
65
  css = """
 
66
  .compact-audio {
67
  min-width: 150px !important;
68
  }
 
69
  .compact-audio label {
70
  display: none !important;
71
  }
 
72
  .compact-audio .container {
73
  padding: 0 !important;
74
  }
75
  """
76
-
77
- # ---------------- UI ----------------
78
  with gr.Blocks(theme=theme, css=css) as demo:
79
  gr.Markdown("""
80
- # 🎙️ **VocalSync Intelligence**
81
- Turning messy speech into clear thinking, meeting notes, and action.
82
  """)
83
-
84
  with gr.Tabs():
85
-
86
- # -------- LIVE INTELLIGENCE TAB --------
87
  with gr.Tab("Live Intelligence"):
88
  with gr.Row():
 
 
 
89
 
90
- with gr.Column(scale=1, min_width=180):
91
- mode = gr.Radio(
92
- ["Real-time", "After Speech"],
93
- value="Real-time",
94
- label="Mode"
95
- )
96
-
97
  language_dropdown = gr.Dropdown(
98
- choices=[
99
- ("English", "en"),
100
- ("Spanish", "es"),
101
- ("French", "fr"),
102
- ("German", "de"),
103
- ("Chinese", "zh"),
104
- ("Auto-Detect", None)
105
- ],
106
  value="en",
107
  label="Speech Language"
108
  )
109
-
110
  audio_in = gr.Audio(
111
- sources=["microphone"],
112
- streaming=True,
113
  type="numpy",
114
- elem_classes="compact-audio"
115
  )
116
-
117
  clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
118
-
119
- with gr.Column(scale=4):
120
- text_out = gr.Textbox(
121
- label="Transcript",
122
- lines=10,
123
- autoscroll=True
124
- )
125
-
126
- analyze_btn = gr.Button(
127
- "Generate Actionable Insights",
128
- variant="primary",
129
- size="lg"
130
- )
131
-
132
- analysis_out = gr.Textbox(
133
- label="AI Intelligence Output",
134
- lines=12
135
- )
136
 
137
  state = gr.State(value=np.array([], dtype=np.float32))
138
-
 
139
  audio_in.stream(
140
- process_stream,
141
- inputs=[audio_in, state, mode, language_dropdown],
142
- outputs=[state, text_out]
143
- )
144
-
145
- analyze_btn.click(
146
- analyze_transcript,
147
- inputs=text_out,
148
- outputs=analysis_out
149
- )
150
-
151
- clear_btn.click(
152
- clear_session,
153
- outputs=[state, text_out, analysis_out]
154
- )
155
-
156
- # -------- MEDIA INGESTION TAB --------
157
  with gr.Tab("Media Ingestion"):
158
  with gr.Row():
159
-
160
  with gr.Column(scale=1, min_width=300):
161
- gr.Markdown("### Source")
162
-
163
  url_input = gr.Textbox(
164
- placeholder="Accessible video URL",
165
- show_label=False
 
166
  )
167
- url_btn = gr.Button("Extract and Transcribe")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- gr.HTML("<hr>")
170
 
171
- file_audio = gr.Audio(
172
- sources=["upload"],
173
- type="filepath",
174
- label="Upload Audio or Video"
175
- )
176
- file_btn = gr.Button("Process File")
177
 
178
- with gr.Column(scale=3):
179
- media_text_out = gr.Textbox(
180
- label="Media Transcript",
181
- lines=12
182
- )
183
- media_analyze_btn = gr.Button(
184
- "Generate Actionable Insights",
185
- variant="primary",
186
- size="lg"
187
- )
188
- media_analysis_out = gr.Textbox(
189
- label="AI Intelligence Output",
190
- lines=10
191
- )
192
 
193
- url_btn.click(
194
- transcribe_video_url,
195
- inputs=url_input,
196
- outputs=media_text_out
197
- )
198
-
199
- file_btn.click(
200
- transcribe_file,
201
- inputs=file_audio,
202
- outputs=media_text_out
203
- )
204
-
205
- media_analyze_btn.click(
206
- analyze_transcript,
207
- inputs=media_text_out,
208
- outputs=media_analysis_out
209
- )
210
-
211
- # -------- API TAB (VISIBLE + DOCUMENTED) --------
212
- gr.Markdown("## 🔌 Public API")
213
-
214
- gr.Interface(
215
- fn=api_analyze,
216
- inputs=gr.Textbox(label="Transcript"),
217
- outputs=gr.Textbox(label="Analysis"),
218
- api_name="analyze"
219
- )
220
 
221
- gr.HTML(
222
- "<div style='text-align:center; color:#666; font-size:0.8em;'>"
223
- "Powered by ContextMap Engine"
224
- "</div>"
225
- )
226
-
227
- # ---------------- LAUNCH ----------------
228
  if __name__ == "__main__":
229
  demo.launch(
230
- server_name="0.0.0.0",
231
- server_port=7860,
 
232
  )
 
 
5
  import librosa
6
  from dotenv import load_dotenv
7
 
8
+ # Your custom logic imports
9
  from src.transcription.streaming_transcriber import StreamingTranscriber
10
  from src.handlers.analysis_handler import analyze_transcript
11
  from src.handlers.transcription_handler import transcribe_file, transcribe_video_url
12
 
13
  load_dotenv()
14
 
15
+ # Global config for stability
16
+ MAX_BUFFER_SECONDS = 5
17
  SAMPLE_RATE = 16000
 
18
 
19
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20
  transcriber = StreamingTranscriber()
21
 
22
+ # --- 1. PRO THEME DEFINITION ---
23
  theme = gr.themes.Monochrome(
24
+ primary_hue="emerald",
25
  neutral_hue="slate",
26
  font=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"],
27
  ).set(
 
30
  button_primary_background_fill="*primary_600",
31
  )
32
 
33
+ # ... (imports remain the same)
34
+
35
  def process_stream(audio, state, mode, language_code): # Added language_code
 
36
  if mode != "Real-time":
37
+ if audio is None: return state, gr.skip()
38
  sr, data = audio
39
  data = data.astype(np.float32) / 32768.0
40
  if sr != SAMPLE_RATE:
 
45
  # Pass the language selection to the transcriber
46
  # Note: You'll need to update your StreamingTranscriber.process_stream
47
  # to accept and use a 'language' argument in self.model.transcribe
48
+ new_state, text = transcriber.process_stream(audio, state)
49
  return new_state, text
50
 
 
 
51
  def clear_session():
52
+ transcriber.clear_history() # Reset the class internal string
53
  return np.array([], dtype=np.float32), "", ""
54
 
55
+ # ... (rest of Gradio UI remains the same)
56
 
57
+ # --- 2. THE UI INTERFACE ---
 
 
 
 
 
 
58
 
59
+ # Removed 'theme' and 'title' from here to fix the UserWarning
60
  css = """
61
+ /* Shrink the audio component container */
62
  .compact-audio {
63
  min-width: 150px !important;
64
  }
65
+ /* Hide the 'Live Input' label to save vertical space */
66
  .compact-audio label {
67
  display: none !important;
68
  }
69
+ /* Reduce internal padding and center the mic button */
70
  .compact-audio .container {
71
  padding: 0 !important;
72
  }
73
  """
74
+ # --- THE UI INTERFACE ---
 
75
  with gr.Blocks(theme=theme, css=css) as demo:
76
  gr.Markdown("""
77
+ # 🎙️ **VocalSync Intelligence**
78
+ *Transforming messy speech into clear guidelines, minutes, and maps.*
79
  """)
 
80
  with gr.Tabs():
 
 
81
  with gr.Tab("Live Intelligence"):
82
  with gr.Row():
83
+ # SIDEBAR (Input Controls)
84
+ with gr.Column(scale=1, min_width=180): # Keeps this column small
85
+ mode = gr.Radio(["Real-time", "After Speech"], value="Real-time", label="Mode")
86
 
87
+ # --- NEW: Language Selection ---
 
 
 
 
 
 
88
  language_dropdown = gr.Dropdown(
89
+ choices=[("English", "en"), ("Spanish", "es"), ("French", "fr"), ("German", "de"), ("Chinese", "zh"), ("Auto-Detect", None)],
 
 
 
 
 
 
 
90
  value="en",
91
  label="Speech Language"
92
  )
 
93
  audio_in = gr.Audio(
94
+ sources=["microphone"],
95
+ streaming=True,
96
  type="numpy",
97
+ elem_classes="compact-audio" # Applies the CSS
98
  )
 
99
  clear_btn = gr.Button("Clear Session", variant="stop", size="sm")
100
+
101
+ # MAIN AREA (Analysis and Transcript)
102
+ with gr.Column(scale=4): # Takes up most of the page
103
+ text_out = gr.Textbox(label="Transcript", lines=10, autoscroll=True)
104
+ analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
105
+ analysis_out = gr.Textbox(label="AI Intelligence Output", lines=12, placeholder="Analysis will appear here after clicking the button...")
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  state = gr.State(value=np.array([], dtype=np.float32))
108
+
109
+ # Updated inputs to include language_dropdown
110
  audio_in.stream(
111
+ process_stream,
112
+ [audio_in, state, mode, language_dropdown],
113
+ [state, text_out]
114
+ )
115
+
116
+ analyze_btn.click(analyze_transcript, inputs=text_out, outputs=analysis_out)
117
+ clear_btn.click(clear_session, outputs=[state, text_out, analysis_out])
118
+
119
+ # ... (Media Ingestion)
 
 
 
 
 
 
 
 
120
  with gr.Tab("Media Ingestion"):
121
  with gr.Row():
122
+ # LEFT SIDEBAR: Upload & URL (Scale 1)
123
  with gr.Column(scale=1, min_width=300):
124
+ gr.Markdown("### 📥 Source")
 
125
  url_input = gr.Textbox(
126
+ placeholder="Accesible Video URL...",
127
+ label="Remote Link",
128
+ show_label=False # Keeps it clean
129
  )
130
+ url_btn = gr.Button("Extract & Transcribe", variant="secondary")
131
+
132
+ gr.HTML("<div style='margin: 15px 0; border-bottom: 1px solid #333;'></div>")
133
+
134
+ with gr.Group(): # Groups Upload + Button
135
+ file_audio = gr.Audio(
136
+ sources=["upload"],
137
+ type="filepath",
138
+ label="Drop Audio/Video File Here",
139
+ elem_classes="compact-upload" # Apply the CSS
140
+ )
141
+ file_btn = gr.Button("🚀 Process File", variant="secondary")
142
+
143
+ # RIGHT PANEL: Transcription & Analysis
144
+ with gr.Column(scale=3):
145
+ media_text_out = gr.Textbox(label="Media Transcript", lines=12, autoscroll=True, placeholder="Transcription will appear here...")
146
+ media_analyze_btn = gr.Button("✨ Generate Actionable Insights", variant="primary", size="lg")
147
+ media_analysis_out = gr.Textbox(label="AI Intelligence Output", lines=10, autoscroll=True, placeholder="Transcription will appear here...")
148
+
149
+
150
+ url_btn.click(transcribe_video_url, inputs=url_input, outputs=media_text_out)
151
+ file_btn.click(transcribe_file, inputs=file_audio, outputs=media_text_out)
152
+ media_analyze_btn.click(analyze_transcript, inputs=media_text_out, outputs=media_analysis_out)
153
 
 
154
 
 
 
 
 
 
 
155
 
156
+ gr.HTML("<div style='text-align: center; color: #666; font-size: 0.8em;'>Powered by ContextMap Engine</div>")
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
 
 
 
 
 
 
 
159
  if __name__ == "__main__":
160
  demo.launch(
161
+ # show_api=False,
162
+ server_name="0.0.0.0", # Required for HF Spaces to map the port
163
+ server_port=7860, # Standard HF port
164
  )
165
+