arshenoy commited on
Commit
4f6acd4
·
verified ·
1 Parent(s): f5e7e3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -28
app.py CHANGED
@@ -10,21 +10,11 @@ from PIL import Image
10
 
11
  # --- 1. CONFIGURATION ---
12
  st.set_page_config(page_title="SomAI", layout="wide", page_icon="🩺")
 
13
 
14
- # The URL of your deployed FastAPI backend (Space 2: arshenoy/somAI-backend)
15
- # NOTE: Replace with the actual URL when deployed. For local testing, use http://localhost:7860
16
- # When deployed on HF Spaces, this may need to be the actual public URL or a service endpoint if using different Spaces.
17
- # Assuming the backend is hosted and accessible.
18
- BACKEND_API_URL = "https://<your-backend-space-name>.hf.space"
19
- # Use this for local testing:
20
- # BACKEND_API_URL = "http://localhost:7860"
21
-
22
- # --- 2. LOAD VOICE BRAIN (Keep Whisper on frontend for VTT) ---
23
  @st.cache_resource
24
  def load_whisper():
25
  print(">>> LOADING AUDIO SENSORS...")
26
- # Use 'tiny' for faster performance on free tier, or 'medium' for better accuracy
27
- # 'large-v3' is too slow for frontend VTT in a live chat.
28
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
29
  return whisper_model
30
 
@@ -182,7 +172,6 @@ with st.sidebar:
182
  )
183
  st.plotly_chart(fig, use_container_width=True)
184
 
185
- # Custom Metric Card with Border (replaces standard metric-card)
186
  st.markdown(f"""
187
  <div style="background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 12px; padding: 15px; margin-top: 15px; border-left: 4px solid {color};">
188
  <h5 style="margin:0; color: {color}; font-family: 'JetBrains Mono', monospace;">CLINICAL ASSESSMENT</h5>
@@ -209,11 +198,11 @@ with col_main:
209
  st.markdown("🎙️ **Hold & Speak:**")
210
  audio_bytes = audio_recorder(
211
  text="",
212
- recording_color="#ff3300", # Neon Red for recording
213
- neutral_color="#00ff80", # Neon Green for neutral
214
  icon_name="microphone",
215
  icon_size="3x",
216
- initial_time=0 # Force 'hold-to-speak' mode
217
  )
218
 
219
  user_query = None
@@ -222,12 +211,9 @@ with col_main:
222
  if audio_bytes:
223
  with st.spinner("🔊 Transcribing Voice..."):
224
  audio_file = io.BytesIO(audio_bytes)
225
- # Transcribe the audio
226
- # Using a single file-like object directly with WhisperModel
227
  segments, info = whisper.transcribe(audio_file, beam_size=5)
228
  text_list = [segment.text for segment in segments]
229
  user_query = " ".join(text_list).strip()
230
- # If transcription is empty, handle gracefully
231
  if not user_query:
232
  st.warning("Could not detect speech. Please speak clearly.")
233
  st.stop()
@@ -259,34 +245,30 @@ with col_main:
259
  "mode": st.session_state.mode
260
  }
261
 
262
- # Using Stream=False for FastAPI/requests.post since the current FastAPI code doesn't support streaming.
263
- # We will stream the *display* locally to emulate the effect.
264
  response = requests.post(f"{BACKEND_API_URL}/generate", json=payload, timeout=60)
265
  response.raise_for_status()
266
 
267
  data = response.json()
268
  raw_text = data['generated_text']
269
 
270
- # Stream display emulation: Chunk the text and write it
271
- # This simulates streaming visually even if the API is non-streaming.
272
- chunk_size = 5 # words per chunk
273
  words = raw_text.split()
274
 
275
  for i in range(0, len(words), chunk_size):
276
  chunk = " ".join(words[i:i + chunk_size])
277
  full_resp += chunk + " "
278
 
279
- # Use an empty container to display the response with a cursor effect
280
- # and then replace it with the next chunk
281
  placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{full_resp}▌</div>", unsafe_allow_html=True)
282
  time.sleep(0.05) # Adjust for speed
283
 
284
- # Final display (no cursor)
285
  placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{raw_text}</div>", unsafe_allow_html=True)
286
 
287
  st.session_state.history.append({"role": "assistant", "content": raw_text})
288
 
289
- # Display suggestions below the chat container
290
  suggestions = data.get('suggestions', [])
291
  if suggestions:
292
  st.markdown("---")
@@ -306,5 +288,5 @@ with col_main:
306
  st.session_state.history.append({"role": "assistant", "content": error_msg})
307
  placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{error_msg}</div>", unsafe_allow_html=True)
308
 
309
- # Manually rerun to clear the input box and update history
310
  st.rerun()
 
10
 
11
  # --- 1. CONFIGURATION ---
12
  st.set_page_config(page_title="SomAI", layout="wide", page_icon="🩺")
13
+ BACKEND_API_URL = "https://arshenoy/somAI-backend.hf.space"
14
 
 
 
 
 
 
 
 
 
 
15
  @st.cache_resource
16
  def load_whisper():
17
  print(">>> LOADING AUDIO SENSORS...")
 
 
18
  whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
19
  return whisper_model
20
 
 
172
  )
173
  st.plotly_chart(fig, use_container_width=True)
174
 
 
175
  st.markdown(f"""
176
  <div style="background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 12px; padding: 15px; margin-top: 15px; border-left: 4px solid {color};">
177
  <h5 style="margin:0; color: {color}; font-family: 'JetBrains Mono', monospace;">CLINICAL ASSESSMENT</h5>
 
198
  st.markdown("🎙️ **Hold & Speak:**")
199
  audio_bytes = audio_recorder(
200
  text="",
201
+ recording_color="#ff3300",
202
+ neutral_color="#00ff80",
203
  icon_name="microphone",
204
  icon_size="3x",
205
+ initial_time=0
206
  )
207
 
208
  user_query = None
 
211
  if audio_bytes:
212
  with st.spinner("🔊 Transcribing Voice..."):
213
  audio_file = io.BytesIO(audio_bytes)
 
 
214
  segments, info = whisper.transcribe(audio_file, beam_size=5)
215
  text_list = [segment.text for segment in segments]
216
  user_query = " ".join(text_list).strip()
 
217
  if not user_query:
218
  st.warning("Could not detect speech. Please speak clearly.")
219
  st.stop()
 
245
  "mode": st.session_state.mode
246
  }
247
 
248
+
 
249
  response = requests.post(f"{BACKEND_API_URL}/generate", json=payload, timeout=60)
250
  response.raise_for_status()
251
 
252
  data = response.json()
253
  raw_text = data['generated_text']
254
 
255
+
256
+ chunk_size = 5
 
257
  words = raw_text.split()
258
 
259
  for i in range(0, len(words), chunk_size):
260
  chunk = " ".join(words[i:i + chunk_size])
261
  full_resp += chunk + " "
262
 
263
+
 
264
  placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{full_resp}▌</div>", unsafe_allow_html=True)
265
  time.sleep(0.05) # Adjust for speed
266
 
267
+
268
  placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{raw_text}</div>", unsafe_allow_html=True)
269
 
270
  st.session_state.history.append({"role": "assistant", "content": raw_text})
271
 
 
272
  suggestions = data.get('suggestions', [])
273
  if suggestions:
274
  st.markdown("---")
 
288
  st.session_state.history.append({"role": "assistant", "content": error_msg})
289
  placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{error_msg}</div>", unsafe_allow_html=True)
290
 
291
+
292
  st.rerun()