Spaces:

arshenoy
/

somAI-frontend

Sleeping

App Files Files Community

arshenoy commited on Nov 30, 2025

Commit

4f6acd4

verified ·

1 Parent(s): f5e7e3d

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -28

app.py CHANGED Viewed

@@ -10,21 +10,11 @@ from PIL import Image
 # --- 1. CONFIGURATION ---
 st.set_page_config(page_title="SomAI", layout="wide", page_icon="🩺")
-# The URL of your deployed FastAPI backend (Space 2: arshenoy/somAI-backend)
-# NOTE: Replace with the actual URL when deployed. For local testing, use http://localhost:7860
-# When deployed on HF Spaces, this may need to be the actual public URL or a service endpoint if using different Spaces.
-# Assuming the backend is hosted and accessible.
-BACKEND_API_URL = "https://<your-backend-space-name>.hf.space"
-# Use this for local testing:
-# BACKEND_API_URL = "http://localhost:7860"
-# --- 2. LOAD VOICE BRAIN (Keep Whisper on frontend for VTT) ---
 @st.cache_resource
 def load_whisper():
     print(">>> LOADING AUDIO SENSORS...")
-    # Use 'tiny' for faster performance on free tier, or 'medium' for better accuracy
-    # 'large-v3' is too slow for frontend VTT in a live chat.
     whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
     return whisper_model
@@ -182,7 +172,6 @@ with st.sidebar:
     )
     st.plotly_chart(fig, use_container_width=True)
-    # Custom Metric Card with Border (replaces standard metric-card)
     st.markdown(f"""
     <div style="background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 12px; padding: 15px; margin-top: 15px; border-left: 4px solid {color};">
         <h5 style="margin:0; color: {color}; font-family: 'JetBrains Mono', monospace;">CLINICAL ASSESSMENT</h5>
@@ -209,11 +198,11 @@ with col_main:
     st.markdown("🎙️ **Hold & Speak:**")
     audio_bytes = audio_recorder(
         text="",
-        recording_color="#ff3300", # Neon Red for recording
-        neutral_color="#00ff80",    # Neon Green for neutral
         icon_name="microphone",
         icon_size="3x",
-        initial_time=0 # Force 'hold-to-speak' mode
     )
     user_query = None
@@ -222,12 +211,9 @@ with col_main:
     if audio_bytes:
         with st.spinner("🔊 Transcribing Voice..."):
             audio_file = io.BytesIO(audio_bytes)
-            # Transcribe the audio
-            # Using a single file-like object directly with WhisperModel
             segments, info = whisper.transcribe(audio_file, beam_size=5)
             text_list = [segment.text for segment in segments]
             user_query = " ".join(text_list).strip()
-            # If transcription is empty, handle gracefully
             if not user_query:
                 st.warning("Could not detect speech. Please speak clearly.")
                 st.stop()
@@ -259,34 +245,30 @@ with col_main:
                         "mode": st.session_state.mode
                     }
-                    # Using Stream=False for FastAPI/requests.post since the current FastAPI code doesn't support streaming.
-                    # We will stream the *display* locally to emulate the effect.
                     response = requests.post(f"{BACKEND_API_URL}/generate", json=payload, timeout=60)
                     response.raise_for_status()
                     data = response.json()
                     raw_text = data['generated_text']
-                    # Stream display emulation: Chunk the text and write it
-                    # This simulates streaming visually even if the API is non-streaming.
-                    chunk_size = 5 # words per chunk
                     words = raw_text.split()
                     for i in range(0, len(words), chunk_size):
                         chunk = " ".join(words[i:i + chunk_size])
                         full_resp += chunk + " "
-                        # Use an empty container to display the response with a cursor effect
-                        # and then replace it with the next chunk
                         placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{full_resp}▌</div>", unsafe_allow_html=True)
                         time.sleep(0.05) # Adjust for speed
-                    # Final display (no cursor)
                     placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{raw_text}</div>", unsafe_allow_html=True)
                     st.session_state.history.append({"role": "assistant", "content": raw_text})
-                    # Display suggestions below the chat container
                     suggestions = data.get('suggestions', [])
                     if suggestions:
                         st.markdown("---")
@@ -306,5 +288,5 @@ with col_main:
                     st.session_state.history.append({"role": "assistant", "content": error_msg})
                     placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{error_msg}</div>", unsafe_allow_html=True)
-        # Manually rerun to clear the input box and update history
         st.rerun()

 # --- 1. CONFIGURATION ---
 st.set_page_config(page_title="SomAI", layout="wide", page_icon="🩺")
+BACKEND_API_URL = "https://arshenoy/somAI-backend.hf.space"
 @st.cache_resource
 def load_whisper():
     print(">>> LOADING AUDIO SENSORS...")
     whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
     return whisper_model
     )
     st.plotly_chart(fig, use_container_width=True)
     st.markdown(f"""
     <div style="background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 12px; padding: 15px; margin-top: 15px; border-left: 4px solid {color};">
         <h5 style="margin:0; color: {color}; font-family: 'JetBrains Mono', monospace;">CLINICAL ASSESSMENT</h5>
     st.markdown("🎙️ **Hold & Speak:**")
     audio_bytes = audio_recorder(
         text="",
+        recording_color="#ff3300",
+        neutral_color="#00ff80",
         icon_name="microphone",
         icon_size="3x",
+        initial_time=0
     )
     user_query = None
     if audio_bytes:
         with st.spinner("🔊 Transcribing Voice..."):
             audio_file = io.BytesIO(audio_bytes)
             segments, info = whisper.transcribe(audio_file, beam_size=5)
             text_list = [segment.text for segment in segments]
             user_query = " ".join(text_list).strip()
             if not user_query:
                 st.warning("Could not detect speech. Please speak clearly.")
                 st.stop()
                         "mode": st.session_state.mode
                     }
                     response = requests.post(f"{BACKEND_API_URL}/generate", json=payload, timeout=60)
                     response.raise_for_status()
                     data = response.json()
                     raw_text = data['generated_text']
+                    chunk_size = 5
                     words = raw_text.split()
                     for i in range(0, len(words), chunk_size):
                         chunk = " ".join(words[i:i + chunk_size])
                         full_resp += chunk + " "
                         placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{full_resp}▌</div>", unsafe_allow_html=True)
                         time.sleep(0.05) # Adjust for speed
                     placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{raw_text}</div>", unsafe_allow_html=True)
                     st.session_state.history.append({"role": "assistant", "content": raw_text})
                     suggestions = data.get('suggestions', [])
                     if suggestions:
                         st.markdown("---")
                     st.session_state.history.append({"role": "assistant", "content": error_msg})
                     placeholder.markdown(f"<div class='chat-bubble ai-bubble'>{error_msg}</div>", unsafe_allow_html=True)
         st.rerun()