SAVE

Sleeping

App Files Files Community

Seth0330 commited on Nov 9, 2025

Commit

6c086cb

verified ·

1 Parent(s): 069e18c

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -73

app.py CHANGED Viewed

@@ -28,16 +28,13 @@ if not OPENROUTER_API_KEY:
     st.warning("Set OPENROUTER_API_KEY in your Space secrets (OpenRouter) to enable AI features.")
     st.stop()
-if not HF_API_TOKEN:
-    st.warning("Set HF_API_TOKEN in your Space secrets (Hugging Face Inference) to enable speech-to-text.")
-    # we don't stop; app still works without voice
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
-# IMPORTANT: put the exact OpenRouter model IDs here.
-# Check https://openrouter.ai/models for the correct slugs.
-VISION_MODEL = os.getenv("VISION_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free")       # <-- adjust if needed
-REASONING_MODEL = os.getenv("REASONING_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free") # <-- or another Nemotron/Instruct model
 HF_WHISPER_MODEL = os.getenv("HF_WHISPER_MODEL", "openai/whisper-large-v3")
 HF_WHISPER_URL = f"https://api-inference.huggingface.co/models/{HF_WHISPER_MODEL}"
@@ -187,7 +184,6 @@ def format_opening_hours(opening_hours: dict) -> str:
         pieces.append("; ".join(weekday[:2]))
     return " | ".join(pieces)
 # =========================
 # CITY LIST
 # =========================
@@ -252,7 +248,6 @@ def split_city_label(label: str):
         return parts[0], parts[1]
     return label.strip(), None
 # =========================
 # GOOGLE PLACES HELPERS
 # =========================
@@ -411,7 +406,6 @@ def tool_hqontario_context_city(city_label: str) -> str:
         )
     return ""
 # =========================
 # DPD & RECALLS HELPERS
 # =========================
@@ -494,36 +488,54 @@ def tool_get_wait_times_awareness() -> str:
     Reference: {WAIT_TIMES_INFO_URL}
     """).strip()
 # =========================
 # OPENROUTER HELPER
 # =========================
 def call_openrouter_chat(model: str, messages, temperature: float = 0.3):
     """
-    Generic helper for OpenRouter chat/completions API with OpenAI-format messages.
     """
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "Content-Type": "application/json",
-        # Optional but recommended by OpenRouter:
-        "HTTP-Referer": "https://seth0330-save.hf.space",
-        "X-Title": "CareCall AI (Canada)",
     }
     payload = {
         "model": model,
         "messages": messages,
-        "temperature": temperature,
     }
     try:
         r = requests.post(OPENROUTER_URL, headers=headers, json=payload, timeout=60)
-        r.raise_for_status()
-        data = r.json()
-        return data["choices"][0]["message"]["content"].strip()
     except Exception as e:
         return f"(Model call unavailable: {e})"
 # =========================
 # VISION (Nemotron via OpenRouter)
 # =========================
@@ -565,9 +577,7 @@ Write as if supporting a separate triage system, not directly reassuring or diag
                 {"type": "text", "text": prompt},
                 {
                     "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{b64}"
-                    },
                 },
             ],
         }
@@ -575,7 +585,6 @@ Write as if supporting a separate triage system, not directly reassuring or diag
     return call_openrouter_chat(VISION_MODEL, messages, temperature=0.2)
 # =========================
 # ASR (Whisper via HF Inference)
 # =========================
@@ -583,14 +592,11 @@ Write as if supporting a separate triage system, not directly reassuring or diag
 def call_asr(audio_source) -> str:
     """
     Uses Hugging Face Inference API for Whisper.
-    Expects:
-    - bytes from audio_recorder_streamlit, or
-    - file-like object.
     """
     if not audio_source or not HF_API_TOKEN:
         return ""
-    # Normalize to bytes
     if isinstance(audio_source, bytes):
         audio_bytes = audio_source
     else:
@@ -598,24 +604,25 @@ def call_asr(audio_source) -> str:
     headers = {
         "Authorization": f"Bearer {HF_API_TOKEN}",
-        "Content-Type": "audio/wav",  # works for most; HF autodetects
     }
     try:
         resp = requests.post(HF_WHISPER_URL, headers=headers, data=audio_bytes, timeout=120)
         resp.raise_for_status()
         data = resp.json()
-        # ASR pipeline returns {"text": "..."}
         if isinstance(data, dict) and "text" in data:
             return data["text"].strip()
-        # Some models may return list-style
         if isinstance(data, list) and data and isinstance(data[0], dict):
-            return (data[0].get("text") or data[0].get("generated_text") or "").strip()
         return ""
     except Exception as e:
         return f"(Transcription unavailable: {e})"
 # =========================
 # REASONING AGENT (Nemotron via OpenRouter)
 # =========================
@@ -784,7 +791,6 @@ HQ Ontario info:
     return call_openrouter_chat(REASONING_MODEL, messages, temperature=0.3)
 # =========================
 # STATE & NAV HELPERS
 # =========================
@@ -797,6 +803,8 @@ if "image_bytes" not in st.session_state:
     st.session_state.image_bytes = None
 if "audio_bytes" not in st.session_state:
     st.session_state.audio_bytes = None
 if "user_text" not in st.session_state:
     st.session_state.user_text = ""
 if "final_answer" not in st.session_state:
@@ -817,7 +825,6 @@ def render_steps():
         unsafe_allow_html=True,
     )
 # =========================
 # APP HEADER
 # =========================
@@ -888,7 +895,7 @@ if st.session_state.step == 1:
     st.markdown("</div>", unsafe_allow_html=True)
 # =========================
-# STEP 2: VOICE OR TEXT
 # =========================
 elif st.session_state.step == 2:
@@ -898,41 +905,42 @@ elif st.session_state.step == 2:
         unsafe_allow_html=True,
     )
     st.markdown(
-        '<div class="label-soft">Use the mic to describe your concern, or type instead. A clear story helps match what we see in the photo.</div>',
         unsafe_allow_html=True,
     )
     st.markdown('<div class="label-soft">Speak (optional)</div>', unsafe_allow_html=True)
-    audio_bytes = audio_recorder(
-    text="Hold to record",
-    recording_color="#ef4444",
-    neutral_color="#e5e7eb",
-    icon_name="microphone",
-    icon_size="1.3x",
-)
-# When a new recording is captured, transcribe and prefill the text box
-if audio_bytes:
-    st.session_state.audio_bytes = audio_bytes
-    st.success("Voice note captured. Transcribing...")
-    # Only re-transcribe if it's a new recording
-    if "last_audio" not in st.session_state or st.session_state.last_audio != audio_bytes:
-        transcript = call_asr(audio_bytes)
-        st.session_state.user_text = transcript.strip() if transcript else ""
-        st.session_state.last_audio = audio_bytes
-user_text = st.text_area(
-    "Or type your description here (you can edit the auto-filled text)",
-    value=st.session_state.user_text,
-    height=120,
-    placeholder='Example: "Painful big toe for 3 days, mild redness, no fever, can walk but hurts in shoes."',
-)
-st.session_state.user_text = user_text
     st.markdown(
-        '<div class="label-soft">When you are ready, get your one recommended pathway.</div>',
         unsafe_allow_html=True,
     )
@@ -948,16 +956,13 @@ st.session_state.user_text = user_text
                 city_label = (st.session_state.city_label or "").strip()
                 vision_summary = call_vision_summarizer(image_bytes) if image_bytes else ""
-                voice_text = call_asr(st.session_state.audio_bytes) if st.session_state.audio_bytes else ""
-                parts = []
-                if st.session_state.user_text.strip():
-                    parts.append("Typed: " + st.session_state.user_text.strip())
-                if voice_text.strip():
-                    parts.append("Voice (transcribed): " + voice_text.strip())
-                narrative = "\n".join(parts)
-                combined_for_drugs = " ".join(x for x in [narrative, vision_summary] if x)
                 dpd_context = (
                     tool_lookup_drug_products(combined_for_drugs)
                     if combined_for_drugs
@@ -976,7 +981,7 @@ st.session_state.user_text = user_text
                     hqontario_context = ""
                 final_answer = call_reasoning_agent(
-                    narrative=narrative,
                     vision_summary=vision_summary,
                     city_label=city_label,
                     dpd_context=dpd_context,
@@ -1025,6 +1030,7 @@ elif st.session_state.step == 3:
         if st.button("Start over", use_container_width=True):
             st.session_state.image_bytes = None
             st.session_state.audio_bytes = None
             st.session_state.user_text = ""
             st.session_state.final_answer = ""
             go_to_step(1)

     st.warning("Set OPENROUTER_API_KEY in your Space secrets (OpenRouter) to enable AI features.")
     st.stop()
+# We don't hard-stop for missing HF_API_TOKEN; app still works without voice.
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
+# IMPORTANT: set these to valid OpenRouter model slugs.
+# Check https://openrouter.ai/models for exact names.
+VISION_MODEL = os.getenv("VISION_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free")       # example slug; update if needed
+REASONING_MODEL = os.getenv("REASONING_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free")  # or another instruct-capable model
 HF_WHISPER_MODEL = os.getenv("HF_WHISPER_MODEL", "openai/whisper-large-v3")
 HF_WHISPER_URL = f"https://api-inference.huggingface.co/models/{HF_WHISPER_MODEL}"
         pieces.append("; ".join(weekday[:2]))
     return " | ".join(pieces)
 # =========================
 # CITY LIST
 # =========================
         return parts[0], parts[1]
     return label.strip(), None
 # =========================
 # GOOGLE PLACES HELPERS
 # =========================
         )
     return ""
 # =========================
 # DPD & RECALLS HELPERS
 # =========================
     Reference: {WAIT_TIMES_INFO_URL}
     """).strip()
 # =========================
 # OPENROUTER HELPER
 # =========================
 def call_openrouter_chat(model: str, messages, temperature: float = 0.3):
     """
+    Helper for OpenRouter's /chat/completions.
+    Returns a readable error message if the response is not valid JSON.
     """
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "Content-Type": "application/json",
     }
+    # Optional attribution if you configure APP_URL and origins in OpenRouter
+    app_url = os.getenv("APP_URL", "").strip()
+    if app_url:
+        headers["HTTP-Referer"] = app_url
+        headers["X-Title"] = "CareCall AI (Canada)"
     payload = {
         "model": model,
         "messages": messages,
+        "temperature": float(temperature),
     }
     try:
         r = requests.post(OPENROUTER_URL, headers=headers, json=payload, timeout=60)
+        if r.status_code != 200:
+            text_snippet = r.text[:300].replace("\n", " ")
+            return f"(Model call error: {r.status_code} — {text_snippet})"
+        try:
+            data = r.json()
+        except ValueError:
+            text_snippet = r.text[:300].replace("\n", " ")
+            return f"(Model call error: Non-JSON response — {text_snippet})"
+        choices = data.get("choices")
+        if not choices or "message" not in choices[0] or "content" not in choices[0]["message"]:
+            return f"(Model call error: Unexpected response format — {data})"
+        return choices[0]["message"]["content"].strip()
     except Exception as e:
         return f"(Model call unavailable: {e})"
 # =========================
 # VISION (Nemotron via OpenRouter)
 # =========================
                 {"type": "text", "text": prompt},
                 {
                     "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
                 },
             ],
         }
     return call_openrouter_chat(VISION_MODEL, messages, temperature=0.2)
 # =========================
 # ASR (Whisper via HF Inference)
 # =========================
 def call_asr(audio_source) -> str:
     """
     Uses Hugging Face Inference API for Whisper.
+    Accepts bytes (from audio_recorder_streamlit) or file-like.
     """
     if not audio_source or not HF_API_TOKEN:
         return ""
     if isinstance(audio_source, bytes):
         audio_bytes = audio_source
     else:
     headers = {
         "Authorization": f"Bearer {HF_API_TOKEN}",
+        "Content-Type": "audio/wav",
     }
     try:
         resp = requests.post(HF_WHISPER_URL, headers=headers, data=audio_bytes, timeout=120)
         resp.raise_for_status()
         data = resp.json()
         if isinstance(data, dict) and "text" in data:
             return data["text"].strip()
         if isinstance(data, list) and data and isinstance(data[0], dict):
+            text_val = data[0].get("text") or data[0].get("generated_text") or ""
+            return text_val.strip()
         return ""
     except Exception as e:
         return f"(Transcription unavailable: {e})"
 # =========================
 # REASONING AGENT (Nemotron via OpenRouter)
 # =========================
     return call_openrouter_chat(REASONING_MODEL, messages, temperature=0.3)
 # =========================
 # STATE & NAV HELPERS
 # =========================
     st.session_state.image_bytes = None
 if "audio_bytes" not in st.session_state:
     st.session_state.audio_bytes = None
+if "last_audio" not in st.session_state:
+    st.session_state.last_audio = None
 if "user_text" not in st.session_state:
     st.session_state.user_text = ""
 if "final_answer" not in st.session_state:
         unsafe_allow_html=True,
     )
 # =========================
 # APP HEADER
 # =========================
     st.markdown("</div>", unsafe_allow_html=True)
 # =========================
+# STEP 2: VOICE OR TEXT (WITH AUTO-FILL)
 # =========================
 elif st.session_state.step == 2:
         unsafe_allow_html=True,
     )
     st.markdown(
+        '<div class="label-soft">Use the mic to describe your concern, or type instead. '
+        'We will auto-fill the text box with your recording so you can review and edit.</div>',
         unsafe_allow_html=True,
     )
     st.markdown('<div class="label-soft">Speak (optional)</div>', unsafe_allow_html=True)
+    audio_bytes = audio_recorder(
+        text="Tap to record",
+        recording_color="#ef4444",
+        neutral_color="#e5e7eb",
+        icon_name="microphone",
+        icon_size="1.3x",
+    )
+    if audio_bytes:
+        # Save latest audio
+        st.session_state.audio_bytes = audio_bytes
+        # Only (re)transcribe if this recording is new
+        if st.session_state.last_audio != audio_bytes:
+            st.success("Voice note captured. Transcribing...")
+            transcript = call_asr(audio_bytes)
+            if transcript:
+                st.session_state.user_text = transcript.strip()
+            st.session_state.last_audio = audio_bytes
+    user_text = st.text_area(
+        "Or type / edit your description here",
+        value=st.session_state.user_text,
+        height=120,
+        placeholder='Example: "Painful big toe for 3 days, mild redness, no fever, can walk but hurts in shoes."',
+    )
+    st.session_state.user_text = user_text
     st.markdown(
+        '<div class="label-soft">When you are ready, tap below to get your one recommended pathway.</div>',
         unsafe_allow_html=True,
     )
                 city_label = (st.session_state.city_label or "").strip()
                 vision_summary = call_vision_summarizer(image_bytes) if image_bytes else ""
+                # Use final edited text; audio already injected into it if present
+                narrative_text = st.session_state.user_text.strip()
+                combined_for_drugs = " ".join(
+                    x for x in [narrative_text, vision_summary] if x
+                )
                 dpd_context = (
                     tool_lookup_drug_products(combined_for_drugs)
                     if combined_for_drugs
                     hqontario_context = ""
                 final_answer = call_reasoning_agent(
+                    narrative=narrative_text,
                     vision_summary=vision_summary,
                     city_label=city_label,
                     dpd_context=dpd_context,
         if st.button("Start over", use_container_width=True):
             st.session_state.image_bytes = None
             st.session_state.audio_bytes = None
+            st.session_state.last_audio = None
             st.session_state.user_text = ""
             st.session_state.final_answer = ""
             go_to_step(1)