Seth0330 commited on
Commit
6c086cb
·
verified ·
1 Parent(s): 069e18c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -73
app.py CHANGED
@@ -28,16 +28,13 @@ if not OPENROUTER_API_KEY:
28
  st.warning("Set OPENROUTER_API_KEY in your Space secrets (OpenRouter) to enable AI features.")
29
  st.stop()
30
 
31
- if not HF_API_TOKEN:
32
- st.warning("Set HF_API_TOKEN in your Space secrets (Hugging Face Inference) to enable speech-to-text.")
33
- # we don't stop; app still works without voice
34
-
35
  OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
36
 
37
- # IMPORTANT: put the exact OpenRouter model IDs here.
38
- # Check https://openrouter.ai/models for the correct slugs.
39
- VISION_MODEL = os.getenv("VISION_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free") # <-- adjust if needed
40
- REASONING_MODEL = os.getenv("REASONING_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free") # <-- or another Nemotron/Instruct model
41
 
42
  HF_WHISPER_MODEL = os.getenv("HF_WHISPER_MODEL", "openai/whisper-large-v3")
43
  HF_WHISPER_URL = f"https://api-inference.huggingface.co/models/{HF_WHISPER_MODEL}"
@@ -187,7 +184,6 @@ def format_opening_hours(opening_hours: dict) -> str:
187
  pieces.append("; ".join(weekday[:2]))
188
  return " | ".join(pieces)
189
 
190
-
191
  # =========================
192
  # CITY LIST
193
  # =========================
@@ -252,7 +248,6 @@ def split_city_label(label: str):
252
  return parts[0], parts[1]
253
  return label.strip(), None
254
 
255
-
256
  # =========================
257
  # GOOGLE PLACES HELPERS
258
  # =========================
@@ -411,7 +406,6 @@ def tool_hqontario_context_city(city_label: str) -> str:
411
  )
412
  return ""
413
 
414
-
415
  # =========================
416
  # DPD & RECALLS HELPERS
417
  # =========================
@@ -494,36 +488,54 @@ def tool_get_wait_times_awareness() -> str:
494
  Reference: {WAIT_TIMES_INFO_URL}
495
  """).strip()
496
 
497
-
498
  # =========================
499
  # OPENROUTER HELPER
500
  # =========================
501
 
502
  def call_openrouter_chat(model: str, messages, temperature: float = 0.3):
503
  """
504
- Generic helper for OpenRouter chat/completions API with OpenAI-format messages.
 
505
  """
506
  headers = {
507
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
508
  "Content-Type": "application/json",
509
- # Optional but recommended by OpenRouter:
510
- "HTTP-Referer": "https://seth0330-save.hf.space",
511
- "X-Title": "CareCall AI (Canada)",
512
  }
 
 
 
 
 
 
 
513
  payload = {
514
  "model": model,
515
  "messages": messages,
516
- "temperature": temperature,
517
  }
 
518
  try:
519
  r = requests.post(OPENROUTER_URL, headers=headers, json=payload, timeout=60)
520
- r.raise_for_status()
521
- data = r.json()
522
- return data["choices"][0]["message"]["content"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  except Exception as e:
524
  return f"(Model call unavailable: {e})"
525
 
526
-
527
  # =========================
528
  # VISION (Nemotron via OpenRouter)
529
  # =========================
@@ -565,9 +577,7 @@ Write as if supporting a separate triage system, not directly reassuring or diag
565
  {"type": "text", "text": prompt},
566
  {
567
  "type": "image_url",
568
- "image_url": {
569
- "url": f"data:image/jpeg;base64,{b64}"
570
- },
571
  },
572
  ],
573
  }
@@ -575,7 +585,6 @@ Write as if supporting a separate triage system, not directly reassuring or diag
575
 
576
  return call_openrouter_chat(VISION_MODEL, messages, temperature=0.2)
577
 
578
-
579
  # =========================
580
  # ASR (Whisper via HF Inference)
581
  # =========================
@@ -583,14 +592,11 @@ Write as if supporting a separate triage system, not directly reassuring or diag
583
  def call_asr(audio_source) -> str:
584
  """
585
  Uses Hugging Face Inference API for Whisper.
586
- Expects:
587
- - bytes from audio_recorder_streamlit, or
588
- - file-like object.
589
  """
590
  if not audio_source or not HF_API_TOKEN:
591
  return ""
592
 
593
- # Normalize to bytes
594
  if isinstance(audio_source, bytes):
595
  audio_bytes = audio_source
596
  else:
@@ -598,24 +604,25 @@ def call_asr(audio_source) -> str:
598
 
599
  headers = {
600
  "Authorization": f"Bearer {HF_API_TOKEN}",
601
- "Content-Type": "audio/wav", # works for most; HF autodetects
602
  }
603
 
604
  try:
605
  resp = requests.post(HF_WHISPER_URL, headers=headers, data=audio_bytes, timeout=120)
606
  resp.raise_for_status()
607
  data = resp.json()
608
- # ASR pipeline returns {"text": "..."}
609
  if isinstance(data, dict) and "text" in data:
610
  return data["text"].strip()
611
- # Some models may return list-style
612
  if isinstance(data, list) and data and isinstance(data[0], dict):
613
- return (data[0].get("text") or data[0].get("generated_text") or "").strip()
 
 
614
  return ""
615
  except Exception as e:
616
  return f"(Transcription unavailable: {e})"
617
 
618
-
619
  # =========================
620
  # REASONING AGENT (Nemotron via OpenRouter)
621
  # =========================
@@ -784,7 +791,6 @@ HQ Ontario info:
784
 
785
  return call_openrouter_chat(REASONING_MODEL, messages, temperature=0.3)
786
 
787
-
788
  # =========================
789
  # STATE & NAV HELPERS
790
  # =========================
@@ -797,6 +803,8 @@ if "image_bytes" not in st.session_state:
797
  st.session_state.image_bytes = None
798
  if "audio_bytes" not in st.session_state:
799
  st.session_state.audio_bytes = None
 
 
800
  if "user_text" not in st.session_state:
801
  st.session_state.user_text = ""
802
  if "final_answer" not in st.session_state:
@@ -817,7 +825,6 @@ def render_steps():
817
  unsafe_allow_html=True,
818
  )
819
 
820
-
821
  # =========================
822
  # APP HEADER
823
  # =========================
@@ -888,7 +895,7 @@ if st.session_state.step == 1:
888
  st.markdown("</div>", unsafe_allow_html=True)
889
 
890
  # =========================
891
- # STEP 2: VOICE OR TEXT
892
  # =========================
893
 
894
  elif st.session_state.step == 2:
@@ -898,41 +905,42 @@ elif st.session_state.step == 2:
898
  unsafe_allow_html=True,
899
  )
900
  st.markdown(
901
- '<div class="label-soft">Use the mic to describe your concern, or type instead. A clear story helps match what we see in the photo.</div>',
 
902
  unsafe_allow_html=True,
903
  )
904
 
905
  st.markdown('<div class="label-soft">Speak (optional)</div>', unsafe_allow_html=True)
906
- audio_bytes = audio_recorder(
907
- text="Hold to record",
908
- recording_color="#ef4444",
909
- neutral_color="#e5e7eb",
910
- icon_name="microphone",
911
- icon_size="1.3x",
912
- )
913
 
914
- # When a new recording is captured, transcribe and prefill the text box
915
- if audio_bytes:
916
- st.session_state.audio_bytes = audio_bytes
917
- st.success("Voice note captured. Transcribing...")
918
-
919
- # Only re-transcribe if it's a new recording
920
- if "last_audio" not in st.session_state or st.session_state.last_audio != audio_bytes:
921
- transcript = call_asr(audio_bytes)
922
- st.session_state.user_text = transcript.strip() if transcript else ""
923
- st.session_state.last_audio = audio_bytes
924
-
925
- user_text = st.text_area(
926
- "Or type your description here (you can edit the auto-filled text)",
927
- value=st.session_state.user_text,
928
- height=120,
929
- placeholder='Example: "Painful big toe for 3 days, mild redness, no fever, can walk but hurts in shoes."',
930
- )
931
- st.session_state.user_text = user_text
932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933
 
934
  st.markdown(
935
- '<div class="label-soft">When you are ready, get your one recommended pathway.</div>',
936
  unsafe_allow_html=True,
937
  )
938
 
@@ -948,16 +956,13 @@ st.session_state.user_text = user_text
948
  city_label = (st.session_state.city_label or "").strip()
949
 
950
  vision_summary = call_vision_summarizer(image_bytes) if image_bytes else ""
951
- voice_text = call_asr(st.session_state.audio_bytes) if st.session_state.audio_bytes else ""
 
952
 
953
- parts = []
954
- if st.session_state.user_text.strip():
955
- parts.append("Typed: " + st.session_state.user_text.strip())
956
- if voice_text.strip():
957
- parts.append("Voice (transcribed): " + voice_text.strip())
958
- narrative = "\n".join(parts)
959
 
960
- combined_for_drugs = " ".join(x for x in [narrative, vision_summary] if x)
961
  dpd_context = (
962
  tool_lookup_drug_products(combined_for_drugs)
963
  if combined_for_drugs
@@ -976,7 +981,7 @@ st.session_state.user_text = user_text
976
  hqontario_context = ""
977
 
978
  final_answer = call_reasoning_agent(
979
- narrative=narrative,
980
  vision_summary=vision_summary,
981
  city_label=city_label,
982
  dpd_context=dpd_context,
@@ -1025,6 +1030,7 @@ elif st.session_state.step == 3:
1025
  if st.button("Start over", use_container_width=True):
1026
  st.session_state.image_bytes = None
1027
  st.session_state.audio_bytes = None
 
1028
  st.session_state.user_text = ""
1029
  st.session_state.final_answer = ""
1030
  go_to_step(1)
 
28
  st.warning("Set OPENROUTER_API_KEY in your Space secrets (OpenRouter) to enable AI features.")
29
  st.stop()
30
 
31
+ # We don't hard-stop for missing HF_API_TOKEN; app still works without voice.
 
 
 
32
  OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
33
 
34
+ # IMPORTANT: set these to valid OpenRouter model slugs.
35
+ # Check https://openrouter.ai/models for exact names.
36
+ VISION_MODEL = os.getenv("VISION_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free") # example slug; update if needed
37
+ REASONING_MODEL = os.getenv("REASONING_MODEL", "nvidia/nemotron-nano-12b-v2-vl:free") # or another instruct-capable model
38
 
39
  HF_WHISPER_MODEL = os.getenv("HF_WHISPER_MODEL", "openai/whisper-large-v3")
40
  HF_WHISPER_URL = f"https://api-inference.huggingface.co/models/{HF_WHISPER_MODEL}"
 
184
  pieces.append("; ".join(weekday[:2]))
185
  return " | ".join(pieces)
186
 
 
187
  # =========================
188
  # CITY LIST
189
  # =========================
 
248
  return parts[0], parts[1]
249
  return label.strip(), None
250
 
 
251
  # =========================
252
  # GOOGLE PLACES HELPERS
253
  # =========================
 
406
  )
407
  return ""
408
 
 
409
  # =========================
410
  # DPD & RECALLS HELPERS
411
  # =========================
 
488
  Reference: {WAIT_TIMES_INFO_URL}
489
  """).strip()
490
 
 
491
  # =========================
492
  # OPENROUTER HELPER
493
  # =========================
494
 
495
  def call_openrouter_chat(model: str, messages, temperature: float = 0.3):
496
  """
497
+ Helper for OpenRouter's /chat/completions.
498
+ Returns a readable error message if the response is not valid JSON.
499
  """
500
  headers = {
501
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
502
  "Content-Type": "application/json",
 
 
 
503
  }
504
+
505
+ # Optional attribution if you configure APP_URL and origins in OpenRouter
506
+ app_url = os.getenv("APP_URL", "").strip()
507
+ if app_url:
508
+ headers["HTTP-Referer"] = app_url
509
+ headers["X-Title"] = "CareCall AI (Canada)"
510
+
511
  payload = {
512
  "model": model,
513
  "messages": messages,
514
+ "temperature": float(temperature),
515
  }
516
+
517
  try:
518
  r = requests.post(OPENROUTER_URL, headers=headers, json=payload, timeout=60)
519
+
520
+ if r.status_code != 200:
521
+ text_snippet = r.text[:300].replace("\n", " ")
522
+ return f"(Model call error: {r.status_code} — {text_snippet})"
523
+
524
+ try:
525
+ data = r.json()
526
+ except ValueError:
527
+ text_snippet = r.text[:300].replace("\n", " ")
528
+ return f"(Model call error: Non-JSON response — {text_snippet})"
529
+
530
+ choices = data.get("choices")
531
+ if not choices or "message" not in choices[0] or "content" not in choices[0]["message"]:
532
+ return f"(Model call error: Unexpected response format — {data})"
533
+
534
+ return choices[0]["message"]["content"].strip()
535
+
536
  except Exception as e:
537
  return f"(Model call unavailable: {e})"
538
 
 
539
  # =========================
540
  # VISION (Nemotron via OpenRouter)
541
  # =========================
 
577
  {"type": "text", "text": prompt},
578
  {
579
  "type": "image_url",
580
+ "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
 
 
581
  },
582
  ],
583
  }
 
585
 
586
  return call_openrouter_chat(VISION_MODEL, messages, temperature=0.2)
587
 
 
588
  # =========================
589
  # ASR (Whisper via HF Inference)
590
  # =========================
 
592
  def call_asr(audio_source) -> str:
593
  """
594
  Uses Hugging Face Inference API for Whisper.
595
+ Accepts bytes (from audio_recorder_streamlit) or file-like.
 
 
596
  """
597
  if not audio_source or not HF_API_TOKEN:
598
  return ""
599
 
 
600
  if isinstance(audio_source, bytes):
601
  audio_bytes = audio_source
602
  else:
 
604
 
605
  headers = {
606
  "Authorization": f"Bearer {HF_API_TOKEN}",
607
+ "Content-Type": "audio/wav",
608
  }
609
 
610
  try:
611
  resp = requests.post(HF_WHISPER_URL, headers=headers, data=audio_bytes, timeout=120)
612
  resp.raise_for_status()
613
  data = resp.json()
614
+
615
  if isinstance(data, dict) and "text" in data:
616
  return data["text"].strip()
617
+
618
  if isinstance(data, list) and data and isinstance(data[0], dict):
619
+ text_val = data[0].get("text") or data[0].get("generated_text") or ""
620
+ return text_val.strip()
621
+
622
  return ""
623
  except Exception as e:
624
  return f"(Transcription unavailable: {e})"
625
 
 
626
  # =========================
627
  # REASONING AGENT (Nemotron via OpenRouter)
628
  # =========================
 
791
 
792
  return call_openrouter_chat(REASONING_MODEL, messages, temperature=0.3)
793
 
 
794
  # =========================
795
  # STATE & NAV HELPERS
796
  # =========================
 
803
  st.session_state.image_bytes = None
804
  if "audio_bytes" not in st.session_state:
805
  st.session_state.audio_bytes = None
806
+ if "last_audio" not in st.session_state:
807
+ st.session_state.last_audio = None
808
  if "user_text" not in st.session_state:
809
  st.session_state.user_text = ""
810
  if "final_answer" not in st.session_state:
 
825
  unsafe_allow_html=True,
826
  )
827
 
 
828
  # =========================
829
  # APP HEADER
830
  # =========================
 
895
  st.markdown("</div>", unsafe_allow_html=True)
896
 
897
  # =========================
898
+ # STEP 2: VOICE OR TEXT (WITH AUTO-FILL)
899
  # =========================
900
 
901
  elif st.session_state.step == 2:
 
905
  unsafe_allow_html=True,
906
  )
907
  st.markdown(
908
+ '<div class="label-soft">Use the mic to describe your concern, or type instead. '
909
+ 'We will auto-fill the text box with your recording so you can review and edit.</div>',
910
  unsafe_allow_html=True,
911
  )
912
 
913
  st.markdown('<div class="label-soft">Speak (optional)</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
914
 
915
+ audio_bytes = audio_recorder(
916
+ text="Tap to record",
917
+ recording_color="#ef4444",
918
+ neutral_color="#e5e7eb",
919
+ icon_name="microphone",
920
+ icon_size="1.3x",
921
+ )
 
 
 
 
 
 
 
 
 
 
 
922
 
923
+ if audio_bytes:
924
+ # Save latest audio
925
+ st.session_state.audio_bytes = audio_bytes
926
+ # Only (re)transcribe if this recording is new
927
+ if st.session_state.last_audio != audio_bytes:
928
+ st.success("Voice note captured. Transcribing...")
929
+ transcript = call_asr(audio_bytes)
930
+ if transcript:
931
+ st.session_state.user_text = transcript.strip()
932
+ st.session_state.last_audio = audio_bytes
933
+
934
+ user_text = st.text_area(
935
+ "Or type / edit your description here",
936
+ value=st.session_state.user_text,
937
+ height=120,
938
+ placeholder='Example: "Painful big toe for 3 days, mild redness, no fever, can walk but hurts in shoes."',
939
+ )
940
+ st.session_state.user_text = user_text
941
 
942
  st.markdown(
943
+ '<div class="label-soft">When you are ready, tap below to get your one recommended pathway.</div>',
944
  unsafe_allow_html=True,
945
  )
946
 
 
956
  city_label = (st.session_state.city_label or "").strip()
957
 
958
  vision_summary = call_vision_summarizer(image_bytes) if image_bytes else ""
959
+ # Use final edited text; audio already injected into it if present
960
+ narrative_text = st.session_state.user_text.strip()
961
 
962
+ combined_for_drugs = " ".join(
963
+ x for x in [narrative_text, vision_summary] if x
964
+ )
 
 
 
965
 
 
966
  dpd_context = (
967
  tool_lookup_drug_products(combined_for_drugs)
968
  if combined_for_drugs
 
981
  hqontario_context = ""
982
 
983
  final_answer = call_reasoning_agent(
984
+ narrative=narrative_text,
985
  vision_summary=vision_summary,
986
  city_label=city_label,
987
  dpd_context=dpd_context,
 
1030
  if st.button("Start over", use_container_width=True):
1031
  st.session_state.image_bytes = None
1032
  st.session_state.audio_bytes = None
1033
+ st.session_state.last_audio = None
1034
  st.session_state.user_text = ""
1035
  st.session_state.final_answer = ""
1036
  go_to_step(1)