stevafernandes commited on
Commit
ce8a4e0
·
verified ·
1 Parent(s): 728abed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -1
app.py CHANGED
@@ -14,6 +14,9 @@ CHANGELOG (corrections applied):
14
  7. Prompt includes "topics_discussed" field so the report only covers relevant sections
15
  8. Resilience: retry logic (up to 3 attempts), robust JSON parser, raw response
16
  surfaced on failure for debugging
 
 
 
17
  """
18
 
19
  import os
@@ -51,6 +54,34 @@ logger = logging.getLogger(__name__)
51
  EXTRACTION_PROMPT = """
52
  You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
53
  Listen to the ENTIRE audio carefully and extract ALL relevant information.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
55
  - You MUST select exactly ONE option for each single-select field
56
  - Use the EXACT string values specified (copy them exactly)
@@ -311,6 +342,42 @@ def parse_json_response(response_text: str) -> dict | None:
311
  return None
312
 
313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  def normalize_value(value, valid_options, default=None):
315
  """Normalize a value to match one of the valid options."""
316
  if value is None:
@@ -956,6 +1023,15 @@ def process_audio(audio_file):
956
  raw_response,
957
  )
958
 
 
 
 
 
 
 
 
 
 
959
  # Normalize data
960
  data = normalize_data(data)
961
 
@@ -1108,4 +1184,4 @@ if __name__ == "__main__":
1108
  if HAS_GRADIO:
1109
  demo.launch(theme=custom_theme)
1110
  else:
1111
- print("Gradio not installed. Core logic is available for import.")
 
14
  7. Prompt includes "topics_discussed" field so the report only covers relevant sections
15
  8. Resilience: retry logic (up to 3 attempts), robust JSON parser, raw response
16
  surfaced on failure for debugging
17
+ 9. Hallucination guard: prompt instructs model to return not_a_planning_conversation
18
+ flag when audio does not contain advance care planning content; validation rejects
19
+ such responses before generating a report.
20
  """
21
 
22
  import os
 
54
  EXTRACTION_PROMPT = """
55
  You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
56
  Listen to the ENTIRE audio carefully and extract ALL relevant information.
57
+
58
+ IMPORTANT - NON-RELEVANT AUDIO DETECTION:
59
+ Before extracting any planning data, first determine whether this audio actually
60
+ contains an advance care planning conversation. The audio MUST contain a real
61
+ discussion about at least one of these topics: health care wishes, financial
62
+ planning, funeral/memorial preferences, or values and legacy.
63
+
64
+ If the audio is:
65
+ - Silence, noise, music, or unintelligible speech
66
+ - A conversation about unrelated topics (e.g. casual chat, a lecture, a podcast
67
+ not about advance care planning)
68
+ - Too short or too unclear to extract meaningful planning information
69
+ - Random test audio or microphone checks
70
+
71
+ Then return ONLY this JSON and nothing else:
72
+ ```json
73
+ {
74
+ "not_a_planning_conversation": true,
75
+ "reason": "Brief explanation of what the audio actually contains"
76
+ }
77
+ ```
78
+
79
+ Do NOT invent, fabricate, or hallucinate planning data. If you cannot clearly hear
80
+ a real advance care planning discussion, you MUST return the above JSON.
81
+
82
+ Only proceed with the full extraction below if you are confident the audio contains
83
+ a genuine advance care planning conversation.
84
+
85
  CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
86
  - You MUST select exactly ONE option for each single-select field
87
  - Use the EXACT string values specified (copy them exactly)
 
342
  return None
343
 
344
 
345
+ def _is_valid_planning_data(data: dict) -> tuple[bool, str]:
346
+ """Check whether parsed data represents a genuine planning conversation.
347
+
348
+ Returns (is_valid, reason). When is_valid is False, reason contains a
349
+ user-facing message explaining why the audio was rejected.
350
+ """
351
+ if not data:
352
+ return False, "No data could be extracted from the audio."
353
+
354
+ # Explicit flag set by the model when the audio is not relevant
355
+ if data.get("not_a_planning_conversation"):
356
+ reason = data.get("reason", "The audio does not appear to contain an advance care planning conversation.")
357
+ return False, (
358
+ "This audio does not contain an advance care planning conversation. "
359
+ f"({reason}) Please record or upload a conversation that discusses "
360
+ "health care wishes, financial planning, or funeral preferences."
361
+ )
362
+
363
+ # Secondary heuristic: if none of the core sections are present, the
364
+ # model may have returned something unexpected.
365
+ has_any_section = any(
366
+ key in data for key in (
367
+ "health_care_wishes", "financial_planning", "funeral_plans",
368
+ "participant", "topics_discussed",
369
+ )
370
+ )
371
+ if not has_any_section:
372
+ return False, (
373
+ "The audio could not be matched to a planning conversation. "
374
+ "Please make sure the recording contains a discussion about "
375
+ "health care wishes, financial planning, or funeral preferences."
376
+ )
377
+
378
+ return True, ""
379
+
380
+
381
  def normalize_value(value, valid_options, default=None):
382
  """Normalize a value to match one of the valid options."""
383
  if value is None:
 
1023
  raw_response,
1024
  )
1025
 
1026
+ # ---- Hallucination guard ----
1027
+ is_valid, rejection_reason = _is_valid_planning_data(data)
1028
+ if not is_valid:
1029
+ return (
1030
+ None,
1031
+ rejection_reason,
1032
+ json.dumps(data, indent=2),
1033
+ )
1034
+
1035
  # Normalize data
1036
  data = normalize_data(data)
1037
 
 
1184
  if HAS_GRADIO:
1185
  demo.launch(theme=custom_theme)
1186
  else:
1187
+ print("Gradio not installed. Core logic is available for import.")