SICG4

Paused

App Files Files Community

stevafernandes commited on Mar 16

Commit

1fe95d9

verified ·

1 Parent(s): 196cb9f

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -30

app.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Planning Summary Audio Analyzer - Hugging Face Spaces App
 Analyzes audio recordings of planning conversations and generates
 a structured Word document planning summary report using Google's Gemini API.
 CHANGELOG (corrections applied):
   1. Treatment preference: added "conditional_comfort_care" option for nuanced cases
   2. Beneficiary status: improved prompt guidance to distinguish account access from
@@ -13,12 +12,18 @@ CHANGELOG (corrections applied):
   5. Name spelling: prompt now flags uncertain proper noun spellings with [verify spelling]
   6. Next Steps section: driven by extracted data and topics discussed, not hardcoded
   7. Prompt includes "topics_discussed" field so the report only covers relevant sections
 """
 import os
 import re
 import json
 import time
 import tempfile
 from docx import Document
 from docx.shared import Inches, Pt, Twips
@@ -40,6 +45,8 @@ try:
 except ImportError:
     HAS_GENAI = False
 # ============================================================================
 # EXTRACTION PROMPT
 # ============================================================================
@@ -48,18 +55,43 @@ EXTRACTION_PROMPT = """
 You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
 Listen to the ENTIRE audio carefully and extract ALL relevant information.
 CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
 - You MUST select exactly ONE option for each single-select field
 - Use the EXACT string values specified (copy them exactly)
 - If the conversation implies something even indirectly, make your best inference
 - NEVER leave single-select fields as null - always pick the best match
 IMPORTANT RULES FOR PROPER NOUNS:
 - If a last name is spelled out letter by letter, use that exact spelling.
 - If a last name is only spoken (not spelled), transcribe it phonetically and append
   [verify spelling] after it. Example: "Potoff [verify spelling]"
 - First names that are spelled out should use the spelled version.
 Return a JSON object with this EXACT structure:
 ```json
 {
@@ -165,19 +197,15 @@ Return a JSON object with this EXACT structure:
   "facilitator_summary": "Facilitator's closing summary and recommendations"
 }
 ```
 DECISION GUIDE FOR COMMON SCENARIOS:
 topics_discussed:
 - Listen for which topics the participant chose to focus on
 - Only include "health", "financial", and/or "funeral" if they were actually discussed
 - If the participant said they only want to discuss health and financial, do NOT include "funeral"
 advance_care_status:
 - If they say they haven't done paperwork/documents yet -> "no_documents"
 - If they have old documents that need updating -> "has_documents_needs_update"
 - If they have current, up-to-date documents -> "has_current_documents"
 treatment_preference:
 - If they UNCONDITIONALLY want only comfort/palliative care, no machines ever -> "comfort_care_only"
 - If they want full treatment/CPR/ventilation IF there's hope of meaningful recovery -> "full_treatment_if_recovery"
@@ -185,7 +213,6 @@ treatment_preference:
   or quality of life (e.g. "treat me if I can still think clearly, but let me go
   if I'm cognitively impaired") -> "conditional_comfort_care"
 - If they're unsure or need more information -> "unsure"
 beneficiary_status:
 - IMPORTANT: "all_current" means the participant explicitly confirmed that formal
   beneficiary designations (not just account access) are filed and up to date
@@ -193,23 +220,19 @@ beneficiary_status:
   did NOT explicitly confirm legal beneficiary designations are current -> "unsure"
 - If they know some designations need updating -> "need_to_update"
 - If they're not sure who's listed or need to check -> "unsure"
 has_info_list:
 - If they have files/info but haven't shared the location or it's disorganized -> "yes_not_shared"
 - If their trusted person knows where everything is -> "yes_shared"
 - If they haven't created any list -> "not_created"
 shared_with_loved_ones:
 - If they've talked but nothing is written down -> "yes_not_written"
 - If they've discussed AND written it down -> "yes_written"
 - If they haven't discussed wishes yet -> "not_yet"
 specific_items_status:
 - If they named specific items for specific people -> "has_specific_items"
 - If they explicitly said no specific designations are needed (e.g. "everything goes
   to my spouse" or "nothing specific needs to go anywhere specific") -> "no_specific_items"
 - If they haven't thought about it yet or are undecided -> "not_yet_decided"
 service_type / body_preference / cost_planning:
 - If funeral planning was NOT discussed at all, use "not_discussed" for these fields
 - Celebration of life, casual gathering, party -> "celebration_of_life"
@@ -221,20 +244,17 @@ service_type / body_preference / cost_planning:
 - If they mention life insurance will cover it or family knows about funding -> "family_aware"
 - If they have a pre-paid funeral plan -> "prepaid"
 - If costs haven't been discussed -> "needs_discussion"
 values_reflections:
 - "meaning_and_joy": ONLY include hobbies, relationships, passions, and activities
   that bring happiness. Do NOT include medical decision criteria like cognitive
   function preferences here. Those belong in treatment_details.
 - "want_remembered_for": Use the participant's own words about how they want to be remembered.
 - "what_matters_most": Summarize their overall philosophy about living and legacy.
 recommended_next_steps:
 - Set each to true ONLY if it is relevant based on what was discussed
 - For example, if funeral planning was not discussed, do not set explore_funeral_preplanning to true
 - If documents already exist and are current, do not set create_healthcare_poa to true
 - Base these on gaps identified during the conversation
 Listen for these key topics:
 - Who would make healthcare decisions (usually spouse first, then adult child)
 - Who would handle finances (often same people)
@@ -243,7 +263,6 @@ Listen for these key topics:
 - Funeral/memorial preferences
 - Special items to give specific people
 - What matters most to them, their values
 Return ONLY valid JSON, no markdown formatting or explanation.
 """
@@ -287,19 +306,78 @@ def analyze_audio(audio_path: str, api_key: str) -> str:
 # ============================================================================
 def parse_json_response(response_text: str) -> dict | None:
-    """Extract JSON from Gemini response."""
     cleaned = re.sub(r'```json\s*', '', response_text)
     cleaned = re.sub(r'```\s*', '', cleaned)
-    json_match = re.search(r'\{[\s\S]*\}', cleaned)
-    if json_match:
-        try:
-            return json.loads(json_match.group())
-        except json.JSONDecodeError:
-            return None
     return None
 def normalize_value(value, valid_options, default=None):
     """Normalize a value to match one of the valid options."""
     if value is None:
@@ -897,8 +975,17 @@ def generate_docx(data: dict, output_path: str) -> str:
 # MAIN PROCESSING FUNCTION
 # ============================================================================
 def process_audio(audio_file):
-    """Main function to process audio and generate Word document."""
     if audio_file is None:
         return None, "Please record or upload an audio file.", None
@@ -907,13 +994,43 @@ def process_audio(audio_file):
         return None, "API key not configured. Please set GEMINI_API_KEY in Space secrets.", None
     try:
-        # Analyze audio
-        raw_response = analyze_audio(audio_file, api_key)
-        # Parse response
-        data = parse_json_response(raw_response)
         if not data:
-            return None, "Failed to parse the AI response. Please try again.", None
         # Normalize data
         data = normalize_data(data)
@@ -999,7 +1116,6 @@ if HAS_GRADIO:
     with gr.Blocks(title="Advance Care Planning") as demo:
         gr.Markdown("""
         # Advance Care Planning
         Record or upload an audio conversation to generate a structured Word document summary report.
         """)

 Planning Summary Audio Analyzer - Hugging Face Spaces App
 Analyzes audio recordings of planning conversations and generates
 a structured Word document planning summary report using Google's Gemini API.
 CHANGELOG (corrections applied):
   1. Treatment preference: added "conditional_comfort_care" option for nuanced cases
   2. Beneficiary status: improved prompt guidance to distinguish account access from
   5. Name spelling: prompt now flags uncertain proper noun spellings with [verify spelling]
   6. Next Steps section: driven by extracted data and topics discussed, not hardcoded
   7. Prompt includes "topics_discussed" field so the report only covers relevant sections
+  8. Resilience: retry logic (up to 3 attempts), robust JSON parser, raw response
+     surfaced on failure for debugging
+  9. Hallucination guard: prompt instructs model to return not_a_planning_conversation
+     flag when audio does not contain advance care planning content; validation rejects
+     such responses before generating a report.
 """
 import os
 import re
 import json
 import time
+import logging
 import tempfile
 from docx import Document
 from docx.shared import Inches, Pt, Twips
 except ImportError:
     HAS_GENAI = False
+logger = logging.getLogger(__name__)
 # ============================================================================
 # EXTRACTION PROMPT
 # ============================================================================
 You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
 Listen to the ENTIRE audio carefully and extract ALL relevant information.
+IMPORTANT - NON-RELEVANT AUDIO DETECTION:
+Before extracting any planning data, first determine whether this audio actually
+contains an advance care planning conversation. The audio MUST contain a real
+discussion about at least one of these topics: health care wishes, financial
+planning, funeral/memorial preferences, or values and legacy.
+If the audio is:
+- Silence, noise, music, or unintelligible speech
+- A conversation about unrelated topics (e.g. casual chat, a lecture, a podcast
+  not about advance care planning)
+- Too short or too unclear to extract meaningful planning information
+- Random test audio or microphone checks
+Then return ONLY this JSON and nothing else:
+```json
+{
+  "not_a_planning_conversation": true,
+  "reason": "Brief explanation of what the audio actually contains"
+}
+```
+Do NOT invent, fabricate, or hallucinate planning data. If you cannot clearly hear
+a real advance care planning discussion, you MUST return the above JSON.
+Only proceed with the full extraction below if you are confident the audio contains
+a genuine advance care planning conversation.
 CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
 - You MUST select exactly ONE option for each single-select field
 - Use the EXACT string values specified (copy them exactly)
 - If the conversation implies something even indirectly, make your best inference
 - NEVER leave single-select fields as null - always pick the best match
 IMPORTANT RULES FOR PROPER NOUNS:
 - If a last name is spelled out letter by letter, use that exact spelling.
 - If a last name is only spoken (not spelled), transcribe it phonetically and append
   [verify spelling] after it. Example: "Potoff [verify spelling]"
 - First names that are spelled out should use the spelled version.
 Return a JSON object with this EXACT structure:
 ```json
 {
   "facilitator_summary": "Facilitator's closing summary and recommendations"
 }
 ```
 DECISION GUIDE FOR COMMON SCENARIOS:
 topics_discussed:
 - Listen for which topics the participant chose to focus on
 - Only include "health", "financial", and/or "funeral" if they were actually discussed
 - If the participant said they only want to discuss health and financial, do NOT include "funeral"
 advance_care_status:
 - If they say they haven't done paperwork/documents yet -> "no_documents"
 - If they have old documents that need updating -> "has_documents_needs_update"
 - If they have current, up-to-date documents -> "has_current_documents"
 treatment_preference:
 - If they UNCONDITIONALLY want only comfort/palliative care, no machines ever -> "comfort_care_only"
 - If they want full treatment/CPR/ventilation IF there's hope of meaningful recovery -> "full_treatment_if_recovery"
   or quality of life (e.g. "treat me if I can still think clearly, but let me go
   if I'm cognitively impaired") -> "conditional_comfort_care"
 - If they're unsure or need more information -> "unsure"
 beneficiary_status:
 - IMPORTANT: "all_current" means the participant explicitly confirmed that formal
   beneficiary designations (not just account access) are filed and up to date
   did NOT explicitly confirm legal beneficiary designations are current -> "unsure"
 - If they know some designations need updating -> "need_to_update"
 - If they're not sure who's listed or need to check -> "unsure"
 has_info_list:
 - If they have files/info but haven't shared the location or it's disorganized -> "yes_not_shared"
 - If their trusted person knows where everything is -> "yes_shared"
 - If they haven't created any list -> "not_created"
 shared_with_loved_ones:
 - If they've talked but nothing is written down -> "yes_not_written"
 - If they've discussed AND written it down -> "yes_written"
 - If they haven't discussed wishes yet -> "not_yet"
 specific_items_status:
 - If they named specific items for specific people -> "has_specific_items"
 - If they explicitly said no specific designations are needed (e.g. "everything goes
   to my spouse" or "nothing specific needs to go anywhere specific") -> "no_specific_items"
 - If they haven't thought about it yet or are undecided -> "not_yet_decided"
 service_type / body_preference / cost_planning:
 - If funeral planning was NOT discussed at all, use "not_discussed" for these fields
 - Celebration of life, casual gathering, party -> "celebration_of_life"
 - If they mention life insurance will cover it or family knows about funding -> "family_aware"
 - If they have a pre-paid funeral plan -> "prepaid"
 - If costs haven't been discussed -> "needs_discussion"
 values_reflections:
 - "meaning_and_joy": ONLY include hobbies, relationships, passions, and activities
   that bring happiness. Do NOT include medical decision criteria like cognitive
   function preferences here. Those belong in treatment_details.
 - "want_remembered_for": Use the participant's own words about how they want to be remembered.
 - "what_matters_most": Summarize their overall philosophy about living and legacy.
 recommended_next_steps:
 - Set each to true ONLY if it is relevant based on what was discussed
 - For example, if funeral planning was not discussed, do not set explore_funeral_preplanning to true
 - If documents already exist and are current, do not set create_healthcare_poa to true
 - Base these on gaps identified during the conversation
 Listen for these key topics:
 - Who would make healthcare decisions (usually spouse first, then adult child)
 - Who would handle finances (often same people)
 - Funeral/memorial preferences
 - Special items to give specific people
 - What matters most to them, their values
 Return ONLY valid JSON, no markdown formatting or explanation.
 """
 # ============================================================================
 def parse_json_response(response_text: str) -> dict | None:
+    """Extract JSON from Gemini response.
+    Uses a multi-strategy approach so that trailing prose, markdown fences,
+    or minor formatting differences do not cause a parse failure.
+    """
+    if not response_text:
+        return None
+    # Strip markdown code fences
     cleaned = re.sub(r'```json\s*', '', response_text)
     cleaned = re.sub(r'```\s*', '', cleaned)
+    cleaned = cleaned.strip()
+    # Strategy 1: try parsing the entire cleaned text directly
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        pass
+    # Strategy 2: locate the first '{' and try progressively shorter
+    # substrings ending at each '}' from the end backward.  This handles
+    # cases where Gemini appends explanatory text after the JSON object.
+    start = cleaned.find('{')
+    if start == -1:
+        return None
+    for end in range(len(cleaned) - 1, start, -1):
+        if cleaned[end] == '}':
+            try:
+                return json.loads(cleaned[start:end + 1])
+            except json.JSONDecodeError:
+                continue
     return None
+def _is_valid_planning_data(data: dict) -> tuple[bool, str]:
+    """Check whether parsed data represents a genuine planning conversation.
+    Returns (is_valid, reason).  When is_valid is False, reason contains a
+    user-facing message explaining why the audio was rejected.
+    """
+    if not data:
+        return False, "No data could be extracted from the audio."
+    # Explicit flag set by the model when the audio is not relevant
+    if data.get("not_a_planning_conversation"):
+        reason = data.get("reason", "The audio does not appear to contain an advance care planning conversation.")
+        return False, (
+            "This audio does not contain an advance care planning conversation. "
+            f"({reason}) Please record or upload a conversation that discusses "
+            "health care wishes, financial planning, or funeral preferences."
+        )
+    # Secondary heuristic: if none of the core sections are present, the
+    # model may have returned something unexpected.
+    has_any_section = any(
+        key in data for key in (
+            "health_care_wishes", "financial_planning", "funeral_plans",
+            "participant", "topics_discussed",
+        )
+    )
+    if not has_any_section:
+        return False, (
+            "The audio could not be matched to a planning conversation. "
+            "Please make sure the recording contains a discussion about "
+            "health care wishes, financial planning, or funeral preferences."
+        )
+    return True, ""
 def normalize_value(value, valid_options, default=None):
     """Normalize a value to match one of the valid options."""
     if value is None:
 # MAIN PROCESSING FUNCTION
 # ============================================================================
+MAX_RETRIES = 3
+RETRY_DELAY_SECONDS = 3
 def process_audio(audio_file):
+    """Main function to process audio and generate Word document.
+    Retries up to MAX_RETRIES times when the Gemini response cannot be
+    parsed, since transient malformed responses are the most common
+    failure mode.
+    """
     if audio_file is None:
         return None, "Please record or upload an audio file.", None
         return None, "API key not configured. Please set GEMINI_API_KEY in Space secrets.", None
     try:
+        # Retry loop: the Gemini API occasionally returns responses that
+        # cannot be parsed (extra prose, truncated JSON, etc.).  A simple
+        # retry with a short delay resolves this the vast majority of the
+        # time without any user intervention.
+        raw_response = None
+        data = None
+        for attempt in range(1, MAX_RETRIES + 1):
+            raw_response = analyze_audio(audio_file, api_key)
+            data = parse_json_response(raw_response)
+            if data is not None:
+                break
+            logger.warning(
+                "Attempt %d/%d: failed to parse Gemini response (length=%d)",
+                attempt, MAX_RETRIES, len(raw_response) if raw_response else 0,
+            )
+            if attempt < MAX_RETRIES:
+                time.sleep(RETRY_DELAY_SECONDS)
         if not data:
+            # Surface the raw response so the user (or developer) can
+            # inspect what Gemini actually returned.
+            return (
+                None,
+                f"Failed to parse the AI response after {MAX_RETRIES} attempts. "
+                "Open the JSON panel below to see the raw API output.",
+                raw_response,
+            )
+        # ---- Hallucination guard ----
+        is_valid, rejection_reason = _is_valid_planning_data(data)
+        if not is_valid:
+            return (
+                None,
+                rejection_reason,
+                json.dumps(data, indent=2),
+            )
         # Normalize data
         data = normalize_data(data)
     with gr.Blocks(title="Advance Care Planning") as demo:
         gr.Markdown("""
         # Advance Care Planning
         Record or upload an audio conversation to generate a structured Word document summary report.
         """)