stevafernandes commited on
Commit
1fe95d9
·
verified ·
1 Parent(s): 196cb9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -30
app.py CHANGED
@@ -2,7 +2,6 @@
2
  Planning Summary Audio Analyzer - Hugging Face Spaces App
3
  Analyzes audio recordings of planning conversations and generates
4
  a structured Word document planning summary report using Google's Gemini API.
5
-
6
  CHANGELOG (corrections applied):
7
  1. Treatment preference: added "conditional_comfort_care" option for nuanced cases
8
  2. Beneficiary status: improved prompt guidance to distinguish account access from
@@ -13,12 +12,18 @@ CHANGELOG (corrections applied):
13
  5. Name spelling: prompt now flags uncertain proper noun spellings with [verify spelling]
14
  6. Next Steps section: driven by extracted data and topics discussed, not hardcoded
15
  7. Prompt includes "topics_discussed" field so the report only covers relevant sections
 
 
 
 
 
16
  """
17
 
18
  import os
19
  import re
20
  import json
21
  import time
 
22
  import tempfile
23
  from docx import Document
24
  from docx.shared import Inches, Pt, Twips
@@ -40,6 +45,8 @@ try:
40
  except ImportError:
41
  HAS_GENAI = False
42
 
 
 
43
  # ============================================================================
44
  # EXTRACTION PROMPT
45
  # ============================================================================
@@ -48,18 +55,43 @@ EXTRACTION_PROMPT = """
48
  You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
49
  Listen to the ENTIRE audio carefully and extract ALL relevant information.
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
52
  - You MUST select exactly ONE option for each single-select field
53
  - Use the EXACT string values specified (copy them exactly)
54
  - If the conversation implies something even indirectly, make your best inference
55
  - NEVER leave single-select fields as null - always pick the best match
56
-
57
  IMPORTANT RULES FOR PROPER NOUNS:
58
  - If a last name is spelled out letter by letter, use that exact spelling.
59
  - If a last name is only spoken (not spelled), transcribe it phonetically and append
60
  [verify spelling] after it. Example: "Potoff [verify spelling]"
61
  - First names that are spelled out should use the spelled version.
62
-
63
  Return a JSON object with this EXACT structure:
64
  ```json
65
  {
@@ -165,19 +197,15 @@ Return a JSON object with this EXACT structure:
165
  "facilitator_summary": "Facilitator's closing summary and recommendations"
166
  }
167
  ```
168
-
169
  DECISION GUIDE FOR COMMON SCENARIOS:
170
-
171
  topics_discussed:
172
  - Listen for which topics the participant chose to focus on
173
  - Only include "health", "financial", and/or "funeral" if they were actually discussed
174
  - If the participant said they only want to discuss health and financial, do NOT include "funeral"
175
-
176
  advance_care_status:
177
  - If they say they haven't done paperwork/documents yet -> "no_documents"
178
  - If they have old documents that need updating -> "has_documents_needs_update"
179
  - If they have current, up-to-date documents -> "has_current_documents"
180
-
181
  treatment_preference:
182
  - If they UNCONDITIONALLY want only comfort/palliative care, no machines ever -> "comfort_care_only"
183
  - If they want full treatment/CPR/ventilation IF there's hope of meaningful recovery -> "full_treatment_if_recovery"
@@ -185,7 +213,6 @@ treatment_preference:
185
  or quality of life (e.g. "treat me if I can still think clearly, but let me go
186
  if I'm cognitively impaired") -> "conditional_comfort_care"
187
  - If they're unsure or need more information -> "unsure"
188
-
189
  beneficiary_status:
190
  - IMPORTANT: "all_current" means the participant explicitly confirmed that formal
191
  beneficiary designations (not just account access) are filed and up to date
@@ -193,23 +220,19 @@ beneficiary_status:
193
  did NOT explicitly confirm legal beneficiary designations are current -> "unsure"
194
  - If they know some designations need updating -> "need_to_update"
195
  - If they're not sure who's listed or need to check -> "unsure"
196
-
197
  has_info_list:
198
  - If they have files/info but haven't shared the location or it's disorganized -> "yes_not_shared"
199
  - If their trusted person knows where everything is -> "yes_shared"
200
  - If they haven't created any list -> "not_created"
201
-
202
  shared_with_loved_ones:
203
  - If they've talked but nothing is written down -> "yes_not_written"
204
  - If they've discussed AND written it down -> "yes_written"
205
  - If they haven't discussed wishes yet -> "not_yet"
206
-
207
  specific_items_status:
208
  - If they named specific items for specific people -> "has_specific_items"
209
  - If they explicitly said no specific designations are needed (e.g. "everything goes
210
  to my spouse" or "nothing specific needs to go anywhere specific") -> "no_specific_items"
211
  - If they haven't thought about it yet or are undecided -> "not_yet_decided"
212
-
213
  service_type / body_preference / cost_planning:
214
  - If funeral planning was NOT discussed at all, use "not_discussed" for these fields
215
  - Celebration of life, casual gathering, party -> "celebration_of_life"
@@ -221,20 +244,17 @@ service_type / body_preference / cost_planning:
221
  - If they mention life insurance will cover it or family knows about funding -> "family_aware"
222
  - If they have a pre-paid funeral plan -> "prepaid"
223
  - If costs haven't been discussed -> "needs_discussion"
224
-
225
  values_reflections:
226
  - "meaning_and_joy": ONLY include hobbies, relationships, passions, and activities
227
  that bring happiness. Do NOT include medical decision criteria like cognitive
228
  function preferences here. Those belong in treatment_details.
229
  - "want_remembered_for": Use the participant's own words about how they want to be remembered.
230
  - "what_matters_most": Summarize their overall philosophy about living and legacy.
231
-
232
  recommended_next_steps:
233
  - Set each to true ONLY if it is relevant based on what was discussed
234
  - For example, if funeral planning was not discussed, do not set explore_funeral_preplanning to true
235
  - If documents already exist and are current, do not set create_healthcare_poa to true
236
  - Base these on gaps identified during the conversation
237
-
238
  Listen for these key topics:
239
  - Who would make healthcare decisions (usually spouse first, then adult child)
240
  - Who would handle finances (often same people)
@@ -243,7 +263,6 @@ Listen for these key topics:
243
  - Funeral/memorial preferences
244
  - Special items to give specific people
245
  - What matters most to them, their values
246
-
247
  Return ONLY valid JSON, no markdown formatting or explanation.
248
  """
249
 
@@ -287,19 +306,78 @@ def analyze_audio(audio_path: str, api_key: str) -> str:
287
  # ============================================================================
288
 
289
  def parse_json_response(response_text: str) -> dict | None:
290
- """Extract JSON from Gemini response."""
 
 
 
 
 
 
 
 
291
  cleaned = re.sub(r'```json\s*', '', response_text)
292
  cleaned = re.sub(r'```\s*', '', cleaned)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
- json_match = re.search(r'\{[\s\S]*\}', cleaned)
295
- if json_match:
296
- try:
297
- return json.loads(json_match.group())
298
- except json.JSONDecodeError:
299
- return None
300
  return None
301
 
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  def normalize_value(value, valid_options, default=None):
304
  """Normalize a value to match one of the valid options."""
305
  if value is None:
@@ -897,8 +975,17 @@ def generate_docx(data: dict, output_path: str) -> str:
897
  # MAIN PROCESSING FUNCTION
898
  # ============================================================================
899
 
 
 
 
 
900
  def process_audio(audio_file):
901
- """Main function to process audio and generate Word document."""
 
 
 
 
 
902
  if audio_file is None:
903
  return None, "Please record or upload an audio file.", None
904
 
@@ -907,13 +994,43 @@ def process_audio(audio_file):
907
  return None, "API key not configured. Please set GEMINI_API_KEY in Space secrets.", None
908
 
909
  try:
910
- # Analyze audio
911
- raw_response = analyze_audio(audio_file, api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
 
913
- # Parse response
914
- data = parse_json_response(raw_response)
915
  if not data:
916
- return None, "Failed to parse the AI response. Please try again.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917
 
918
  # Normalize data
919
  data = normalize_data(data)
@@ -999,7 +1116,6 @@ if HAS_GRADIO:
999
  with gr.Blocks(title="Advance Care Planning") as demo:
1000
  gr.Markdown("""
1001
  # Advance Care Planning
1002
-
1003
  Record or upload an audio conversation to generate a structured Word document summary report.
1004
  """)
1005
 
 
2
  Planning Summary Audio Analyzer - Hugging Face Spaces App
3
  Analyzes audio recordings of planning conversations and generates
4
  a structured Word document planning summary report using Google's Gemini API.
 
5
  CHANGELOG (corrections applied):
6
  1. Treatment preference: added "conditional_comfort_care" option for nuanced cases
7
  2. Beneficiary status: improved prompt guidance to distinguish account access from
 
12
  5. Name spelling: prompt now flags uncertain proper noun spellings with [verify spelling]
13
  6. Next Steps section: driven by extracted data and topics discussed, not hardcoded
14
  7. Prompt includes "topics_discussed" field so the report only covers relevant sections
15
+ 8. Resilience: retry logic (up to 3 attempts), robust JSON parser, raw response
16
+ surfaced on failure for debugging
17
+ 9. Hallucination guard: prompt instructs model to return not_a_planning_conversation
18
+ flag when audio does not contain advance care planning content; validation rejects
19
+ such responses before generating a report.
20
  """
21
 
22
  import os
23
  import re
24
  import json
25
  import time
26
+ import logging
27
  import tempfile
28
  from docx import Document
29
  from docx.shared import Inches, Pt, Twips
 
45
  except ImportError:
46
  HAS_GENAI = False
47
 
48
+ logger = logging.getLogger(__name__)
49
+
50
  # ============================================================================
51
  # EXTRACTION PROMPT
52
  # ============================================================================
 
55
  You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
56
  Listen to the ENTIRE audio carefully and extract ALL relevant information.
57
 
58
+ IMPORTANT - NON-RELEVANT AUDIO DETECTION:
59
+ Before extracting any planning data, first determine whether this audio actually
60
+ contains an advance care planning conversation. The audio MUST contain a real
61
+ discussion about at least one of these topics: health care wishes, financial
62
+ planning, funeral/memorial preferences, or values and legacy.
63
+
64
+ If the audio is:
65
+ - Silence, noise, music, or unintelligible speech
66
+ - A conversation about unrelated topics (e.g. casual chat, a lecture, a podcast
67
+ not about advance care planning)
68
+ - Too short or too unclear to extract meaningful planning information
69
+ - Random test audio or microphone checks
70
+
71
+ Then return ONLY this JSON and nothing else:
72
+ ```json
73
+ {
74
+ "not_a_planning_conversation": true,
75
+ "reason": "Brief explanation of what the audio actually contains"
76
+ }
77
+ ```
78
+
79
+ Do NOT invent, fabricate, or hallucinate planning data. If you cannot clearly hear
80
+ a real advance care planning discussion, you MUST return the above JSON.
81
+
82
+ Only proceed with the full extraction below if you are confident the audio contains
83
+ a genuine advance care planning conversation.
84
+
85
  CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
86
  - You MUST select exactly ONE option for each single-select field
87
  - Use the EXACT string values specified (copy them exactly)
88
  - If the conversation implies something even indirectly, make your best inference
89
  - NEVER leave single-select fields as null - always pick the best match
 
90
  IMPORTANT RULES FOR PROPER NOUNS:
91
  - If a last name is spelled out letter by letter, use that exact spelling.
92
  - If a last name is only spoken (not spelled), transcribe it phonetically and append
93
  [verify spelling] after it. Example: "Potoff [verify spelling]"
94
  - First names that are spelled out should use the spelled version.
 
95
  Return a JSON object with this EXACT structure:
96
  ```json
97
  {
 
197
  "facilitator_summary": "Facilitator's closing summary and recommendations"
198
  }
199
  ```
 
200
  DECISION GUIDE FOR COMMON SCENARIOS:
 
201
  topics_discussed:
202
  - Listen for which topics the participant chose to focus on
203
  - Only include "health", "financial", and/or "funeral" if they were actually discussed
204
  - If the participant said they only want to discuss health and financial, do NOT include "funeral"
 
205
  advance_care_status:
206
  - If they say they haven't done paperwork/documents yet -> "no_documents"
207
  - If they have old documents that need updating -> "has_documents_needs_update"
208
  - If they have current, up-to-date documents -> "has_current_documents"
 
209
  treatment_preference:
210
  - If they UNCONDITIONALLY want only comfort/palliative care, no machines ever -> "comfort_care_only"
211
  - If they want full treatment/CPR/ventilation IF there's hope of meaningful recovery -> "full_treatment_if_recovery"
 
213
  or quality of life (e.g. "treat me if I can still think clearly, but let me go
214
  if I'm cognitively impaired") -> "conditional_comfort_care"
215
  - If they're unsure or need more information -> "unsure"
 
216
  beneficiary_status:
217
  - IMPORTANT: "all_current" means the participant explicitly confirmed that formal
218
  beneficiary designations (not just account access) are filed and up to date
 
220
  did NOT explicitly confirm legal beneficiary designations are current -> "unsure"
221
  - If they know some designations need updating -> "need_to_update"
222
  - If they're not sure who's listed or need to check -> "unsure"
 
223
  has_info_list:
224
  - If they have files/info but haven't shared the location or it's disorganized -> "yes_not_shared"
225
  - If their trusted person knows where everything is -> "yes_shared"
226
  - If they haven't created any list -> "not_created"
 
227
  shared_with_loved_ones:
228
  - If they've talked but nothing is written down -> "yes_not_written"
229
  - If they've discussed AND written it down -> "yes_written"
230
  - If they haven't discussed wishes yet -> "not_yet"
 
231
  specific_items_status:
232
  - If they named specific items for specific people -> "has_specific_items"
233
  - If they explicitly said no specific designations are needed (e.g. "everything goes
234
  to my spouse" or "nothing specific needs to go anywhere specific") -> "no_specific_items"
235
  - If they haven't thought about it yet or are undecided -> "not_yet_decided"
 
236
  service_type / body_preference / cost_planning:
237
  - If funeral planning was NOT discussed at all, use "not_discussed" for these fields
238
  - Celebration of life, casual gathering, party -> "celebration_of_life"
 
244
  - If they mention life insurance will cover it or family knows about funding -> "family_aware"
245
  - If they have a pre-paid funeral plan -> "prepaid"
246
  - If costs haven't been discussed -> "needs_discussion"
 
247
  values_reflections:
248
  - "meaning_and_joy": ONLY include hobbies, relationships, passions, and activities
249
  that bring happiness. Do NOT include medical decision criteria like cognitive
250
  function preferences here. Those belong in treatment_details.
251
  - "want_remembered_for": Use the participant's own words about how they want to be remembered.
252
  - "what_matters_most": Summarize their overall philosophy about living and legacy.
 
253
  recommended_next_steps:
254
  - Set each to true ONLY if it is relevant based on what was discussed
255
  - For example, if funeral planning was not discussed, do not set explore_funeral_preplanning to true
256
  - If documents already exist and are current, do not set create_healthcare_poa to true
257
  - Base these on gaps identified during the conversation
 
258
  Listen for these key topics:
259
  - Who would make healthcare decisions (usually spouse first, then adult child)
260
  - Who would handle finances (often same people)
 
263
  - Funeral/memorial preferences
264
  - Special items to give specific people
265
  - What matters most to them, their values
 
266
  Return ONLY valid JSON, no markdown formatting or explanation.
267
  """
268
 
 
306
  # ============================================================================
307
 
308
  def parse_json_response(response_text: str) -> dict | None:
309
+ """Extract JSON from Gemini response.
310
+
311
+ Uses a multi-strategy approach so that trailing prose, markdown fences,
312
+ or minor formatting differences do not cause a parse failure.
313
+ """
314
+ if not response_text:
315
+ return None
316
+
317
+ # Strip markdown code fences
318
  cleaned = re.sub(r'```json\s*', '', response_text)
319
  cleaned = re.sub(r'```\s*', '', cleaned)
320
+ cleaned = cleaned.strip()
321
+
322
+ # Strategy 1: try parsing the entire cleaned text directly
323
+ try:
324
+ return json.loads(cleaned)
325
+ except json.JSONDecodeError:
326
+ pass
327
+
328
+ # Strategy 2: locate the first '{' and try progressively shorter
329
+ # substrings ending at each '}' from the end backward. This handles
330
+ # cases where Gemini appends explanatory text after the JSON object.
331
+ start = cleaned.find('{')
332
+ if start == -1:
333
+ return None
334
+
335
+ for end in range(len(cleaned) - 1, start, -1):
336
+ if cleaned[end] == '}':
337
+ try:
338
+ return json.loads(cleaned[start:end + 1])
339
+ except json.JSONDecodeError:
340
+ continue
341
 
 
 
 
 
 
 
342
  return None
343
 
344
 
345
+ def _is_valid_planning_data(data: dict) -> tuple[bool, str]:
346
+ """Check whether parsed data represents a genuine planning conversation.
347
+
348
+ Returns (is_valid, reason). When is_valid is False, reason contains a
349
+ user-facing message explaining why the audio was rejected.
350
+ """
351
+ if not data:
352
+ return False, "No data could be extracted from the audio."
353
+
354
+ # Explicit flag set by the model when the audio is not relevant
355
+ if data.get("not_a_planning_conversation"):
356
+ reason = data.get("reason", "The audio does not appear to contain an advance care planning conversation.")
357
+ return False, (
358
+ "This audio does not contain an advance care planning conversation. "
359
+ f"({reason}) Please record or upload a conversation that discusses "
360
+ "health care wishes, financial planning, or funeral preferences."
361
+ )
362
+
363
+ # Secondary heuristic: if none of the core sections are present, the
364
+ # model may have returned something unexpected.
365
+ has_any_section = any(
366
+ key in data for key in (
367
+ "health_care_wishes", "financial_planning", "funeral_plans",
368
+ "participant", "topics_discussed",
369
+ )
370
+ )
371
+ if not has_any_section:
372
+ return False, (
373
+ "The audio could not be matched to a planning conversation. "
374
+ "Please make sure the recording contains a discussion about "
375
+ "health care wishes, financial planning, or funeral preferences."
376
+ )
377
+
378
+ return True, ""
379
+
380
+
381
  def normalize_value(value, valid_options, default=None):
382
  """Normalize a value to match one of the valid options."""
383
  if value is None:
 
975
  # MAIN PROCESSING FUNCTION
976
  # ============================================================================
977
 
978
+ MAX_RETRIES = 3
979
+ RETRY_DELAY_SECONDS = 3
980
+
981
+
982
  def process_audio(audio_file):
983
+ """Main function to process audio and generate Word document.
984
+
985
+ Retries up to MAX_RETRIES times when the Gemini response cannot be
986
+ parsed, since transient malformed responses are the most common
987
+ failure mode.
988
+ """
989
  if audio_file is None:
990
  return None, "Please record or upload an audio file.", None
991
 
 
994
  return None, "API key not configured. Please set GEMINI_API_KEY in Space secrets.", None
995
 
996
  try:
997
+ # Retry loop: the Gemini API occasionally returns responses that
998
+ # cannot be parsed (extra prose, truncated JSON, etc.). A simple
999
+ # retry with a short delay resolves this the vast majority of the
1000
+ # time without any user intervention.
1001
+ raw_response = None
1002
+ data = None
1003
+
1004
+ for attempt in range(1, MAX_RETRIES + 1):
1005
+ raw_response = analyze_audio(audio_file, api_key)
1006
+ data = parse_json_response(raw_response)
1007
+ if data is not None:
1008
+ break
1009
+ logger.warning(
1010
+ "Attempt %d/%d: failed to parse Gemini response (length=%d)",
1011
+ attempt, MAX_RETRIES, len(raw_response) if raw_response else 0,
1012
+ )
1013
+ if attempt < MAX_RETRIES:
1014
+ time.sleep(RETRY_DELAY_SECONDS)
1015
 
 
 
1016
  if not data:
1017
+ # Surface the raw response so the user (or developer) can
1018
+ # inspect what Gemini actually returned.
1019
+ return (
1020
+ None,
1021
+ f"Failed to parse the AI response after {MAX_RETRIES} attempts. "
1022
+ "Open the JSON panel below to see the raw API output.",
1023
+ raw_response,
1024
+ )
1025
+
1026
+ # ---- Hallucination guard ----
1027
+ is_valid, rejection_reason = _is_valid_planning_data(data)
1028
+ if not is_valid:
1029
+ return (
1030
+ None,
1031
+ rejection_reason,
1032
+ json.dumps(data, indent=2),
1033
+ )
1034
 
1035
  # Normalize data
1036
  data = normalize_data(data)
 
1116
  with gr.Blocks(title="Advance Care Planning") as demo:
1117
  gr.Markdown("""
1118
  # Advance Care Planning
 
1119
  Record or upload an audio conversation to generate a structured Word document summary report.
1120
  """)
1121