stevafernandes commited on
Commit
fb242fd
·
verified ·
1 Parent(s): 69dcb61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -32
app.py CHANGED
@@ -2,7 +2,6 @@
2
  Planning Summary Audio Analyzer - Hugging Face Spaces App
3
  Analyzes audio recordings of planning conversations and generates
4
  a structured Word document planning summary report using Google's Gemini API.
5
-
6
  CHANGELOG (corrections applied):
7
  1. Treatment preference: added "conditional_comfort_care" option for nuanced cases
8
  2. Beneficiary status: improved prompt guidance to distinguish account access from
@@ -13,12 +12,15 @@ CHANGELOG (corrections applied):
13
  5. Name spelling: prompt now flags uncertain proper noun spellings with [verify spelling]
14
  6. Next Steps section: driven by extracted data and topics discussed, not hardcoded
15
  7. Prompt includes "topics_discussed" field so the report only covers relevant sections
 
 
16
  """
17
 
18
  import os
19
  import re
20
  import json
21
  import time
 
22
  import tempfile
23
  from docx import Document
24
  from docx.shared import Inches, Pt, Twips
@@ -40,6 +42,8 @@ try:
40
  except ImportError:
41
  HAS_GENAI = False
42
 
 
 
43
  # ============================================================================
44
  # EXTRACTION PROMPT
45
  # ============================================================================
@@ -47,19 +51,16 @@ except ImportError:
47
  EXTRACTION_PROMPT = """
48
  You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
49
  Listen to the ENTIRE audio carefully and extract ALL relevant information.
50
-
51
  CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
52
  - You MUST select exactly ONE option for each single-select field
53
  - Use the EXACT string values specified (copy them exactly)
54
  - If the conversation implies something even indirectly, make your best inference
55
  - NEVER leave single-select fields as null - always pick the best match
56
-
57
  IMPORTANT RULES FOR PROPER NOUNS:
58
  - If a last name is spelled out letter by letter, use that exact spelling.
59
  - If a last name is only spoken (not spelled), transcribe it phonetically and append
60
  [verify spelling] after it. Example: "Potoff [verify spelling]"
61
  - First names that are spelled out should use the spelled version.
62
-
63
  Return a JSON object with this EXACT structure:
64
  ```json
65
  {
@@ -165,19 +166,15 @@ Return a JSON object with this EXACT structure:
165
  "facilitator_summary": "Facilitator's closing summary and recommendations"
166
  }
167
  ```
168
-
169
  DECISION GUIDE FOR COMMON SCENARIOS:
170
-
171
  topics_discussed:
172
  - Listen for which topics the participant chose to focus on
173
  - Only include "health", "financial", and/or "funeral" if they were actually discussed
174
  - If the participant said they only want to discuss health and financial, do NOT include "funeral"
175
-
176
  advance_care_status:
177
  - If they say they haven't done paperwork/documents yet -> "no_documents"
178
  - If they have old documents that need updating -> "has_documents_needs_update"
179
  - If they have current, up-to-date documents -> "has_current_documents"
180
-
181
  treatment_preference:
182
  - If they UNCONDITIONALLY want only comfort/palliative care, no machines ever -> "comfort_care_only"
183
  - If they want full treatment/CPR/ventilation IF there's hope of meaningful recovery -> "full_treatment_if_recovery"
@@ -185,7 +182,6 @@ treatment_preference:
185
  or quality of life (e.g. "treat me if I can still think clearly, but let me go
186
  if I'm cognitively impaired") -> "conditional_comfort_care"
187
  - If they're unsure or need more information -> "unsure"
188
-
189
  beneficiary_status:
190
  - IMPORTANT: "all_current" means the participant explicitly confirmed that formal
191
  beneficiary designations (not just account access) are filed and up to date
@@ -193,23 +189,19 @@ beneficiary_status:
193
  did NOT explicitly confirm legal beneficiary designations are current -> "unsure"
194
  - If they know some designations need updating -> "need_to_update"
195
  - If they're not sure who's listed or need to check -> "unsure"
196
-
197
  has_info_list:
198
  - If they have files/info but haven't shared the location or it's disorganized -> "yes_not_shared"
199
  - If their trusted person knows where everything is -> "yes_shared"
200
  - If they haven't created any list -> "not_created"
201
-
202
  shared_with_loved_ones:
203
  - If they've talked but nothing is written down -> "yes_not_written"
204
  - If they've discussed AND written it down -> "yes_written"
205
  - If they haven't discussed wishes yet -> "not_yet"
206
-
207
  specific_items_status:
208
  - If they named specific items for specific people -> "has_specific_items"
209
  - If they explicitly said no specific designations are needed (e.g. "everything goes
210
  to my spouse" or "nothing specific needs to go anywhere specific") -> "no_specific_items"
211
  - If they haven't thought about it yet or are undecided -> "not_yet_decided"
212
-
213
  service_type / body_preference / cost_planning:
214
  - If funeral planning was NOT discussed at all, use "not_discussed" for these fields
215
  - Celebration of life, casual gathering, party -> "celebration_of_life"
@@ -221,20 +213,17 @@ service_type / body_preference / cost_planning:
221
  - If they mention life insurance will cover it or family knows about funding -> "family_aware"
222
  - If they have a pre-paid funeral plan -> "prepaid"
223
  - If costs haven't been discussed -> "needs_discussion"
224
-
225
  values_reflections:
226
  - "meaning_and_joy": ONLY include hobbies, relationships, passions, and activities
227
  that bring happiness. Do NOT include medical decision criteria like cognitive
228
  function preferences here. Those belong in treatment_details.
229
  - "want_remembered_for": Use the participant's own words about how they want to be remembered.
230
  - "what_matters_most": Summarize their overall philosophy about living and legacy.
231
-
232
  recommended_next_steps:
233
  - Set each to true ONLY if it is relevant based on what was discussed
234
  - For example, if funeral planning was not discussed, do not set explore_funeral_preplanning to true
235
  - If documents already exist and are current, do not set create_healthcare_poa to true
236
  - Base these on gaps identified during the conversation
237
-
238
  Listen for these key topics:
239
  - Who would make healthcare decisions (usually spouse first, then adult child)
240
  - Who would handle finances (often same people)
@@ -243,7 +232,6 @@ Listen for these key topics:
243
  - Funeral/memorial preferences
244
  - Special items to give specific people
245
  - What matters most to them, their values
246
-
247
  Return ONLY valid JSON, no markdown formatting or explanation.
248
  """
249
 
@@ -287,16 +275,39 @@ def analyze_audio(audio_path: str, api_key: str) -> str:
287
  # ============================================================================
288
 
289
  def parse_json_response(response_text: str) -> dict | None:
290
- """Extract JSON from Gemini response."""
 
 
 
 
 
 
 
 
291
  cleaned = re.sub(r'```json\s*', '', response_text)
292
  cleaned = re.sub(r'```\s*', '', cleaned)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
- json_match = re.search(r'\{[\s\S]*\}', cleaned)
295
- if json_match:
296
- try:
297
- return json.loads(json_match.group())
298
- except json.JSONDecodeError:
299
- return None
300
  return None
301
 
302
 
@@ -897,8 +908,17 @@ def generate_docx(data: dict, output_path: str) -> str:
897
  # MAIN PROCESSING FUNCTION
898
  # ============================================================================
899
 
 
 
 
 
900
  def process_audio(audio_file):
901
- """Main function to process audio and generate Word document."""
 
 
 
 
 
902
  if audio_file is None:
903
  return None, "Please record or upload an audio file.", None
904
 
@@ -907,13 +927,34 @@ def process_audio(audio_file):
907
  return None, "API key not configured. Please set GEMINI_API_KEY in Space secrets.", None
908
 
909
  try:
910
- # Analyze audio
911
- raw_response = analyze_audio(audio_file, api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
912
 
913
- # Parse response
914
- data = parse_json_response(raw_response)
915
  if not data:
916
- return None, "Failed to parse the AI response. Please try again.", None
 
 
 
 
 
 
 
917
 
918
  # Normalize data
919
  data = normalize_data(data)
@@ -999,7 +1040,6 @@ if HAS_GRADIO:
999
  with gr.Blocks(title="Advance Care Planning") as demo:
1000
  gr.Markdown("""
1001
  # Advance Care Planning
1002
-
1003
  Record or upload an audio conversation to generate a structured Word document summary report.
1004
  """)
1005
 
@@ -1068,4 +1108,4 @@ if __name__ == "__main__":
1068
  if HAS_GRADIO:
1069
  demo.launch(theme=custom_theme)
1070
  else:
1071
- print("Gradio not installed. Core logic is available for import.")
 
2
  Planning Summary Audio Analyzer - Hugging Face Spaces App
3
  Analyzes audio recordings of planning conversations and generates
4
  a structured Word document planning summary report using Google's Gemini API.
 
5
  CHANGELOG (corrections applied):
6
  1. Treatment preference: added "conditional_comfort_care" option for nuanced cases
7
  2. Beneficiary status: improved prompt guidance to distinguish account access from
 
12
  5. Name spelling: prompt now flags uncertain proper noun spellings with [verify spelling]
13
  6. Next Steps section: driven by extracted data and topics discussed, not hardcoded
14
  7. Prompt includes "topics_discussed" field so the report only covers relevant sections
15
+ 8. Resilience: retry logic (up to 3 attempts), robust JSON parser, raw response
16
+ surfaced on failure for debugging
17
  """
18
 
19
  import os
20
  import re
21
  import json
22
  import time
23
+ import logging
24
  import tempfile
25
  from docx import Document
26
  from docx.shared import Inches, Pt, Twips
 
42
  except ImportError:
43
  HAS_GENAI = False
44
 
45
+ logger = logging.getLogger(__name__)
46
+
47
  # ============================================================================
48
  # EXTRACTION PROMPT
49
  # ============================================================================
 
51
  EXTRACTION_PROMPT = """
52
  You are analyzing a recorded conversation about advance care planning and end-of-life wishes.
53
  Listen to the ENTIRE audio carefully and extract ALL relevant information.
 
54
  CRITICAL INSTRUCTIONS FOR SINGLE-SELECT FIELDS:
55
  - You MUST select exactly ONE option for each single-select field
56
  - Use the EXACT string values specified (copy them exactly)
57
  - If the conversation implies something even indirectly, make your best inference
58
  - NEVER leave single-select fields as null - always pick the best match
 
59
  IMPORTANT RULES FOR PROPER NOUNS:
60
  - If a last name is spelled out letter by letter, use that exact spelling.
61
  - If a last name is only spoken (not spelled), transcribe it phonetically and append
62
  [verify spelling] after it. Example: "Potoff [verify spelling]"
63
  - First names that are spelled out should use the spelled version.
 
64
  Return a JSON object with this EXACT structure:
65
  ```json
66
  {
 
166
  "facilitator_summary": "Facilitator's closing summary and recommendations"
167
  }
168
  ```
 
169
  DECISION GUIDE FOR COMMON SCENARIOS:
 
170
  topics_discussed:
171
  - Listen for which topics the participant chose to focus on
172
  - Only include "health", "financial", and/or "funeral" if they were actually discussed
173
  - If the participant said they only want to discuss health and financial, do NOT include "funeral"
 
174
  advance_care_status:
175
  - If they say they haven't done paperwork/documents yet -> "no_documents"
176
  - If they have old documents that need updating -> "has_documents_needs_update"
177
  - If they have current, up-to-date documents -> "has_current_documents"
 
178
  treatment_preference:
179
  - If they UNCONDITIONALLY want only comfort/palliative care, no machines ever -> "comfort_care_only"
180
  - If they want full treatment/CPR/ventilation IF there's hope of meaningful recovery -> "full_treatment_if_recovery"
 
182
  or quality of life (e.g. "treat me if I can still think clearly, but let me go
183
  if I'm cognitively impaired") -> "conditional_comfort_care"
184
  - If they're unsure or need more information -> "unsure"
 
185
  beneficiary_status:
186
  - IMPORTANT: "all_current" means the participant explicitly confirmed that formal
187
  beneficiary designations (not just account access) are filed and up to date
 
189
  did NOT explicitly confirm legal beneficiary designations are current -> "unsure"
190
  - If they know some designations need updating -> "need_to_update"
191
  - If they're not sure who's listed or need to check -> "unsure"
 
192
  has_info_list:
193
  - If they have files/info but haven't shared the location or it's disorganized -> "yes_not_shared"
194
  - If their trusted person knows where everything is -> "yes_shared"
195
  - If they haven't created any list -> "not_created"
 
196
  shared_with_loved_ones:
197
  - If they've talked but nothing is written down -> "yes_not_written"
198
  - If they've discussed AND written it down -> "yes_written"
199
  - If they haven't discussed wishes yet -> "not_yet"
 
200
  specific_items_status:
201
  - If they named specific items for specific people -> "has_specific_items"
202
  - If they explicitly said no specific designations are needed (e.g. "everything goes
203
  to my spouse" or "nothing specific needs to go anywhere specific") -> "no_specific_items"
204
  - If they haven't thought about it yet or are undecided -> "not_yet_decided"
 
205
  service_type / body_preference / cost_planning:
206
  - If funeral planning was NOT discussed at all, use "not_discussed" for these fields
207
  - Celebration of life, casual gathering, party -> "celebration_of_life"
 
213
  - If they mention life insurance will cover it or family knows about funding -> "family_aware"
214
  - If they have a pre-paid funeral plan -> "prepaid"
215
  - If costs haven't been discussed -> "needs_discussion"
 
216
  values_reflections:
217
  - "meaning_and_joy": ONLY include hobbies, relationships, passions, and activities
218
  that bring happiness. Do NOT include medical decision criteria like cognitive
219
  function preferences here. Those belong in treatment_details.
220
  - "want_remembered_for": Use the participant's own words about how they want to be remembered.
221
  - "what_matters_most": Summarize their overall philosophy about living and legacy.
 
222
  recommended_next_steps:
223
  - Set each to true ONLY if it is relevant based on what was discussed
224
  - For example, if funeral planning was not discussed, do not set explore_funeral_preplanning to true
225
  - If documents already exist and are current, do not set create_healthcare_poa to true
226
  - Base these on gaps identified during the conversation
 
227
  Listen for these key topics:
228
  - Who would make healthcare decisions (usually spouse first, then adult child)
229
  - Who would handle finances (often same people)
 
232
  - Funeral/memorial preferences
233
  - Special items to give specific people
234
  - What matters most to them, their values
 
235
  Return ONLY valid JSON, no markdown formatting or explanation.
236
  """
237
 
 
275
  # ============================================================================
276
 
277
  def parse_json_response(response_text: str) -> dict | None:
278
+ """Extract JSON from Gemini response.
279
+
280
+ Uses a multi-strategy approach so that trailing prose, markdown fences,
281
+ or minor formatting differences do not cause a parse failure.
282
+ """
283
+ if not response_text:
284
+ return None
285
+
286
+ # Strip markdown code fences
287
  cleaned = re.sub(r'```json\s*', '', response_text)
288
  cleaned = re.sub(r'```\s*', '', cleaned)
289
+ cleaned = cleaned.strip()
290
+
291
+ # Strategy 1: try parsing the entire cleaned text directly
292
+ try:
293
+ return json.loads(cleaned)
294
+ except json.JSONDecodeError:
295
+ pass
296
+
297
+ # Strategy 2: locate the first '{' and try progressively shorter
298
+ # substrings ending at each '}' from the end backward. This handles
299
+ # cases where Gemini appends explanatory text after the JSON object.
300
+ start = cleaned.find('{')
301
+ if start == -1:
302
+ return None
303
+
304
+ for end in range(len(cleaned) - 1, start, -1):
305
+ if cleaned[end] == '}':
306
+ try:
307
+ return json.loads(cleaned[start:end + 1])
308
+ except json.JSONDecodeError:
309
+ continue
310
 
 
 
 
 
 
 
311
  return None
312
 
313
 
 
908
  # MAIN PROCESSING FUNCTION
909
  # ============================================================================
910
 
911
+ MAX_RETRIES = 3
912
+ RETRY_DELAY_SECONDS = 3
913
+
914
+
915
  def process_audio(audio_file):
916
+ """Main function to process audio and generate Word document.
917
+
918
+ Retries up to MAX_RETRIES times when the Gemini response cannot be
919
+ parsed, since transient malformed responses are the most common
920
+ failure mode.
921
+ """
922
  if audio_file is None:
923
  return None, "Please record or upload an audio file.", None
924
 
 
927
  return None, "API key not configured. Please set GEMINI_API_KEY in Space secrets.", None
928
 
929
  try:
930
+ # Retry loop: the Gemini API occasionally returns responses that
931
+ # cannot be parsed (extra prose, truncated JSON, etc.). A simple
932
+ # retry with a short delay resolves this the vast majority of the
933
+ # time without any user intervention.
934
+ raw_response = None
935
+ data = None
936
+
937
+ for attempt in range(1, MAX_RETRIES + 1):
938
+ raw_response = analyze_audio(audio_file, api_key)
939
+ data = parse_json_response(raw_response)
940
+ if data is not None:
941
+ break
942
+ logger.warning(
943
+ "Attempt %d/%d: failed to parse Gemini response (length=%d)",
944
+ attempt, MAX_RETRIES, len(raw_response) if raw_response else 0,
945
+ )
946
+ if attempt < MAX_RETRIES:
947
+ time.sleep(RETRY_DELAY_SECONDS)
948
 
 
 
949
  if not data:
950
+ # Surface the raw response so the user (or developer) can
951
+ # inspect what Gemini actually returned.
952
+ return (
953
+ None,
954
+ f"Failed to parse the AI response after {MAX_RETRIES} attempts. "
955
+ "Open the JSON panel below to see the raw API output.",
956
+ raw_response,
957
+ )
958
 
959
  # Normalize data
960
  data = normalize_data(data)
 
1040
  with gr.Blocks(title="Advance Care Planning") as demo:
1041
  gr.Markdown("""
1042
  # Advance Care Planning
 
1043
  Record or upload an audio conversation to generate a structured Word document summary report.
1044
  """)
1045
 
 
1108
  if HAS_GRADIO:
1109
  demo.launch(theme=custom_theme)
1110
  else:
1111
+ print("Gradio not installed. Core logic is available for import.")