VEDAGI1 commited on
Commit
acc53dd
·
verified ·
1 Parent(s): b1a7c72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -782
app.py CHANGED
@@ -1,10 +1,3 @@
1
- # app.py
2
- # Universal AI Data Analyst with:
3
- # - Unchanged analysis & assessment logic
4
- # - Fixed Gradio event wiring (uses gr.State for history)
5
- # - Triple-quoted progress strings (no unterminated literals)
6
- # - Sleek full-width UI and Voice-to-Text (browser Web Speech API)
7
- # - Optional HIPAA flags (fallback defaults if not present in settings.py)
8
  from __future__ import annotations
9
 
10
  import io
@@ -40,682 +33,9 @@ from audit_log import log_event
40
  from privacy import safety_filter, refusal_reply
41
  from llm_router import cohere_chat, _co_client, cohere_embed
42
 
43
- # ---------------------- Helpers (analysis logic unchanged) ----------------------
44
- def load_markdown_text(filepath: str) -> str:
45
- try:
46
- with open(filepath, "r", encoding="utf-8") as f:
47
- return f.read()
48
- except FileNotFoundError:
49
- return f"**Error:** Document `{os.path.basename(filepath)}` not found."
50
-
51
- def _sanitize_text(s: str) -> str:
52
- if not isinstance(s, str):
53
- return s
54
- # Remove control characters (except newline and tab)
55
- return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
56
-
57
- # Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
58
- PHI_PATTERNS = [
59
- (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
60
- (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
61
- (re.compile(r"\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b"), "[REDACTED_PHONE]"),
62
- (re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}"), "[REDACTED_EMAIL]"),
63
- (re.compile(r"\b(19|20)\d{2}-\d{2}-\d{2}\b"), "[REDACTED_DOB]"),
64
- (re.compile(r"\b\d{2}/\d{2}/(19|20)\d{2}\b"), "[REDACTED_DOB]"),
65
- (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
66
- ]
67
-
68
- def redact_phi(text: str) -> str:
69
- if not isinstance(text, str):
70
- return text
71
- t = text
72
- for pat, repl in PHI_PATTERNS:
73
- t = pat.sub(repl, t)
74
- return t
75
-
76
- def safe_log(event_name: str, meta: dict | None = None):
77
- # Avoid logging raw PHI or payloads
78
- try:
79
- meta = (meta or {}).copy()
80
- meta.pop("raw", None)
81
- log_event(event_name, None, meta)
82
- except Exception:
83
- # Never raise from logging
84
- pass
85
-
86
-
87
- # ---------------------- JSON Validation ----------------------
88
-
89
- class JSONValidationError(Exception):
90
- """Raised when script output fails JSON validation."""
91
- pass
92
-
93
-
94
- def validate_json_output(raw_output: str) -> Dict[str, Any]:
95
- """
96
- Validates and parses JSON output from the analysis script.
97
-
98
- This creates the "hard boundary" between calculation and communication
99
- as described in the ClarityOps architecture. The function:
100
- 1. Strips whitespace and handles empty output
101
- 2. Attempts to parse as JSON
102
- 3. Validates the structure is a dictionary (not array or primitive)
103
- 4. Checks for error indicators in the output
104
- 5. Returns validated Python dict for report generation
105
-
106
- Args:
107
- raw_output: Raw string captured from script stdout
108
-
109
- Returns:
110
- Validated dictionary containing analysis findings
111
-
112
- Raises:
113
- JSONValidationError: If output is empty, malformed, or contains errors
114
- """
115
- # Strip whitespace
116
- cleaned_output = raw_output.strip()
117
-
118
- # Check for empty output
119
- if not cleaned_output:
120
- raise JSONValidationError(
121
- "Analysis script produced no output. The script must print a JSON object to stdout."
122
- )
123
-
124
- # Handle multiple JSON objects (take the last complete one)
125
- # This handles cases where debug prints precede the final JSON
126
- json_candidates = []
127
- brace_count = 0
128
- current_start = None
129
-
130
- for i, char in enumerate(cleaned_output):
131
- if char == '{':
132
- if brace_count == 0:
133
- current_start = i
134
- brace_count += 1
135
- elif char == '}':
136
- brace_count -= 1
137
- if brace_count == 0 and current_start is not None:
138
- json_candidates.append(cleaned_output[current_start:i+1])
139
- current_start = None
140
-
141
- # If no valid JSON structure found, try parsing the whole output
142
- if not json_candidates:
143
- json_to_parse = cleaned_output
144
- else:
145
- # Use the last JSON object (most likely the final output)
146
- json_to_parse = json_candidates[-1]
147
-
148
- # Attempt JSON parsing
149
- try:
150
- parsed = json.loads(json_to_parse)
151
- except json.JSONDecodeError as e:
152
- # Provide helpful error message with context
153
- error_context = cleaned_output[:500] + ("..." if len(cleaned_output) > 500 else "")
154
- raise JSONValidationError(
155
- f"Analysis script produced invalid JSON. Parse error: {e.msg} at position {e.pos}.\n\n"
156
- f"Raw output (first 500 chars):\n```\n{error_context}\n```"
157
- )
158
-
159
- # Validate structure is a dictionary
160
- if not isinstance(parsed, dict):
161
- raise JSONValidationError(
162
- f"Analysis output must be a JSON object (dictionary), not {type(parsed).__name__}. "
163
- f"Ensure your script prints a dictionary with json.dumps()."
164
- )
165
-
166
- # Check for error indicators in the output
167
- if "error" in parsed:
168
- error_msg = parsed.get("error", "Unknown error")
169
- raise JSONValidationError(
170
- f"Analysis script reported an error: {error_msg}"
171
- )
172
-
173
- # Validate output is not empty dict
174
- if not parsed:
175
- raise JSONValidationError(
176
- "Analysis script produced an empty JSON object. "
177
- "Ensure your script populates the output dictionary with findings."
178
- )
179
-
180
- # Log successful validation (without sensitive data)
181
- safe_log("json_validation_success", {"keys": list(parsed.keys()), "key_count": len(parsed)})
182
-
183
- return parsed
184
-
185
-
186
- def format_validated_json_for_report(validated_data: Dict[str, Any]) -> str:
187
- """
188
- Formats validated JSON data for the report generator.
189
-
190
- Converts the validated Python dictionary back to a formatted JSON string
191
- for the LLM to interpret. This ensures consistent formatting and handles
192
- any edge cases in serialization.
193
-
194
- Args:
195
- validated_data: Validated dictionary from validate_json_output()
196
-
197
- Returns:
198
- Formatted JSON string ready for report generation
199
- """
200
- try:
201
- return json.dumps(validated_data, indent=2, default=str, ensure_ascii=False)
202
- except (TypeError, ValueError) as e:
203
- # Fallback to string representation if JSON serialization fails
204
- safe_log("json_format_warning", {"error": str(e)})
205
- return json.dumps({"raw_data": str(validated_data)}, indent=2)
206
-
207
-
208
- # ---------------------- Analysis Script Generation ----------------------
209
-
210
- def _create_python_script(user_scenario: str, schema_context: str) -> str:
211
- EXPERT_ANALYTICAL_GUIDELINES = """
212
- --- EXPERT ANALYTICAL GUIDELINES ---
213
- When writing your script, you MUST follow these expert business rules:
214
- 1. **Linking Datasets Rule:** If you need to connect facilities to health zones when the 'zone' column is not in the facility list,
215
- you must first identify the high-priority zone from the beds data, then find the major city (by facility count) in the facility list,
216
- and *then* assess that city's capacity. Do not try to filter the facility list by a 'zone' column if it does not exist in the schema.
217
- 2. **Prioritization Rule:** To prioritize locations, you MUST combine the most recent population data with specific high-risk health indicators
218
- to create a multi-factor risk score.
219
- 3. **Capacity Calculation Rule:** For capacity over a 3-month window, assume **60 working days**.
220
- 4. **Cost Calculation Rule:** Sum 'Startup cost' and 'Ongoing cost' per person before multiplying.
221
- """
222
- prompt_for_coder = f"""\
223
- You are an expert Python data scientist. Your job is to write a script to extract the data needed to answer the user's request.
224
- You have dataframes in a list `dfs`.
225
-
226
- {EXPERT_ANALYTICAL_GUIDELINES}
227
-
228
- --- DATA SCHEMA ---
229
- {schema_context}
230
- --- END DATA SCHEMA ---
231
-
232
- CRITICAL RULES:
233
- 1. **DO NOT READ FILES:** You MUST NOT include `pd.read_csv`. The data is ALREADY loaded in the `dfs` variable. You MUST use this variable. Failure to do so will cause a fatal error.
234
- 2. **JSON OUTPUT ONLY:** Your script's ONLY output must be a single JSON object printed to stdout containing the raw data findings.
235
- 3. **BE PRECISE:** Use the exact, case-sensitive column names from the schema and robustly clean strings (`re.sub()`) before converting to numbers.
236
- 4. **JSON SERIALIZATION:** Before adding data to your final dictionary for JSON conversion, you MUST convert any pandas-specific types (like `int64`) to standard Python types using `.item()` for single values or `.tolist()` for lists.
237
- 5. **SINGLE JSON OUTPUT:** Print exactly ONE JSON object at the end of your script. Do not print debug statements or multiple JSON objects.
238
- 6. **VALID JSON STRUCTURE:** The output MUST be a dictionary/object, not an array or primitive value.
239
-
240
- --- USER'S SCENARIO ---
241
- {user_scenario}
242
-
243
- --- PYTHON SCRIPT ---
244
- Now, write the complete Python script that performs the analysis and prints a single, serializable JSON object.
245
- ```python
246
- """
247
- generated_text = cohere_chat(prompt_for_coder)
248
- match = re2.search(r"```python\n(.*?)```", generated_text, re2.DOTALL)
249
- if match:
250
- return match.group(1).strip()
251
- return "print(json.dumps({'error': 'Failed to generate a valid Python script.'}))"
252
-
253
-
254
- def _generate_long_report(prompt: str) -> str:
255
- try:
256
- client = _co_client()
257
- if not client:
258
- return "Error: Cohere client not initialized."
259
- response = client.chat(
260
- model=COHERE_MODEL_PRIMARY,
261
- message=prompt,
262
- max_tokens=4096,
263
- )
264
- return response.text
265
- except Exception as e:
266
- safe_log("cohere_chat_error", {"err": str(e)})
267
- return f"Error during final report generation: {e}"
268
-
269
-
270
- def _generate_final_report(user_scenario: str, validated_json_str: str) -> str:
271
- prompt_for_writer = f"""\
272
- You are an expert management consultant and data analyst.
273
- A data science script has run to extract key findings. You have the user's original request and the validated JSON data.
274
-
275
- Your task is to synthesize these validated findings into a single, comprehensive, and professional report that directly answers all of the user's questions with detailed justifications.
276
-
277
- --- USER'S ORIGINAL SCENARIO & DELIVERABLES ---
278
- {user_scenario}
279
- --- END SCENARIO ---
280
-
281
- --- VALIDATED DATA FINDINGS (JSON) ---
282
- {validated_json_str}
283
- --- END VALIDATED DATA ---
284
-
285
- Now, write the final, polished report. The report MUST:
286
- 1. Follow the "Expected Output Format" requested by the user.
287
- 2. Use tables, bullet points, and DETAILED narrative justifications for each recommendation.
288
- 3. Synthesize the validated data into actionable insights. Do not just copy the raw numbers; interpret them.
289
- 4. Ensure you fully address ALL evaluation questions, especially the final recommendations.
290
- """
291
- return _generate_long_report(prompt_for_writer)
292
-
293
-
294
- def _append_msg(h: List[Dict[str, str]], r: str, c: str) -> List[Dict[str, str]]:
295
- return (h or []) + [{"role": r, "content": c}]
296
-
297
-
298
- def ping_cohere() -> str:
299
- try:
300
- cli = _co_client()
301
- if not cli:
302
- return "Cohere client not initialized."
303
- vecs = cohere_embed(["hello", "world"])
304
- return f"Cohere OK ✅ (model={COHERE_MODEL_PRIMARY})" if vecs else "Cohere reachable."
305
- except Exception as e:
306
- return f"Cohere ping failed: {e}"
307
-
308
-
309
- def handle(user_msg: str, files: list, yield_update) -> str:
310
- try:
311
- # Safety filter on incoming message
312
- safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
313
- if blocked_in:
314
- return refusal_reply(reason_in)
315
-
316
- # Optional PHI redaction for prompts sent to an external LLM
317
- redacted_in = safe_in
318
- if PHI_MODE and REDACT_BEFORE_LLM:
319
- redacted_in = redact_phi(safe_in)
320
-
321
- file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
322
-
323
- if file_paths:
324
- # CSV analysis path
325
- dataframes, schema_parts = [], []
326
- for i, p in enumerate(file_paths):
327
- if p.endswith(".csv"):
328
- try:
329
- df = pd.read_csv(p)
330
- except UnicodeDecodeError:
331
- df = pd.read_csv(p, encoding="latin1")
332
- dataframes.append(df)
333
- schema_parts.append(
334
- f"DataFrame `dfs[{i}]` (`{os.path.basename(p)}`):\n{df.head().to_markdown()}\n"
335
- )
336
-
337
- if not dataframes:
338
- return "Please upload at least one CSV file."
339
-
340
- schema_context = "\n".join(schema_parts)
341
-
342
- # If external PHI is not allowed, use redacted prompt; otherwise use original
343
- prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
344
-
345
- yield_update("""```
346
- 🧠 Generating aligned analysis script...
347
- ```""")
348
- analysis_script = _create_python_script(prompt_for_code, schema_context)
349
-
350
- yield_update("""```
351
- ⚙️ Executing script to extract raw data...
352
- ```""")
353
- execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
354
- output_buffer = io.StringIO()
355
-
356
- try:
357
- with redirect_stdout(output_buffer):
358
- exec(analysis_script, execution_namespace)
359
- raw_data_output = output_buffer.getvalue()
360
- except Exception as e:
361
- return (
362
- f"An error occurred executing the script: {e}\n\nGenerated Script:\n"
363
- f"```python\n{analysis_script}\n```"
364
- )
365
-
366
- # JSON Validation - creates hard boundary between calculation and communication
367
- yield_update("""```
368
- 🔍 Validating JSON output...
369
- ```""")
370
- try:
371
- validated_data = validate_json_output(raw_data_output)
372
- validated_json_str = format_validated_json_for_report(validated_data)
373
- safe_log("json_validation_passed", {"output_keys": list(validated_data.keys())})
374
- except JSONValidationError as e:
375
- safe_log("json_validation_failed", {"error": str(e)})
376
- return (
377
- f"**JSON Validation Failed**\n\n{e}\n\n"
378
- f"Generated Script:\n```python\n{analysis_script}\n```"
379
- )
380
-
381
- yield_update("""```
382
- ✍️ Synthesizing final comprehensive report...
383
- ```""")
384
- writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
385
- final_report = _generate_final_report(writer_input, validated_json_str)
386
- return _sanitize_text(final_report)
387
- else:
388
- # Pure chat path
389
- chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
390
- prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
391
- return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
392
-
393
- except Exception as e:
394
- tb = traceback.format_exc()
395
- safe_log("app_error", {"err": str(e)})
396
- return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
397
-
398
-
399
- PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
400
- TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
401
-
402
-
403
- # ---------------------- Sleek UI assets (CSS/JS only) ----------------------
404
-
405
- SLEEK_CSS = """
406
- /* Full-bleed, modern look */
407
- :root, body, #root, .gradio-container { height: 100%; }
408
- .gradio-container { padding: 0 !important; }
409
- .block { padding: 0 !important; }
410
-
411
- /* Header */
412
- .header {
413
- padding: 20px 28px;
414
- background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
415
- color: #fff;
416
- display: flex; align-items: center; justify-content: space-between;
417
- gap: 16px;
418
- }
419
- .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
420
- .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
421
-
422
- /* Main layout */
423
- .main {
424
- display: grid;
425
- grid-template-columns: 420px 1fr;
426
- gap: 16px;
427
- padding: 16px;
428
- height: calc(100vh - 72px);
429
- box-sizing: border-box;
430
- }
431
- .left, .right {
432
- background: #0b1020;
433
- color: #e9edf3;
434
- border-radius: 16px;
435
- border: 1px solid #1c2642;
436
- }
437
- .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
438
- .right { padding: 0; display: flex; flex-direction: column; }
439
-
440
- /* Panels */
441
- .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
442
- .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
443
-
444
- /* Sticky actions */
445
- .actions {
446
- display: flex; gap: 8px; align-items: center; justify-content: stretch;
447
- }
448
- .actions .gr-button { flex: 1; }
449
-
450
- /* Tabs full height */
451
- .right .tabs { height: 100%; display: flex; flex-direction: column; }
452
- .right .tabitem { flex: 1; display: flex; flex-direction: column; }
453
- #chatbot_container { flex: 1; }
454
- #chatbot_container .gr-chatbot { height: 100%; }
455
-
456
- /* Tiny separators */
457
- .hr { height: 1px; background: #16203b; margin: 10px 0; }
458
-
459
- /* Voice hint */
460
- .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
461
- """
462
-
463
- VOICE_STT_HTML = """
464
- <script>
465
- let __rs_rec = null;
466
- function rs_toggle_stt(elemId){
467
- const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
468
- if (!SpeechRecognition){
469
- alert("This browser does not support Speech Recognition. Try Chrome or Edge.");
470
- return;
471
- }
472
- if (__rs_rec){ __rs_rec.stop(); __rs_rec = null; return; }
473
- __rs_rec = new SpeechRecognition();
474
- __rs_rec.lang = "en-US";
475
- __rs_rec.interimResults = true;
476
- __rs_rec.continuous = true;
477
-
478
- const box = document.querySelector(`#${elemId} textarea`);
479
- if (!box){ alert("Prompt box not found."); return; }
480
- let base = box.value || "";
481
-
482
- __rs_rec.onresult = (ev) => {
483
- let t = "";
484
- for (let i = ev.resultIndex; i < ev.results.length; i++){
485
- t += ev.results[i].transcript;
486
- }
487
- box.value = (base + " " + t).trim();
488
- box.dispatchEvent(new Event("input", { bubbles: true }));
489
- };
490
- __rs_rec.onend = () => { __rs_rec = null; };
491
- __rs_rec.start();
492
- }
493
- </script>
494
- """
495
-
496
-
497
- # ---------------------- Sleek UI (with fixed State wiring) ----------------------
498
-
499
- with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
500
- # Persistent in-memory history component (fixes list/_id error)
501
- assessment_history = gr.State([])
502
-
503
- # Header
504
- with gr.Row(elem_classes=["header"]):
505
- gr.Markdown("<h1>Clarity Ops Augemented Decision Support</h1>")
506
- pill = "PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else \
507
- "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
508
- gr.Markdown(f"<span class='badge'>{pill}</span>")
509
-
510
- # Main layout
511
- with gr.Row(elem_classes=["main"]):
512
- # Left panel
513
- with gr.Column(elem_classes=["left"]):
514
- gr.Markdown("<div class='panel-title'>New Assessment</div>")
515
- gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
516
- files_input = gr.Files(
517
- label="Upload Data Files (.csv)",
518
- file_count="multiple",
519
- type="filepath",
520
- file_types=[".csv"],
521
- )
522
- prompt_input = gr.Textbox(
523
- label="Prompt",
524
- placeholder="Paste your scenario or question here...",
525
- lines=12,
526
- elem_id="prompt_box",
527
- autofocus=True,
528
- )
529
-
530
- with gr.Row(elem_classes=["actions"]):
531
- send_btn = gr.Button("▶️ Run Analysis", variant="primary")
532
- clear_btn = gr.Button("🧹 Clear")
533
- voice_btn = gr.Button("🎙️ Voice")
534
-
535
- gr.Markdown("<div class='voice-hint'>Click Voice to start/stop dictation into the prompt box.</div>")
536
- ping_btn = gr.Button("🔌 Ping Cohere")
537
- ping_out = gr.Markdown()
538
-
539
- gr.Markdown("<div class='hr'></div>")
540
- if PHI_MODE:
541
- gr.Markdown(
542
- "⚠️ **PHI Mode:** History persistence is disabled by default. Avoid unnecessary identifiers."
543
- )
544
-
545
- with gr.Accordion("Privacy & Terms", open=False):
546
- gr.Markdown(PRIVACY_POLICY_TEXT)
547
- gr.Markdown("<div class='hr'></div>")
548
- gr.Markdown(TERMS_OF_SERVICE_TEXT)
549
-
550
- # Right panel
551
- with gr.Column(elem_classes=["right"]):
552
- with gr.Tabs(elem_classes=["tabs"]):
553
- with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
554
- with gr.Column(elem_id="chatbot_container"):
555
- chat_history_output = gr.Chatbot(label="Analysis Output", type="messages", container=False, autoscroll=True)
556
- with gr.TabItem("Assessment History", id=1, elem_classes=["tabitem"]):
557
- gr.Markdown("### Review Past Assessments")
558
- history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
559
- history_display = gr.Markdown(label="Selected Assessment Details")
560
-
561
- # Inject voice-to-text helper
562
- gr.HTML(VOICE_STT_HTML)
563
-
564
- # --------- Event logic (unchanged analysis flow) ----------
565
-
566
- def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
567
- if not prompt:
568
- gr.Warning("Please enter a prompt.")
569
- yield chat_history_list, history_state_list, gr.update()
570
- return
571
-
572
- # Append user's message
573
- chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
574
-
575
- # Optional progress callback (not streaming in this UI)
576
- def dummy_update(message: str):
577
- pass
578
-
579
- # Thinking bubble
580
- thinking_message = _append_msg(
581
- chat_with_user_msg,
582
- "assistant",
583
- """```
584
- 🧠 Generating and executing analysis... Please wait.
585
- ```""",
586
- )
587
- yield thinking_message, history_state_list, gr.update()
588
-
589
- # Run analysis/chat
590
- ai_response_text = handle(prompt, files, dummy_update)
591
-
592
- # Append final assistant response
593
- final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
594
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
595
-
596
- # Capture filenames (if any)
597
- file_names: List[str] = []
598
- if files:
599
- file_names = [
600
- os.path.basename(f.name if hasattr(f, "name") else f) for f in files
601
- ]
602
-
603
- # Build history record
604
- new_entry = {
605
- "id": timestamp,
606
- "prompt": prompt,
607
- "files": file_names,
608
- "response": ai_response_text,
609
- "chat_history": final_chat,
610
- }
611
-
612
- # Respect PHI/history flags
613
- if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
614
- updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
615
- else:
616
- updated_history = history_state_list or []
617
-
618
- history_labels = [f"{item['id']} - {item['prompt'][:40]}..." for item in updated_history]
619
-
620
- yield final_chat, updated_history, gr.update(choices=history_labels)
621
-
622
- def view_history(selection: str, history_state_list: List[Dict[str, Any]]) -> str:
623
- if not selection or not history_state_list:
624
- return ""
625
- try:
626
- selected_id = selection.split(" - ", 1)[0]
627
- except Exception:
628
- selected_id = selection
629
-
630
- selected_assessment = next(
631
- (item for item in history_state_list if item.get("id") == selected_id), None
632
- )
633
- if not selected_assessment:
634
- return "Could not find the selected assessment."
635
-
636
- file_list = selected_assessment.get("files", [])
637
- file_list_md = "\n- ".join(file_list) if file_list else "*(no files uploaded)*"
638
-
639
- chat_entries = selected_assessment.get("chat_history", [])
640
- chat_md_lines = []
641
- for msg in chat_entries:
642
- role = msg.get("role", "").capitalize()
643
- content = msg.get("content", "")
644
- chat_md_lines.append(f"**{role}:** {content}")
645
- chat_md = "\n\n".join(chat_md_lines)
646
-
647
- return f"""### Assessment from: {selected_assessment['id']}
648
- **Files Used:**
649
- - {file_list_md}
650
- ---
651
- **Original Prompt:**
652
- > {selected_assessment['prompt']}
653
- ---
654
- **AI Generated Response:**
655
- {selected_assessment['response']}
656
- ---
657
- **Chat Transcript:**
658
- {chat_md}
659
- """
660
-
661
- # Wire events (using proper gr.State component for history)
662
- send_btn.click(
663
- run_analysis_wrapper,
664
- inputs=[prompt_input, files_input, chat_history_output, assessment_history],
665
- outputs=[chat_history_output, assessment_history, history_dropdown],
666
- )
667
- history_dropdown.change(
668
- view_history,
669
- inputs=[history_dropdown, assessment_history],
670
- outputs=[history_display],
671
- )
672
- clear_btn.click(
673
- lambda: (None, None, []),
674
- outputs=[prompt_input, files_input, chat_history_output],
675
- )
676
- ping_btn.click(ping_cohere, outputs=[ping_out])
677
- voice_btn.click(None, [], [], js="rs_toggle_stt('prompt_box')")
678
-
679
-
680
- if __name__ == "__main__":
681
- if not os.getenv("COHERE_API_KEY"):
682
- print("🔴 COHERE_API_KEY environment variable not set. Application may not function correctly.")
683
- demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", "7860")))from __future__ import annotations
684
-
685
- import io
686
- import json
687
- import os
688
- import traceback
689
- from contextlib import redirect_stdout
690
- from datetime import datetime
691
- from typing import Any, Dict, List
692
-
693
- import gradio as gr
694
- import pandas as pd
695
- import regex as re2
696
- import re
697
- from langchain_cohere import ChatCohere # noqa: F401
698
- from settings import (
699
- GENERAL_CONVERSATION_PROMPT,
700
- COHERE_MODEL_PRIMARY,
701
- COHERE_TIMEOUT_S, # noqa: F401
702
- USE_OPEN_FALLBACKS # noqa: F401
703
- )
704
- # Try to import optional HIPAA flags; fall back to safe defaults if not defined.
705
- try:
706
- from settings import PHI_MODE, PERSIST_HISTORY, HISTORY_TTL_DAYS, REDACT_BEFORE_LLM, ALLOW_EXTERNAL_PHI
707
- except Exception:
708
- PHI_MODE = False
709
- PERSIST_HISTORY = True
710
- HISTORY_TTL_DAYS = 365
711
- REDACT_BEFORE_LLM = False
712
- ALLOW_EXTERNAL_PHI = True
713
 
714
- from audit_log import log_event
715
- from privacy import safety_filter, refusal_reply
716
- from llm_router import cohere_chat, _co_client, cohere_embed
717
 
718
- # ---------------------- Helpers (analysis logic unchanged) ----------------------
719
  def load_markdown_text(filepath: str) -> str:
720
  try:
721
  with open(filepath, "r", encoding="utf-8") as f:
@@ -723,13 +43,14 @@ def load_markdown_text(filepath: str) -> str:
723
  except FileNotFoundError:
724
  return f"**Error:** Document `{os.path.basename(filepath)}` not found."
725
 
 
726
  def _sanitize_text(s: str) -> str:
727
  if not isinstance(s, str):
728
  return s
729
- # Remove control characters (except newline and tab)
730
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
731
 
732
- # Conservative PHI redaction patterns (only applied if PHI_MODE & REDACT_BEFORE_LLM are enabled)
 
733
  PHI_PATTERNS = [
734
  (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
735
  (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
@@ -740,6 +61,7 @@ PHI_PATTERNS = [
740
  (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
741
  ]
742
 
 
743
  def redact_phi(text: str) -> str:
744
  if not isinstance(text, str):
745
  return text
@@ -748,14 +70,13 @@ def redact_phi(text: str) -> str:
748
  t = pat.sub(repl, t)
749
  return t
750
 
 
751
  def safe_log(event_name: str, meta: dict | None = None):
752
- # Avoid logging raw PHI or payloads
753
  try:
754
  meta = (meta or {}).copy()
755
  meta.pop("raw", None)
756
  log_event(event_name, None, meta)
757
  except Exception:
758
- # Never raise from logging
759
  pass
760
 
761
 
@@ -769,35 +90,16 @@ class JSONValidationError(Exception):
769
  def validate_json_output(raw_output: str) -> Dict[str, Any]:
770
  """
771
  Validates and parses JSON output from the analysis script.
772
-
773
- This creates the "hard boundary" between calculation and communication
774
- as described in the ClarityOps architecture. The function:
775
- 1. Strips whitespace and handles empty output
776
- 2. Attempts to parse as JSON
777
- 3. Validates the structure is a dictionary (not array or primitive)
778
- 4. Checks for error indicators in the output
779
- 5. Returns validated Python dict for report generation
780
-
781
- Args:
782
- raw_output: Raw string captured from script stdout
783
-
784
- Returns:
785
- Validated dictionary containing analysis findings
786
-
787
- Raises:
788
- JSONValidationError: If output is empty, malformed, or contains errors
789
  """
790
- # Strip whitespace
791
  cleaned_output = raw_output.strip()
792
 
793
- # Check for empty output
794
  if not cleaned_output:
795
  raise JSONValidationError(
796
  "Analysis script produced no output. The script must print a JSON object to stdout."
797
  )
798
 
799
  # Handle multiple JSON objects (take the last complete one)
800
- # This handles cases where debug prints precede the final JSON
801
  json_candidates = []
802
  brace_count = 0
803
  current_start = None
@@ -813,69 +115,45 @@ def validate_json_output(raw_output: str) -> Dict[str, Any]:
813
  json_candidates.append(cleaned_output[current_start:i+1])
814
  current_start = None
815
 
816
- # If no valid JSON structure found, try parsing the whole output
817
  if not json_candidates:
818
  json_to_parse = cleaned_output
819
  else:
820
- # Use the last JSON object (most likely the final output)
821
  json_to_parse = json_candidates[-1]
822
 
823
- # Attempt JSON parsing
824
  try:
825
  parsed = json.loads(json_to_parse)
826
  except json.JSONDecodeError as e:
827
- # Provide helpful error message with context
828
  error_context = cleaned_output[:500] + ("..." if len(cleaned_output) > 500 else "")
829
  raise JSONValidationError(
830
  f"Analysis script produced invalid JSON. Parse error: {e.msg} at position {e.pos}.\n\n"
831
  f"Raw output (first 500 chars):\n```\n{error_context}\n```"
832
  )
833
 
834
- # Validate structure is a dictionary
835
  if not isinstance(parsed, dict):
836
  raise JSONValidationError(
837
  f"Analysis output must be a JSON object (dictionary), not {type(parsed).__name__}. "
838
  f"Ensure your script prints a dictionary with json.dumps()."
839
  )
840
 
841
- # Check for error indicators in the output
842
  if "error" in parsed:
843
  error_msg = parsed.get("error", "Unknown error")
844
- raise JSONValidationError(
845
- f"Analysis script reported an error: {error_msg}"
846
- )
847
 
848
- # Validate output is not empty dict
849
  if not parsed:
850
  raise JSONValidationError(
851
  "Analysis script produced an empty JSON object. "
852
  "Ensure your script populates the output dictionary with findings."
853
  )
854
 
855
- # Log successful validation (without sensitive data)
856
  safe_log("json_validation_success", {"keys": list(parsed.keys()), "key_count": len(parsed)})
857
-
858
  return parsed
859
 
860
 
861
  def format_validated_json_for_report(validated_data: Dict[str, Any]) -> str:
862
- """
863
- Formats validated JSON data for the report generator.
864
-
865
- Converts the validated Python dictionary back to a formatted JSON string
866
- for the LLM to interpret. This ensures consistent formatting and handles
867
- any edge cases in serialization.
868
-
869
- Args:
870
- validated_data: Validated dictionary from validate_json_output()
871
-
872
- Returns:
873
- Formatted JSON string ready for report generation
874
- """
875
  try:
876
  return json.dumps(validated_data, indent=2, default=str, ensure_ascii=False)
877
  except (TypeError, ValueError) as e:
878
- # Fallback to string representation if JSON serialization fails
879
  safe_log("json_format_warning", {"error": str(e)})
880
  return json.dumps({"raw_data": str(validated_data)}, indent=2)
881
 
@@ -983,12 +261,10 @@ def ping_cohere() -> str:
983
 
984
  def handle(user_msg: str, files: list, yield_update) -> str:
985
  try:
986
- # Safety filter on incoming message
987
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
988
  if blocked_in:
989
  return refusal_reply(reason_in)
990
 
991
- # Optional PHI redaction for prompts sent to an external LLM
992
  redacted_in = safe_in
993
  if PHI_MODE and REDACT_BEFORE_LLM:
994
  redacted_in = redact_phi(safe_in)
@@ -996,7 +272,6 @@ def handle(user_msg: str, files: list, yield_update) -> str:
996
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
997
 
998
  if file_paths:
999
- # CSV analysis path
1000
  dataframes, schema_parts = [], []
1001
  for i, p in enumerate(file_paths):
1002
  if p.endswith(".csv"):
@@ -1013,18 +288,12 @@ def handle(user_msg: str, files: list, yield_update) -> str:
1013
  return "Please upload at least one CSV file."
1014
 
1015
  schema_context = "\n".join(schema_parts)
1016
-
1017
- # If external PHI is not allowed, use redacted prompt; otherwise use original
1018
  prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
1019
 
1020
- yield_update("""```
1021
- 🧠 Generating aligned analysis script...
1022
- ```""")
1023
  analysis_script = _create_python_script(prompt_for_code, schema_context)
1024
 
1025
- yield_update("""```
1026
- ⚙️ Executing script to extract raw data...
1027
- ```""")
1028
  execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
1029
  output_buffer = io.StringIO()
1030
 
@@ -1039,9 +308,7 @@ def handle(user_msg: str, files: list, yield_update) -> str:
1039
  )
1040
 
1041
  # JSON Validation - creates hard boundary between calculation and communication
1042
- yield_update("""```
1043
- 🔍 Validating JSON output...
1044
- ```""")
1045
  try:
1046
  validated_data = validate_json_output(raw_data_output)
1047
  validated_json_str = format_validated_json_for_report(validated_data)
@@ -1053,20 +320,16 @@ def handle(user_msg: str, files: list, yield_update) -> str:
1053
  f"Generated Script:\n```python\n{analysis_script}\n```"
1054
  )
1055
 
1056
- yield_update("""```
1057
- ✍️ Synthesizing final comprehensive report...
1058
- ```""")
1059
  writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
1060
  final_report = _generate_final_report(writer_input, validated_json_str)
1061
  return _sanitize_text(final_report)
1062
  else:
1063
- # Pure chat path
1064
  chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
1065
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
1066
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
1067
 
1068
  except Exception as e:
1069
- tb = traceback.format_exc()
1070
  safe_log("app_error", {"err": str(e)})
1071
  return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
1072
 
@@ -1075,15 +338,13 @@ PRIVACY_POLICY_TEXT = load_markdown_text("privacy_policy.md")
1075
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
1076
 
1077
 
1078
- # ---------------------- Sleek UI assets (CSS/JS only) ----------------------
1079
 
1080
  SLEEK_CSS = """
1081
- /* Full-bleed, modern look */
1082
  :root, body, #root, .gradio-container { height: 100%; }
1083
  .gradio-container { padding: 0 !important; }
1084
  .block { padding: 0 !important; }
1085
 
1086
- /* Header */
1087
  .header {
1088
  padding: 20px 28px;
1089
  background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
@@ -1094,7 +355,6 @@ SLEEK_CSS = """
1094
  .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
1095
  .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
1096
 
1097
- /* Main layout */
1098
  .main {
1099
  display: grid;
1100
  grid-template-columns: 420px 1fr;
@@ -1112,26 +372,20 @@ SLEEK_CSS = """
1112
  .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
1113
  .right { padding: 0; display: flex; flex-direction: column; }
1114
 
1115
- /* Panels */
1116
  .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
1117
  .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
1118
 
1119
- /* Sticky actions */
1120
  .actions {
1121
  display: flex; gap: 8px; align-items: center; justify-content: stretch;
1122
  }
1123
  .actions .gr-button { flex: 1; }
1124
 
1125
- /* Tabs full height */
1126
  .right .tabs { height: 100%; display: flex; flex-direction: column; }
1127
  .right .tabitem { flex: 1; display: flex; flex-direction: column; }
1128
  #chatbot_container { flex: 1; }
1129
  #chatbot_container .gr-chatbot { height: 100%; }
1130
 
1131
- /* Tiny separators */
1132
  .hr { height: 1px; background: #16203b; margin: 10px 0; }
1133
-
1134
- /* Voice hint */
1135
  .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
1136
  """
1137
 
@@ -1169,22 +423,18 @@ function rs_toggle_stt(elemId){
1169
  """
1170
 
1171
 
1172
- # ---------------------- Sleek UI (with fixed State wiring) ----------------------
1173
 
1174
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
1175
- # Persistent in-memory history component (fixes list/_id error)
1176
  assessment_history = gr.State([])
1177
 
1178
- # Header
1179
  with gr.Row(elem_classes=["header"]):
1180
- gr.Markdown("<h1>Clarity Ops Augemented Decision Support</h1>")
1181
  pill = "PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else \
1182
  "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
1183
  gr.Markdown(f"<span class='badge'>{pill}</span>")
1184
 
1185
- # Main layout
1186
  with gr.Row(elem_classes=["main"]):
1187
- # Left panel
1188
  with gr.Column(elem_classes=["left"]):
1189
  gr.Markdown("<div class='panel-title'>New Assessment</div>")
1190
  gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
@@ -1222,7 +472,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
1222
  gr.Markdown("<div class='hr'></div>")
1223
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
1224
 
1225
- # Right panel
1226
  with gr.Column(elem_classes=["right"]):
1227
  with gr.Tabs(elem_classes=["tabs"]):
1228
  with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
@@ -1233,49 +482,37 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
1233
  history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
1234
  history_display = gr.Markdown(label="Selected Assessment Details")
1235
 
1236
- # Inject voice-to-text helper
1237
  gr.HTML(VOICE_STT_HTML)
1238
 
1239
- # --------- Event logic (unchanged analysis flow) ----------
1240
-
1241
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
1242
  if not prompt:
1243
  gr.Warning("Please enter a prompt.")
1244
  yield chat_history_list, history_state_list, gr.update()
1245
  return
1246
 
1247
- # Append user's message
1248
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
1249
 
1250
- # Optional progress callback (not streaming in this UI)
1251
  def dummy_update(message: str):
1252
  pass
1253
 
1254
- # Thinking bubble
1255
  thinking_message = _append_msg(
1256
  chat_with_user_msg,
1257
  "assistant",
1258
- """```
1259
- 🧠 Generating and executing analysis... Please wait.
1260
- ```""",
1261
  )
1262
  yield thinking_message, history_state_list, gr.update()
1263
 
1264
- # Run analysis/chat
1265
  ai_response_text = handle(prompt, files, dummy_update)
1266
 
1267
- # Append final assistant response
1268
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
1269
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1270
 
1271
- # Capture filenames (if any)
1272
  file_names: List[str] = []
1273
  if files:
1274
  file_names = [
1275
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
1276
  ]
1277
 
1278
- # Build history record
1279
  new_entry = {
1280
  "id": timestamp,
1281
  "prompt": prompt,
@@ -1284,7 +521,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
1284
  "chat_history": final_chat,
1285
  }
1286
 
1287
- # Respect PHI/history flags
1288
  if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
1289
  updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
1290
  else:
@@ -1333,7 +569,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
1333
  {chat_md}
1334
  """
1335
 
1336
- # Wire events (using proper gr.State component for history)
1337
  send_btn.click(
1338
  run_analysis_wrapper,
1339
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
  import io
 
33
  from privacy import safety_filter, refusal_reply
34
  from llm_router import cohere_chat, _co_client, cohere_embed
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # ---------------------- Helpers ----------------------
 
 
38
 
 
39
  def load_markdown_text(filepath: str) -> str:
40
  try:
41
  with open(filepath, "r", encoding="utf-8") as f:
 
43
  except FileNotFoundError:
44
  return f"**Error:** Document `{os.path.basename(filepath)}` not found."
45
 
46
+
47
  def _sanitize_text(s: str) -> str:
48
  if not isinstance(s, str):
49
  return s
 
50
  return re2.sub(r"[\p{C}--[\n\t]]+", "", s)
51
 
52
+
53
+ # Conservative PHI redaction patterns
54
  PHI_PATTERNS = [
55
  (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED_SSN]"),
56
  (re.compile(r"\b\d{9}\b"), "[REDACTED_MRN]"),
 
61
  (re.compile(r"\b\d{5}(-\d{4})?\b"), "[REDACTED_ZIP]"),
62
  ]
63
 
64
+
65
  def redact_phi(text: str) -> str:
66
  if not isinstance(text, str):
67
  return text
 
70
  t = pat.sub(repl, t)
71
  return t
72
 
73
+
74
  def safe_log(event_name: str, meta: dict | None = None):
 
75
  try:
76
  meta = (meta or {}).copy()
77
  meta.pop("raw", None)
78
  log_event(event_name, None, meta)
79
  except Exception:
 
80
  pass
81
 
82
 
 
90
  def validate_json_output(raw_output: str) -> Dict[str, Any]:
91
  """
92
  Validates and parses JSON output from the analysis script.
93
+ Creates the hard boundary between calculation and communication.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  """
 
95
  cleaned_output = raw_output.strip()
96
 
 
97
  if not cleaned_output:
98
  raise JSONValidationError(
99
  "Analysis script produced no output. The script must print a JSON object to stdout."
100
  )
101
 
102
  # Handle multiple JSON objects (take the last complete one)
 
103
  json_candidates = []
104
  brace_count = 0
105
  current_start = None
 
115
  json_candidates.append(cleaned_output[current_start:i+1])
116
  current_start = None
117
 
 
118
  if not json_candidates:
119
  json_to_parse = cleaned_output
120
  else:
 
121
  json_to_parse = json_candidates[-1]
122
 
 
123
  try:
124
  parsed = json.loads(json_to_parse)
125
  except json.JSONDecodeError as e:
 
126
  error_context = cleaned_output[:500] + ("..." if len(cleaned_output) > 500 else "")
127
  raise JSONValidationError(
128
  f"Analysis script produced invalid JSON. Parse error: {e.msg} at position {e.pos}.\n\n"
129
  f"Raw output (first 500 chars):\n```\n{error_context}\n```"
130
  )
131
 
 
132
  if not isinstance(parsed, dict):
133
  raise JSONValidationError(
134
  f"Analysis output must be a JSON object (dictionary), not {type(parsed).__name__}. "
135
  f"Ensure your script prints a dictionary with json.dumps()."
136
  )
137
 
 
138
  if "error" in parsed:
139
  error_msg = parsed.get("error", "Unknown error")
140
+ raise JSONValidationError(f"Analysis script reported an error: {error_msg}")
 
 
141
 
 
142
  if not parsed:
143
  raise JSONValidationError(
144
  "Analysis script produced an empty JSON object. "
145
  "Ensure your script populates the output dictionary with findings."
146
  )
147
 
 
148
  safe_log("json_validation_success", {"keys": list(parsed.keys()), "key_count": len(parsed)})
 
149
  return parsed
150
 
151
 
152
  def format_validated_json_for_report(validated_data: Dict[str, Any]) -> str:
153
+ """Formats validated JSON data for the report generator."""
 
 
 
 
 
 
 
 
 
 
 
 
154
  try:
155
  return json.dumps(validated_data, indent=2, default=str, ensure_ascii=False)
156
  except (TypeError, ValueError) as e:
 
157
  safe_log("json_format_warning", {"error": str(e)})
158
  return json.dumps({"raw_data": str(validated_data)}, indent=2)
159
 
 
261
 
262
  def handle(user_msg: str, files: list, yield_update) -> str:
263
  try:
 
264
  safe_in, blocked_in, reason_in = safety_filter(user_msg, mode="input")
265
  if blocked_in:
266
  return refusal_reply(reason_in)
267
 
 
268
  redacted_in = safe_in
269
  if PHI_MODE and REDACT_BEFORE_LLM:
270
  redacted_in = redact_phi(safe_in)
 
272
  file_paths: List[str] = [getattr(f, "name", None) or f for f in (files or [])]
273
 
274
  if file_paths:
 
275
  dataframes, schema_parts = [], []
276
  for i, p in enumerate(file_paths):
277
  if p.endswith(".csv"):
 
288
  return "Please upload at least one CSV file."
289
 
290
  schema_context = "\n".join(schema_parts)
 
 
291
  prompt_for_code = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
292
 
293
+ yield_update("```\n🧠 Generating aligned analysis script...\n```")
 
 
294
  analysis_script = _create_python_script(prompt_for_code, schema_context)
295
 
296
+ yield_update("```\n⚙️ Executing script to extract raw data...\n```")
 
 
297
  execution_namespace = {"dfs": dataframes, "pd": pd, "re": re, "json": json}
298
  output_buffer = io.StringIO()
299
 
 
308
  )
309
 
310
  # JSON Validation - creates hard boundary between calculation and communication
311
+ yield_update("```\n🔍 Validating JSON output...\n```")
 
 
312
  try:
313
  validated_data = validate_json_output(raw_data_output)
314
  validated_json_str = format_validated_json_for_report(validated_data)
 
320
  f"Generated Script:\n```python\n{analysis_script}\n```"
321
  )
322
 
323
+ yield_update("```\n✍️ Synthesizing final comprehensive report...\n```")
 
 
324
  writer_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
325
  final_report = _generate_final_report(writer_input, validated_json_str)
326
  return _sanitize_text(final_report)
327
  else:
 
328
  chat_input = redacted_in if (PHI_MODE and not ALLOW_EXTERNAL_PHI) else safe_in
329
  prompt = f"{GENERAL_CONVERSATION_PROMPT}\n\nUser: {chat_input}\nAssistant:"
330
  return _sanitize_text(cohere_chat(prompt) or "How can I help further?")
331
 
332
  except Exception as e:
 
333
  safe_log("app_error", {"err": str(e)})
334
  return "A critical error occurred. Please contact your administrator." if PHI_MODE else f"A critical error occurred: {e}"
335
 
 
338
  TERMS_OF_SERVICE_TEXT = load_markdown_text("terms_of_service.md")
339
 
340
 
341
+ # ---------------------- UI Assets ----------------------
342
 
343
  SLEEK_CSS = """
 
344
  :root, body, #root, .gradio-container { height: 100%; }
345
  .gradio-container { padding: 0 !important; }
346
  .block { padding: 0 !important; }
347
 
 
348
  .header {
349
  padding: 20px 28px;
350
  background: linear-gradient(135deg, #0e1726, #1d2a44 60%, #243a5e);
 
355
  .header h1 { margin: 0; font-size: 22px; letter-spacing: 0.3px; font-weight: 600; }
356
  .header .badge { font-size: 12px; opacity: 0.9; background:#ffffff22; padding:6px 10px; border-radius: 999px; }
357
 
 
358
  .main {
359
  display: grid;
360
  grid-template-columns: 420px 1fr;
 
372
  .left { padding: 16px; display: flex; flex-direction: column; gap: 12px; }
373
  .right { padding: 0; display: flex; flex-direction: column; }
374
 
 
375
  .panel-title { font-size: 14px; font-weight: 600; color: #aeb8cc; margin-bottom: 6px; }
376
  .helper { font-size: 12px; color: #97a3bb; margin-bottom: 8px; }
377
 
 
378
  .actions {
379
  display: flex; gap: 8px; align-items: center; justify-content: stretch;
380
  }
381
  .actions .gr-button { flex: 1; }
382
 
 
383
  .right .tabs { height: 100%; display: flex; flex-direction: column; }
384
  .right .tabitem { flex: 1; display: flex; flex-direction: column; }
385
  #chatbot_container { flex: 1; }
386
  #chatbot_container .gr-chatbot { height: 100%; }
387
 
 
388
  .hr { height: 1px; background: #16203b; margin: 10px 0; }
 
 
389
  .voice-hint { font-size: 12px; color:#9fb0cc; margin-top: 4px; }
390
  """
391
 
 
423
  """
424
 
425
 
426
+ # ---------------------- Gradio UI ----------------------
427
 
428
  with gr.Blocks(theme=gr.themes.Soft(), css=SLEEK_CSS, fill_width=True) as demo:
 
429
  assessment_history = gr.State([])
430
 
 
431
  with gr.Row(elem_classes=["header"]):
432
+ gr.Markdown("<h1>Clarity Ops Augmented Decision Support</h1>")
433
  pill = "PHI Mode ON · history off" if (PHI_MODE and not PERSIST_HISTORY) else \
434
  "PHI Mode ON" if PHI_MODE else "PHI Mode OFF"
435
  gr.Markdown(f"<span class='badge'>{pill}</span>")
436
 
 
437
  with gr.Row(elem_classes=["main"]):
 
438
  with gr.Column(elem_classes=["left"]):
439
  gr.Markdown("<div class='panel-title'>New Assessment</div>")
440
  gr.Markdown("<div class='helper'>Upload CSVs for analysis, or enter a prompt. Voice works in modern browsers.</div>")
 
472
  gr.Markdown("<div class='hr'></div>")
473
  gr.Markdown(TERMS_OF_SERVICE_TEXT)
474
 
 
475
  with gr.Column(elem_classes=["right"]):
476
  with gr.Tabs(elem_classes=["tabs"]):
477
  with gr.TabItem("Current Assessment", id=0, elem_classes=["tabitem"]):
 
482
  history_dropdown = gr.Dropdown(label="Select an assessment to review", choices=[])
483
  history_display = gr.Markdown(label="Selected Assessment Details")
484
 
 
485
  gr.HTML(VOICE_STT_HTML)
486
 
 
 
487
  def run_analysis_wrapper(prompt, files, chat_history_list, history_state_list):
488
  if not prompt:
489
  gr.Warning("Please enter a prompt.")
490
  yield chat_history_list, history_state_list, gr.update()
491
  return
492
 
 
493
  chat_with_user_msg = _append_msg(chat_history_list, "user", prompt)
494
 
 
495
  def dummy_update(message: str):
496
  pass
497
 
 
498
  thinking_message = _append_msg(
499
  chat_with_user_msg,
500
  "assistant",
501
+ "```\n🧠 Generating and executing analysis... Please wait.\n```",
 
 
502
  )
503
  yield thinking_message, history_state_list, gr.update()
504
 
 
505
  ai_response_text = handle(prompt, files, dummy_update)
506
 
 
507
  final_chat = _append_msg(chat_with_user_msg, "assistant", ai_response_text)
508
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
509
 
 
510
  file_names: List[str] = []
511
  if files:
512
  file_names = [
513
  os.path.basename(f.name if hasattr(f, "name") else f) for f in files
514
  ]
515
 
 
516
  new_entry = {
517
  "id": timestamp,
518
  "prompt": prompt,
 
521
  "chat_history": final_chat,
522
  }
523
 
 
524
  if PERSIST_HISTORY and (not PHI_MODE or (PHI_MODE and HISTORY_TTL_DAYS > 0)):
525
  updated_history: List[Dict[str, Any]] = (history_state_list or []) + [new_entry]
526
  else:
 
569
  {chat_md}
570
  """
571
 
 
572
  send_btn.click(
573
  run_analysis_wrapper,
574
  inputs=[prompt_input, files_input, chat_history_output, assessment_history],