hchevva commited on
Commit
f94df25
·
verified ·
1 Parent(s): 40a8012

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -14
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import re
3
  import json
4
  import tempfile
 
5
  from pathlib import Path
6
  from typing import Dict, List, Tuple, Any, Optional
7
 
@@ -1392,6 +1393,26 @@ def _load_extraction_payload(file_obj: Any) -> Tuple[Any, List[Dict[str, Any]],
1392
  raise ValueError("Unsupported extraction_details.json format.")
1393
 
1394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1395
  def export_blank_cancer_risk_template():
1396
  tmpdir = Path(tempfile.mkdtemp(prefix="tox_template_"))
1397
  path = tmpdir / "cancer_risk_input_template.csv"
@@ -1442,9 +1463,9 @@ def export_prefilled_cancer_risk_template(records: List[Dict[str, Any]]):
1442
 
1443
  def run_regulatory_gap_assessment(extraction_json_file, framework: str, override_notes: str):
1444
  if extraction_json_file is None:
1445
- return pd.DataFrame(), "Upload extraction_details.json first.", None, None, "No input file."
1446
  try:
1447
- payload, _, _ = _load_extraction_payload(extraction_json_file)
1448
  matrix_df, report, report_md = map_extraction_to_framework(
1449
  extraction_payload=payload,
1450
  framework=framework,
@@ -1452,29 +1473,42 @@ def run_regulatory_gap_assessment(extraction_json_file, framework: str, override
1452
  override_notes=override_notes or "",
1453
  )
1454
  except Exception as e:
1455
- return pd.DataFrame(), f"(assessment unavailable: {e})", None, None, str(e)
1456
 
1457
  run_dir = make_run_dir(base_dir="runs")
1458
  matrix_path = write_dataframe_csv(run_dir / "regulatory_gap_matrix.csv", matrix_df)
1459
  report_path = write_json(run_dir / "regulatory_gap_report.json", report)
1460
  write_markdown(run_dir / "regulatory_gap_report.md", report_md)
1461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1462
  md = "### Regulatory Gap Summary\n" + report_md
1463
  status = f"✅ Gap assessment complete. Covered={report.get('summary', {}).get('covered', 0)} | Missing={report.get('summary', {}).get('missing', 0)}"
1464
- return matrix_df, md, str(matrix_path), str(report_path), status
1465
 
1466
 
1467
- def run_cancer_risk_batch_ui(input_csv_file):
1468
  if input_csv_file is None:
1469
- return pd.DataFrame(), None, None, None, "Upload a populated cancer risk input CSV."
1470
  try:
1471
  df = pd.read_csv(input_csv_file.name)
1472
  except Exception as e:
1473
- return pd.DataFrame(), None, None, None, f"Could not read CSV: {e}"
1474
 
1475
  missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
1476
  if missing:
1477
- return pd.DataFrame(), None, None, None, f"Missing required columns: {missing}"
1478
 
1479
  run_dir = make_run_dir(base_dir="runs")
1480
  rows = df.fillna("").to_dict("records")
@@ -1482,9 +1516,9 @@ def run_cancer_risk_batch_ui(input_csv_file):
1482
  try:
1483
  result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
1484
  except MCPClientError as e:
1485
- return pd.DataFrame(), None, None, None, f"MCP server unavailable: {e}"
1486
  except Exception as e:
1487
- return pd.DataFrame(), None, None, None, f"Calculation failed: {e}"
1488
 
1489
  result_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
1490
  out_df = pd.DataFrame(result_rows)
@@ -1494,13 +1528,35 @@ def run_cancer_risk_batch_ui(input_csv_file):
1494
  artifacts = result.get("artifacts", {}) if isinstance(result, dict) else {}
1495
  log_path = artifacts.get("log_jsonl", str(run_dir / "cancer_risk_log.jsonl"))
1496
  report_path = artifacts.get("report_md", str(run_dir / "cancer_risk_report.md"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1497
 
1498
  summ = result.get("summary", {})
1499
  status = (
1500
  f"✅ Batch complete. total={summ.get('total_rows', 0)} "
1501
  f"ok={summ.get('ok_rows', 0)} error={summ.get('error_rows', 0)}"
1502
  )
1503
- return out_df, str(result_csv_path), str(log_path), str(report_path), status
1504
 
1505
 
1506
  # =============================
@@ -1869,11 +1925,12 @@ with gr.Blocks(title="Toxicology PDF → Grounded Extractor", css=APP_CSS) as de
1869
  reg_matrix_df = gr.Dataframe(label="Clause-level gap matrix", interactive=False, wrap=True)
1870
  reg_matrix_file = gr.File(label="Download: regulatory_gap_matrix.csv")
1871
  reg_report_file = gr.File(label="Download: regulatory_gap_report.json")
 
1872
 
1873
  reg_run_btn.click(
1874
  fn=run_regulatory_gap_assessment,
1875
  inputs=[reg_extraction_json, reg_framework, reg_override_notes],
1876
- outputs=[reg_matrix_df, reg_summary_md, reg_matrix_file, reg_report_file, reg_status]
1877
  )
1878
 
1879
  with gr.Tab("Cancer Risk Calculator"):
@@ -1889,17 +1946,19 @@ with gr.Blocks(title="Toxicology PDF → Grounded Extractor", css=APP_CSS) as de
1889
  template_btn.click(fn=export_blank_cancer_risk_template, inputs=None, outputs=[template_file, template_status])
1890
 
1891
  risk_input_csv = gr.File(label="Upload populated cancer risk input CSV", file_types=[".csv"], file_count="single")
 
1892
  risk_run_btn = gr.Button("Run Cancer Risk Batch", variant="primary")
1893
  risk_status = gr.Textbox(label="Status", interactive=False)
1894
  risk_results_df = gr.Dataframe(label="Cancer risk results", interactive=False, wrap=True)
1895
  risk_results_csv = gr.File(label="Download: cancer_risk_results.csv")
1896
  risk_log_file = gr.File(label="Download: cancer_risk_log.jsonl")
1897
  risk_report_file = gr.File(label="Download: cancer_risk_report.md")
 
1898
 
1899
  risk_run_btn.click(
1900
  fn=run_cancer_risk_batch_ui,
1901
- inputs=[risk_input_csv],
1902
- outputs=[risk_results_df, risk_results_csv, risk_log_file, risk_report_file, risk_status]
1903
  )
1904
 
1905
  if __name__ == "__main__":
 
2
  import re
3
  import json
4
  import tempfile
5
+ import datetime
6
  from pathlib import Path
7
  from typing import Dict, List, Tuple, Any, Optional
8
 
 
1393
  raise ValueError("Unsupported extraction_details.json format.")
1394
 
1395
 
1396
+ def _build_payload_with_extensions(payload: Any, papers: List[Dict[str, Any]], ext: Dict[str, Any]) -> Dict[str, Any]:
1397
+ base: Dict[str, Any] = {}
1398
+ if isinstance(payload, dict):
1399
+ base = dict(payload)
1400
+ base["papers"] = papers
1401
+ base["toxra_extensions"] = ext
1402
+ return base
1403
+
1404
+
1405
+ def _safe_ext_dict(ext: Dict[str, Any]) -> Dict[str, Any]:
1406
+ out = dict(ext or {})
1407
+ if not isinstance(out.get("nlp_diagnostics", []), list):
1408
+ out["nlp_diagnostics"] = []
1409
+ if not isinstance(out.get("regulatory_gap_assessment", {}), dict):
1410
+ out["regulatory_gap_assessment"] = {}
1411
+ if not isinstance(out.get("risk_calculation_refs", []), list):
1412
+ out["risk_calculation_refs"] = []
1413
+ return out
1414
+
1415
+
1416
  def export_blank_cancer_risk_template():
1417
  tmpdir = Path(tempfile.mkdtemp(prefix="tox_template_"))
1418
  path = tmpdir / "cancer_risk_input_template.csv"
 
1463
 
1464
  def run_regulatory_gap_assessment(extraction_json_file, framework: str, override_notes: str):
1465
  if extraction_json_file is None:
1466
+ return pd.DataFrame(), "Upload extraction_details.json first.", None, None, None, "No input file."
1467
  try:
1468
+ payload, papers, existing_ext = _load_extraction_payload(extraction_json_file)
1469
  matrix_df, report, report_md = map_extraction_to_framework(
1470
  extraction_payload=payload,
1471
  framework=framework,
 
1473
  override_notes=override_notes or "",
1474
  )
1475
  except Exception as e:
1476
+ return pd.DataFrame(), f"(assessment unavailable: {e})", None, None, None, str(e)
1477
 
1478
  run_dir = make_run_dir(base_dir="runs")
1479
  matrix_path = write_dataframe_csv(run_dir / "regulatory_gap_matrix.csv", matrix_df)
1480
  report_path = write_json(run_dir / "regulatory_gap_report.json", report)
1481
  write_markdown(run_dir / "regulatory_gap_report.md", report_md)
1482
 
1483
+ ext = _safe_ext_dict(existing_ext)
1484
+ reg_ext = dict(ext.get("regulatory_gap_assessment", {}) or {})
1485
+ reg_ext[framework] = {
1486
+ "timestamp_utc": datetime.datetime.utcnow().isoformat() + "Z",
1487
+ "summary": report.get("summary", {}),
1488
+ "run_dir": str(run_dir),
1489
+ "matrix_csv": str(matrix_path),
1490
+ "report_json": str(report_path),
1491
+ }
1492
+ ext["regulatory_gap_assessment"] = reg_ext
1493
+ updated_payload = _build_payload_with_extensions(payload, papers, ext)
1494
+ updated_extraction_path = write_json(run_dir / "extraction_details_with_extensions.json", updated_payload)
1495
+
1496
  md = "### Regulatory Gap Summary\n" + report_md
1497
  status = f"✅ Gap assessment complete. Covered={report.get('summary', {}).get('covered', 0)} | Missing={report.get('summary', {}).get('missing', 0)}"
1498
+ return matrix_df, md, str(matrix_path), str(report_path), str(updated_extraction_path), status
1499
 
1500
 
1501
+ def run_cancer_risk_batch_ui(input_csv_file, extraction_json_file):
1502
  if input_csv_file is None:
1503
+ return pd.DataFrame(), None, None, None, None, "Upload a populated cancer risk input CSV."
1504
  try:
1505
  df = pd.read_csv(input_csv_file.name)
1506
  except Exception as e:
1507
+ return pd.DataFrame(), None, None, None, None, f"Could not read CSV: {e}"
1508
 
1509
  missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
1510
  if missing:
1511
+ return pd.DataFrame(), None, None, None, None, f"Missing required columns: {missing}"
1512
 
1513
  run_dir = make_run_dir(base_dir="runs")
1514
  rows = df.fillna("").to_dict("records")
 
1516
  try:
1517
  result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
1518
  except MCPClientError as e:
1519
+ return pd.DataFrame(), None, None, None, None, f"MCP server unavailable: {e}"
1520
  except Exception as e:
1521
+ return pd.DataFrame(), None, None, None, None, f"Calculation failed: {e}"
1522
 
1523
  result_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
1524
  out_df = pd.DataFrame(result_rows)
 
1528
  artifacts = result.get("artifacts", {}) if isinstance(result, dict) else {}
1529
  log_path = artifacts.get("log_jsonl", str(run_dir / "cancer_risk_log.jsonl"))
1530
  report_path = artifacts.get("report_md", str(run_dir / "cancer_risk_report.md"))
1531
+ updated_extraction_file = None
1532
+
1533
+ if extraction_json_file is not None:
1534
+ try:
1535
+ payload, papers, existing_ext = _load_extraction_payload(extraction_json_file)
1536
+ ext = _safe_ext_dict(existing_ext)
1537
+ refs = list(ext.get("risk_calculation_refs", []) or [])
1538
+ refs.append(
1539
+ {
1540
+ "timestamp_utc": datetime.datetime.utcnow().isoformat() + "Z",
1541
+ "run_dir": str(run_dir),
1542
+ "results_csv": str(result_csv_path),
1543
+ "log_jsonl": str(log_path),
1544
+ "report_md": str(report_path),
1545
+ "summary": result.get("summary", {}),
1546
+ }
1547
+ )
1548
+ ext["risk_calculation_refs"] = refs
1549
+ updated_payload = _build_payload_with_extensions(payload, papers, ext)
1550
+ updated_extraction_file = str(write_json(run_dir / "extraction_details_with_extensions.json", updated_payload))
1551
+ except Exception:
1552
+ updated_extraction_file = None
1553
 
1554
  summ = result.get("summary", {})
1555
  status = (
1556
  f"✅ Batch complete. total={summ.get('total_rows', 0)} "
1557
  f"ok={summ.get('ok_rows', 0)} error={summ.get('error_rows', 0)}"
1558
  )
1559
+ return out_df, str(result_csv_path), str(log_path), str(report_path), updated_extraction_file, status
1560
 
1561
 
1562
  # =============================
 
1925
  reg_matrix_df = gr.Dataframe(label="Clause-level gap matrix", interactive=False, wrap=True)
1926
  reg_matrix_file = gr.File(label="Download: regulatory_gap_matrix.csv")
1927
  reg_report_file = gr.File(label="Download: regulatory_gap_report.json")
1928
+ reg_updated_extraction_file = gr.File(label="Download: extraction_details_with_extensions.json")
1929
 
1930
  reg_run_btn.click(
1931
  fn=run_regulatory_gap_assessment,
1932
  inputs=[reg_extraction_json, reg_framework, reg_override_notes],
1933
+ outputs=[reg_matrix_df, reg_summary_md, reg_matrix_file, reg_report_file, reg_updated_extraction_file, reg_status]
1934
  )
1935
 
1936
  with gr.Tab("Cancer Risk Calculator"):
 
1946
  template_btn.click(fn=export_blank_cancer_risk_template, inputs=None, outputs=[template_file, template_status])
1947
 
1948
  risk_input_csv = gr.File(label="Upload populated cancer risk input CSV", file_types=[".csv"], file_count="single")
1949
+ risk_extraction_json = gr.File(label="Optional: Upload extraction_details.json to append risk refs", file_types=[".json"], file_count="single")
1950
  risk_run_btn = gr.Button("Run Cancer Risk Batch", variant="primary")
1951
  risk_status = gr.Textbox(label="Status", interactive=False)
1952
  risk_results_df = gr.Dataframe(label="Cancer risk results", interactive=False, wrap=True)
1953
  risk_results_csv = gr.File(label="Download: cancer_risk_results.csv")
1954
  risk_log_file = gr.File(label="Download: cancer_risk_log.jsonl")
1955
  risk_report_file = gr.File(label="Download: cancer_risk_report.md")
1956
+ risk_updated_extraction_file = gr.File(label="Download: extraction_details_with_extensions.json")
1957
 
1958
  risk_run_btn.click(
1959
  fn=run_cancer_risk_batch_ui,
1960
+ inputs=[risk_input_csv, risk_extraction_json],
1961
+ outputs=[risk_results_df, risk_results_csv, risk_log_file, risk_report_file, risk_updated_extraction_file, risk_status]
1962
  )
1963
 
1964
  if __name__ == "__main__":