Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
import tempfile
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
from typing import Dict, List, Tuple, Any, Optional
|
| 7 |
|
|
@@ -1392,6 +1393,26 @@ def _load_extraction_payload(file_obj: Any) -> Tuple[Any, List[Dict[str, Any]],
|
|
| 1392 |
raise ValueError("Unsupported extraction_details.json format.")
|
| 1393 |
|
| 1394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1395 |
def export_blank_cancer_risk_template():
|
| 1396 |
tmpdir = Path(tempfile.mkdtemp(prefix="tox_template_"))
|
| 1397 |
path = tmpdir / "cancer_risk_input_template.csv"
|
|
@@ -1442,9 +1463,9 @@ def export_prefilled_cancer_risk_template(records: List[Dict[str, Any]]):
|
|
| 1442 |
|
| 1443 |
def run_regulatory_gap_assessment(extraction_json_file, framework: str, override_notes: str):
|
| 1444 |
if extraction_json_file is None:
|
| 1445 |
-
return pd.DataFrame(), "Upload extraction_details.json first.", None, None, "No input file."
|
| 1446 |
try:
|
| 1447 |
-
payload,
|
| 1448 |
matrix_df, report, report_md = map_extraction_to_framework(
|
| 1449 |
extraction_payload=payload,
|
| 1450 |
framework=framework,
|
|
@@ -1452,29 +1473,42 @@ def run_regulatory_gap_assessment(extraction_json_file, framework: str, override
|
|
| 1452 |
override_notes=override_notes or "",
|
| 1453 |
)
|
| 1454 |
except Exception as e:
|
| 1455 |
-
return pd.DataFrame(), f"(assessment unavailable: {e})", None, None, str(e)
|
| 1456 |
|
| 1457 |
run_dir = make_run_dir(base_dir="runs")
|
| 1458 |
matrix_path = write_dataframe_csv(run_dir / "regulatory_gap_matrix.csv", matrix_df)
|
| 1459 |
report_path = write_json(run_dir / "regulatory_gap_report.json", report)
|
| 1460 |
write_markdown(run_dir / "regulatory_gap_report.md", report_md)
|
| 1461 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1462 |
md = "### Regulatory Gap Summary\n" + report_md
|
| 1463 |
status = f"✅ Gap assessment complete. Covered={report.get('summary', {}).get('covered', 0)} | Missing={report.get('summary', {}).get('missing', 0)}"
|
| 1464 |
-
return matrix_df, md, str(matrix_path), str(report_path), status
|
| 1465 |
|
| 1466 |
|
| 1467 |
-
def run_cancer_risk_batch_ui(input_csv_file):
|
| 1468 |
if input_csv_file is None:
|
| 1469 |
-
return pd.DataFrame(), None, None, None, "Upload a populated cancer risk input CSV."
|
| 1470 |
try:
|
| 1471 |
df = pd.read_csv(input_csv_file.name)
|
| 1472 |
except Exception as e:
|
| 1473 |
-
return pd.DataFrame(), None, None, None, f"Could not read CSV: {e}"
|
| 1474 |
|
| 1475 |
missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
|
| 1476 |
if missing:
|
| 1477 |
-
return pd.DataFrame(), None, None, None, f"Missing required columns: {missing}"
|
| 1478 |
|
| 1479 |
run_dir = make_run_dir(base_dir="runs")
|
| 1480 |
rows = df.fillna("").to_dict("records")
|
|
@@ -1482,9 +1516,9 @@ def run_cancer_risk_batch_ui(input_csv_file):
|
|
| 1482 |
try:
|
| 1483 |
result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
|
| 1484 |
except MCPClientError as e:
|
| 1485 |
-
return pd.DataFrame(), None, None, None, f"MCP server unavailable: {e}"
|
| 1486 |
except Exception as e:
|
| 1487 |
-
return pd.DataFrame(), None, None, None, f"Calculation failed: {e}"
|
| 1488 |
|
| 1489 |
result_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
|
| 1490 |
out_df = pd.DataFrame(result_rows)
|
|
@@ -1494,13 +1528,35 @@ def run_cancer_risk_batch_ui(input_csv_file):
|
|
| 1494 |
artifacts = result.get("artifacts", {}) if isinstance(result, dict) else {}
|
| 1495 |
log_path = artifacts.get("log_jsonl", str(run_dir / "cancer_risk_log.jsonl"))
|
| 1496 |
report_path = artifacts.get("report_md", str(run_dir / "cancer_risk_report.md"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1497 |
|
| 1498 |
summ = result.get("summary", {})
|
| 1499 |
status = (
|
| 1500 |
f"✅ Batch complete. total={summ.get('total_rows', 0)} "
|
| 1501 |
f"ok={summ.get('ok_rows', 0)} error={summ.get('error_rows', 0)}"
|
| 1502 |
)
|
| 1503 |
-
return out_df, str(result_csv_path), str(log_path), str(report_path), status
|
| 1504 |
|
| 1505 |
|
| 1506 |
# =============================
|
|
@@ -1869,11 +1925,12 @@ with gr.Blocks(title="Toxicology PDF → Grounded Extractor", css=APP_CSS) as de
|
|
| 1869 |
reg_matrix_df = gr.Dataframe(label="Clause-level gap matrix", interactive=False, wrap=True)
|
| 1870 |
reg_matrix_file = gr.File(label="Download: regulatory_gap_matrix.csv")
|
| 1871 |
reg_report_file = gr.File(label="Download: regulatory_gap_report.json")
|
|
|
|
| 1872 |
|
| 1873 |
reg_run_btn.click(
|
| 1874 |
fn=run_regulatory_gap_assessment,
|
| 1875 |
inputs=[reg_extraction_json, reg_framework, reg_override_notes],
|
| 1876 |
-
outputs=[reg_matrix_df, reg_summary_md, reg_matrix_file, reg_report_file, reg_status]
|
| 1877 |
)
|
| 1878 |
|
| 1879 |
with gr.Tab("Cancer Risk Calculator"):
|
|
@@ -1889,17 +1946,19 @@ with gr.Blocks(title="Toxicology PDF → Grounded Extractor", css=APP_CSS) as de
|
|
| 1889 |
template_btn.click(fn=export_blank_cancer_risk_template, inputs=None, outputs=[template_file, template_status])
|
| 1890 |
|
| 1891 |
risk_input_csv = gr.File(label="Upload populated cancer risk input CSV", file_types=[".csv"], file_count="single")
|
|
|
|
| 1892 |
risk_run_btn = gr.Button("Run Cancer Risk Batch", variant="primary")
|
| 1893 |
risk_status = gr.Textbox(label="Status", interactive=False)
|
| 1894 |
risk_results_df = gr.Dataframe(label="Cancer risk results", interactive=False, wrap=True)
|
| 1895 |
risk_results_csv = gr.File(label="Download: cancer_risk_results.csv")
|
| 1896 |
risk_log_file = gr.File(label="Download: cancer_risk_log.jsonl")
|
| 1897 |
risk_report_file = gr.File(label="Download: cancer_risk_report.md")
|
|
|
|
| 1898 |
|
| 1899 |
risk_run_btn.click(
|
| 1900 |
fn=run_cancer_risk_batch_ui,
|
| 1901 |
-
inputs=[risk_input_csv],
|
| 1902 |
-
outputs=[risk_results_df, risk_results_csv, risk_log_file, risk_report_file, risk_status]
|
| 1903 |
)
|
| 1904 |
|
| 1905 |
if __name__ == "__main__":
|
|
|
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
import tempfile
|
| 5 |
+
import datetime
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Dict, List, Tuple, Any, Optional
|
| 8 |
|
|
|
|
| 1393 |
raise ValueError("Unsupported extraction_details.json format.")
|
| 1394 |
|
| 1395 |
|
| 1396 |
+
def _build_payload_with_extensions(payload: Any, papers: List[Dict[str, Any]], ext: Dict[str, Any]) -> Dict[str, Any]:
|
| 1397 |
+
base: Dict[str, Any] = {}
|
| 1398 |
+
if isinstance(payload, dict):
|
| 1399 |
+
base = dict(payload)
|
| 1400 |
+
base["papers"] = papers
|
| 1401 |
+
base["toxra_extensions"] = ext
|
| 1402 |
+
return base
|
| 1403 |
+
|
| 1404 |
+
|
| 1405 |
+
def _safe_ext_dict(ext: Dict[str, Any]) -> Dict[str, Any]:
|
| 1406 |
+
out = dict(ext or {})
|
| 1407 |
+
if not isinstance(out.get("nlp_diagnostics", []), list):
|
| 1408 |
+
out["nlp_diagnostics"] = []
|
| 1409 |
+
if not isinstance(out.get("regulatory_gap_assessment", {}), dict):
|
| 1410 |
+
out["regulatory_gap_assessment"] = {}
|
| 1411 |
+
if not isinstance(out.get("risk_calculation_refs", []), list):
|
| 1412 |
+
out["risk_calculation_refs"] = []
|
| 1413 |
+
return out
|
| 1414 |
+
|
| 1415 |
+
|
| 1416 |
def export_blank_cancer_risk_template():
|
| 1417 |
tmpdir = Path(tempfile.mkdtemp(prefix="tox_template_"))
|
| 1418 |
path = tmpdir / "cancer_risk_input_template.csv"
|
|
|
|
| 1463 |
|
| 1464 |
def run_regulatory_gap_assessment(extraction_json_file, framework: str, override_notes: str):
|
| 1465 |
if extraction_json_file is None:
|
| 1466 |
+
return pd.DataFrame(), "Upload extraction_details.json first.", None, None, None, "No input file."
|
| 1467 |
try:
|
| 1468 |
+
payload, papers, existing_ext = _load_extraction_payload(extraction_json_file)
|
| 1469 |
matrix_df, report, report_md = map_extraction_to_framework(
|
| 1470 |
extraction_payload=payload,
|
| 1471 |
framework=framework,
|
|
|
|
| 1473 |
override_notes=override_notes or "",
|
| 1474 |
)
|
| 1475 |
except Exception as e:
|
| 1476 |
+
return pd.DataFrame(), f"(assessment unavailable: {e})", None, None, None, str(e)
|
| 1477 |
|
| 1478 |
run_dir = make_run_dir(base_dir="runs")
|
| 1479 |
matrix_path = write_dataframe_csv(run_dir / "regulatory_gap_matrix.csv", matrix_df)
|
| 1480 |
report_path = write_json(run_dir / "regulatory_gap_report.json", report)
|
| 1481 |
write_markdown(run_dir / "regulatory_gap_report.md", report_md)
|
| 1482 |
|
| 1483 |
+
ext = _safe_ext_dict(existing_ext)
|
| 1484 |
+
reg_ext = dict(ext.get("regulatory_gap_assessment", {}) or {})
|
| 1485 |
+
reg_ext[framework] = {
|
| 1486 |
+
"timestamp_utc": datetime.datetime.utcnow().isoformat() + "Z",
|
| 1487 |
+
"summary": report.get("summary", {}),
|
| 1488 |
+
"run_dir": str(run_dir),
|
| 1489 |
+
"matrix_csv": str(matrix_path),
|
| 1490 |
+
"report_json": str(report_path),
|
| 1491 |
+
}
|
| 1492 |
+
ext["regulatory_gap_assessment"] = reg_ext
|
| 1493 |
+
updated_payload = _build_payload_with_extensions(payload, papers, ext)
|
| 1494 |
+
updated_extraction_path = write_json(run_dir / "extraction_details_with_extensions.json", updated_payload)
|
| 1495 |
+
|
| 1496 |
md = "### Regulatory Gap Summary\n" + report_md
|
| 1497 |
status = f"✅ Gap assessment complete. Covered={report.get('summary', {}).get('covered', 0)} | Missing={report.get('summary', {}).get('missing', 0)}"
|
| 1498 |
+
return matrix_df, md, str(matrix_path), str(report_path), str(updated_extraction_path), status
|
| 1499 |
|
| 1500 |
|
| 1501 |
+
def run_cancer_risk_batch_ui(input_csv_file, extraction_json_file):
|
| 1502 |
if input_csv_file is None:
|
| 1503 |
+
return pd.DataFrame(), None, None, None, None, "Upload a populated cancer risk input CSV."
|
| 1504 |
try:
|
| 1505 |
df = pd.read_csv(input_csv_file.name)
|
| 1506 |
except Exception as e:
|
| 1507 |
+
return pd.DataFrame(), None, None, None, None, f"Could not read CSV: {e}"
|
| 1508 |
|
| 1509 |
missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
|
| 1510 |
if missing:
|
| 1511 |
+
return pd.DataFrame(), None, None, None, None, f"Missing required columns: {missing}"
|
| 1512 |
|
| 1513 |
run_dir = make_run_dir(base_dir="runs")
|
| 1514 |
rows = df.fillna("").to_dict("records")
|
|
|
|
| 1516 |
try:
|
| 1517 |
result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
|
| 1518 |
except MCPClientError as e:
|
| 1519 |
+
return pd.DataFrame(), None, None, None, None, f"MCP server unavailable: {e}"
|
| 1520 |
except Exception as e:
|
| 1521 |
+
return pd.DataFrame(), None, None, None, None, f"Calculation failed: {e}"
|
| 1522 |
|
| 1523 |
result_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
|
| 1524 |
out_df = pd.DataFrame(result_rows)
|
|
|
|
| 1528 |
artifacts = result.get("artifacts", {}) if isinstance(result, dict) else {}
|
| 1529 |
log_path = artifacts.get("log_jsonl", str(run_dir / "cancer_risk_log.jsonl"))
|
| 1530 |
report_path = artifacts.get("report_md", str(run_dir / "cancer_risk_report.md"))
|
| 1531 |
+
updated_extraction_file = None
|
| 1532 |
+
|
| 1533 |
+
if extraction_json_file is not None:
|
| 1534 |
+
try:
|
| 1535 |
+
payload, papers, existing_ext = _load_extraction_payload(extraction_json_file)
|
| 1536 |
+
ext = _safe_ext_dict(existing_ext)
|
| 1537 |
+
refs = list(ext.get("risk_calculation_refs", []) or [])
|
| 1538 |
+
refs.append(
|
| 1539 |
+
{
|
| 1540 |
+
"timestamp_utc": datetime.datetime.utcnow().isoformat() + "Z",
|
| 1541 |
+
"run_dir": str(run_dir),
|
| 1542 |
+
"results_csv": str(result_csv_path),
|
| 1543 |
+
"log_jsonl": str(log_path),
|
| 1544 |
+
"report_md": str(report_path),
|
| 1545 |
+
"summary": result.get("summary", {}),
|
| 1546 |
+
}
|
| 1547 |
+
)
|
| 1548 |
+
ext["risk_calculation_refs"] = refs
|
| 1549 |
+
updated_payload = _build_payload_with_extensions(payload, papers, ext)
|
| 1550 |
+
updated_extraction_file = str(write_json(run_dir / "extraction_details_with_extensions.json", updated_payload))
|
| 1551 |
+
except Exception:
|
| 1552 |
+
updated_extraction_file = None
|
| 1553 |
|
| 1554 |
summ = result.get("summary", {})
|
| 1555 |
status = (
|
| 1556 |
f"✅ Batch complete. total={summ.get('total_rows', 0)} "
|
| 1557 |
f"ok={summ.get('ok_rows', 0)} error={summ.get('error_rows', 0)}"
|
| 1558 |
)
|
| 1559 |
+
return out_df, str(result_csv_path), str(log_path), str(report_path), updated_extraction_file, status
|
| 1560 |
|
| 1561 |
|
| 1562 |
# =============================
|
|
|
|
| 1925 |
reg_matrix_df = gr.Dataframe(label="Clause-level gap matrix", interactive=False, wrap=True)
|
| 1926 |
reg_matrix_file = gr.File(label="Download: regulatory_gap_matrix.csv")
|
| 1927 |
reg_report_file = gr.File(label="Download: regulatory_gap_report.json")
|
| 1928 |
+
reg_updated_extraction_file = gr.File(label="Download: extraction_details_with_extensions.json")
|
| 1929 |
|
| 1930 |
reg_run_btn.click(
|
| 1931 |
fn=run_regulatory_gap_assessment,
|
| 1932 |
inputs=[reg_extraction_json, reg_framework, reg_override_notes],
|
| 1933 |
+
outputs=[reg_matrix_df, reg_summary_md, reg_matrix_file, reg_report_file, reg_updated_extraction_file, reg_status]
|
| 1934 |
)
|
| 1935 |
|
| 1936 |
with gr.Tab("Cancer Risk Calculator"):
|
|
|
|
| 1946 |
template_btn.click(fn=export_blank_cancer_risk_template, inputs=None, outputs=[template_file, template_status])
|
| 1947 |
|
| 1948 |
risk_input_csv = gr.File(label="Upload populated cancer risk input CSV", file_types=[".csv"], file_count="single")
|
| 1949 |
+
risk_extraction_json = gr.File(label="Optional: Upload extraction_details.json to append risk refs", file_types=[".json"], file_count="single")
|
| 1950 |
risk_run_btn = gr.Button("Run Cancer Risk Batch", variant="primary")
|
| 1951 |
risk_status = gr.Textbox(label="Status", interactive=False)
|
| 1952 |
risk_results_df = gr.Dataframe(label="Cancer risk results", interactive=False, wrap=True)
|
| 1953 |
risk_results_csv = gr.File(label="Download: cancer_risk_results.csv")
|
| 1954 |
risk_log_file = gr.File(label="Download: cancer_risk_log.jsonl")
|
| 1955 |
risk_report_file = gr.File(label="Download: cancer_risk_report.md")
|
| 1956 |
+
risk_updated_extraction_file = gr.File(label="Download: extraction_details_with_extensions.json")
|
| 1957 |
|
| 1958 |
risk_run_btn.click(
|
| 1959 |
fn=run_cancer_risk_batch_ui,
|
| 1960 |
+
inputs=[risk_input_csv, risk_extraction_json],
|
| 1961 |
+
outputs=[risk_results_df, risk_results_csv, risk_log_file, risk_report_file, risk_updated_extraction_file, risk_status]
|
| 1962 |
)
|
| 1963 |
|
| 1964 |
if __name__ == "__main__":
|