Spaces:

hchevva
/

NLP_Project

Runtime error

App Files Files Community

hchevva commited on Feb 15

Commit

f94df25

verified ·

1 Parent(s): 40a8012

Upload app.py

Browse files

Files changed (1) hide show

app.py +73 -14

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import re
 import json
 import tempfile
 from pathlib import Path
 from typing import Dict, List, Tuple, Any, Optional
@@ -1392,6 +1393,26 @@ def _load_extraction_payload(file_obj: Any) -> Tuple[Any, List[Dict[str, Any]],
     raise ValueError("Unsupported extraction_details.json format.")
 def export_blank_cancer_risk_template():
     tmpdir = Path(tempfile.mkdtemp(prefix="tox_template_"))
     path = tmpdir / "cancer_risk_input_template.csv"
@@ -1442,9 +1463,9 @@ def export_prefilled_cancer_risk_template(records: List[Dict[str, Any]]):
 def run_regulatory_gap_assessment(extraction_json_file, framework: str, override_notes: str):
     if extraction_json_file is None:
-        return pd.DataFrame(), "Upload extraction_details.json first.", None, None, "No input file."
     try:
-        payload, _, _ = _load_extraction_payload(extraction_json_file)
         matrix_df, report, report_md = map_extraction_to_framework(
             extraction_payload=payload,
             framework=framework,
@@ -1452,29 +1473,42 @@ def run_regulatory_gap_assessment(extraction_json_file, framework: str, override
             override_notes=override_notes or "",
         )
     except Exception as e:
-        return pd.DataFrame(), f"(assessment unavailable: {e})", None, None, str(e)
     run_dir = make_run_dir(base_dir="runs")
     matrix_path = write_dataframe_csv(run_dir / "regulatory_gap_matrix.csv", matrix_df)
     report_path = write_json(run_dir / "regulatory_gap_report.json", report)
     write_markdown(run_dir / "regulatory_gap_report.md", report_md)
     md = "### Regulatory Gap Summary\n" + report_md
     status = f"✅ Gap assessment complete. Covered={report.get('summary', {}).get('covered', 0)} | Missing={report.get('summary', {}).get('missing', 0)}"
-    return matrix_df, md, str(matrix_path), str(report_path), status
-def run_cancer_risk_batch_ui(input_csv_file):
     if input_csv_file is None:
-        return pd.DataFrame(), None, None, None, "Upload a populated cancer risk input CSV."
     try:
         df = pd.read_csv(input_csv_file.name)
     except Exception as e:
-        return pd.DataFrame(), None, None, None, f"Could not read CSV: {e}"
     missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
     if missing:
-        return pd.DataFrame(), None, None, None, f"Missing required columns: {missing}"
     run_dir = make_run_dir(base_dir="runs")
     rows = df.fillna("").to_dict("records")
@@ -1482,9 +1516,9 @@ def run_cancer_risk_batch_ui(input_csv_file):
     try:
         result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
     except MCPClientError as e:
-        return pd.DataFrame(), None, None, None, f"MCP server unavailable: {e}"
     except Exception as e:
-        return pd.DataFrame(), None, None, None, f"Calculation failed: {e}"
     result_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
     out_df = pd.DataFrame(result_rows)
@@ -1494,13 +1528,35 @@ def run_cancer_risk_batch_ui(input_csv_file):
     artifacts = result.get("artifacts", {}) if isinstance(result, dict) else {}
     log_path = artifacts.get("log_jsonl", str(run_dir / "cancer_risk_log.jsonl"))
     report_path = artifacts.get("report_md", str(run_dir / "cancer_risk_report.md"))
     summ = result.get("summary", {})
     status = (
         f"✅ Batch complete. total={summ.get('total_rows', 0)} "
         f"ok={summ.get('ok_rows', 0)} error={summ.get('error_rows', 0)}"
     )
-    return out_df, str(result_csv_path), str(log_path), str(report_path), status
 # =============================
@@ -1869,11 +1925,12 @@ with gr.Blocks(title="Toxicology PDF → Grounded Extractor", css=APP_CSS) as de
             reg_matrix_df = gr.Dataframe(label="Clause-level gap matrix", interactive=False, wrap=True)
             reg_matrix_file = gr.File(label="Download: regulatory_gap_matrix.csv")
             reg_report_file = gr.File(label="Download: regulatory_gap_report.json")
             reg_run_btn.click(
                 fn=run_regulatory_gap_assessment,
                 inputs=[reg_extraction_json, reg_framework, reg_override_notes],
-                outputs=[reg_matrix_df, reg_summary_md, reg_matrix_file, reg_report_file, reg_status]
             )
     with gr.Tab("Cancer Risk Calculator"):
@@ -1889,17 +1946,19 @@ with gr.Blocks(title="Toxicology PDF → Grounded Extractor", css=APP_CSS) as de
             template_btn.click(fn=export_blank_cancer_risk_template, inputs=None, outputs=[template_file, template_status])
             risk_input_csv = gr.File(label="Upload populated cancer risk input CSV", file_types=[".csv"], file_count="single")
             risk_run_btn = gr.Button("Run Cancer Risk Batch", variant="primary")
             risk_status = gr.Textbox(label="Status", interactive=False)
             risk_results_df = gr.Dataframe(label="Cancer risk results", interactive=False, wrap=True)
             risk_results_csv = gr.File(label="Download: cancer_risk_results.csv")
             risk_log_file = gr.File(label="Download: cancer_risk_log.jsonl")
             risk_report_file = gr.File(label="Download: cancer_risk_report.md")
             risk_run_btn.click(
                 fn=run_cancer_risk_batch_ui,
-                inputs=[risk_input_csv],
-                outputs=[risk_results_df, risk_results_csv, risk_log_file, risk_report_file, risk_status]
             )
 if __name__ == "__main__":

 import re
 import json
 import tempfile
+import datetime
 from pathlib import Path
 from typing import Dict, List, Tuple, Any, Optional
     raise ValueError("Unsupported extraction_details.json format.")
+def _build_payload_with_extensions(payload: Any, papers: List[Dict[str, Any]], ext: Dict[str, Any]) -> Dict[str, Any]:
+    base: Dict[str, Any] = {}
+    if isinstance(payload, dict):
+        base = dict(payload)
+    base["papers"] = papers
+    base["toxra_extensions"] = ext
+    return base
+def _safe_ext_dict(ext: Dict[str, Any]) -> Dict[str, Any]:
+    out = dict(ext or {})
+    if not isinstance(out.get("nlp_diagnostics", []), list):
+        out["nlp_diagnostics"] = []
+    if not isinstance(out.get("regulatory_gap_assessment", {}), dict):
+        out["regulatory_gap_assessment"] = {}
+    if not isinstance(out.get("risk_calculation_refs", []), list):
+        out["risk_calculation_refs"] = []
+    return out
 def export_blank_cancer_risk_template():
     tmpdir = Path(tempfile.mkdtemp(prefix="tox_template_"))
     path = tmpdir / "cancer_risk_input_template.csv"
 def run_regulatory_gap_assessment(extraction_json_file, framework: str, override_notes: str):
     if extraction_json_file is None:
+        return pd.DataFrame(), "Upload extraction_details.json first.", None, None, None, "No input file."
     try:
+        payload, papers, existing_ext = _load_extraction_payload(extraction_json_file)
         matrix_df, report, report_md = map_extraction_to_framework(
             extraction_payload=payload,
             framework=framework,
             override_notes=override_notes or "",
         )
     except Exception as e:
+        return pd.DataFrame(), f"(assessment unavailable: {e})", None, None, None, str(e)
     run_dir = make_run_dir(base_dir="runs")
     matrix_path = write_dataframe_csv(run_dir / "regulatory_gap_matrix.csv", matrix_df)
     report_path = write_json(run_dir / "regulatory_gap_report.json", report)
     write_markdown(run_dir / "regulatory_gap_report.md", report_md)
+    ext = _safe_ext_dict(existing_ext)
+    reg_ext = dict(ext.get("regulatory_gap_assessment", {}) or {})
+    reg_ext[framework] = {
+        "timestamp_utc": datetime.datetime.utcnow().isoformat() + "Z",
+        "summary": report.get("summary", {}),
+        "run_dir": str(run_dir),
+        "matrix_csv": str(matrix_path),
+        "report_json": str(report_path),
+    }
+    ext["regulatory_gap_assessment"] = reg_ext
+    updated_payload = _build_payload_with_extensions(payload, papers, ext)
+    updated_extraction_path = write_json(run_dir / "extraction_details_with_extensions.json", updated_payload)
     md = "### Regulatory Gap Summary\n" + report_md
     status = f"✅ Gap assessment complete. Covered={report.get('summary', {}).get('covered', 0)} | Missing={report.get('summary', {}).get('missing', 0)}"
+    return matrix_df, md, str(matrix_path), str(report_path), str(updated_extraction_path), status
+def run_cancer_risk_batch_ui(input_csv_file, extraction_json_file):
     if input_csv_file is None:
+        return pd.DataFrame(), None, None, None, None, "Upload a populated cancer risk input CSV."
     try:
         df = pd.read_csv(input_csv_file.name)
     except Exception as e:
+        return pd.DataFrame(), None, None, None, None, f"Could not read CSV: {e}"
     missing = [c for c in CANCER_RISK_TEMPLATE_COLUMNS if c not in df.columns]
     if missing:
+        return pd.DataFrame(), None, None, None, None, f"Missing required columns: {missing}"
     run_dir = make_run_dir(base_dir="runs")
     rows = df.fillna("").to_dict("records")
     try:
         result = run_batch_cancer_risk(rows, run_dir=str(run_dir))
     except MCPClientError as e:
+        return pd.DataFrame(), None, None, None, None, f"MCP server unavailable: {e}"
     except Exception as e:
+        return pd.DataFrame(), None, None, None, None, f"Calculation failed: {e}"
     result_rows = result.get("rows", []) if isinstance(result.get("rows", []), list) else []
     out_df = pd.DataFrame(result_rows)
     artifacts = result.get("artifacts", {}) if isinstance(result, dict) else {}
     log_path = artifacts.get("log_jsonl", str(run_dir / "cancer_risk_log.jsonl"))
     report_path = artifacts.get("report_md", str(run_dir / "cancer_risk_report.md"))
+    updated_extraction_file = None
+    if extraction_json_file is not None:
+        try:
+            payload, papers, existing_ext = _load_extraction_payload(extraction_json_file)
+            ext = _safe_ext_dict(existing_ext)
+            refs = list(ext.get("risk_calculation_refs", []) or [])
+            refs.append(
+                {
+                    "timestamp_utc": datetime.datetime.utcnow().isoformat() + "Z",
+                    "run_dir": str(run_dir),
+                    "results_csv": str(result_csv_path),
+                    "log_jsonl": str(log_path),
+                    "report_md": str(report_path),
+                    "summary": result.get("summary", {}),
+                }
+            )
+            ext["risk_calculation_refs"] = refs
+            updated_payload = _build_payload_with_extensions(payload, papers, ext)
+            updated_extraction_file = str(write_json(run_dir / "extraction_details_with_extensions.json", updated_payload))
+        except Exception:
+            updated_extraction_file = None
     summ = result.get("summary", {})
     status = (
         f"✅ Batch complete. total={summ.get('total_rows', 0)} "
         f"ok={summ.get('ok_rows', 0)} error={summ.get('error_rows', 0)}"
     )
+    return out_df, str(result_csv_path), str(log_path), str(report_path), updated_extraction_file, status
 # =============================
             reg_matrix_df = gr.Dataframe(label="Clause-level gap matrix", interactive=False, wrap=True)
             reg_matrix_file = gr.File(label="Download: regulatory_gap_matrix.csv")
             reg_report_file = gr.File(label="Download: regulatory_gap_report.json")
+            reg_updated_extraction_file = gr.File(label="Download: extraction_details_with_extensions.json")
             reg_run_btn.click(
                 fn=run_regulatory_gap_assessment,
                 inputs=[reg_extraction_json, reg_framework, reg_override_notes],
+                outputs=[reg_matrix_df, reg_summary_md, reg_matrix_file, reg_report_file, reg_updated_extraction_file, reg_status]
             )
     with gr.Tab("Cancer Risk Calculator"):
             template_btn.click(fn=export_blank_cancer_risk_template, inputs=None, outputs=[template_file, template_status])
             risk_input_csv = gr.File(label="Upload populated cancer risk input CSV", file_types=[".csv"], file_count="single")
+            risk_extraction_json = gr.File(label="Optional: Upload extraction_details.json to append risk refs", file_types=[".json"], file_count="single")
             risk_run_btn = gr.Button("Run Cancer Risk Batch", variant="primary")
             risk_status = gr.Textbox(label="Status", interactive=False)
             risk_results_df = gr.Dataframe(label="Cancer risk results", interactive=False, wrap=True)
             risk_results_csv = gr.File(label="Download: cancer_risk_results.csv")
             risk_log_file = gr.File(label="Download: cancer_risk_log.jsonl")
             risk_report_file = gr.File(label="Download: cancer_risk_report.md")
+            risk_updated_extraction_file = gr.File(label="Download: extraction_details_with_extensions.json")
             risk_run_btn.click(
                 fn=run_cancer_risk_batch_ui,
+                inputs=[risk_input_csv, risk_extraction_json],
+                outputs=[risk_results_df, risk_results_csv, risk_log_file, risk_report_file, risk_updated_extraction_file, risk_status]
             )
 if __name__ == "__main__":