Spaces:
Running
Running
manpreet88 commited on
Commit ·
c03986e
1
Parent(s): f39b235
Update gradio_interface.py
Browse files- PolyAgent/gradio_interface.py +12 -26
PolyAgent/gradio_interface.py
CHANGED
|
@@ -28,7 +28,7 @@ except Exception as e:
|
|
| 28 |
|
| 29 |
|
| 30 |
# =============================================================================
|
| 31 |
-
# DOI NORMALIZATION HELPERS
|
| 32 |
# =============================================================================
|
| 33 |
_DOI_RE = re.compile(r"^10\.\d{4,9}/\S+$", re.IGNORECASE)
|
| 34 |
|
|
@@ -47,7 +47,7 @@ def doi_to_url(doi: str) -> str:
|
|
| 47 |
return f"https://doi.org/{doi}"
|
| 48 |
|
| 49 |
# -----------------------------------------------------------------------------
|
| 50 |
-
# Console defaults
|
| 51 |
# -----------------------------------------------------------------------------
|
| 52 |
DEFAULT_CASE_BRIEF = (
|
| 53 |
"We are developing a polymer film for high-barrier flexible packaging (food-contact). "
|
|
@@ -81,7 +81,7 @@ DEFAULT_TARGET_BY_PROPERTY = {
|
|
| 81 |
}
|
| 82 |
|
| 83 |
# -----------------------------------------------------------------------------
|
| 84 |
-
#
|
| 85 |
# -----------------------------------------------------------------------------
|
| 86 |
RUN_INSTRUCTIONS_MD = (
|
| 87 |
"### How to run PolyAgent (one-time setup)\n"
|
|
@@ -168,11 +168,6 @@ def _normalize_seed_inputs_for_display(obj: Any) -> Any:
|
|
| 168 |
|
| 169 |
return obj
|
| 170 |
|
| 171 |
-
|
| 172 |
-
# -----------------------------------------------------------------------------
|
| 173 |
-
# Markdown safety: keep polymer endpoint token "[*]" from being rendered as "[]"
|
| 174 |
-
# -----------------------------------------------------------------------------
|
| 175 |
-
# Markdown safety: keep polymer endpoint token "[*]" from being rendered as "[]"
|
| 176 |
_ENDPOINT_TOKEN_RE = re.compile(r"\[\*\]")
|
| 177 |
|
| 178 |
def _escape_endpoint_tokens_for_markdown(text: str) -> str:
|
|
@@ -205,11 +200,10 @@ def _escape_endpoint_tokens_for_markdown(text: str) -> str:
|
|
| 205 |
return "".join(out_parts)
|
| 206 |
|
| 207 |
# -----------------------------------------------------------------------------
|
| 208 |
-
#
|
| 209 |
# -----------------------------------------------------------------------------
|
| 210 |
_NUM_RE = r"[-+]?\d+(?:\.\d+)?"
|
| 211 |
|
| 212 |
-
|
| 213 |
def _infer_property_from_questions(q: str) -> Optional[str]:
|
| 214 |
"""
|
| 215 |
Infer canonical property name from free-text questions.
|
|
@@ -243,7 +237,6 @@ def _infer_property_from_questions(q: str) -> Optional[str]:
|
|
| 243 |
|
| 244 |
return None
|
| 245 |
|
| 246 |
-
|
| 247 |
def _infer_target_value_from_questions(q: str, prop: Optional[str]) -> Optional[float]:
|
| 248 |
"""
|
| 249 |
Infer numeric target_value from free-text questions.
|
|
@@ -338,7 +331,7 @@ def _infer_seed_psmiles_from_questions(q: str) -> Optional[str]:
|
|
| 338 |
if not text:
|
| 339 |
return None
|
| 340 |
|
| 341 |
-
# 1) Prefer code block content
|
| 342 |
code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text)
|
| 343 |
for block in code_blocks:
|
| 344 |
for line in (block or "").splitlines():
|
|
@@ -356,10 +349,6 @@ def _infer_seed_psmiles_from_questions(q: str) -> Optional[str]:
|
|
| 356 |
|
| 357 |
return None
|
| 358 |
|
| 359 |
-
|
| 360 |
-
# -----------------------------------------------------------------------------
|
| 361 |
-
# Domain normalization: show ROOT domain like nature.com, springer.com, etc.
|
| 362 |
-
# -----------------------------------------------------------------------------
|
| 363 |
_SECOND_LEVEL_TLDS = {
|
| 364 |
"co.uk",
|
| 365 |
"ac.uk",
|
|
@@ -400,7 +389,6 @@ def _root_domain(netloc: str) -> str:
|
|
| 400 |
return last3
|
| 401 |
return last2
|
| 402 |
|
| 403 |
-
|
| 404 |
def _url_to_domain(url: str) -> Optional[str]:
|
| 405 |
if not isinstance(url, str) or not url.strip():
|
| 406 |
return None
|
|
@@ -524,7 +512,7 @@ def ensure_orch(state: Dict[str, Any]) -> Tuple[PolymerOrchestrator, Dict[str, A
|
|
| 524 |
|
| 525 |
|
| 526 |
# -----------------------------------------------------------------------------
|
| 527 |
-
#
|
| 528 |
# -----------------------------------------------------------------------------
|
| 529 |
def _extract_tool_output(exec_res: Dict[str, Any], tool_name: str) -> Optional[Any]:
|
| 530 |
"""
|
|
@@ -670,7 +658,7 @@ def _maybe_add_artifacts(
|
|
| 670 |
except Exception as e:
|
| 671 |
extras["gen_grid_error"] = str(e)
|
| 672 |
|
| 673 |
-
#
|
| 674 |
try:
|
| 675 |
seed_psmiles = ((report.get("summary", {}) or {}).get("property_prediction", {}) or {}).get("psmiles")
|
| 676 |
if not seed_psmiles:
|
|
@@ -683,7 +671,7 @@ def _maybe_add_artifacts(
|
|
| 683 |
except Exception as e:
|
| 684 |
extras["mol_render_error"] = str(e)
|
| 685 |
|
| 686 |
-
# Explainability heatmap
|
| 687 |
try:
|
| 688 |
summary = report.get("summary", {}) or {}
|
| 689 |
tool_outputs = report.get("tool_outputs", {}) or {}
|
|
@@ -756,7 +744,6 @@ def _collect_citations(report: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 756 |
if isinstance(s, dict):
|
| 757 |
sources.append(s)
|
| 758 |
|
| 759 |
-
# fallback walk
|
| 760 |
if not sources:
|
| 761 |
def walk(node: Any):
|
| 762 |
if isinstance(node, dict):
|
|
@@ -809,7 +796,6 @@ def _collect_citations(report: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
| 809 |
title = s.get("title") or "Untitled"
|
| 810 |
|
| 811 |
dedup[key] = {
|
| 812 |
-
# Keep key name "domain" for UI compatibility, but it now holds the DOI URL / URL text requirement.
|
| 813 |
"domain": cite_url,
|
| 814 |
"title": title,
|
| 815 |
"url": cite_url,
|
|
@@ -937,12 +923,12 @@ def _assign_tool_tags(plan: Dict[str, Any], exec_res: Dict[str, Any], report: Di
|
|
| 937 |
|
| 938 |
|
| 939 |
# -----------------------------------------------------------------------------
|
| 940 |
-
# PolyAgent Console
|
| 941 |
# -----------------------------------------------------------------------------
|
| 942 |
def run_agent(state: Dict[str, Any], questions: str) -> Tuple[str, List[str]]:
|
| 943 |
orch, ctx = ensure_orch(state)
|
| 944 |
|
| 945 |
-
# ---------- AUTO-DETECTION
|
| 946 |
qtxt = questions or ""
|
| 947 |
|
| 948 |
inferred_prop = _infer_property_from_questions(qtxt) or DEFAULT_PROPERTY_NAME
|
|
@@ -1028,7 +1014,7 @@ def run_agent(state: Dict[str, Any], questions: str) -> Tuple[str, List[str]]:
|
|
| 1028 |
# Tool tags: ALWAYS [T]
|
| 1029 |
_assign_tool_tags(plan=plan, exec_res=exec_res, report=report)
|
| 1030 |
|
| 1031 |
-
# Normalize seed-related
|
| 1032 |
report = _normalize_seed_inputs_for_display(report)
|
| 1033 |
ctx["last_report"] = report
|
| 1034 |
|
|
@@ -1069,7 +1055,7 @@ def run_agent(state: Dict[str, Any], questions: str) -> Tuple[str, List[str]]:
|
|
| 1069 |
return final_md, imgs
|
| 1070 |
|
| 1071 |
|
| 1072 |
-
# ----------------------------- Advanced Tools
|
| 1073 |
def tool_data_extraction(state: Dict[str, Any], psmiles: str) -> Tuple[str, List[str]]:
|
| 1074 |
orch, ctx = ensure_orch(state)
|
| 1075 |
psmiles = _convert_at_to_star(psmiles)
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
# =============================================================================
|
| 31 |
+
# DOI NORMALIZATION HELPERS
|
| 32 |
# =============================================================================
|
| 33 |
_DOI_RE = re.compile(r"^10\.\d{4,9}/\S+$", re.IGNORECASE)
|
| 34 |
|
|
|
|
| 47 |
return f"https://doi.org/{doi}"
|
| 48 |
|
| 49 |
# -----------------------------------------------------------------------------
|
| 50 |
+
# Console defaults
|
| 51 |
# -----------------------------------------------------------------------------
|
| 52 |
DEFAULT_CASE_BRIEF = (
|
| 53 |
"We are developing a polymer film for high-barrier flexible packaging (food-contact). "
|
|
|
|
| 81 |
}
|
| 82 |
|
| 83 |
# -----------------------------------------------------------------------------
|
| 84 |
+
# Run instructions bubble
|
| 85 |
# -----------------------------------------------------------------------------
|
| 86 |
RUN_INSTRUCTIONS_MD = (
|
| 87 |
"### How to run PolyAgent (one-time setup)\n"
|
|
|
|
| 168 |
|
| 169 |
return obj
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
_ENDPOINT_TOKEN_RE = re.compile(r"\[\*\]")
|
| 172 |
|
| 173 |
def _escape_endpoint_tokens_for_markdown(text: str) -> str:
|
|
|
|
| 200 |
return "".join(out_parts)
|
| 201 |
|
| 202 |
# -----------------------------------------------------------------------------
|
| 203 |
+
# Auto-detect property / target_value / seed from Questions
|
| 204 |
# -----------------------------------------------------------------------------
|
| 205 |
_NUM_RE = r"[-+]?\d+(?:\.\d+)?"
|
| 206 |
|
|
|
|
| 207 |
def _infer_property_from_questions(q: str) -> Optional[str]:
|
| 208 |
"""
|
| 209 |
Infer canonical property name from free-text questions.
|
|
|
|
| 237 |
|
| 238 |
return None
|
| 239 |
|
|
|
|
| 240 |
def _infer_target_value_from_questions(q: str, prop: Optional[str]) -> Optional[float]:
|
| 241 |
"""
|
| 242 |
Infer numeric target_value from free-text questions.
|
|
|
|
| 331 |
if not text:
|
| 332 |
return None
|
| 333 |
|
| 334 |
+
# 1) Prefer code block content
|
| 335 |
code_blocks = re.findall(r"```(?:\w+)?\s*([\s\S]*?)```", text)
|
| 336 |
for block in code_blocks:
|
| 337 |
for line in (block or "").splitlines():
|
|
|
|
| 349 |
|
| 350 |
return None
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
_SECOND_LEVEL_TLDS = {
|
| 353 |
"co.uk",
|
| 354 |
"ac.uk",
|
|
|
|
| 389 |
return last3
|
| 390 |
return last2
|
| 391 |
|
|
|
|
| 392 |
def _url_to_domain(url: str) -> Optional[str]:
|
| 393 |
if not isinstance(url, str) or not url.strip():
|
| 394 |
return None
|
|
|
|
| 512 |
|
| 513 |
|
| 514 |
# -----------------------------------------------------------------------------
|
| 515 |
+
# Extract tool output so the PLAN drives the final report
|
| 516 |
# -----------------------------------------------------------------------------
|
| 517 |
def _extract_tool_output(exec_res: Dict[str, Any], tool_name: str) -> Optional[Any]:
|
| 518 |
"""
|
|
|
|
| 658 |
except Exception as e:
|
| 659 |
extras["gen_grid_error"] = str(e)
|
| 660 |
|
| 661 |
+
# Polymer render (seed)
|
| 662 |
try:
|
| 663 |
seed_psmiles = ((report.get("summary", {}) or {}).get("property_prediction", {}) or {}).get("psmiles")
|
| 664 |
if not seed_psmiles:
|
|
|
|
| 671 |
except Exception as e:
|
| 672 |
extras["mol_render_error"] = str(e)
|
| 673 |
|
| 674 |
+
# Explainability heatmap
|
| 675 |
try:
|
| 676 |
summary = report.get("summary", {}) or {}
|
| 677 |
tool_outputs = report.get("tool_outputs", {}) or {}
|
|
|
|
| 744 |
if isinstance(s, dict):
|
| 745 |
sources.append(s)
|
| 746 |
|
|
|
|
| 747 |
if not sources:
|
| 748 |
def walk(node: Any):
|
| 749 |
if isinstance(node, dict):
|
|
|
|
| 796 |
title = s.get("title") or "Untitled"
|
| 797 |
|
| 798 |
dedup[key] = {
|
|
|
|
| 799 |
"domain": cite_url,
|
| 800 |
"title": title,
|
| 801 |
"url": cite_url,
|
|
|
|
| 923 |
|
| 924 |
|
| 925 |
# -----------------------------------------------------------------------------
|
| 926 |
+
# PolyAgent Console
|
| 927 |
# -----------------------------------------------------------------------------
|
| 928 |
def run_agent(state: Dict[str, Any], questions: str) -> Tuple[str, List[str]]:
|
| 929 |
orch, ctx = ensure_orch(state)
|
| 930 |
|
| 931 |
+
# ---------- AUTO-DETECTION ----------
|
| 932 |
qtxt = questions or ""
|
| 933 |
|
| 934 |
inferred_prop = _infer_property_from_questions(qtxt) or DEFAULT_PROPERTY_NAME
|
|
|
|
| 1014 |
# Tool tags: ALWAYS [T]
|
| 1015 |
_assign_tool_tags(plan=plan, exec_res=exec_res, report=report)
|
| 1016 |
|
| 1017 |
+
# Normalize seed-related PSMILES for display only
|
| 1018 |
report = _normalize_seed_inputs_for_display(report)
|
| 1019 |
ctx["last_report"] = report
|
| 1020 |
|
|
|
|
| 1055 |
return final_md, imgs
|
| 1056 |
|
| 1057 |
|
| 1058 |
+
# ----------------------------- Advanced Tools ----------------------------- #
|
| 1059 |
def tool_data_extraction(state: Dict[str, Any], psmiles: str) -> Tuple[str, List[str]]:
|
| 1060 |
orch, ctx = ensure_orch(state)
|
| 1061 |
psmiles = _convert_at_to_star(psmiles)
|