Spaces:
Running
Running
Update PolyAgent/orchestrator.py
Browse files- PolyAgent/orchestrator.py +10 -14
PolyAgent/orchestrator.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
PolyAgent Orchestrator
|
| 3 |
===========================
|
| 4 |
|
| 5 |
This file provides a modular orchestrator that:
|
|
@@ -26,11 +26,11 @@ import numpy as np
|
|
| 26 |
import torch
|
| 27 |
import torch.nn as nn
|
| 28 |
|
| 29 |
-
# HF Transformers (for SELFIES-TED decoder
|
| 30 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 31 |
from transformers.modeling_outputs import BaseModelOutput
|
| 32 |
|
| 33 |
-
#
|
| 34 |
try:
|
| 35 |
import requests
|
| 36 |
from bs4 import BeautifulSoup
|
|
@@ -38,7 +38,7 @@ except Exception:
|
|
| 38 |
requests = None
|
| 39 |
BeautifulSoup = None
|
| 40 |
|
| 41 |
-
#
|
| 42 |
try:
|
| 43 |
from rdkit import Chem
|
| 44 |
from rdkit.Chem import Draw
|
|
@@ -51,7 +51,7 @@ try:
|
|
| 51 |
except Exception:
|
| 52 |
cm = None
|
| 53 |
|
| 54 |
-
#
|
| 55 |
try:
|
| 56 |
import joblib
|
| 57 |
except Exception:
|
|
@@ -62,7 +62,7 @@ try:
|
|
| 62 |
except Exception:
|
| 63 |
spm = None
|
| 64 |
|
| 65 |
-
#
|
| 66 |
try:
|
| 67 |
import selfies as sf
|
| 68 |
except Exception:
|
|
@@ -85,13 +85,11 @@ class PathsConfig:
|
|
| 85 |
"""
|
| 86 |
|
| 87 |
def __init__(self):
|
| 88 |
-
# 1) HF model repo
|
| 89 |
-
# Example: "kaurm43/PolyFusionAgent-weights-5m" (change to your real repo_id)
|
| 90 |
self.hf_repo_id = os.getenv("POLYFUSION_WEIGHTS_REPO", "kaurm43/polyfusionagent-weights")
|
| 91 |
self.hf_repo_type = os.getenv("POLYFUSION_WEIGHTS_REPO_TYPE", "model") # usually "model"
|
| 92 |
|
| 93 |
# 2) Where to store downloaded files
|
| 94 |
-
# Prefer /data on Spaces with persistent storage; else use a cache folder.
|
| 95 |
default_root = "/data/polyfusion_cache" if os.path.isdir("/data") else os.path.expanduser("~/.cache/polyfusion_cache")
|
| 96 |
self.local_weights_root = os.getenv("POLYFUSION_WEIGHTS_DIR", default_root)
|
| 97 |
|
|
@@ -99,7 +97,6 @@ class PathsConfig:
|
|
| 99 |
self.hf_token = os.getenv("HF_TOKEN", None)
|
| 100 |
|
| 101 |
# 4) Download (cached) + get local folder path.
|
| 102 |
-
# allow_patterns keeps download smaller/faster (only pull what orchestrator needs).
|
| 103 |
allow = [
|
| 104 |
"tokenizer_spm_5m/**",
|
| 105 |
"polyfusion_cl_5m/**",
|
|
@@ -117,8 +114,7 @@ class PathsConfig:
|
|
| 117 |
allow_patterns=allow,
|
| 118 |
)
|
| 119 |
|
| 120 |
-
# 5) Map to the
|
| 121 |
-
# (Only path wiring changes; no behavior changes elsewhere.)
|
| 122 |
self.cl_weights_path = os.path.join(self._weights_dir, "polyfusion_cl_5m", "pytorch_model.bin")
|
| 123 |
|
| 124 |
# If your Space also includes a local Chroma DB folder in the Space repo,
|
|
@@ -131,7 +127,7 @@ class PathsConfig:
|
|
| 131 |
self.downstream_bestweights_5m_dir = os.path.join(self._weights_dir, "downstream_heads_5m")
|
| 132 |
self.inverse_design_5m_dir = os.path.join(self._weights_dir, "inverse_design_5m")
|
| 133 |
|
| 134 |
-
# 6) Optional: sanity-check required files
|
| 135 |
self._assert_exists(self.cl_weights_path, "CL weights")
|
| 136 |
self._assert_exists(self.spm_model_path, "SentencePiece model")
|
| 137 |
self._assert_exists(self.spm_vocab_path, "SentencePiece vocab")
|
|
@@ -601,7 +597,7 @@ def _assign_tool_tags_to_report(report: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 601 |
|
| 602 |
def _render_tool_outputs_verbatim_md(report: Dict[str, Any]) -> str:
|
| 603 |
"""
|
| 604 |
-
Render tool outputs as verbatim JSON blocks
|
| 605 |
"""
|
| 606 |
if not isinstance(report, dict):
|
| 607 |
return ""
|
|
|
|
| 1 |
"""
|
| 2 |
+
PolyAgent Orchestrator
|
| 3 |
===========================
|
| 4 |
|
| 5 |
This file provides a modular orchestrator that:
|
|
|
|
| 26 |
import torch
|
| 27 |
import torch.nn as nn
|
| 28 |
|
| 29 |
+
# HF Transformers (for SELFIES-TED decoder)
|
| 30 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 31 |
from transformers.modeling_outputs import BaseModelOutput
|
| 32 |
|
| 33 |
+
# Imports for web fetching
|
| 34 |
try:
|
| 35 |
import requests
|
| 36 |
from bs4 import BeautifulSoup
|
|
|
|
| 38 |
requests = None
|
| 39 |
BeautifulSoup = None
|
| 40 |
|
| 41 |
+
# Imports for visuals
|
| 42 |
try:
|
| 43 |
from rdkit import Chem
|
| 44 |
from rdkit.Chem import Draw
|
|
|
|
| 51 |
except Exception:
|
| 52 |
cm = None
|
| 53 |
|
| 54 |
+
# joblib + sentencepiece for 5M generator artifacts
|
| 55 |
try:
|
| 56 |
import joblib
|
| 57 |
except Exception:
|
|
|
|
| 62 |
except Exception:
|
| 63 |
spm = None
|
| 64 |
|
| 65 |
+
# selfies (for SELFIES→SMILES/PSMILES conversion)
|
| 66 |
try:
|
| 67 |
import selfies as sf
|
| 68 |
except Exception:
|
|
|
|
| 85 |
"""
|
| 86 |
|
| 87 |
def __init__(self):
|
| 88 |
+
# 1) HF model repo
|
|
|
|
| 89 |
self.hf_repo_id = os.getenv("POLYFUSION_WEIGHTS_REPO", "kaurm43/polyfusionagent-weights")
|
| 90 |
self.hf_repo_type = os.getenv("POLYFUSION_WEIGHTS_REPO_TYPE", "model") # usually "model"
|
| 91 |
|
| 92 |
# 2) Where to store downloaded files
|
|
|
|
| 93 |
default_root = "/data/polyfusion_cache" if os.path.isdir("/data") else os.path.expanduser("~/.cache/polyfusion_cache")
|
| 94 |
self.local_weights_root = os.getenv("POLYFUSION_WEIGHTS_DIR", default_root)
|
| 95 |
|
|
|
|
| 97 |
self.hf_token = os.getenv("HF_TOKEN", None)
|
| 98 |
|
| 99 |
# 4) Download (cached) + get local folder path.
|
|
|
|
| 100 |
allow = [
|
| 101 |
"tokenizer_spm_5m/**",
|
| 102 |
"polyfusion_cl_5m/**",
|
|
|
|
| 114 |
allow_patterns=allow,
|
| 115 |
)
|
| 116 |
|
| 117 |
+
# 5) Map to the necessary files
|
|
|
|
| 118 |
self.cl_weights_path = os.path.join(self._weights_dir, "polyfusion_cl_5m", "pytorch_model.bin")
|
| 119 |
|
| 120 |
# If your Space also includes a local Chroma DB folder in the Space repo,
|
|
|
|
| 127 |
self.downstream_bestweights_5m_dir = os.path.join(self._weights_dir, "downstream_heads_5m")
|
| 128 |
self.inverse_design_5m_dir = os.path.join(self._weights_dir, "inverse_design_5m")
|
| 129 |
|
| 130 |
+
# 6) Optional: sanity-check required files
|
| 131 |
self._assert_exists(self.cl_weights_path, "CL weights")
|
| 132 |
self._assert_exists(self.spm_model_path, "SentencePiece model")
|
| 133 |
self._assert_exists(self.spm_vocab_path, "SentencePiece vocab")
|
|
|
|
| 597 |
|
| 598 |
def _render_tool_outputs_verbatim_md(report: Dict[str, Any]) -> str:
|
| 599 |
"""
|
| 600 |
+
Render tool outputs as verbatim JSON blocks.
|
| 601 |
"""
|
| 602 |
if not isinstance(report, dict):
|
| 603 |
return ""
|