Spaces:
Sleeping
Sleeping
File size: 8,743 Bytes
2106752 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | """
Inference Script Example
===================================
MANDATORY
- Before submitting, ensure the following variables are defined in your environment configuration:
API_BASE_URL The API endpoint for the LLM.
MODEL_NAME The model identifier to use for inference.
HF_TOKEN Your Hugging Face / API key.
LOCAL_IMAGE_NAME The name of the local image to use for the environment if you are using from_docker_image()
method
- Defaults are set only for API_BASE_URL and MODEL_NAME
(and should reflect your active inference setup):
API_BASE_URL = os.getenv("API_BASE_URL", "<your-active-endpoint>")
MODEL_NAME = os.getenv("MODEL_NAME", "<your-active-model>")
- The inference script must be named `inference.py` and placed in the root directory of the project
- Participants must use OpenAI Client for all LLM calls using above variables
STDOUT FORMAT
- The script must emit exactly three line types to stdout, in this order:
[START] task=<task_name> env=<benchmark> model=<model_name>
[STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
[END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
Rules:
- One [START] line at episode begin.
- One [STEP] line per step, immediately after env.step() returns.
- One [END] line after env.close(), always emitted (even on exception).
- reward and rewards are formatted to 2 decimal places.
- done and success are lowercase booleans: true or false.
- error is the raw last_action_error string, or null if none.
- All fields on a single line with no newlines within a line.
- Each tasks should return score in [0, 1]
Example:
[START] task=click-test env=miniwob model=Qwen3-VL-30B
[STEP] step=1 action=click('123') reward=0.00 done=false error=null
[STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
[STEP] step=3 action=click('789') reward=1.00 done=true error=null
[END] success=true steps=3 score=1.00 rewards=0.00,0.00,1.00
"""
import asyncio
import os
import textwrap
from typing import List, Optional
from urllib.parse import urlparse
from dotenv import load_dotenv
from openai import OpenAI
load_dotenv() # Load environment variables from .env file
from molecular_Designer_Env.client import MolecularDesignerEnvEnv
from molecular_Designer_Env.models import MolecularDesignerEnvAction
IMAGE_NAME = os.getenv("IMAGE_NAME") # If you are using docker image
BASE_URL = os.getenv("BASE_URL") # If connecting to deployed server
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
TASK_NAME = os.getenv("TASK_NAME", "easy")
BENCHMARK = os.getenv("BENCHMARK", "molecular_Designer_Env")
MAX_STEPS = 10
TEMPERATURE = 0.7
MAX_TOKENS = 150
SUCCESS_SCORE_THRESHOLD = 0.85 # normalized score in [0, 1]
# Replaced total max reward tracking since it's now dynamically evaluated up to 1.0 per task
MAX_TOTAL_REWARD = 1.0
SYSTEM_PROMPT = textwrap.dedent(
"""
You are an expert medicinal chemist AI acting in a molecular design environment.
Each turn you will receive feedback on a molecule you design.
Your goal is to provide a valid SMILES string that maximizes the task's unique reward.
Reply with exactly one SMILES string - no quotes, no prefixes, just the SMILES string (e.g. CCO).
"""
).strip()
def log_start(task: str, env: str, model: str) -> None:
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
error_val = error if error else "null"
done_val = str(done).lower()
print(
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
flush=True,
)
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
def build_user_prompt(step: int, last_feedback: str, last_reward: float, history: List[str]) -> str:
history_block = "\n".join(history[-4:]) if history else "None"
return textwrap.dedent(
f"""
Step: {step}
Last feedback: {last_feedback!r}
Last reward: {last_reward:.3f}
Previous steps history:
{history_block}
Generate your next SMILES string to improve your score. Follow the task's rules and constraints exactly. Target MW or LogP where applicable.
"""
).strip()
def get_model_message(client: OpenAI, step: int, last_feedback: str, last_reward: float, history: List[str]) -> str:
user_prompt = build_user_prompt(step, last_feedback, last_reward, history)
try:
completion = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
temperature=TEMPERATURE,
max_tokens=MAX_TOKENS,
stream=False,
)
text = (completion.choices[0].message.content or "").strip()
return text if text else "CCO"
except Exception as exc:
print(f"[DEBUG] Model request failed: {exc}", flush=True)
return "CCO"
def normalize_base_url(base_url: Optional[str]) -> Optional[str]:
"""Normalize user-provided BASE_URL into an API runtime URL.
If a Hugging Face repo page URL is provided (huggingface.co/spaces/user/space),
convert it to the runtime domain (https://user-space.hf.space).
"""
if not base_url:
return base_url
cleaned = base_url.strip().rstrip("/")
parsed = urlparse(cleaned)
# Handle Hugging Face repo page URL -> runtime URL used by API/WebSocket.
if parsed.netloc == "huggingface.co":
parts = [p for p in parsed.path.strip("/").split("/") if p]
if len(parts) >= 3 and parts[0] == "spaces":
owner, space = parts[1], parts[2]
return f"https://{owner}-{space}.hf.space"
# Avoid accidentally pointing at the web UI path.
if cleaned.endswith("/web"):
return cleaned[:-4]
return cleaned
async def main() -> None:
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
runtime_base_url = normalize_base_url(BASE_URL)
if runtime_base_url:
env = MolecularDesignerEnvEnv(base_url=runtime_base_url)
else:
if not IMAGE_NAME:
raise ValueError(
"Set BASE_URL for deployed env, or IMAGE_NAME for local docker env."
)
env = await MolecularDesignerEnvEnv.from_docker_image(IMAGE_NAME)
history: List[str] = []
rewards: List[float] = []
steps_taken = 0
score = 0.0
success = False
log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
try:
result = await asyncio.to_thread(env.reset) # Ensure async execution correctly maps
last_feedback = result.observation.feedback
last_reward = 0.0
for step in range(1, MAX_STEPS + 1):
if result.done:
break
message = get_model_message(client, step, last_feedback, last_reward, history)
result = await asyncio.to_thread(env.step, MolecularDesignerEnvAction(smiles=message))
obs = result.observation
reward = result.reward or 0.0
done = result.done
error = None
rewards.append(reward)
steps_taken = step
last_feedback = obs.feedback
last_reward = reward
log_step(step=step, action=message, reward=reward, done=done, error=error)
history.append(f"Step {step}: {message!r} -> reward {reward:+.3f}")
if done:
break
score = max(rewards) if rewards else 0.0
score = min(max(score, 0.0), 1.0) # clamp to [0, 1]
success = score >= SUCCESS_SCORE_THRESHOLD
finally:
try:
env.close()
except Exception as e:
print(f"[DEBUG] env.close() error (container cleanup): {e}", flush=True)
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
if __name__ == "__main__":
asyncio.run(main()) |