Spaces:
Sleeping
Sleeping
File size: 7,224 Bytes
a7b597d d994b0c a7b597d d994b0c a7b597d d994b0c a7b597d 8218238 a7b597d d994b0c a7b597d d994b0c a7b597d d994b0c a7b597d d994b0c a7b597d d994b0c 5c774ec d994b0c a7b597d 05b0eac d994b0c a7b597d 5c774ec 05b0eac d994b0c a7b597d d994b0c a7b597d 05b0eac a7b597d d994b0c a7b597d d994b0c a7b597d 05b0eac a7b597d d994b0c a7b597d d994b0c a7b597d 05b0eac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | import asyncio
import contextlib
import io
import os
import textwrap
from types import SimpleNamespace
from typing import Any, List, Optional
from openai import OpenAI
from openenv.core import EnvClient
from productivity_env import ProductivityAction, ProductivityEnv
API_KEY = os.environ["API_KEY"] if "API_KEY" in os.environ else os.getenv("HF_TOKEN")
API_BASE_URL = os.environ["API_BASE_URL"] if "API_BASE_URL" in os.environ else "https://router.huggingface.co/v1"
MODEL_NAME = os.environ["MODEL_NAME"] if "MODEL_NAME" in os.environ else "Qwen/Qwen2.5-72B-Instruct"
TASK_NAME = os.getenv("PRODUCTIVITY_TASK", "triage")
BENCHMARK = os.getenv("PRODUCTIVITY_BENCHMARK", "productivity_copilot")
IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME") or os.getenv("IMAGE_NAME")
MAX_STEPS = 10
TEMPERATURE = 0.7
MAX_TOKENS = 150
SUCCESS_SCORE_THRESHOLD = 0.5
SYSTEM_PROMPT = textwrap.dedent(
"""
You are an AI productivity coach managing a simulated human worker.
Each turn you observe the human's condition (stress, focus, distraction, failure probability).
Your goal is to decrease failure probability while keeping stress below 8.
Reply with exactly one action in the format: ACTION_TYPE|Message
Available actions:
WAIT|
FORCE_BREAK|Take a break!
BLOCK_SOCIAL_MEDIA|Blocked
SEND_NUDGE|You can do this!
Choose wisely based on the observation!
"""
).strip()
def log_start(task: str, env: str, model: str) -> None:
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
error_val = error if error else "null"
done_val = str(done).lower()
print(
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
flush=True,
)
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True)
def build_user_prompt(step: int, obs_dict: dict, last_reward: float, history: List[str]) -> str:
history_block = "\n".join(history[-4:]) if history else "None"
return textwrap.dedent(
f"""
Step: {step}
Observation: {obs_dict}
Last reward: {last_reward:.2f}
Previous steps:
{history_block}
Consider the human's stress and failure probability. Send your next action.
"""
).strip()
def get_model_message(client: OpenAI, step: int, obs_dict: dict, last_reward: float, history: List[str]) -> str:
user_prompt = build_user_prompt(step, obs_dict, last_reward, history)
try:
completion = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_prompt},
],
temperature=TEMPERATURE,
max_tokens=MAX_TOKENS,
stream=False,
)
text = (completion.choices[0].message.content or "").strip()
return text if text else "WAIT|"
except Exception:
return "WAIT|"
def create_client() -> OpenAI:
if "API_BASE_URL" in os.environ and "API_KEY" in os.environ:
return OpenAI(
base_url=os.environ["API_BASE_URL"],
api_key=os.environ["API_KEY"],
)
return OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
def warmup_llm_proxy(client: OpenAI) -> None:
try:
client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": "Reply with exactly WAIT|warmup"},
{"role": "user", "content": "warmup"},
],
temperature=0,
max_tokens=8,
stream=False,
)
except Exception:
pass
def normalize_result(result: Any) -> SimpleNamespace:
if hasattr(result, "observation"):
observation = result.observation
reward = result.reward
done = result.done
else:
observation = result
reward = getattr(result, "reward", None)
done = getattr(result, "done", False)
return SimpleNamespace(observation=observation, reward=reward, done=done)
async def main() -> None:
client = None
env = None
history: List[str] = []
rewards: List[float] = []
steps_taken = 0
score = 0.0
success = False
log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
try:
client = create_client()
warmup_llm_proxy(client)
if IMAGE_NAME:
env = await EnvClient.from_docker_image(
IMAGE_NAME,
env_vars={"PRODUCTIVITY_TASK": TASK_NAME},
)
result = await env.reset(task_name=TASK_NAME)
else:
with contextlib.redirect_stdout(io.StringIO()):
env = ProductivityEnv(task_name=TASK_NAME)
result = normalize_result(env.reset(task_name=TASK_NAME))
result = normalize_result(result)
last_obs = result.observation.model_dump()
last_reward = 0.0
for step in range(1, MAX_STEPS + 1):
if result.done:
break
response = get_model_message(client, step, last_obs, last_reward, history)
parts = response.split("|", 1)
action_type = parts[0] if len(parts) > 0 else "WAIT"
message = parts[1] if len(parts) > 1 else ""
valid_actions = ["WAIT", "FORCE_BREAK", "BLOCK_SOCIAL_MEDIA", "SEND_NUDGE"]
if action_type not in valid_actions:
action_type = "WAIT"
if IMAGE_NAME:
result = await env.step(ProductivityAction(action_type=action_type, message=message))
else:
result = normalize_result(
env.step(ProductivityAction(action_type=action_type, message=message))
)
result = normalize_result(result)
obs = result.observation
reward = result.reward or 0.0
done = result.done
error = getattr(obs, "last_action_error", None)
rewards.append(reward)
steps_taken = step
last_obs = obs.model_dump()
last_reward = reward
log_step(step=step, action=action_type, reward=reward, done=done, error=error)
history.append(f"Step {step}: {action_type} -> reward {reward:+.2f}")
if done:
break
score = max(rewards) if rewards else 0.0
score = min(max(score, 0.0), 1.0)
success = score >= SUCCESS_SCORE_THRESHOLD
except Exception:
success = False
finally:
try:
if env is not None and hasattr(env, "close"):
await env.close()
except Exception:
pass
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
if __name__ == "__main__":
try:
asyncio.run(main())
except Exception:
log_end(success=False, steps=0, score=0.0, rewards=[])
|