vishgg commited on
Commit
2c0eb54
Β·
1 Parent(s): 7c0af84

feat: hackathon submission - inference.py, openenv.yaml, validator-ready

Browse files
Files changed (6) hide show
  1. Dockerfile +4 -0
  2. inference.py +292 -0
  3. openenv.yaml +6 -3
  4. pyproject.toml +7 -1
  5. server/app.py +16 -0
  6. uv.lock +0 -0
Dockerfile CHANGED
@@ -16,6 +16,10 @@ RUN pip install --no-cache-dir -r requirements.txt
16
  COPY --chown=user . /app
17
 
18
  ENV ENABLE_WEB_INTERFACE=true
 
 
 
 
19
 
20
  EXPOSE 7860
21
 
 
16
  COPY --chown=user . /app
17
 
18
  ENV ENABLE_WEB_INTERFACE=true
19
+ # Defaults for the mandatory inference env vars (override at runtime as needed)
20
+ ENV API_BASE_URL=https://router.huggingface.co/v1
21
+ ENV MODEL_NAME=Qwen/Qwen2.5-72B-Instruct
22
+ # HF_TOKEN must be supplied at runtime via Spaces secrets
23
 
24
  EXPOSE 7860
25
 
inference.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Inference script for the GTM Strategy Optimizer environment.
2
+
3
+ Mandatory hackathon submission entry point. Drives the GTM environment with an
4
+ LLM agent (via the OpenAI client) and emits structured stdout logs in the
5
+ [START] / [STEP] / [END] format defined by the spec.
6
+
7
+ Environment variables:
8
+ API_BASE_URL OpenAI-compatible base URL (default: HF router)
9
+ MODEL_NAME Model identifier (default: Qwen2.5-72B-Instruct)
10
+ HF_TOKEN API key (or API_KEY)
11
+ GTM_TASK Task to run: channel_optimizer | growth_strategist |
12
+ market_dominator (default: channel_optimizer)
13
+ GTM_SEED Episode seed (default: 42)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import os
20
+ import sys
21
+ import textwrap
22
+ from typing import Any, Dict, List, Optional
23
+
24
+ # Make repo root importable when invoked from any cwd
25
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
26
+
27
+ from openai import OpenAI
28
+
29
+ from models import GTMAction
30
+ from server.environment import GTMEnvironment
31
+ from server.simulation import MESSAGING_DIMS
32
+ from server.tasks import TASKS, get_task
33
+
34
+ # ── Config ─────────────────────────────────────────────────────────────────
35
+
36
+ API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
37
+ MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
38
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or "dummy"
39
+
40
+ TASK_NAME = os.getenv("GTM_TASK", "channel_optimizer")
41
+ SEED = int(os.getenv("GTM_SEED", "42"))
42
+ BENCHMARK = "gtm_strategy_optimizer"
43
+
44
+ TEMPERATURE = 0.3
45
+ MAX_TOKENS = 600
46
+ SUCCESS_SCORE_THRESHOLD = 0.5 # grader scores in [0,1]; >0.5 = beat random
47
+
48
+
49
+ # ── Structured stdout logging ──────────────────────────────────────────────
50
+
51
+
52
+ def log_start(task: str, env: str, model: str) -> None:
53
+ print(f"[START] task={task} env={env} model={model}", flush=True)
54
+
55
+
56
+ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
57
+ error_val = error if error else "null"
58
+ done_val = str(done).lower()
59
+ print(
60
+ f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
61
+ flush=True,
62
+ )
63
+
64
+
65
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
66
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
67
+ print(
68
+ f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
69
+ flush=True,
70
+ )
71
+
72
+
73
+ # ── Prompt + LLM helpers ───────────────────────────────────────────────────
74
+
75
+
76
+ SYSTEM_PROMPT = textwrap.dedent(
77
+ """
78
+ You are a Go-To-Market (GTM) strategist running a product launch in a simulated market.
79
+ Each week you decide:
80
+ 1. budget_allocation β€” fractions across the available channels (sum <= 1.0)
81
+ 2. segment_targeting β€” fractions across customer segments (sum ~ 1.0)
82
+ 3. messaging β€” emphasis across messaging dimensions (sum ~ 1.0)
83
+ 4. experiment β€” optional experiment id or null
84
+ 5. pricing_action β€” optional pricing change or null
85
+
86
+ Goals: maximize revenue, maintain brand health, avoid waste, respect compliance.
87
+ Strategy hints: diversify early, double down on high-ROI channels, match
88
+ messaging to segment preferences, keep messaging consistent week-to-week.
89
+
90
+ Reply with ONLY a single JSON object matching:
91
+ {
92
+ "budget_allocation": {"channel_name": fraction, ...},
93
+ "segment_targeting": {"segment_name": fraction, ...},
94
+ "messaging": {"dimension": fraction, ...},
95
+ "experiment": "type" or null,
96
+ "pricing_action": "action" or null
97
+ }
98
+ No prose, no code fences, no commentary.
99
+ """
100
+ ).strip()
101
+
102
+
103
+ def _format_observation(obs, task) -> str:
104
+ parts = [
105
+ f"Week {obs.week}/{obs.total_weeks}",
106
+ f"Budget remaining: ${obs.budget_remaining:,.0f} (weekly ${obs.weekly_budget:,.0f})",
107
+ f"Brand: {obs.brand_score:.0f}/100 Total revenue: ${obs.total_revenue:,.0f} "
108
+ f"Conversions: {obs.total_conversions} CAC: ${obs.average_cac:,.0f}",
109
+ ]
110
+ if obs.channel_metrics:
111
+ parts.append("Channels:")
112
+ for ch, m in obs.channel_metrics.items():
113
+ parts.append(
114
+ f" {ch}: {m.impressions} imp, {m.clicks} clk, {m.conversions} conv, "
115
+ f"${m.spend:,.0f} spend, ROI={m.roi:.2f}"
116
+ )
117
+ if obs.segment_performance:
118
+ parts.append("Segments:")
119
+ for seg, m in obs.segment_performance.items():
120
+ parts.append(
121
+ f" {seg}: cvr={m.conversion_rate:.4f}, eng={m.engagement_score:.1f}, "
122
+ f"rev=${m.revenue:,.0f}"
123
+ )
124
+ if obs.experiment_result:
125
+ parts.append(f"Experiment: {obs.experiment_result.recommendation}")
126
+
127
+ parts.append(f"Available channels: {[c.name for c in task.channels]}")
128
+ parts.append(f"Available segments: {[s.name for s in task.segments]}")
129
+ if task.available_experiments:
130
+ parts.append(f"Available experiments: {task.available_experiments}")
131
+ if task.available_pricing_actions:
132
+ parts.append(f"Available pricing actions: {task.available_pricing_actions}")
133
+ parts.append(f"Messaging dimensions: {MESSAGING_DIMS}")
134
+ parts.append("\nRespond with the JSON action only.")
135
+ return "\n".join(parts)
136
+
137
+
138
+ def _equal_action_dict(task) -> Dict[str, Any]:
139
+ return {
140
+ "budget_allocation": {c.name: 1.0 / len(task.channels) for c in task.channels},
141
+ "segment_targeting": {s.name: 1.0 / len(task.segments) for s in task.segments},
142
+ "messaging": {d: 1.0 / len(MESSAGING_DIMS) for d in MESSAGING_DIMS},
143
+ "experiment": None,
144
+ "pricing_action": None,
145
+ }
146
+
147
+
148
+ def _parse_llm_action(text: str, task) -> Dict[str, Any]:
149
+ """Best-effort JSON extraction. Falls back to equal allocation."""
150
+ fallback = _equal_action_dict(task)
151
+ if not text:
152
+ return fallback
153
+ s = text.strip()
154
+ if "```json" in s:
155
+ s = s.split("```json", 1)[1].split("```", 1)[0].strip()
156
+ elif "```" in s:
157
+ s = s.split("```", 1)[1].split("```", 1)[0].strip()
158
+ # Trim to first {...} block
159
+ if "{" in s and "}" in s:
160
+ s = s[s.index("{"): s.rindex("}") + 1]
161
+ try:
162
+ action = json.loads(s)
163
+ except (json.JSONDecodeError, ValueError):
164
+ return fallback
165
+ for key in ("budget_allocation", "segment_targeting", "messaging"):
166
+ if not isinstance(action.get(key), dict):
167
+ action[key] = fallback[key]
168
+ action.setdefault("experiment", None)
169
+ action.setdefault("pricing_action", None)
170
+ return action
171
+
172
+
173
+ def _ask_llm(client: OpenAI, messages: List[Dict[str, str]]) -> str:
174
+ try:
175
+ completion = client.chat.completions.create(
176
+ model=MODEL_NAME,
177
+ messages=messages,
178
+ temperature=TEMPERATURE,
179
+ max_tokens=MAX_TOKENS,
180
+ stream=False,
181
+ )
182
+ return (completion.choices[0].message.content or "").strip()
183
+ except Exception as exc:
184
+ print(f"[DEBUG] LLM request failed: {exc}", flush=True)
185
+ return ""
186
+
187
+
188
+ def _short_action_str(action_dict: Dict[str, Any]) -> str:
189
+ """Compact one-line representation of an action for the [STEP] log."""
190
+ budget = action_dict.get("budget_allocation", {}) or {}
191
+ top = sorted(budget.items(), key=lambda kv: -kv[1])[:3]
192
+ budget_str = ",".join(f"{k[:8]}={v:.2f}" for k, v in top)
193
+ exp = action_dict.get("experiment") or "none"
194
+ pricing = action_dict.get("pricing_action") or "none"
195
+ return f"budget=[{budget_str}]/exp={exp}/price={pricing}"
196
+
197
+
198
+ # ── Main loop ──────────────────────────────────────────────────────────────
199
+
200
+
201
+ def main() -> int:
202
+ if TASK_NAME not in TASKS:
203
+ print(
204
+ f"[DEBUG] Unknown GTM_TASK={TASK_NAME!r}, falling back to channel_optimizer",
205
+ flush=True,
206
+ )
207
+ task_id = "channel_optimizer"
208
+ else:
209
+ task_id = TASK_NAME
210
+
211
+ task = get_task(task_id)
212
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
213
+
214
+ rewards: List[float] = []
215
+ steps_taken = 0
216
+ score = 0.0
217
+ success = False
218
+
219
+ log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
220
+
221
+ env = GTMEnvironment()
222
+ try:
223
+ obs = env.reset(task_id=task_id, seed=SEED)
224
+
225
+ messages: List[Dict[str, str]] = [
226
+ {"role": "system", "content": SYSTEM_PROMPT},
227
+ {
228
+ "role": "user",
229
+ "content": (
230
+ f"Starting task: {task.name} ({task.difficulty})\n"
231
+ f"Duration: {task.total_weeks} weeks Budget: ${task.total_budget:,.0f}\n"
232
+ f"Channels: {[c.name for c in task.channels]}\n"
233
+ f"Segments: {[s.name for s in task.segments]}\n\n"
234
+ + _format_observation(obs, task)
235
+ ),
236
+ },
237
+ ]
238
+
239
+ step = 0
240
+ while not obs.done:
241
+ step += 1
242
+ llm_text = _ask_llm(client, messages)
243
+ action_dict = _parse_llm_action(llm_text, task)
244
+
245
+ error: Optional[str] = None
246
+ try:
247
+ gtm_action = GTMAction(**action_dict)
248
+ obs = env.step(gtm_action)
249
+ except Exception as exc:
250
+ error = f"step_failed:{exc}"
251
+ # Use equal allocation as a safe fallback so the episode can continue
252
+ obs = env.step(GTMAction(**_equal_action_dict(task)))
253
+
254
+ reward = float(obs.reward) if obs.reward is not None else 0.0
255
+ rewards.append(reward)
256
+ steps_taken = step
257
+
258
+ log_step(
259
+ step=step,
260
+ action=_short_action_str(action_dict),
261
+ reward=reward,
262
+ done=bool(obs.done),
263
+ error=error,
264
+ )
265
+
266
+ # Append to context for the next turn (trim aggressively to stay small)
267
+ messages.append({"role": "assistant", "content": llm_text or "{}"})
268
+ messages.append(
269
+ {"role": "user", "content": _format_observation(obs, task)}
270
+ )
271
+ if len(messages) > 10:
272
+ messages = [messages[0]] + messages[-8:]
273
+
274
+ if obs.done:
275
+ break
276
+
277
+ # Final grader score (env's grader returns a value in [0, 1])
278
+ grader = env.get_grader_score(env.state.episode_id)
279
+ score = float(grader) if grader is not None else 0.0
280
+ score = max(0.0, min(1.0, score))
281
+ success = score >= SUCCESS_SCORE_THRESHOLD
282
+
283
+ except Exception as exc:
284
+ print(f"[DEBUG] inference failed: {exc}", flush=True)
285
+ finally:
286
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
287
+
288
+ return 0
289
+
290
+
291
+ if __name__ == "__main__":
292
+ sys.exit(main())
openenv.yaml CHANGED
@@ -1,3 +1,6 @@
1
- name: gtm-strategy-optimizer
2
- version: "1.0.0"
3
- description: "RL environment simulating Go-To-Market strategy optimization β€” budget allocation, ICP targeting, messaging, and experimentation under uncertainty"
 
 
 
 
1
+ spec_version: 1
2
+ name: gtm_strategy_optimizer
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 7860
pyproject.toml CHANGED
@@ -15,7 +15,13 @@ dependencies = [
15
  "websockets>=15.0.1",
16
  "openai>=1.0.0",
17
  "numpy>=1.24.0",
 
 
 
18
  ]
19
 
 
 
 
20
  [tool.setuptools.packages.find]
21
- include = ["gtm_env*", "server*"]
 
15
  "websockets>=15.0.1",
16
  "openai>=1.0.0",
17
  "numpy>=1.24.0",
18
+ "torch>=2.0.0",
19
+ "plotly>=5.0",
20
+ "pandas>=2.0",
21
  ]
22
 
23
+ [project.scripts]
24
+ server = "server.app:main"
25
+
26
  [tool.setuptools.packages.find]
27
+ include = ["server*", "rl*", "ui*"]
server/app.py CHANGED
@@ -230,3 +230,19 @@ def run_infer(req: InferRequest) -> InferResponse:
230
 
231
  result = run_inference(req.task_id, seed=req.seed)
232
  return InferResponse(**result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  result = run_inference(req.task_id, seed=req.seed)
232
  return InferResponse(**result)
233
+
234
+
235
+ # ── Server entry point ─────────────────────────────────────────────────────
236
+
237
+
238
+ def main() -> None:
239
+ """Run the FastAPI server with uvicorn (used as a console script)."""
240
+ import uvicorn
241
+
242
+ host = os.getenv("HOST", "0.0.0.0")
243
+ port = int(os.getenv("PORT", "7860"))
244
+ uvicorn.run(app, host=host, port=port)
245
+
246
+
247
+ if __name__ == "__main__":
248
+ main()
uv.lock ADDED
The diff for this file is too large to render. See raw diff