ShinnosukeU commited on
Commit
e76d25c
·
verified ·
1 Parent(s): 8114240

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. big_file.py +693 -0
  2. server/env_wrapper.py +23 -0
big_file.py ADDED
@@ -0,0 +1,693 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # big_file.py — all code required to run server/env_wrapper.py
3
+ # No imports pointing to other files in this project.
4
+ # ============================================================
5
+
6
+ # --------------- External imports (all files combined) ---------------
7
+
8
+ import json
9
+ import os
10
+ import random
11
+ import re
12
+ import sys
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from uuid import uuid4
18
+
19
+ from dotenv import load_dotenv
20
+ from openai import OpenAI
21
+ from openai.types.chat import (
22
+ ChatCompletionAssistantMessageParam,
23
+ ChatCompletionMessageParam,
24
+ ChatCompletionSystemMessageParam,
25
+ ChatCompletionUserMessageParam,
26
+ )
27
+ from openenv.core.env_server.interfaces import Environment
28
+ from openenv.core.env_server.types import Action, Observation, State
29
+ from pydantic import Field
30
+
31
+ # ============================================================
32
+ # evaluate_protocal.py
33
+ # ============================================================
34
+
35
+ load_dotenv()
36
+
37
+ TASK_COMPLETE_KEYWORD = "TASK_COMPLETE"
38
+ MAX_TURNS = 30
39
+ RESULTS_FILE = "results.json"
40
+
41
+
42
+ @dataclass
43
+ class TimeSlot:
44
+ day: str
45
+ location: str
46
+ start: float # hours in 24h (e.g. 10.5 = 10:30)
47
+ end: float
48
+
49
+ def contains(self, day: str, location: str, time: float, duration: float) -> bool:
50
+ return (
51
+ self.day == day
52
+ and self.location.lower() == location.lower()
53
+ and self.start <= time
54
+ and time + duration <= self.end
55
+ )
56
+
57
+
58
+ @dataclass
59
+ class Schedule:
60
+ name: str
61
+ slots: list[TimeSlot]
62
+
63
+ def is_available(
64
+ self, day: str, location: str, time: float, duration: float
65
+ ) -> bool:
66
+ return any(slot.contains(day, location, time, duration) for slot in self.slots)
67
+
68
+ def to_natural(self) -> str:
69
+ day_names = {
70
+ "Mo": "Monday",
71
+ "Tu": "Tuesday",
72
+ "We": "Wednesday",
73
+ "Th": "Thursday",
74
+ "Fr": "Friday",
75
+ }
76
+ parts = []
77
+ for slot in self.slots:
78
+ start_str = _format_time(slot.start)
79
+ end_str = _format_time(slot.end)
80
+ parts.append(
81
+ f"{day_names[slot.day]} in {slot.location}, {start_str}-{end_str}"
82
+ )
83
+ return "; ".join(parts)
84
+
85
+
86
+ def _format_time(t: float) -> str:
87
+ hours = int(t)
88
+ minutes = int((t - hours) * 60)
89
+ if minutes == 0:
90
+ return str(hours)
91
+ return f"{hours}:{minutes:02d}"
92
+
93
+
94
+ def _parse_time(s: str) -> float:
95
+ if ":" in s:
96
+ h, m = s.split(":")
97
+ return int(h) + int(m) / 60
98
+ return float(s)
99
+
100
+
101
+ def verify_meeting(
102
+ schedules: list[Schedule], day: str, location: str, time: float, duration: float
103
+ ) -> tuple[bool, list[str]]:
104
+ errors = []
105
+ for schedule in schedules:
106
+ if not schedule.is_available(day, location, time, duration):
107
+ time_str = _format_time(time)
108
+ errors.append(
109
+ f"{schedule.name} is NOT available on {day} at {time_str} ({location})"
110
+ )
111
+ return len(errors) == 0, errors
112
+
113
+
114
+ DAY_ALIASES: dict[str, str] = {
115
+ "monday": "Mo",
116
+ "tuesday": "Tu",
117
+ "wednesday": "We",
118
+ "thursday": "Th",
119
+ "friday": "Fr",
120
+ "mon": "Mo",
121
+ "tue": "Tu",
122
+ "wed": "We",
123
+ "thu": "Th",
124
+ "fri": "Fr",
125
+ "mo": "Mo",
126
+ "tu": "Tu",
127
+ "we": "We",
128
+ "th": "Th",
129
+ "fr": "Fr",
130
+ }
131
+
132
+
133
+ def parse_compact_result(text: str) -> tuple[str, str, float] | None:
134
+ pattern = r"=>\s*([A-Za-z]{2,9})\[([A-Za-z]+)\](\d{1,2}(?::\d{2})?)\s*-\s*\d{1,2}(?::\d{2})?"
135
+ match = re.search(pattern, text)
136
+ if not match:
137
+ return None
138
+ raw_day = match.group(1).lower()
139
+ day = DAY_ALIASES.get(raw_day, match.group(1))
140
+ location = match.group(2)
141
+ time = _parse_time(match.group(3))
142
+ return day, location, time
143
+
144
+
145
+ @dataclass
146
+ class Session:
147
+ client: OpenAI
148
+ model: str
149
+ name: str = ""
150
+ system_prompt: str = ""
151
+ messages: list[ChatCompletionMessageParam] = field(default_factory=list)
152
+ total_completion_tokens: int = 0
153
+ turns: int = 0
154
+
155
+ def __post_init__(self) -> None:
156
+ if self.system_prompt:
157
+ sys_msg: ChatCompletionSystemMessageParam = {
158
+ "role": "system",
159
+ "content": self.system_prompt,
160
+ }
161
+ self.messages.append(sys_msg)
162
+
163
+ def send(self, content: str) -> str:
164
+ user_msg: ChatCompletionUserMessageParam = {
165
+ "role": "user",
166
+ "content": content,
167
+ }
168
+ self.messages.append(user_msg)
169
+ response = self.client.chat.completions.create(
170
+ model=self.model,
171
+ messages=self.messages,
172
+ max_tokens=500,
173
+ )
174
+ assistant_content = response.choices[0].message.content or ""
175
+ assistant_msg: ChatCompletionAssistantMessageParam = {
176
+ "role": "assistant",
177
+ "content": assistant_content,
178
+ }
179
+ self.messages.append(assistant_msg)
180
+
181
+ if response.usage:
182
+ self.total_completion_tokens += response.usage.completion_tokens
183
+
184
+ self.turns += 1
185
+ return assistant_content
186
+
187
+ def is_complete(self) -> bool:
188
+ if not self.messages:
189
+ return False
190
+ last = self.messages[-1]
191
+ content = last.get("content")
192
+ return (
193
+ last["role"] == "assistant"
194
+ and isinstance(content, str)
195
+ and (TASK_COMPLETE_KEYWORD in content or "=>" in content)
196
+ )
197
+
198
+
199
+ def negotiate(
200
+ agent_a: Session, agent_b: Session, max_turns: int = MAX_TURNS
201
+ ) -> list[dict[str, str]]:
202
+ conversation: list[dict[str, str]] = []
203
+
204
+ response = agent_a.send("Propose a meeting time.")
205
+ conversation.append({"agent": agent_a.name, "content": response})
206
+
207
+ for _ in range(max_turns):
208
+ if agent_a.is_complete():
209
+ break
210
+
211
+ response = agent_b.send(response)
212
+ conversation.append({"agent": agent_b.name, "content": response})
213
+ if agent_b.is_complete():
214
+ break
215
+
216
+ response = agent_a.send(response)
217
+ conversation.append({"agent": agent_a.name, "content": response})
218
+
219
+ return conversation
220
+
221
+
222
+ MEETING_DURATION = 30 # minutes
223
+ DAYS = ["Mo", "Tu", "We", "Th", "Fr"]
224
+ CITIES = ["SF", "NYC"]
225
+ MIN_HOUR = 8
226
+ MAX_HOUR = 18
227
+
228
+
229
+ def generate_schedules(
230
+ num_overlaps: int, rng: random.Random
231
+ ) -> tuple[Schedule, Schedule]:
232
+ days = DAYS[:]
233
+ rng.shuffle(days)
234
+
235
+ overlap_days = days[:num_overlaps]
236
+ filler_days = days[num_overlaps:]
237
+
238
+ a_slots: list[TimeSlot] = []
239
+ b_slots: list[TimeSlot] = []
240
+
241
+ for day in overlap_days:
242
+ city = rng.choice(CITIES)
243
+ overlap_start = rng.randint(MIN_HOUR + 1, MAX_HOUR - 2)
244
+ overlap_end = rng.randint(
245
+ overlap_start + 1, min(overlap_start + 3, MAX_HOUR - 1)
246
+ )
247
+ a_start = rng.randint(MIN_HOUR, overlap_start)
248
+ a_end = rng.randint(overlap_end, MAX_HOUR)
249
+ b_start = rng.randint(MIN_HOUR, overlap_start)
250
+ b_end = rng.randint(overlap_end, MAX_HOUR)
251
+ a_slots.append(TimeSlot(day, city, float(a_start), float(a_end)))
252
+ b_slots.append(TimeSlot(day, city, float(b_start), float(b_end)))
253
+
254
+ for day in filler_days:
255
+ strategy = rng.choice(["a_only", "b_only", "diff_cities"])
256
+ if strategy == "a_only":
257
+ city = rng.choice(CITIES)
258
+ start = rng.randint(MIN_HOUR, MAX_HOUR - 2)
259
+ end = rng.randint(start + 2, MAX_HOUR)
260
+ a_slots.append(TimeSlot(day, city, float(start), float(end)))
261
+ elif strategy == "b_only":
262
+ city = rng.choice(CITIES)
263
+ start = rng.randint(MIN_HOUR, MAX_HOUR - 2)
264
+ end = rng.randint(start + 2, MAX_HOUR)
265
+ b_slots.append(TimeSlot(day, city, float(start), float(end)))
266
+ else:
267
+ city_a, city_b = rng.sample(CITIES, 2)
268
+ start_a = rng.randint(MIN_HOUR, MAX_HOUR - 2)
269
+ end_a = rng.randint(start_a + 2, MAX_HOUR)
270
+ start_b = rng.randint(MIN_HOUR, MAX_HOUR - 2)
271
+ end_b = rng.randint(start_b + 2, MAX_HOUR)
272
+ a_slots.append(TimeSlot(day, city_a, float(start_a), float(end_a)))
273
+ b_slots.append(TimeSlot(day, city_b, float(start_b), float(end_b)))
274
+
275
+ day_order = {d: i for i, d in enumerate(DAYS)}
276
+ a_slots.sort(key=lambda s: day_order[s.day])
277
+ b_slots.sort(key=lambda s: day_order[s.day])
278
+
279
+ return Schedule("T", a_slots), Schedule("J", b_slots)
280
+
281
+
282
+ def compute_valid_meetings(
283
+ sched_a: Schedule, sched_b: Schedule, duration: float
284
+ ) -> list[dict[str, str | float]]:
285
+ valid: list[dict[str, str | float]] = []
286
+ for slot_a in sched_a.slots:
287
+ for slot_b in sched_b.slots:
288
+ if (
289
+ slot_a.day != slot_b.day
290
+ or slot_a.location.lower() != slot_b.location.lower()
291
+ ):
292
+ continue
293
+ overlap_start = max(slot_a.start, slot_b.start)
294
+ overlap_end = min(slot_a.end, slot_b.end)
295
+ if overlap_end - overlap_start >= duration:
296
+ valid.append(
297
+ {
298
+ "day": slot_a.day,
299
+ "location": slot_a.location,
300
+ "start": overlap_start,
301
+ "end": overlap_end,
302
+ }
303
+ )
304
+ return valid
305
+
306
+
307
+ def run_trial(
308
+ client: OpenAI,
309
+ model: str,
310
+ lang_spec: str,
311
+ rng: random.Random,
312
+ ) -> dict:
313
+ num_overlaps = rng.choice([0, 1, 2])
314
+ t_schedule, j_schedule = generate_schedules(num_overlaps, rng)
315
+ duration = MEETING_DURATION / 60
316
+ valid_meetings = compute_valid_meetings(t_schedule, j_schedule, duration)
317
+
318
+ agent_t = Session(
319
+ client=client,
320
+ model=model,
321
+ name="T",
322
+ system_prompt=(
323
+ f"You are T. Your availability: {t_schedule.to_natural()}\n"
324
+ f"Meeting duration: {MEETING_DURATION} minutes.\n" + RULES + lang_spec
325
+ ),
326
+ )
327
+
328
+ agent_j = Session(
329
+ client=client,
330
+ model=model,
331
+ name="J",
332
+ system_prompt=(
333
+ f"You are J. Your availability: {j_schedule.to_natural()}\n"
334
+ f"Meeting duration: {MEETING_DURATION} minutes.\n" + RULES + lang_spec
335
+ ),
336
+ )
337
+
338
+ conversation = negotiate(agent_t, agent_j)
339
+
340
+ combined_completion_tokens = (
341
+ agent_t.total_completion_tokens + agent_j.total_completion_tokens
342
+ )
343
+
344
+ # Check if agents said NO_VALID_TIME
345
+ said_no_valid = any("NO_VALID_TIME" in msg["content"] for msg in conversation)
346
+
347
+ # Check if agents proposed a meeting
348
+ meeting_result = None
349
+ for msg in reversed(conversation):
350
+ parsed = parse_compact_result(msg["content"])
351
+ if parsed:
352
+ meeting_result = parsed
353
+ break
354
+
355
+ correct = False
356
+ errors: list[str] = []
357
+
358
+ if said_no_valid and not meeting_result:
359
+ if not valid_meetings:
360
+ correct = True
361
+ else:
362
+ errors.append("Agent said NO_VALID_TIME but valid meetings exist")
363
+ elif meeting_result:
364
+ day, location, time = meeting_result
365
+ correct, errors = verify_meeting(
366
+ [t_schedule, j_schedule], day, location, time, duration
367
+ )
368
+ else:
369
+ errors.append("No meeting proposed and no NO_VALID_TIME signal")
370
+
371
+ combined_chars = sum(len(msg["content"]) for msg in conversation)
372
+
373
+ return {
374
+ "correct": correct,
375
+ "errors": errors,
376
+ "num_overlaps": num_overlaps,
377
+ "valid_meetings": valid_meetings,
378
+ "schedules": {
379
+ "T": t_schedule.to_natural(),
380
+ "J": j_schedule.to_natural(),
381
+ },
382
+ "combined_completion_tokens": combined_completion_tokens,
383
+ "combined_chars": combined_chars,
384
+ "total_turns": agent_t.turns + agent_j.turns,
385
+ "agents": {
386
+ agent_t.name: {
387
+ "turns": agent_t.turns,
388
+ "completion_tokens": agent_t.total_completion_tokens,
389
+ },
390
+ agent_j.name: {
391
+ "turns": agent_j.turns,
392
+ "completion_tokens": agent_j.total_completion_tokens,
393
+ },
394
+ },
395
+ "meeting": (
396
+ {
397
+ "day": meeting_result[0],
398
+ "location": meeting_result[1],
399
+ "time": meeting_result[2],
400
+ }
401
+ if meeting_result
402
+ else None
403
+ ),
404
+ "conversation": conversation,
405
+ }
406
+
407
+
408
+ def run_experiment(
409
+ client: OpenAI,
410
+ model: str,
411
+ lang_spec: str,
412
+ n: int,
413
+ experiment_id: str | None = None,
414
+ max_workers: int = 8,
415
+ ) -> dict:
416
+ exp_id = experiment_id or "unnamed"
417
+ trials = [None] * n
418
+
419
+ def _run(i: int) -> tuple[int, dict]:
420
+ rng = random.Random()
421
+ return i, run_trial(client, model, lang_spec, rng)
422
+
423
+ completed = 0
424
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
425
+ futures = {executor.submit(_run, i): i for i in range(n)}
426
+ for future in as_completed(futures):
427
+ i, trial = future.result()
428
+ trials[i] = trial
429
+ completed += 1
430
+ status = "CORRECT" if trial["correct"] else "INCORRECT"
431
+ print(
432
+ f"[{completed}/{n}] {status} | "
433
+ f"chars={trial['combined_chars']} | "
434
+ f"tokens={trial['combined_completion_tokens']} | "
435
+ f"turns={trial['total_turns']}"
436
+ )
437
+
438
+ experiment = {
439
+ "experiment_id": exp_id,
440
+ "model": model,
441
+ "lang_spec": lang_spec,
442
+ "num_trials": n,
443
+ "created_at": datetime.now(timezone.utc).isoformat(),
444
+ "trials": trials,
445
+ }
446
+
447
+ path = Path(RESULTS_FILE)
448
+ results: list[dict] = []
449
+ if path.exists():
450
+ results = json.loads(path.read_text())
451
+ results.append(experiment)
452
+ path.write_text(json.dumps(results, indent=2) + "\n")
453
+
454
+ correct_count = sum(1 for t in trials if t["correct"])
455
+ chars = [t["combined_chars"] for t in trials]
456
+ tokens = [t["combined_completion_tokens"] for t in trials]
457
+ print(
458
+ f"\nExperiment {exp_id}: "
459
+ f"{correct_count}/{n} correct | "
460
+ f"mean_chars={sum(chars) / len(chars):.0f} | "
461
+ f"mean_tokens={sum(tokens) / len(tokens):.0f}"
462
+ )
463
+
464
+ return experiment
465
+
466
+
467
+ RULES = """\
468
+ Rules:
469
+ - You can ONLY be in the city listed for each day. You CANNOT travel or change cities.
470
+ - You can ONLY meet if BOTH people are in the SAME city on the SAME day.
471
+ - Reject any proposal where you are in a different city than the other person.
472
+ - When agreed, respond with => <day>[<city>]<start>-<end> and TASK_COMPLETE (e.g. => Fr[NYC]9-9:30)
473
+ - If no valid meeting time exists, respond with NO_VALID_TIME and TASK_COMPLETE
474
+ """
475
+
476
+ LANG_SPECS: dict[str, str] = {
477
+ "compact": """\
478
+ You communicate using a compact scheduling protocol. Here is the format:
479
+
480
+ M? d=<minutes> z=<timezone> w=<day range> p=<preference>
481
+ <name>: <day>[<city>]<start>-<end>,<start>-<end>;<day>[<city>]<start>-<end>
482
+ => <day>[<city>]<start>-<end>
483
+
484
+ Example:
485
+ M? d=30 z=ET w=Mo-Fr p=earliest
486
+ T: Mo[SF]9-12;Tu[NYC]13-17;Th[SF]10-15;Fr[NYC]9-11
487
+ J: Mo[NYC]10-14;Tu[SF]9-12;We[SF]13-16;Th[NYC]11-15;Fr[NYC]9-11
488
+ => Fr[NYC]9-9:30
489
+
490
+ - Times are in 24h format
491
+ - Days: Mo,Tu,We,Th,Fr
492
+ - Locations in brackets: [SF], [NYC]
493
+ - You MUST use this compact format for ALL messages, no natural language
494
+ - To propose: send your available slots in compact format
495
+ - To accept: respond with => <day>[<city>]<start>-<end>
496
+ - To reject/counter: send your slots that conflict and suggest alternatives
497
+ """,
498
+ "natural": """\
499
+ Negotiate with the other person to find a 30-minute in-person meeting time.
500
+ Keep responses short (1-2 sentences).
501
+ """,
502
+ }
503
+
504
+
505
+ def main() -> None:
506
+ client = OpenAI(
507
+ base_url="https://openrouter.ai/api/v1",
508
+ api_key=os.environ["OPENROUTER_API_KEY"],
509
+ )
510
+ model = "google/gemini-3-flash-preview"
511
+
512
+ n = int(sys.argv[1]) if len(sys.argv) > 1 else 1
513
+
514
+ for spec_name, lang_spec in LANG_SPECS.items():
515
+ run_experiment(client, model, lang_spec, n, spec_name)
516
+
517
+
518
+ def evaluate_lang_spec(lang_spec: str, n: int = 5) -> float:
519
+ client = OpenAI(
520
+ base_url="https://openrouter.ai/api/v1",
521
+ api_key=os.environ["OPENROUTER_API_KEY"],
522
+ )
523
+ model = "google/gemini-3-flash-preview"
524
+
525
+ def _run(_: int) -> dict:
526
+ return run_trial(client, model, lang_spec, random.Random())
527
+
528
+ with ThreadPoolExecutor(max_workers=n) as executor:
529
+ trials = list(executor.map(_run, range(n)))
530
+
531
+ Path("sample_conversation.json").write_text(json.dumps(trials[0], indent=2) + "\n")
532
+
533
+ return sum(t["combined_completion_tokens"] for t in trials) / len(trials)
534
+
535
+
536
+ # ============================================================
537
+ # models.py
538
+ # ============================================================
539
+
540
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
541
+ # All rights reserved.
542
+ #
543
+ # This source code is licensed under the BSD-style license found in the
544
+ # LICENSE file in the root directory of this source tree.
545
+
546
+ """
547
+ Data models for the Agent Language Environment.
548
+
549
+ The agent_language environment is a simple test environment that echoes back messages.
550
+ """
551
+
552
+
553
+ class AgentLanguageAction(Action):
554
+ """Action for the Agent Language environment - just a message to echo."""
555
+
556
+ language_specification: str = Field(..., description="Language Specification")
557
+
558
+
559
+ class AgentLanguageObservation(Observation):
560
+ """Observation from the Agent Language environment - the echoed message."""
561
+
562
+ message: str = Field(default="", description="Scenario")
563
+
564
+ class AgentLanguageState(State):
565
+ """Custom state fields."""
566
+
567
+
568
+ # ============================================================
569
+ # server/agent_language_environment.py
570
+ # ============================================================
571
+
572
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
573
+ # All rights reserved.
574
+ #
575
+ # This source code is licensed under the BSD-style license found in the
576
+ # LICENSE file in the root directory of this source tree.
577
+
578
+ """
579
+ Agent Language Environment Implementation.
580
+
581
+ A simple test environment that echoes back messages sent to it.
582
+ Perfect for testing HTTP server infrastructure.
583
+ """
584
+
585
+ COMMUNICATION_PROTOCAL_PROMPT = """"You are generating a communication protocol between two agents. Produce language specification that **minimize** the number of tokens needed for a single exchange while preserving clarity. This could be some abbreviation synonyms or some template for the communication. Your communication protocal might be detailed and should include examples of the communication. Try not to limit the amount of actual information that is passed to each agent. Instead forcus on formtting of the communication, and telling the agents to abbreviate and make the communication as short as possible. The communication protocal itsefl does not need to be concise, it should be in natural language with full sentences, even paragraphs if needed, and easy to understand.
586
+
587
+ Example:
588
+ When you communicate, avoid extra greetings.
589
+ """
590
+
591
+ class AgentLanguageEnvironment(Environment):
592
+ """
593
+ A simple echo environment that echoes back messages.
594
+
595
+ This environment is designed for testing the HTTP server infrastructure.
596
+ It maintains minimal state and simply echoes back whatever message it receives.
597
+
598
+ Example:
599
+ >>> env = AgentLanguageEnvironment()
600
+ >>> obs = env.reset()
601
+ >>> print(obs.echoed_message) # "Agent Language environment ready!"
602
+ >>>
603
+ >>> obs = env.step(AgentLanguageAction(message="Hello"))
604
+ >>> print(obs.echoed_message) # "Hello"
605
+ >>> print(obs.message_length) # 5
606
+ """
607
+
608
+ # Enable concurrent WebSocket sessions.
609
+ # Set to True if your environment isolates state between instances.
610
+ # When True, multiple WebSocket clients can connect simultaneously, each
611
+ # getting their own environment instance (when using factory mode in app.py).
612
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
613
+
614
+ def __init__(self):
615
+ """Initialize the agent_language environment."""
616
+ self._state = AgentLanguageState(episode_id=str(uuid4()), step_count=0)
617
+ self._reset_count = 0
618
+
619
+ def reset(self) -> AgentLanguageObservation:
620
+ """
621
+ Reset the environment.
622
+
623
+ Returns:
624
+ AgentLanguageObservation with a ready message
625
+ """
626
+ self._state = AgentLanguageState(episode_id=str(uuid4()), step_count=0)
627
+ self._reset_count += 1
628
+
629
+ message = COMMUNICATION_PROTOCAL_PROMPT + "\n\n Design a communication protocal for two agents scheduling a meeting time."
630
+
631
+ return AgentLanguageObservation(
632
+ message=message,
633
+ done=False,
634
+ reward=0.0,
635
+ )
636
+
637
+ def step(self, action: AgentLanguageAction) -> AgentLanguageObservation: # type: ignore[override]
638
+ """
639
+ Execute a step in the environment by echoing the message.
640
+
641
+ Args:
642
+ action: AgentLanguageAction containing the message to echo
643
+
644
+ Returns:
645
+ AgentLanguageObservation with the echoed message and its length
646
+ """
647
+ self._state.step_count += 1
648
+ language_specification = action.language_specification
649
+ reward = -evaluate_lang_spec(language_specification, n=3)
650
+ return AgentLanguageObservation(
651
+ message="Do not call any more function.",
652
+ done=True,
653
+ reward=reward,
654
+ #metadata={"original_message": message, "step": self._state.step_count},
655
+ )
656
+
657
+ @property
658
+ def state(self) -> AgentLanguageState:
659
+ """
660
+ Get the current environment state.
661
+
662
+ Returns:
663
+ Current State with episode_id and step_count
664
+ """
665
+ return self._state
666
+
667
+
668
+ # ============================================================
669
+ # server/env_wrapper.py
670
+ # ============================================================
671
+
672
+
673
+ class HFEnv:
674
+ def __init__(self):
675
+ self.env = AgentLanguageEnvironment()
676
+ self.reward = 0
677
+
678
+ def reset(self, **kwargs) -> str | None:
679
+ return self.env.reset().message
680
+
681
+ def submit_language_specification(self, language_specification: str) -> float:
682
+ """
683
+ Submit a language specification to the environment.
684
+
685
+ Args:
686
+ language_specification: Language specification to submit.
687
+
688
+ Returns:
689
+ Observation
690
+ """
691
+ obs = self.env.step(AgentLanguageAction(language_specification=language_specification))
692
+ self.reward = obs.reward
693
+ return obs.message
server/env_wrapper.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from agent_language.models import AgentLanguageAction
3
+ from agent_language.server.agent_language_environment import AgentLanguageEnvironment
4
+
5
+
6
+ class HFEnv:
7
+ def __init__(self):
8
+ self.env = AgentLanguageEnvironment()
9
+
10
+ def reset(self):
11
+ return self.env.reset()
12
+
13
+ def submit_language_specification(self, language_specification: str):
14
+ """
15
+ Submit a language specification to the environment.
16
+
17
+ Args:
18
+ language_specification: Language specification to submit.
19
+
20
+ Returns:
21
+ Observation
22
+ """
23
+ return self.env.step(AgentLanguageAction(language_specification=language_specification))