File size: 1,912 Bytes
80b7188 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import asyncio
import os
from datetime import datetime
from transformers import AutoTokenizer
from rllm.agents.code_agent import CompetitionCodingAgent
from rllm.data.dataset import DatasetRegistry
from rllm.engine.agent_execution_engine import AgentExecutionEngine
from rllm.environments.base.single_turn_env import SingleTurnEnvironment
from rllm.rewards.reward_fn import code_reward_fn
from rllm.utils import save_trajectories
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "true"
n_parallel_agents = 64
model_name = "agentica-org/DeepCoder-14B-Preview"
tokenizer = AutoTokenizer.from_pretrained(model_name)
reward_fn = code_reward_fn
env_args = {
"reward_fn": reward_fn,
}
sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name}
engine = AgentExecutionEngine(
agent_class=CompetitionCodingAgent,
env_class=SingleTurnEnvironment,
agent_args={},
env_args=env_args,
engine_name="openai",
tokenizer=tokenizer,
sampling_params=sampling_params,
rollout_engine_args={
"base_url": "http://localhost:30000/v1",
"api_key": "None",
},
max_response_length=65536,
max_prompt_length=4096,
n_parallel_agents=n_parallel_agents,
)
test_dataset = DatasetRegistry.load_dataset("deepcoder", "test")
if test_dataset is None:
print("Dataset not found, preparing dataset...")
from prepare_deepcoder_data import prepare_deepcoder_data
_, test_dataset = prepare_deepcoder_data()
tasks = test_dataset.get_data()
results = asyncio.run(engine.execute_tasks(tasks))
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_trajectories(results, filename=f"deepcoder_trajectories_{len(tasks)}_{timestamp}.pt")
|