import asyncio import os from datetime import datetime from transformers import AutoTokenizer from rllm.agents.code_agent import CompetitionCodingAgent from rllm.data.dataset import DatasetRegistry from rllm.engine.agent_execution_engine import AgentExecutionEngine from rllm.environments.base.single_turn_env import SingleTurnEnvironment from rllm.rewards.reward_fn import code_reward_fn from rllm.utils import save_trajectories if __name__ == "__main__": os.environ["TOKENIZERS_PARALLELISM"] = "true" n_parallel_agents = 64 model_name = "agentica-org/DeepCoder-14B-Preview" tokenizer = AutoTokenizer.from_pretrained(model_name) reward_fn = code_reward_fn env_args = { "reward_fn": reward_fn, } sampling_params = {"temperature": 0.6, "top_p": 0.95, "model": model_name} engine = AgentExecutionEngine( agent_class=CompetitionCodingAgent, env_class=SingleTurnEnvironment, agent_args={}, env_args=env_args, engine_name="openai", tokenizer=tokenizer, sampling_params=sampling_params, rollout_engine_args={ "base_url": "http://localhost:30000/v1", "api_key": "None", }, max_response_length=65536, max_prompt_length=4096, n_parallel_agents=n_parallel_agents, ) test_dataset = DatasetRegistry.load_dataset("deepcoder", "test") if test_dataset is None: print("Dataset not found, preparing dataset...") from prepare_deepcoder_data import prepare_deepcoder_data _, test_dataset = prepare_deepcoder_data() tasks = test_dataset.get_data() results = asyncio.run(engine.execute_tasks(tasks)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_trajectories(results, filename=f"deepcoder_trajectories_{len(tasks)}_{timestamp}.pt")