| |
| """ |
| REPL Environment with LLM Integration. |
| |
| Demonstrates the RLM (Recursive Language Model) paradigm using OpenEnv's repl_env: |
| 1. LLM generates Python code to solve a task |
| 2. Code is executed in the sandboxed REPL |
| 3. LLM sees the output and generates more code |
| 4. Process repeats until FINAL() is called |
| |
| Usage: |
| python examples/repl_with_llm.py |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
|
|
| from huggingface_hub import InferenceClient |
| from repl_env import LocalRLMRunner, RLM_SYSTEM_PROMPT |
|
|
| |
| MODEL_NAME = os.environ.get("REPL_LLM_MODEL", "Qwen/Qwen3.5-9B") |
| MAX_ITERATIONS = 10 |
| |
|
|
| HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
|
|
| def create_chat_fn(): |
| """Create the chat function with Qwen3.5 model card recommended params.""" |
| client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN, timeout=300) |
|
|
| def chat_fn(messages: list[dict], model: str | None = None) -> str: |
| response = client.chat.completions.create( |
| model=model or MODEL_NAME, |
| messages=messages, |
| max_tokens=2048, |
| |
| temperature=0.6, |
| top_p=0.95, |
| presence_penalty=0.0, |
| extra_body={ |
| "top_k": 20, |
| "min_p": 0.0, |
| "repetition_penalty": 1.0, |
| |
| "chat_template_kwargs": {"enable_thinking": False}, |
| }, |
| ) |
| return response.choices[0].message.content |
|
|
| return chat_fn |
|
|
|
|
| def main(): |
| print("=" * 60) |
| print("REPL Environment with LLM Integration (Qwen)") |
| print("=" * 60) |
|
|
| print(f"Model: {MODEL_NAME}") |
|
|
| context = """ |
| The quick brown fox jumps over the lazy dog. |
| This is a sample text for testing the REPL environment. |
| It contains multiple sentences that we can analyze. |
| The RLM paradigm allows models to process data programmatically. |
| """ |
| task = "Count the total number of words in the context" |
|
|
| print(f"Task: {task}") |
| print(f"Context: {context[:100]}...") |
|
|
| chat_fn = create_chat_fn() |
| runner = LocalRLMRunner( |
| chat_fn, |
| system_prompt=RLM_SYSTEM_PROMPT, |
| max_iterations=MAX_ITERATIONS, |
| max_depth=3, |
| verbose=True, |
| ) |
| result = runner.run(context, task) |
|
|
| print(f"\n{'=' * 60}") |
| print(f"Final Result: {result.final_answer}") |
| print(f"Iterations: {result.iterations}") |
| print("=" * 60) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|