File size: 2,655 Bytes
e5f64b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | #!/usr/bin/env python3
"""
REPL Environment with LLM Integration.
Demonstrates the RLM (Recursive Language Model) paradigm using OpenEnv's repl_env:
1. LLM generates Python code to solve a task
2. Code is executed in the sandboxed REPL
3. LLM sees the output and generates more code
4. Process repeats until FINAL() is called
Usage:
python examples/repl_with_llm.py
"""
from __future__ import annotations
import os
from huggingface_hub import InferenceClient
from repl_env import LocalRLMRunner, RLM_SYSTEM_PROMPT
# ============== CONFIGURATION ==============
MODEL_NAME = os.environ.get("REPL_LLM_MODEL", "Qwen/Qwen3.5-9B")
MAX_ITERATIONS = 10
# ===========================================
HF_TOKEN = os.environ.get("HF_TOKEN")
def create_chat_fn():
"""Create the chat function with Qwen3.5 model card recommended params."""
client = InferenceClient(model=MODEL_NAME, token=HF_TOKEN, timeout=300)
def chat_fn(messages: list[dict], model: str | None = None) -> str:
response = client.chat.completions.create(
model=model or MODEL_NAME,
messages=messages,
max_tokens=2048,
# Qwen3.5 non-thinking mode for precise coding tasks (from model card)
temperature=0.6,
top_p=0.95,
presence_penalty=0.0,
extra_body={
"top_k": 20,
"min_p": 0.0,
"repetition_penalty": 1.0,
# Disable thinking mode — the RLM loop is the reasoning mechanism
"chat_template_kwargs": {"enable_thinking": False},
},
)
return response.choices[0].message.content
return chat_fn
def main():
print("=" * 60)
print("REPL Environment with LLM Integration (Qwen)")
print("=" * 60)
print(f"Model: {MODEL_NAME}")
context = """
The quick brown fox jumps over the lazy dog.
This is a sample text for testing the REPL environment.
It contains multiple sentences that we can analyze.
The RLM paradigm allows models to process data programmatically.
"""
task = "Count the total number of words in the context"
print(f"Task: {task}")
print(f"Context: {context[:100]}...")
chat_fn = create_chat_fn()
runner = LocalRLMRunner(
chat_fn,
system_prompt=RLM_SYSTEM_PROMPT,
max_iterations=MAX_ITERATIONS,
max_depth=3,
verbose=True,
)
result = runner.run(context, task)
print(f"\n{'=' * 60}")
print(f"Final Result: {result.final_answer}")
print(f"Iterations: {result.iterations}")
print("=" * 60)
if __name__ == "__main__":
main()
|