repl / server /app.py
sergiopaniego's picture
Upload folder using huggingface_hub
4123f35 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
FastAPI application for the REPL Environment.
This module creates an HTTP server that exposes the REPLEnvironment
over HTTP and WebSocket endpoints, compatible with EnvClient.
The server includes llm_query and llm_batch support via HuggingFace Inference API,
enabling the Recursive Language Model (RLM) paradigm.
Usage:
# Development (with auto-reload):
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
# Production:
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
# Or run directly:
uv run --project . server
Environment Variables:
HF_TOKEN: HuggingFace API token for Inference API (optional for public models)
LLM_MODEL: Model to use for llm_query/llm_batch (default: Qwen/Qwen3-1.7B)
"""
import os
from typing import List
# Support both in-repo and standalone imports
try:
# In-repo imports (when running from OpenEnv repository)
from openenv.core.env_server.http_server import create_app
from ..models import REPLAction, REPLObservation
from .repl_environment import REPLEnvironment
except ImportError:
# Standalone imports (when environment is standalone with openenv from pip)
from openenv.core.env_server.http_server import create_app
from models import REPLAction, REPLObservation
from server.repl_environment import REPLEnvironment
# ============== LLM CONFIGURATION ==============
# Model to use for llm_query and llm_batch
# Using Qwen2.5 (no thinking mode, simpler responses)
LLM_MODEL = os.environ.get("LLM_MODEL", "Qwen/Qwen3-Coder-480B-A35B-Instruct")
HF_TOKEN = os.environ.get("HF_TOKEN", None)
# ===============================================
def create_llm_query_fn():
"""
Create the llm_query function using HuggingFace Inference API.
The sub-LLM can handle ~500K chars of input context.
The caller should include the text to analyze as part of the prompt.
Example usage in REPL:
chunk = context[0:100000]
answer = llm_query(f"Count the spells in this text: {chunk}")
Returns:
Function that takes a prompt string and returns the model's response.
"""
from huggingface_hub import InferenceClient
client = InferenceClient(model=LLM_MODEL, token=HF_TOKEN)
def llm_query(prompt: str) -> str:
"""Query the LLM with a prompt and return the response.
Args:
prompt: The full prompt including any text/context to analyze.
Example: f"Find spells in: {chunk}"
Returns:
The LLM's response string.
"""
try:
# Use chat_completion for chat models like Qwen
messages = [{"role": "user", "content": prompt}]
response = client.chat_completion(
messages=messages,
max_tokens=2048, # Increased for longer responses
temperature=0.7,
)
return response.choices[0].message.content
except Exception as e:
return f"Error calling LLM: {e}"
return llm_query
def create_llm_batch_fn(llm_query_fn):
"""
Create the llm_batch function for processing multiple prompts.
Args:
llm_query_fn: The single-query LLM function
Returns:
Function that takes a list of prompts and returns a list of responses.
"""
def llm_batch(prompts: List[str]) -> List[str]:
"""Query the LLM with multiple prompts and return all responses."""
# For now, process sequentially. Could be parallelized with async.
return [llm_query_fn(prompt) for prompt in prompts]
return llm_batch
def create_repl_environment_factory():
"""
Factory function that creates REPLEnvironment instances with LLM support.
This factory is called for each new WebSocket session, creating a fresh
environment with llm_query and llm_batch functions enabled.
"""
# Create LLM functions (shared across instances for efficiency)
llm_query_fn = create_llm_query_fn()
llm_batch_fn = create_llm_batch_fn(llm_query_fn)
def factory():
return REPLEnvironment(
llm_query_fn=llm_query_fn,
llm_batch_fn=llm_batch_fn,
)
return factory
# Check if LLM support should be enabled
ENABLE_LLM = os.environ.get("ENABLE_LLM", "true").lower() in ("true", "1", "yes")
if ENABLE_LLM:
print(f"[REPL Server] LLM support ENABLED with model: {LLM_MODEL}")
env_factory = create_repl_environment_factory()
else:
print("[REPL Server] LLM support DISABLED")
env_factory = REPLEnvironment
# Create the app with web interface and README integration
app = create_app(env_factory, REPLAction, REPLObservation, env_name="repl_env")
def main():
"""
Entry point for direct execution via uv run or python -m.
This function enables running the server without Docker:
uv run --project . server
python -m envs.repl_env.server.app
openenv serve repl_env
"""
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
if __name__ == "__main__":
main()