File size: 5,497 Bytes
ba71ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eca0e48
 
 
 
 
 
 
 
 
 
 
 
11cb018
 
 
 
 
 
 
 
 
ba71ff7
 
 
 
eca0e48
ba71ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
FastAPI application for the REPL Environment.

This module creates an HTTP server that exposes the REPLEnvironment
over HTTP and WebSocket endpoints, compatible with EnvClient.

The server includes llm_query and llm_query_batched support via HuggingFace Inference API,
enabling the Recursive Language Model (RLM) paradigm.

LLM Token Configuration:
    1. Client can pass `hf_token` in reset() - RECOMMENDED
    2. Server fallback: HF_TOKEN environment variable

LLM functions are created dynamically in REPLEnvironment.reset() based on the
available token (client or server).

Usage:
    # Development (with auto-reload):
    uvicorn server.app:app --reload --host 0.0.0.0 --port 8000

    # Production:
    uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4

    # Or run directly:
    uv run --project . server

Environment Variables:
    HF_TOKEN: Fallback HuggingFace API token (client token takes priority)
    LLM_MODEL: Model to use for llm_query/llm_query_batched (default: Qwen/Qwen3.5-9B)
    MAX_CONCURRENT_ENVS: Maximum concurrent WebSocket sessions (default: 8)
"""

import inspect
import logging
import os

try:
    from openenv.core.env_server.http_server import create_app

    from ..models import REPLAction, REPLObservation
    from .gradio_ui import build_repl_gradio_app
    from .repl_environment import REPLEnvironment
except ImportError:
    from models import REPLAction, REPLObservation
    from openenv.core.env_server.http_server import create_app
    from server.gradio_ui import build_repl_gradio_app
    from server.repl_environment import REPLEnvironment


# ============== CONFIGURATION ==============
LLM_MODEL = os.environ.get("LLM_MODEL", "Qwen/Qwen3.5-9B")
HF_TOKEN = os.environ.get("HF_TOKEN")
REPL_MAX_ITERATIONS = int(os.environ.get("REPL_MAX_ITERATIONS", "30"))
REPL_MAX_OUTPUT_LENGTH = int(os.environ.get("REPL_MAX_OUTPUT_LENGTH", "8192"))
REPL_CONTEXT_PREVIEW_LENGTH = int(os.environ.get("REPL_CONTEXT_PREVIEW_LENGTH", "500"))
REPL_RLM_MAX_DEPTH = int(os.environ.get("REPL_RLM_MAX_DEPTH", "2"))
REPL_RLM_MAX_ITERATIONS = int(os.environ.get("REPL_RLM_MAX_ITERATIONS", "30"))
MAX_CONCURRENT_ENVS = int(os.environ.get("MAX_CONCURRENT_ENVS", "8"))
# ==========================================

_logger = logging.getLogger(__name__)

# Log LLM configuration
if HF_TOKEN:
    print("[REPL Server] LLM support ENABLED (server token configured)")
    print(f"[REPL Server] Default model: {LLM_MODEL}")
else:
    print("[REPL Server] No server HF_TOKEN configured")
    print(
        "[REPL Server] LLM functions will be enabled if client passes hf_token in reset()"
    )


def create_repl_environment() -> REPLEnvironment:
    """Factory function that creates REPLEnvironment with server config.

    LLM functions are created dynamically during `reset()` when a client
    passes `hf_token`. Rewards are computed via the default `REPLRubric`;
    pass `expected_answer` at reset time for outcome-based scoring.
    """
    return REPLEnvironment(
        max_iterations=REPL_MAX_ITERATIONS,
        max_output_length=REPL_MAX_OUTPUT_LENGTH,
        context_preview_length=REPL_CONTEXT_PREVIEW_LENGTH,
        rlm_max_depth=REPL_RLM_MAX_DEPTH,
        rlm_max_iterations=REPL_RLM_MAX_ITERATIONS,
    )


# Create the app with web interface and README integration.
_sig = inspect.signature(create_app)
if "gradio_builder" in _sig.parameters:
    # Opt in to the primary-tab behaviour when supported so visitors land on
    # the custom REPL UI instead of the auto-generated schema Playground.
    # Older openenv-core versions silently ignore the extra kwargs.
    create_app_kwargs: dict = {
        "env_name": "repl_env",
        "max_concurrent_envs": MAX_CONCURRENT_ENVS,
        "gradio_builder": build_repl_gradio_app,
    }
    if "custom_tab_name" in _sig.parameters:
        create_app_kwargs["custom_tab_name"] = "REPL"
    if "custom_tab_primary" in _sig.parameters:
        create_app_kwargs["custom_tab_primary"] = True
    if "show_default_tab" in _sig.parameters:
        # The auto-generated Playground is not useful for REPL (its real
        # surface is injected Python helpers, not the bare action schema),
        # so serve only the custom tab.
        create_app_kwargs["show_default_tab"] = False
    if "title_override" in _sig.parameters:
        create_app_kwargs["title_override"] = (
            "OpenEnv REPL — Recursive Language Model playground"
        )
    app = create_app(
        create_repl_environment,
        REPLAction,
        REPLObservation,
        **create_app_kwargs,
    )
else:
    _logger.warning(
        "Installed openenv-core does not support gradio_builder; "
        "custom REPL Gradio tab will not be available."
    )
    app = create_app(
        create_repl_environment,
        REPLAction,
        REPLObservation,
        env_name="repl_env",
        max_concurrent_envs=MAX_CONCURRENT_ENVS,
    )


def main():
    """
    Entry point for direct execution via uv run or python -m.

    This function enables running the server without Docker:
        uv run --project . server
        python -m envs.repl_env.server.app
        openenv serve repl_env
    """
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8000)


if __name__ == "__main__":
    main()