ragavrida commited on
Commit
d9eba60
ยท
1 Parent(s): 0a98a40

fix: prioritize API_KEY over HF_TOKEN for LiteLLM proxy compliance

Browse files

- Reorder env var priority: API_KEY > OPENAI_API_KEY > HF_TOKEN
- Add debug logging to stderr for API config tracing
- Ensures hackathon-injected credentials are used over .env defaults

Files changed (2) hide show
  1. inference.py +7 -1
  2. static/index.html +268 -0
inference.py CHANGED
@@ -75,7 +75,8 @@ _load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
75
  # โ”€โ”€โ”€ Configuration โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
76
 
77
  IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME") or os.getenv("IMAGE_NAME") # If using from_docker_image()
78
- API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY") or os.getenv("API_KEY")
 
79
 
80
  API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
81
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-4o-mini")
@@ -84,6 +85,11 @@ TEMPERATURE = 0.0
84
  MAX_TOKENS = 500
85
  SUCCESS_SCORE_THRESHOLD = 0.3
86
 
 
 
 
 
 
87
  def _maybe_disable_proxies() -> None:
88
  """
89
  OpenEnv's websocket client will honor HTTP(S)/SOCKS proxy env vars.
 
75
  # โ”€โ”€โ”€ Configuration โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
76
 
77
  IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME") or os.getenv("IMAGE_NAME") # If using from_docker_image()
78
+ # Prioritize hackathon-injected API_KEY and API_BASE_URL over .env / HF_TOKEN
79
+ API_KEY = os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN")
80
 
81
  API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
82
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-4o-mini")
 
85
  MAX_TOKENS = 500
86
  SUCCESS_SCORE_THRESHOLD = 0.3
87
 
88
+ # Debug: show which API config is active (stderr only)
89
+ print(f"[DEBUG] API_BASE_URL = {API_BASE_URL}", file=sys.stderr, flush=True)
90
+ print(f"[DEBUG] API_KEY source = {'API_KEY' if os.getenv('API_KEY') else 'OPENAI_API_KEY' if os.getenv('OPENAI_API_KEY') else 'HF_TOKEN' if os.getenv('HF_TOKEN') else 'NONE'}", file=sys.stderr, flush=True)
91
+ print(f"[DEBUG] MODEL_NAME = {MODEL_NAME}", file=sys.stderr, flush=True)
92
+
93
  def _maybe_disable_proxies() -> None:
94
  """
95
  OpenEnv's websocket client will honor HTTP(S)/SOCKS proxy env vars.
static/index.html ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>CodeReviewEnv โ€” Interactive RL Benchmark</title>
7
+ <meta name="description" content="The first RL benchmark for structured knowledge work. Train AI agents on real code review tasks.">
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap" rel="stylesheet">
10
+ <link rel="stylesheet" href="/static/style.css">
11
+ </head>
12
+ <body>
13
+ <!-- Hero -->
14
+ <header class="hero">
15
+ <div class="hero-bg"></div>
16
+ <div class="container">
17
+ <div class="hero-badge">OpenEnv Compliant</div>
18
+ <h1>๐Ÿ” CodeReviewEnv</h1>
19
+ <p class="hero-subtitle">The first RL benchmark for <span class="gradient-text">structured knowledge work</span></p>
20
+ <p class="hero-desc">Train and evaluate LLM agents on real code review tasks โ€” severity triage, queue prioritization, and actionable feedback generation โ€” with deterministic grading and trajectory export for world model research.</p>
21
+ <div class="hero-stats">
22
+ <div class="stat">
23
+ <div class="stat-value">3</div>
24
+ <div class="stat-label">Tasks</div>
25
+ </div>
26
+ <div class="stat">
27
+ <div class="stat-value">50</div>
28
+ <div class="stat-label">PR Templates</div>
29
+ </div>
30
+ <div class="stat">
31
+ <div class="stat-value">7</div>
32
+ <div class="stat-label">Languages</div>
33
+ </div>
34
+ <div class="stat">
35
+ <div class="stat-value">0.69</div>
36
+ <div class="stat-label">GPT-4o-mini</div>
37
+ </div>
38
+ </div>
39
+ </div>
40
+ </header>
41
+
42
+ <!-- Task Cards -->
43
+ <section class="section">
44
+ <div class="container">
45
+ <h2 class="section-title">Three Difficulty Levels</h2>
46
+ <div class="task-grid">
47
+ <div class="task-card" data-task="easy">
48
+ <div class="task-difficulty easy">โญ Easy</div>
49
+ <h3>Severity Labeling</h3>
50
+ <p>Classify each PR's bug severity: critical, high, medium, low, or none.</p>
51
+ <div class="task-meta">
52
+ <span>5 steps</span>
53
+ <span>GPT-4o-mini: <strong>1.00</strong></span>
54
+ </div>
55
+ <button class="btn btn-primary" onclick="startDemo('easy')">Try It Live โ†’</button>
56
+ </div>
57
+ <div class="task-card" data-task="medium">
58
+ <div class="task-difficulty medium">โญโญ Medium</div>
59
+ <h3>Queue Prioritization</h3>
60
+ <p>Sort the review queue by urgency โ€” security first, junior devs next.</p>
61
+ <div class="task-meta">
62
+ <span>3 steps</span>
63
+ <span>GPT-4o-mini: <strong>0.68</strong></span>
64
+ </div>
65
+ <button class="btn btn-secondary" onclick="startDemo('medium')">Try It Live โ†’</button>
66
+ </div>
67
+ <div class="task-card" data-task="hard">
68
+ <div class="task-difficulty hard">โญโญโญ Hard</div>
69
+ <h3>Feedback Generation</h3>
70
+ <p>Write actionable review comments targeting specific buggy lines.</p>
71
+ <div class="task-meta">
72
+ <span>18 steps max</span>
73
+ <span>GPT-4o-mini: <strong>0.38</strong></span>
74
+ </div>
75
+ <button class="btn btn-accent" onclick="startDemo('hard')">Try It Live โ†’</button>
76
+ </div>
77
+ </div>
78
+ </div>
79
+ </section>
80
+
81
+ <!-- Interactive Demo -->
82
+ <section class="section demo-section" id="demo">
83
+ <div class="container">
84
+ <h2 class="section-title">Interactive Demo</h2>
85
+ <div class="demo-container" id="demoContainer" style="display:none;">
86
+ <div class="demo-header">
87
+ <div class="demo-task-badge" id="demoTaskBadge">Easy</div>
88
+ <div class="demo-step" id="demoStep">Step 0 / 5</div>
89
+ <div class="demo-score">
90
+ Score: <span id="demoScore" class="score-value">0.00</span>
91
+ </div>
92
+ </div>
93
+
94
+ <!-- PR Card -->
95
+ <div class="pr-card" id="prCard">
96
+ <div class="pr-header">
97
+ <span class="pr-id" id="prId">PR-001</span>
98
+ <span class="pr-author" id="prAuthor">junior</span>
99
+ </div>
100
+ <h3 class="pr-title" id="prTitle">Loading...</h3>
101
+ <p class="pr-desc" id="prDesc"></p>
102
+ <div class="diff-container" id="diffContainer">
103
+ <div class="diff-header">
104
+ <span class="diff-filename" id="diffFilename">file.py</span>
105
+ <span class="diff-lang" id="diffLang">python</span>
106
+ </div>
107
+ <pre class="diff-code" id="diffCode"></pre>
108
+ </div>
109
+ </div>
110
+
111
+ <!-- Action Panel (Easy) -->
112
+ <div class="action-panel" id="actionPanelEasy" style="display:none;">
113
+ <h4>Classify Bug Severity</h4>
114
+ <div class="severity-buttons">
115
+ <button class="sev-btn critical" onclick="submitAction('critical')">๐Ÿ”ด Critical</button>
116
+ <button class="sev-btn high" onclick="submitAction('high')">๐ŸŸ  High</button>
117
+ <button class="sev-btn medium" onclick="submitAction('medium')">๐ŸŸก Medium</button>
118
+ <button class="sev-btn low" onclick="submitAction('low')">๐Ÿ”ต Low</button>
119
+ <button class="sev-btn none" onclick="submitAction('none')">โšช None</button>
120
+ </div>
121
+ </div>
122
+
123
+ <!-- Action Panel (Medium) -->
124
+ <div class="action-panel" id="actionPanelMedium" style="display:none;">
125
+ <h4>Drag to Reorder by Priority</h4>
126
+ <div class="queue-list" id="queueList"></div>
127
+ <button class="btn btn-primary" onclick="submitPriorityOrder()">Submit Order โ†’</button>
128
+ </div>
129
+
130
+ <!-- Action Panel (Hard) -->
131
+ <div class="action-panel" id="actionPanelHard" style="display:none;">
132
+ <h4>Write Review Feedback</h4>
133
+ <div class="comment-form">
134
+ <input type="text" id="commentFile" placeholder="Target file" class="input-field">
135
+ <input type="number" id="commentLine" placeholder="Line #" class="input-field input-small">
136
+ <textarea id="commentText" placeholder="Your review comment..." class="input-field textarea-field"></textarea>
137
+ <div class="hard-buttons">
138
+ <button class="btn btn-primary" onclick="submitComment()">๐Ÿ’ฌ Add Comment</button>
139
+ <button class="btn btn-success" onclick="submitDecision('approve')">โœ… Approve</button>
140
+ <button class="btn btn-danger" onclick="submitDecision('request_changes')">๐Ÿ”„ Request Changes</button>
141
+ </div>
142
+ </div>
143
+ </div>
144
+
145
+ <!-- Reward Flash -->
146
+ <div class="reward-flash" id="rewardFlash" style="display:none;">
147
+ <span class="reward-value" id="rewardValue">+1.00</span>
148
+ <span class="reward-reason" id="rewardReason"></span>
149
+ </div>
150
+
151
+ <!-- Trajectory -->
152
+ <div class="trajectory" id="trajectory">
153
+ <h4>Episode Trajectory</h4>
154
+ <div class="trajectory-steps" id="trajectorySteps"></div>
155
+ </div>
156
+ </div>
157
+
158
+ <div class="demo-placeholder" id="demoPlaceholder">
159
+ <div class="placeholder-icon">๐ŸŽฎ</div>
160
+ <p>Select a task above to start an interactive demo</p>
161
+ </div>
162
+ </div>
163
+ </section>
164
+
165
+ <!-- Research Section -->
166
+ <section class="section research-section">
167
+ <div class="container">
168
+ <h2 class="section-title">Research: Knowledge-Work World Models</h2>
169
+ <div class="research-grid">
170
+ <div class="research-card">
171
+ <div class="research-icon">๐Ÿง </div>
172
+ <h3>Semantic MDP</h3>
173
+ <p>States are structured text (code diffs, bug categories). Transitions depend on professional judgment, not physics.</p>
174
+ </div>
175
+ <div class="research-card">
176
+ <div class="research-icon">๐Ÿ“Š</div>
177
+ <h3>Trajectory Export</h3>
178
+ <p>Every episode exports (s, a, r, s') transitions in JSONL for training Knowledge-Work World Models.</p>
179
+ </div>
180
+ <div class="research-card">
181
+ <div class="research-icon">๐Ÿ”ฌ</div>
182
+ <h3>Deterministic Grading</h3>
183
+ <p>No LLM-as-judge. Ordinal matching, Kendall Tau correlation, and 5-component weighted scorers.</p>
184
+ </div>
185
+ <div class="research-card">
186
+ <div class="research-icon">๐Ÿ›ก๏ธ</div>
187
+ <h3>Anti-Exploit</h3>
188
+ <p>Spam penalties, consistency checks, and decaying rewards prevent trivial gaming strategies.</p>
189
+ </div>
190
+ </div>
191
+ </div>
192
+ </section>
193
+
194
+ <!-- Benchmark Table -->
195
+ <section class="section">
196
+ <div class="container">
197
+ <h2 class="section-title">Benchmark Comparison</h2>
198
+ <div class="table-wrapper">
199
+ <table class="benchmark-table">
200
+ <thead>
201
+ <tr>
202
+ <th>Benchmark</th>
203
+ <th>State Space</th>
204
+ <th>Transition</th>
205
+ <th>World Model?</th>
206
+ <th>Domain</th>
207
+ </tr>
208
+ </thead>
209
+ <tbody>
210
+ <tr><td>MuJoCo</td><td>โ„โฟ (joints)</td><td>Physics sim</td><td>โœ… Dreamer</td><td>Robotics</td></tr>
211
+ <tr><td>Atari</td><td>Pixels</td><td>Game engine</td><td>โœ… MuZero</td><td>Games</td></tr>
212
+ <tr><td>TextWorld</td><td>Synthetic text</td><td>Game rules</td><td>โš ๏ธ Li et al.</td><td>Text games</td></tr>
213
+ <tr><td>SWE-bench</td><td>Code</td><td>N/A</td><td>โŒ Eval only</td><td>SE</td></tr>
214
+ <tr class="highlight-row"><td><strong>CodeReviewEnv</strong></td><td><strong>Structured text</strong></td><td><strong>Professional judgment</strong></td><td><strong>โœ… KW-WM</strong></td><td><strong>Knowledge work</strong></td></tr>
215
+ </tbody>
216
+ </table>
217
+ </div>
218
+ </div>
219
+ </section>
220
+
221
+ <!-- API Section -->
222
+ <section class="section api-section">
223
+ <div class="container">
224
+ <h2 class="section-title">API Endpoints</h2>
225
+ <div class="api-grid">
226
+ <div class="api-card">
227
+ <code class="api-method post">POST</code>
228
+ <code class="api-path">/reset</code>
229
+ <p>Start new episode</p>
230
+ </div>
231
+ <div class="api-card">
232
+ <code class="api-method post">POST</code>
233
+ <code class="api-path">/step</code>
234
+ <p>Take an action</p>
235
+ </div>
236
+ <div class="api-card">
237
+ <code class="api-method get">GET</code>
238
+ <code class="api-path">/state</code>
239
+ <p>Current state</p>
240
+ </div>
241
+ <div class="api-card">
242
+ <code class="api-method get">GET</code>
243
+ <code class="api-path">/health</code>
244
+ <p>Health check</p>
245
+ </div>
246
+ <div class="api-card">
247
+ <code class="api-method get">GET</code>
248
+ <code class="api-path">/export_trajectory</code>
249
+ <p>JSONL trajectory</p>
250
+ </div>
251
+ <div class="api-card">
252
+ <code class="api-method get">GET</code>
253
+ <code class="api-path">/docs</code>
254
+ <p>OpenAPI docs</p>
255
+ </div>
256
+ </div>
257
+ </div>
258
+ </section>
259
+
260
+ <footer class="footer">
261
+ <div class="container">
262
+ <p>CodeReviewEnv โ€” OpenEnv Hackathon 2026 | BSD-3-Clause</p>
263
+ </div>
264
+ </footer>
265
+
266
+ <script src="/static/app.js"></script>
267
+ </body>
268
+ </html>