fix: align inference with checklist and fix hf deployment headers
Browse files- app.py +12 -5
- inference.py +9 -33
app.py
CHANGED
|
@@ -76,10 +76,9 @@ app = FastAPI(
|
|
| 76 |
from fastapi.middleware.trustedhost import TrustedHostMiddleware
|
| 77 |
from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware
|
| 78 |
|
| 79 |
-
# 1. Trusted Host (Prevent Host-header injection)
|
| 80 |
app.add_middleware(
|
| 81 |
TrustedHostMiddleware,
|
| 82 |
-
allowed_hosts=["*"] if settings.app_env in ("development", "test") else [
|
| 83 |
)
|
| 84 |
|
| 85 |
# 2. Proxy Headers (Support Docker/Reverse-proxy)
|
|
@@ -88,7 +87,7 @@ app.add_middleware(ProxyHeadersMiddleware, trusted_hosts="*")
|
|
| 88 |
# 3. CORS
|
| 89 |
app.add_middleware(
|
| 90 |
CORSMiddleware,
|
| 91 |
-
allow_origins=["*"] if settings.app_env == "development" else [
|
| 92 |
allow_credentials=True,
|
| 93 |
allow_methods=["*"],
|
| 94 |
allow_headers=["*"],
|
|
@@ -99,10 +98,18 @@ app.add_middleware(
|
|
| 99 |
async def add_security_headers(request: Request, call_next):
|
| 100 |
response = await call_next(request)
|
| 101 |
response.headers["X-Content-Type-Options"] = "nosniff"
|
| 102 |
-
response.headers["X-Frame-Options"] = "
|
| 103 |
response.headers["X-XSS-Protection"] = "1; mode=block"
|
| 104 |
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
return response
|
| 107 |
|
| 108 |
# 5. Rate Limiting
|
|
|
|
| 76 |
from fastapi.middleware.trustedhost import TrustedHostMiddleware
|
| 77 |
from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware
|
| 78 |
|
|
|
|
| 79 |
app.add_middleware(
|
| 80 |
TrustedHostMiddleware,
|
| 81 |
+
allowed_hosts=["*"] if settings.app_env in ("development", "test") else ["localhost", "127.0.0.1", "*.hf.space", "huggingface.co"]
|
| 82 |
)
|
| 83 |
|
| 84 |
# 2. Proxy Headers (Support Docker/Reverse-proxy)
|
|
|
|
| 87 |
# 3. CORS
|
| 88 |
app.add_middleware(
|
| 89 |
CORSMiddleware,
|
| 90 |
+
allow_origins=["*"] if settings.app_env == "development" else ["*"],
|
| 91 |
allow_credentials=True,
|
| 92 |
allow_methods=["*"],
|
| 93 |
allow_headers=["*"],
|
|
|
|
| 98 |
async def add_security_headers(request: Request, call_next):
|
| 99 |
response = await call_next(request)
|
| 100 |
response.headers["X-Content-Type-Options"] = "nosniff"
|
| 101 |
+
response.headers["X-Frame-Options"] = "SAMEORIGIN"
|
| 102 |
response.headers["X-XSS-Protection"] = "1; mode=block"
|
| 103 |
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
|
| 104 |
+
# Added frame-ancestors to allow Hugging Face to embed the space
|
| 105 |
+
response.headers["Content-Security-Policy"] = (
|
| 106 |
+
"default-src 'self'; "
|
| 107 |
+
"script-src 'self' 'unsafe-inline'; "
|
| 108 |
+
"style-src 'self' 'unsafe-inline'; "
|
| 109 |
+
"img-src 'self' data:; "
|
| 110 |
+
"connect-src 'self' ws: wss:; "
|
| 111 |
+
"frame-ancestors 'self' https://*.huggingface.co https://huggingface.co;"
|
| 112 |
+
)
|
| 113 |
return response
|
| 114 |
|
| 115 |
# 5. Rate Limiting
|
inference.py
CHANGED
|
@@ -20,12 +20,12 @@ import time
|
|
| 20 |
import requests
|
| 21 |
from openai import OpenAI
|
| 22 |
|
| 23 |
-
# ββ Environment Variables (
|
| 24 |
-
API_BASE_URL = os.
|
| 25 |
-
MODEL_NAME = os.
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
ENV_URL = os.
|
| 29 |
|
| 30 |
# ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
TASKS = ["bug_detection", "security_audit", "architectural_review"]
|
|
@@ -42,7 +42,7 @@ def log_start(task: str, env: str, model: str):
|
|
| 42 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 43 |
|
| 44 |
def log_step(step: int, action: str, reward: float, done: bool, error):
|
| 45 |
-
error_str = str(error) if error else "
|
| 46 |
done_str = "true" if done else "false"
|
| 47 |
print(
|
| 48 |
f"[STEP] step={step} action={action} reward={reward:.2f} "
|
|
@@ -198,8 +198,7 @@ def sanitize_action(action_dict: dict, task_id: str) -> dict:
|
|
| 198 |
|
| 199 |
def run_episode(task_id: str, seed: int) -> dict:
|
| 200 |
"""Run a single episode. Returns {score, steps, success, rewards}."""
|
| 201 |
-
|
| 202 |
-
log_start(task_id, benchmark, MODEL_NAME)
|
| 203 |
|
| 204 |
# ββ Reset ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 205 |
try:
|
|
@@ -288,43 +287,20 @@ def run_episode(task_id: str, seed: int) -> dict:
|
|
| 288 |
|
| 289 |
|
| 290 |
def main():
|
| 291 |
-
"""Run all tasks across multiple seeds
|
| 292 |
-
print("=" * 60, flush=True)
|
| 293 |
-
print("CodeLens Baseline", flush=True)
|
| 294 |
-
print(f"Model: {MODEL_NAME}", flush=True)
|
| 295 |
-
print(f"EnvURL: {ENV_URL}", flush=True)
|
| 296 |
-
print("=" * 60, flush=True)
|
| 297 |
|
| 298 |
all_results = []
|
| 299 |
|
| 300 |
for task_id in TASKS:
|
| 301 |
task_scores = []
|
| 302 |
for seed in SEEDS:
|
| 303 |
-
print(f"\n--- Task: {task_id} | Seed: {seed} ---", flush=True)
|
| 304 |
result = run_episode(task_id, seed)
|
| 305 |
all_results.append(result)
|
| 306 |
task_scores.append(result["score"])
|
| 307 |
|
| 308 |
avg_score = sum(task_scores) / len(task_scores) if task_scores else 0.0
|
| 309 |
-
print(f"\n[SUMMARY] task={task_id} avg_score={avg_score:.4f} seeds={SEEDS}", flush=True)
|
| 310 |
-
|
| 311 |
-
# ββ Overall baseline table βββββββββββββββββββββββββββββββββββββββββββββ
|
| 312 |
-
print("\n" + "=" * 60, flush=True)
|
| 313 |
-
print("BASELINE RESULTS", flush=True)
|
| 314 |
-
print("=" * 60, flush=True)
|
| 315 |
-
print(f"{'Task':<30} {'Avg Score':>10} {'Success Rate':>14}", flush=True)
|
| 316 |
-
print("-" * 56, flush=True)
|
| 317 |
-
|
| 318 |
-
for task_id in TASKS:
|
| 319 |
-
task_results = [r for r in all_results if r["task_id"] == task_id]
|
| 320 |
-
avg = sum(r["score"] for r in task_results) / len(task_results)
|
| 321 |
-
succ = sum(1 for r in task_results if r["success"]) / len(task_results)
|
| 322 |
-
print(f"{task_id:<30} {avg:>10.4f} {succ*100:>13.1f}%", flush=True)
|
| 323 |
|
| 324 |
overall = sum(r["score"] for r in all_results) / len(all_results)
|
| 325 |
-
print("-" * 56, flush=True)
|
| 326 |
-
print(f"{'OVERALL':<30} {overall:>10.4f}", flush=True)
|
| 327 |
-
|
| 328 |
return 0
|
| 329 |
|
| 330 |
|
|
|
|
| 20 |
import requests
|
| 21 |
from openai import OpenAI
|
| 22 |
|
| 23 |
+
# ββ Environment Variables (strictly following OpenEnv checklist) ββββββββββββββββ
|
| 24 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
|
| 25 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-3.5-turbo")
|
| 26 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 27 |
+
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
|
| 28 |
+
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 29 |
|
| 30 |
# ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
TASKS = ["bug_detection", "security_audit", "architectural_review"]
|
|
|
|
| 42 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 43 |
|
| 44 |
def log_step(step: int, action: str, reward: float, done: bool, error):
|
| 45 |
+
error_str = str(error) if error else "None"
|
| 46 |
done_str = "true" if done else "false"
|
| 47 |
print(
|
| 48 |
f"[STEP] step={step} action={action} reward={reward:.2f} "
|
|
|
|
| 198 |
|
| 199 |
def run_episode(task_id: str, seed: int) -> dict:
|
| 200 |
"""Run a single episode. Returns {score, steps, success, rewards}."""
|
| 201 |
+
log_start(task_id, ENV_URL, MODEL_NAME)
|
|
|
|
| 202 |
|
| 203 |
# ββ Reset ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 204 |
try:
|
|
|
|
| 287 |
|
| 288 |
|
| 289 |
def main():
|
| 290 |
+
"""Run all tasks across multiple seeds."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
all_results = []
|
| 293 |
|
| 294 |
for task_id in TASKS:
|
| 295 |
task_scores = []
|
| 296 |
for seed in SEEDS:
|
|
|
|
| 297 |
result = run_episode(task_id, seed)
|
| 298 |
all_results.append(result)
|
| 299 |
task_scores.append(result["score"])
|
| 300 |
|
| 301 |
avg_score = sum(task_scores) / len(task_scores) if task_scores else 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
overall = sum(r["score"] for r in all_results) / len(all_results)
|
|
|
|
|
|
|
|
|
|
| 304 |
return 0
|
| 305 |
|
| 306 |
|