Spaces:
Running
Running
File size: 4,102 Bytes
6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 6e7ce30 a089c46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | import os
import json
import logging
import requests
from openai import OpenAI
from environment.models import Action, Issue
# Configure logging for better visibility in Hugging Face Logs
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- CONFIGURATION ---
# The judges will provide these via environment variables
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.groq.com/openai/v1")
API_KEY = os.getenv("GROQ_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME", "llama3-70b-8192")
# UPDATED: Points directly to your Space URL by default
ENV_URL = os.getenv("ENV_URL", "https://syam-sashank-codereview-env.hf.space")
# Initialize OpenAI Client
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
def parse_llm_response(text: str) -> Action:
"""
Parses the LLM's string output into a structured Action object.
Handles Markdown code blocks commonly used by LLMs.
"""
try:
# Clean up Markdown JSON blocks
if "```json" in text:
text = text.split("```json")[1].split("```")[0]
elif "```" in text:
text = text.split("```")[1].split("```")[0]
data = json.loads(text.strip())
# Validate items against the Issue model
issues = [Issue(**item) for item in data]
return Action(issues=issues, final=True)
except Exception as e:
logger.error(f"Failed to parse LLM response: {e}")
# Return empty list so the grader can still run (and likely give 0.0)
return Action(issues=[], final=True)
def run_task(task_id: str) -> float:
"""
Executes a single task: Reset -> LLM Inference -> Step -> Return Reward.
"""
logger.info(f"--- Starting Task: {task_id} ---")
# 1. Reset environment
resp = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id})
resp.raise_for_status()
reset_data = resp.json()
session_id = reset_data["session_id"]
obs = reset_data["observation"]
# 2. Build the prompt
prompt = f"""You are a professional security and code reviewer.
Analyze the following Python code and identify all bugs, style issues, security flaws, performance anti-patterns, and missing documentation.
Return ONLY a JSON list where each item has:
- "line": (integer)
- "category": (one of: bug, style, security, performance, documentation)
- "description": (string, max 200 chars)
Code to review:
{obs['code']}
"""
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "user", "content": prompt}],
temperature=0.0 # Crucial for reproducible baseline scores
)
raw_content = response.choices[0].message.content
except Exception as e:
logger.error(f"LLM Completion error: {e}")
raw_content = "[]"
# Convert LLM text to Action object
action = parse_llm_response(raw_content)
# 3. Take step in the environment
step_resp = requests.post(f"{ENV_URL}/step", json={
"session_id": session_id,
"action": action.dict()
})
step_resp.raise_for_status()
result_data = step_resp.json()
# Extract the F1-based reward
final_reward = result_data["reward"]["value"]
logger.info(f"Result for {task_id}: Score = {final_reward:.3f}")
return final_reward
if __name__ == "__main__":
# The competition requires scores for at least 3 tasks
task_list = ["easy", "medium", "hard"]
final_scores = {}
print(f"Connecting to environment at: {ENV_URL}")
for task in task_list:
try:
score = run_task(task)
final_scores[task] = score
except Exception as e:
logger.error(f"Task {task} failed to execute: {e}")
final_scores[task] = 0.0
# Final Summary for the Logs
print("\n" + "="*30)
print(" BASELINE PERFORMANCE REPORT ")
print("="*30)
for task, score in final_scores.items():
print(f"Task: {task:8} | Score: {score:.3f}")
print("="*30) |