Spaces:

EnvArchitects
/

contract-validation-env

Running

File size: 5,218 Bytes

from models import ContractValidationAction
from client import ContractValidationEnv
import os
import json
import textwrap
import asyncio
from typing import List, Optional

from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()


# --- MANDATORY ENV VARS ---
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
HF_TOKEN = os.getenv("HF_TOKEN")

BENCHMARK = "contract_validation"
MAX_STEPS = 15


# --- THE STRICT FORMATTING FIX ---
# The grader expects exact string matches, NOT JSON!
def log_start(task: str) -> None:
    print(f"[START] task={task}", flush=True)


def log_step(step: int, reward: float) -> None:
    # Reward must be numeric
    clamped_reward = max(0, round(reward, 2))
    print(f"[STEP] step={step} reward={clamped_reward}", flush=True)


def log_end(task: str, score: float, steps: int) -> None:
    # UPDATED: Score must be strictly between 0 and 1 (No 0.0, No 1.0)
    final_score = max(0.01, min(0.99, round(score, 2)))
    print(f"[END] task={task} score={final_score} steps={steps}", flush=True)


async def run_task(client: OpenAI, task_level: str):
    # Direct connection to your live, validated Space
    space_url = "https://envarchitects-contract-validation-env.hf.space"
    env = ContractValidationEnv(base_url=space_url)

    try:
        result = await env.reset(task_level=task_level)
        obs = result.observation
        done = False

        # Output the exact START string
        log_start(task=task_level)

        while not done and obs.step_count < MAX_STEPS:
            system_prompt = textwrap.dedent("""

                You are a precise legal AI. Review the clauses and output valid JSON.

                Your JSON must match exactly:

                {

                  "thoughts": "your reasoning",

                  "clause_id": 1,

                  "risk_type": "liability",

                  "submit_final": false

                }

                Valid risk types: liability, payment, termination, confidentiality, compliance, none.

            """).strip()

            user_prompt = textwrap.dedent(f"""

                Current Clauses: {json.dumps(obs.contract_clauses)}

                Risks You Have ALREADY Flagged: {json.dumps(obs.flagged_risks)}



                Instructions:

                1. Identify any unflagged risks in the Current Clauses.

                2. If there is a risk you haven't flagged yet, output its clause_id and risk_type.

                3. DO NOT repeat an action. If a clause is already in your "ALREADY Flagged" list, leave it alone.

                4. CRITICAL: If you have found all the risks (or if the remaining clauses are perfectly safe), you MUST end the review by setting "submit_final": true, "clause_id": 0, and "risk_type": "none".

            """).strip()

            try:
                response = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ],
                    response_format={"type": "json_object"},
                    temperature=0.1
                )

                raw_response = response.choices[0].message.content
                parsed = json.loads(raw_response)

                clause_id = int(parsed.get("clause_id", 0))
                risk_type = str(parsed.get("risk_type", "none"))
                submit_final = bool(parsed.get("submit_final", False))

                action = ContractValidationAction(
                    clause_id=clause_id,
                    risk_type=risk_type,
                    submit_final=submit_final,
                    explanation=parsed.get("thoughts", "")
                )

            except Exception as e:
                # Fallback action if the LLM hallucinated bad JSON
                action = ContractValidationAction(
                    clause_id=0, risk_type="none", submit_final=False)

            result = await env.step(action)
            obs = result.observation

            step_reward = result.reward if result.reward is not None else 0.0
            done = result.done

            # Output the exact STEP string
            log_step(step=obs.step_count, reward=step_reward)

        score = obs.info.get("score", 0.0)

        # Output the exact END string
        log_end(task=task_level, score=score, steps=obs.step_count)

    finally:
        try:
            await env.close()
        except Exception:
            pass


async def main():
    if not HF_TOKEN:
        print("CRITICAL WARNING: HF_TOKEN is missing! Make sure your .env file is set up correctly.")
        return

    client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)

    # Must run the 3 requested tasks
    tasks = ["easy", "medium", "hard"]
    for t in tasks:
        await run_task(client, t)


if __name__ == "__main__":
    asyncio.run(main())