from models import ContractValidationAction
from client import ContractValidationEnv
import os
import json
import textwrap
import asyncio
from typing import List, Optional

from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()


# --- MANDATORY ENV VARS ---
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
HF_TOKEN = os.getenv("HF_TOKEN")

BENCHMARK = "contract_validation"
MAX_STEPS = 15


# --- THE STRICT FORMATTING FIX ---
# The grader expects exact string matches, NOT JSON!
def log_start(task: str) -> None:
    print(f"[START] task={task}", flush=True)


def log_step(step: int, reward: float) -> None:
    # Reward must be numeric
    clamped_reward = max(0, round(reward, 2))
    print(f"[STEP] step={step} reward={clamped_reward}", flush=True)


def log_end(task: str, score: float, steps: int) -> None:
    # UPDATED: Score must be strictly between 0 and 1 (No 0.0, No 1.0)
    final_score = max(0.01, min(0.99, round(score, 2)))
    print(f"[END] task={task} score={final_score} steps={steps}", flush=True)


async def run_task(client: OpenAI, task_level: str):
    # Direct connection to your live, validated Space
    space_url = "https://envarchitects-contract-validation-env.hf.space"
    env = ContractValidationEnv(base_url=space_url)

    try:
        result = await env.reset(task_level=task_level)
        obs = result.observation
        done = False

        # Output the exact START string
        log_start(task=task_level)

        while not done and obs.step_count < MAX_STEPS:
            system_prompt = textwrap.dedent("""
                You are a precise legal AI. Review the clauses and output valid JSON.
                Your JSON must match exactly:
                {
                  "thoughts": "your reasoning",
                  "clause_id": 1,
                  "risk_type": "liability",
                  "submit_final": false
                }
                Valid risk types: liability, payment, termination, confidentiality, compliance, none.
            """).strip()

            user_prompt = textwrap.dedent(f"""
                Current Clauses: {json.dumps(obs.contract_clauses)}
                Risks You Have ALREADY Flagged: {json.dumps(obs.flagged_risks)}

                Instructions:
                1. Identify any unflagged risks in the Current Clauses.
                2. If there is a risk you haven't flagged yet, output its clause_id and risk_type.
                3. DO NOT repeat an action. If a clause is already in your "ALREADY Flagged" list, leave it alone.
                4. CRITICAL: If you have found all the risks (or if the remaining clauses are perfectly safe), you MUST end the review by setting "submit_final": true, "clause_id": 0, and "risk_type": "none".
            """).strip()

            try:
                response = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ],
                    response_format={"type": "json_object"},
                    temperature=0.1
                )

                raw_response = response.choices[0].message.content
                parsed = json.loads(raw_response)

                clause_id = int(parsed.get("clause_id", 0))
                risk_type = str(parsed.get("risk_type", "none"))
                submit_final = bool(parsed.get("submit_final", False))

                action = ContractValidationAction(
                    clause_id=clause_id,
                    risk_type=risk_type,
                    submit_final=submit_final,
                    explanation=parsed.get("thoughts", "")
                )

            except Exception as e:
                # Fallback action if the LLM hallucinated bad JSON
                action = ContractValidationAction(
                    clause_id=0, risk_type="none", submit_final=False)

            result = await env.step(action)
            obs = result.observation

            step_reward = result.reward if result.reward is not None else 0.0
            done = result.done

            # Output the exact STEP string
            log_step(step=obs.step_count, reward=step_reward)

        score = obs.info.get("score", 0.0)

        # Output the exact END string
        log_end(task=task_level, score=score, steps=obs.step_count)

    finally:
        try:
            await env.close()
        except Exception:
            pass


async def main():
    if not HF_TOKEN:
        print("CRITICAL WARNING: HF_TOKEN is missing! Make sure your .env file is set up correctly.")
        return

    client = OpenAI(api_key=HF_TOKEN, base_url=API_BASE_URL)

    # Must run the 3 requested tasks
    tasks = ["easy", "medium", "hard"]
    for t in tasks:
        await run_task(client, t)


if __name__ == "__main__":
    asyncio.run(main())