import os
import json
import textwrap
from typing import List

from openai import OpenAI
from environment import CodeReviewEnv
from models import Action, Observation

API_BASE_URL = os.getenv("API_BASE_URL")
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
MAX_STEPS = 5
TEMPERATURE = 0.2
MAX_TOKENS = 200
FALLBACK_ACTION = "skip"

SYSTEM_PROMPT = textwrap.dedent(
    """
    You are an AI code reviewer. Your task is to provide helpful comments on pull requests.
    You will see a code snippet and existing comments.
    
    Reply with ONE of the following:
    - "write_comment: [your comment]" - to provide a helpful code review comment
    - "skip" - if you cannot provide a helpful comment
    - "done" - if the code is already perfect
    
    Be constructive, specific, and focus on improving code quality.
    """
).strip()

def build_user_prompt(step: int, obs: Observation, history: List[str]) -> str:
    newline = "\n"
    comments_str = newline.join(obs.comments) if obs.comments else "No existing comments"
    history_str = newline.join(history[-3:]) if history else "None"
    
    prompt = textwrap.dedent(
        f"""
        Step: {step}
        
        Code to review:
        {obs.pr_code}

        Existing comments on this PR:
        {comments_str}

        Previous actions:
        {history_str}

        Please provide your response (write_comment, skip, or done):
        """
    ).strip()
    return prompt

def parse_model_action(response_text: str) -> Action:
    if not response_text:
        return Action(action_type=FALLBACK_ACTION)

    raw_text = response_text.strip()
    lower_text = raw_text.lower()

    if lower_text.startswith("skip"):
        return Action(action_type="skip")
    if lower_text.startswith("done"):
        return Action(action_type="done")
    if lower_text.startswith("write_comment"):
        if ":" in raw_text:
            comment = raw_text.split(":", 1)[1].strip()
        else:
            comment = raw_text[len("write_comment"):].strip()
        if not comment:
            return Action(action_type="skip")
        return Action(action_type="write_comment", comment_text=comment)
    # default: treat as a comment
    return Action(action_type="write_comment", comment_text=raw_text)

def main() -> None:
    if not API_BASE_URL or not API_KEY or not MODEL_NAME:
        print("Error: API_BASE_URL, HF_TOKEN/API_KEY, and MODEL_NAME must be set.")
        return

    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
    env = CodeReviewEnv()
    tasks = ["easy", "medium", "hard"]
    scores = {}

    print("=" * 50)
    print("Code Review Environment - Baseline Inference")
    print(f"API Base URL: {API_BASE_URL}")
    print(f"Model: {MODEL_NAME}")
    print("=" * 50)

    for task in tasks:
        print(f"\nRunning task: {task.upper()}")
        env.set_task(task)          # set task before reset
        obs = env.reset()
        
        history: List[str] = []
        done = False
        step = 0
        final_reward = 0.0

        while not done and step < MAX_STEPS:
            step += 1
            user_prompt = build_user_prompt(step, obs, history)
            
            messages = [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt},
            ]
            
            try:
                completion = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=messages,
                    temperature=TEMPERATURE,
                    max_tokens=MAX_TOKENS,
                )
                response_text = completion.choices[0].message.content or ""
            except Exception as exc:
                print(f"  Request failed: {exc}. Using fallback.")
                response_text = FALLBACK_ACTION
            
            action = parse_model_action(response_text)
            obs, reward, done, info = env.step(action)
            final_reward = reward.value
            
            history.append(f"Step {step}: {action.action_type}")
            print(f"  Step {step} | Action: {action.action_type} | Reward: {reward.value:.2f}")

        scores[task] = final_reward
        print(f"{task.upper()} completed. Final Score: {final_reward:.2f}")

    print("\n" + "="*50)
    print("FINAL RESULTS")
    print(json.dumps(scores, indent=2))
    print("="*50)

if __name__ == "__main__":
    main()