import requests
import sys
import os
import json
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()

BASE_URL = "http://localhost:7860"

def run_inference(task_name: str):
    env_name = "CrisisSim"
    
    API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1"
    MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini"
    API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
    
    if not API_KEY:
        print(f"[START] task={task_name} env={env_name} model={MODEL_NAME}", flush=True)
        print("[STEP] step=1 action=null reward=0.00 done=true error=API_KEY or HF_TOKEN environment variable is not set", flush=True)
        print("[END] success=false steps=0 rewards=", flush=True)
        sys.exit(1)
        
    print(f"[START] task={task_name} env={env_name} model={MODEL_NAME}", flush=True)
    
    try:
        client = OpenAI(
            base_url=API_BASE_URL,
            api_key=API_KEY
        )
    except Exception as e:
        print(f"[STEP] step=1 action=null reward=0.00 done=true error=api_error", flush=True)
        print("[END] success=false steps=0 rewards=", flush=True)
        sys.exit(1)
    
    steps = 0
    rewards = []
    last_actions = []
    
    VALID_ACTIONS = [
        "cut_expenses", "stock_essentials", "invest_gold", "hold_cash", 
        "convert_currency", "take_loan", "pay_debt", "reduce_luxury", "build_emergency_fund"
    ]
    
    SYSTEM_PROMPT = """You are an expert financial crisis manager operating in a simulated economy.

Your goals:
- Survive as long as possible (avoid bankruptcy)
- Maintain and grow savings
- Reduce debt strategically
- Adapt to inflation and economic shocks

Rules:
- Do NOT repeat the same action more than 2 times in a row
- Avoid passive strategies like always holding cash
- Balance short-term survival with long-term stability
- Consider consequences of each decision

Always return ONLY one valid action."""

    try:
        # Reset Environment
        res = requests.post(f"{BASE_URL}/reset", json={"task_name": task_name})
        res.raise_for_status()
        state = res.json()
        
        while True:
            steps += 1
            error_val = "null"
            
            user_prompt = f"""Current State:
{state}

Previous Actions (last 3):
{last_actions[-3:]}

Choose ONE action from:
[cut_expenses, stock_essentials, invest_gold, hold_cash, convert_currency, take_loan, pay_debt, reduce_luxury, build_emergency_fund]

Avoid repeating same action too often.

Return ONLY the action name."""
            
            # Prompt the model
            try:
                completion = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[
                        {"role": "system", "content": SYSTEM_PROMPT.strip()},
                        {"role": "user", "content": user_prompt.strip()}
                    ],
                    temperature=0.5,
                    max_tokens=50
                )
                action = completion.choices[0].message.content.strip()
                
                # fallback if invalid
                if action not in VALID_ACTIONS:
                    action = "hold_cash"
                    
                # prevent repetition >2
                if len(last_actions) >= 2 and last_actions[-1] == last_actions[-2] == action:
                    alternatives = [a for a in VALID_ACTIONS if a != action]
                    action = alternatives[0]
                    
            except Exception as e:
                # SAFE FALLBACK
                action = "hold_cash"
                error_val = "api_error"
                
            last_actions.append(action)

            # Step the environment
            step_res = requests.post(f"{BASE_URL}/step", json={"action": action})
            step_res.raise_for_status()
            
            data = step_res.json()
            state = data["observation"]
            reward = data["reward"]
            done = data["done"]
            rewards.append(reward)
            
            print(f"[STEP] step={steps} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}", flush=True)
            
            if done:
                success = not state.get("bankrupt", True)
                break
                
        rewards_str = ",".join(f"{r:.2f}" for r in rewards)
        print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}", flush=True)
        
    except Exception as e:
        error_val = "api_error"
        print(f"[STEP] step={steps} action=null reward=0.00 done=true error={error_val}", flush=True)
        rewards_str = ",".join(f"{r:.2f}" for r in rewards)
        print(f"[END] success=false steps={steps} rewards={rewards_str}", flush=True)

if __name__ == "__main__":
    task = sys.argv[1] if len(sys.argv) > 1 else "easy"
    run_inference(task)