import os
import json
import time
from openai import OpenAI
from gradio_client import Client
from datetime import datetime

# --- CONFIGURATION ---
SPACE_URL = "https://arun-misra-my-env.hf.space/web"
MODEL_NAME = "llama3:latest" # User specified
OLLAMA_URL = "http://127.0.0.1:11434/v1"
LOG_FILE = "screening_log.txt"
STEPS_TO_RUN = 10

# --- SETUP ---
client_llm = OpenAI(base_url=OLLAMA_URL, api_key="ollama")
client_hf = Client(SPACE_URL)

def log_result(step_num, obs, choice, reward, status):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_entry = f"""
[{timestamp}] --- STEP {step_num} ---
CANDIDATE ID: {obs.get('candidate_id')}
JOB TITLE: {obs.get('job_title')}
AI DECISION: {choice.get('decision')}
AI REASONING: {choice.get('reasoning')}
REWARD RECEIVED: {reward}
STATUS: {status}
------------------------------------------
"""
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        f.write(log_entry)
    print(f"✅ Step {step_num} Logged (Reward: {reward})")

def run_multi_step_test():
    print(f"🚀 Starting {STEPS_TO_RUN}-step AI Screening Test...")
    print(f"Model: {MODEL_NAME} | Space: {SPACE_URL}")
    
    # Initialize Log
    with open(LOG_FILE, "w", encoding="utf-8") as f:
        f.write(f"--- AI SCREENING LOG ({datetime.now()}) ---\n")

    # 1. Reset Environment
    reset_data = client_hf.predict(api_name="/reset_env")
    try:
        obs = json.loads(reset_data[1])["observation"]
    except Exception as e:
        print(f"❌ Server crashed during reset! Raw HuggingFace Output:\n{reset_data}")
        return
        
    for i in range(1, STEPS_TO_RUN + 1):
        # 2. Prompt LLM
        # Parse weighted criteria for structured prompt
        try:
            macro_criteria = json.loads(obs.get('macro_criteria', '{}'))
            micro_criteria = json.loads(obs.get('micro_criteria', '{}'))
        except Exception:
            macro_criteria = {}
            micro_criteria = {}

        # Format criteria into readable grading rubrics
        macro_rubric = "\n".join([f"  - {k} (weight: {v}%)" for k, v in macro_criteria.items()]) if macro_criteria else "  - Not specified"
        micro_rubric = "\n".join([f"  - {k} (weight: {v}%)" for k, v in micro_criteria.items()]) if micro_criteria else "  - Not specified"

        prompt = f"""You are a Senior HR Expert conducting structured resume screening. You must evaluate this candidate using the EXACT same methodology as a professional GPT-4-powered ATS system.

=== PRIMARY RULE: JUNK DETECTION ===
If the resume contains nonsensical text, gibberish (e.g., "blorph snizzle"), or is clearly not a professional resume, immediately stop and choose "reject" with reasoning "Junk/Spam submission detected."

=== JOB DESCRIPTION ===
{obs.get('job_description')}

=== SCORING RUBRIC ===
High-Level Criteria (weights must sum to 100):
{macro_rubric}

Detailed skill criteria (weights must sum to 100):
{micro_rubric}

=== CANDIDATE RESUME ===
{obs.get('resume_text')}

=== YOUR ANALYTICAL TASK ===
Follow these steps EXACTLY:
1. DETECT JUNK: Is this a real resume? If not, REJECT.
2. INDIVIDUAL SCORING: For each high-level criteria, assign a score of 0-10 based on the resume vs JD.
3. WEIGHTED MATH: Multiply each score by its weight to get the "Weighted Macro Average".
4. FINAL DECISION:
   - "shortlist"      → Weighted macro average > 6.5/10 (Strong Match)
   - "flag_for_review" → Weighted macro average 4.0–6.5/10 (Borderline Case)
   - "reject"         → Weighted macro average < 4.0/10 (Unqualified)

Respond ONLY with valid JSON:
{{
  "analysis": {{
    "junk_detected": false,
    "scores": {{ "criteria": score, ... }},
    "weighted_average": total
  }},
  "decision": "shortlist/reject/flag_for_review",
  "reasoning": "Brief, data-backed justification."
}}"""
        
        try:
            response = client_llm.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": prompt}],
                response_format={"type": "json_object"}
            )
            ai_choice = json.loads(response.choices[0].message.content)
        except Exception as e:
            print(f"❌ AI Error at Step {i}: {e}")
            break
            
        # 3. Step Environment
        step_data = client_hf.predict(
            param_0=ai_choice.get('decision', 'flag_for_review'),
            param_1=ai_choice.get('reasoning', 'No reasoning.'),
            param_2=f"STEP_{i}",
            api_name="/step_form"
        )
        
        # 4. Parse Results
        try:
            raw_json = json.loads(step_data[1])
            reward = raw_json.get("reward", 0.0)
            feedback = raw_json.get("observation", {}).get("status", "N/A")
        except Exception as e:
            print(f"❌ Server returned invalid data (likely empty queue or crash). Raw output: {step_data}")
            break
            
        # 5. Log it
        log_result(i, obs, ai_choice, reward, feedback)
        
        # Prepare for next observation
        obs = raw_json["observation"]
        time.sleep(1) # Small delay for stability

    print(f"\n🏁 Test Complete. Results saved to: {LOG_FILE}")

if __name__ == "__main__":
    run_multi_step_test()