"""
User Profiling Agent
Extracts structured user information for eligibility matching
"""

import json
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
from prompts.profiling_prompt import PROFILING_PROMPT
from config import GROQ_API_KEY


def get_llm():
    """Initialize Groq LLM"""
    if not GROQ_API_KEY:
        raise ValueError("GROQ_API_KEY not found in environment variables")
    
    return ChatGroq(
        api_key=GROQ_API_KEY,
        model="llama-3.3-70b-versatile",
        temperature=0.1  # Low temperature for structured extraction
    )


def extract_json_from_text(text: str) -> dict:
    """Extract JSON from text that might contain markdown or extra content"""
    import re
    
    # Try direct JSON parse first
    try:
        return json.loads(text.strip())
    except json.JSONDecodeError:
        pass
    
    # Try to extract JSON from markdown code blocks
    json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
    matches = re.findall(json_pattern, text, re.DOTALL)
    if matches:
        try:
            return json.loads(matches[0])
        except json.JSONDecodeError:
            pass
    
    # Try to find complete JSON object (improved pattern)
    # Match from first { to last }
    start_idx = text.find('{')
    end_idx = text.rfind('}')
    
    if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
        try:
            potential_json = text[start_idx:end_idx+1]
            return json.loads(potential_json)
        except json.JSONDecodeError:
            pass
    
    # Fallback: try to find any JSON-like structure
    json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
    matches = re.findall(json_pattern, text, re.DOTALL)
    for match in matches:
        try:
            return json.loads(match)
        except json.JSONDecodeError:
            continue
    
    return None


def run_profiling_agent(user_input: str) -> dict:
    """
    Extracts structured profile information from user input
    
    Args:
        user_input: Raw user input text
        
    Returns:
        Structured profile dictionary
    """
    try:
        llm = get_llm()
        
        prompt = PROFILING_PROMPT.format(user_input=user_input)
        
        messages = [
            SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
            HumanMessage(content=prompt)
        ]
        
        response = llm.invoke(messages)
        
        print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")
        
        # Extract JSON from response
        profile_data = extract_json_from_text(response.content)
        
        if profile_data:
            # Normalize keys to lowercase with underscores
            normalized_profile = {}
            for key, value in profile_data.items():
                normalized_key = key.lower().replace(' ', '_').replace('-', '_')
                normalized_profile[normalized_key] = value
            
            print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
            return normalized_profile
        else:
            # Fallback: Create basic profile from user input
            print("⚠️ Could not parse JSON, creating basic profile")
            return {
                "user_input": user_input,
                "raw_profile": response.content,
                "note": "Profile extraction incomplete. Using raw input."
            }
    
    except Exception as e:
        print(f"❌ Profiling error: {str(e)}")
        return {
            "error": str(e),
            "user_input": user_input
        }


def validate_profile(profile_data: dict) -> bool:
    """
    Validates that profile has minimum required information
    
    Args:
        profile_data: Profile dictionary
        
    Returns:
        True if valid, False otherwise
    """
    required_fields = ['age', 'state', 'education']
    
    for field in required_fields:
        if field not in profile_data or profile_data[field] == "Not Provided":
            return False
    
    return True


if __name__ == "__main__":
    # Test the agent
    test_input = """
    I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
    My family income is around 3 lakh per year. I belong to the OBC category.
    I am currently unemployed and looking for government job opportunities.
    """
    
    result = run_profiling_agent(test_input)
    print(json.dumps(result, indent=2))