File size: 4,697 Bytes
388aa42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""

User Profiling Agent

Extracts structured user information for eligibility matching

"""

import json
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
from prompts.profiling_prompt import PROFILING_PROMPT
from config import GROQ_API_KEY


def get_llm():
    """Initialize Groq LLM"""
    if not GROQ_API_KEY:
        raise ValueError("GROQ_API_KEY not found in environment variables")
    
    return ChatGroq(
        api_key=GROQ_API_KEY,
        model="llama-3.3-70b-versatile",
        temperature=0.1  # Low temperature for structured extraction
    )


def extract_json_from_text(text: str) -> dict:
    """Extract JSON from text that might contain markdown or extra content"""
    import re
    
    # Try direct JSON parse first
    try:
        return json.loads(text.strip())
    except json.JSONDecodeError:
        pass
    
    # Try to extract JSON from markdown code blocks
    json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
    matches = re.findall(json_pattern, text, re.DOTALL)
    if matches:
        try:
            return json.loads(matches[0])
        except json.JSONDecodeError:
            pass
    
    # Try to find complete JSON object (improved pattern)
    # Match from first { to last }
    start_idx = text.find('{')
    end_idx = text.rfind('}')
    
    if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
        try:
            potential_json = text[start_idx:end_idx+1]
            return json.loads(potential_json)
        except json.JSONDecodeError:
            pass
    
    # Fallback: try to find any JSON-like structure
    json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
    matches = re.findall(json_pattern, text, re.DOTALL)
    for match in matches:
        try:
            return json.loads(match)
        except json.JSONDecodeError:
            continue
    
    return None


def run_profiling_agent(user_input: str) -> dict:
    """

    Extracts structured profile information from user input

    

    Args:

        user_input: Raw user input text

        

    Returns:

        Structured profile dictionary

    """
    try:
        llm = get_llm()
        
        prompt = PROFILING_PROMPT.format(user_input=user_input)
        
        messages = [
            SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
            HumanMessage(content=prompt)
        ]
        
        response = llm.invoke(messages)
        
        print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")
        
        # Extract JSON from response
        profile_data = extract_json_from_text(response.content)
        
        if profile_data:
            # Normalize keys to lowercase with underscores
            normalized_profile = {}
            for key, value in profile_data.items():
                normalized_key = key.lower().replace(' ', '_').replace('-', '_')
                normalized_profile[normalized_key] = value
            
            print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
            return normalized_profile
        else:
            # Fallback: Create basic profile from user input
            print("⚠️ Could not parse JSON, creating basic profile")
            return {
                "user_input": user_input,
                "raw_profile": response.content,
                "note": "Profile extraction incomplete. Using raw input."
            }
    
    except Exception as e:
        print(f"❌ Profiling error: {str(e)}")
        return {
            "error": str(e),
            "user_input": user_input
        }


def validate_profile(profile_data: dict) -> bool:
    """

    Validates that profile has minimum required information

    

    Args:

        profile_data: Profile dictionary

        

    Returns:

        True if valid, False otherwise

    """
    required_fields = ['age', 'state', 'education']
    
    for field in required_fields:
        if field not in profile_data or profile_data[field] == "Not Provided":
            return False
    
    return True


if __name__ == "__main__":
    # Test the agent
    test_input = """

    I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.

    My family income is around 3 lakh per year. I belong to the OBC category.

    I am currently unemployed and looking for government job opportunities.

    """
    
    result = run_profiling_agent(test_input)
    print(json.dumps(result, indent=2))