""" User Profiling Agent Extracts structured user information for eligibility matching """ import json from langchain_groq import ChatGroq from langchain_core.messages import HumanMessage, SystemMessage from prompts.profiling_prompt import PROFILING_PROMPT from config import GROQ_API_KEY def get_llm(): """Initialize Groq LLM""" if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY not found in environment variables") return ChatGroq( api_key=GROQ_API_KEY, model="llama-3.3-70b-versatile", temperature=0.1 # Low temperature for structured extraction ) def extract_json_from_text(text: str) -> dict: """Extract JSON from text that might contain markdown or extra content""" import re # Try direct JSON parse first try: return json.loads(text.strip()) except json.JSONDecodeError: pass # Try to extract JSON from markdown code blocks json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```' matches = re.findall(json_pattern, text, re.DOTALL) if matches: try: return json.loads(matches[0]) except json.JSONDecodeError: pass # Try to find complete JSON object (improved pattern) # Match from first { to last } start_idx = text.find('{') end_idx = text.rfind('}') if start_idx != -1 and end_idx != -1 and end_idx > start_idx: try: potential_json = text[start_idx:end_idx+1] return json.loads(potential_json) except json.JSONDecodeError: pass # Fallback: try to find any JSON-like structure json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}' matches = re.findall(json_pattern, text, re.DOTALL) for match in matches: try: return json.loads(match) except json.JSONDecodeError: continue return None def run_profiling_agent(user_input: str) -> dict: """ Extracts structured profile information from user input Args: user_input: Raw user input text Returns: Structured profile dictionary """ try: llm = get_llm() prompt = PROFILING_PROMPT.format(user_input=user_input) messages = [ SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."), HumanMessage(content=prompt) ] response = llm.invoke(messages) print(f"\nšŸ¤– LLM Response (first 200 chars): {response.content[:200]}...") # Extract JSON from response profile_data = extract_json_from_text(response.content) if profile_data: # Normalize keys to lowercase with underscores normalized_profile = {} for key, value in profile_data.items(): normalized_key = key.lower().replace(' ', '_').replace('-', '_') normalized_profile[normalized_key] = value print(f"āœ… Profile extracted: {list(normalized_profile.keys())}") return normalized_profile else: # Fallback: Create basic profile from user input print("āš ļø Could not parse JSON, creating basic profile") return { "user_input": user_input, "raw_profile": response.content, "note": "Profile extraction incomplete. Using raw input." } except Exception as e: print(f"āŒ Profiling error: {str(e)}") return { "error": str(e), "user_input": user_input } def validate_profile(profile_data: dict) -> bool: """ Validates that profile has minimum required information Args: profile_data: Profile dictionary Returns: True if valid, False otherwise """ required_fields = ['age', 'state', 'education'] for field in required_fields: if field not in profile_data or profile_data[field] == "Not Provided": return False return True if __name__ == "__main__": # Test the agent test_input = """ I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering. My family income is around 3 lakh per year. I belong to the OBC category. I am currently unemployed and looking for government job opportunities. """ result = run_profiling_agent(test_input) print(json.dumps(result, indent=2))