Spaces:
Running
Running
File size: 4,697 Bytes
388aa42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | """
User Profiling Agent
Extracts structured user information for eligibility matching
"""
import json
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
from prompts.profiling_prompt import PROFILING_PROMPT
from config import GROQ_API_KEY
def get_llm():
"""Initialize Groq LLM"""
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY not found in environment variables")
return ChatGroq(
api_key=GROQ_API_KEY,
model="llama-3.3-70b-versatile",
temperature=0.1 # Low temperature for structured extraction
)
def extract_json_from_text(text: str) -> dict:
"""Extract JSON from text that might contain markdown or extra content"""
import re
# Try direct JSON parse first
try:
return json.loads(text.strip())
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code blocks
json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
matches = re.findall(json_pattern, text, re.DOTALL)
if matches:
try:
return json.loads(matches[0])
except json.JSONDecodeError:
pass
# Try to find complete JSON object (improved pattern)
# Match from first { to last }
start_idx = text.find('{')
end_idx = text.rfind('}')
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
try:
potential_json = text[start_idx:end_idx+1]
return json.loads(potential_json)
except json.JSONDecodeError:
pass
# Fallback: try to find any JSON-like structure
json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
matches = re.findall(json_pattern, text, re.DOTALL)
for match in matches:
try:
return json.loads(match)
except json.JSONDecodeError:
continue
return None
def run_profiling_agent(user_input: str) -> dict:
"""
Extracts structured profile information from user input
Args:
user_input: Raw user input text
Returns:
Structured profile dictionary
"""
try:
llm = get_llm()
prompt = PROFILING_PROMPT.format(user_input=user_input)
messages = [
SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
HumanMessage(content=prompt)
]
response = llm.invoke(messages)
print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")
# Extract JSON from response
profile_data = extract_json_from_text(response.content)
if profile_data:
# Normalize keys to lowercase with underscores
normalized_profile = {}
for key, value in profile_data.items():
normalized_key = key.lower().replace(' ', '_').replace('-', '_')
normalized_profile[normalized_key] = value
print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
return normalized_profile
else:
# Fallback: Create basic profile from user input
print("⚠️ Could not parse JSON, creating basic profile")
return {
"user_input": user_input,
"raw_profile": response.content,
"note": "Profile extraction incomplete. Using raw input."
}
except Exception as e:
print(f"❌ Profiling error: {str(e)}")
return {
"error": str(e),
"user_input": user_input
}
def validate_profile(profile_data: dict) -> bool:
"""
Validates that profile has minimum required information
Args:
profile_data: Profile dictionary
Returns:
True if valid, False otherwise
"""
required_fields = ['age', 'state', 'education']
for field in required_fields:
if field not in profile_data or profile_data[field] == "Not Provided":
return False
return True
if __name__ == "__main__":
# Test the agent
test_input = """
I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
My family income is around 3 lakh per year. I belong to the OBC category.
I am currently unemployed and looking for government job opportunities.
"""
result = run_profiling_agent(test_input)
print(json.dumps(result, indent=2))
|