Spaces:

Anmol4521
/

jansahayak

Running

App Files Files Community

jansahayak / agents /profiling_agent.py

Anmol4521

Upload 95 files

388aa42 verified 9 days ago

raw

history blame contribute delete

4.7 kB

	"""
	User Profiling Agent
	Extracts structured user information for eligibility matching
	"""

	import json
	from langchain_groq import ChatGroq
	from langchain_core.messages import HumanMessage, SystemMessage
	from prompts.profiling_prompt import PROFILING_PROMPT
	from config import GROQ_API_KEY


	def get_llm():
	"""Initialize Groq LLM"""
	if not GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY not found in environment variables")

	return ChatGroq(
	api_key=GROQ_API_KEY,
	model="llama-3.3-70b-versatile",
	temperature=0.1 # Low temperature for structured extraction
	)


	def extract_json_from_text(text: str) -> dict:
	"""Extract JSON from text that might contain markdown or extra content"""
	import re

	# Try direct JSON parse first
	try:
	return json.loads(text.strip())
	except json.JSONDecodeError:
	pass

	# Try to extract JSON from markdown code blocks
	json_pattern = r'```(?:json)?\s(\{.?\})\s*```'
	matches = re.findall(json_pattern, text, re.DOTALL)
	if matches:
	try:
	return json.loads(matches[0])
	except json.JSONDecodeError:
	pass

	# Try to find complete JSON object (improved pattern)
	# Match from first { to last }
	start_idx = text.find('{')
	end_idx = text.rfind('}')

	if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
	try:
	potential_json = text[start_idx:end_idx+1]
	return json.loads(potential_json)
	except json.JSONDecodeError:
	pass

	# Fallback: try to find any JSON-like structure
	json_pattern = r'\{[^{}](?:\{[^{}]\}[^{}])\}'
	matches = re.findall(json_pattern, text, re.DOTALL)
	for match in matches:
	try:
	return json.loads(match)
	except json.JSONDecodeError:
	continue

	return None


	def run_profiling_agent(user_input: str) -> dict:
	"""
	Extracts structured profile information from user input

	Args:
	user_input: Raw user input text

	Returns:
	Structured profile dictionary
	"""
	try:
	llm = get_llm()

	prompt = PROFILING_PROMPT.format(user_input=user_input)

	messages = [
	SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
	HumanMessage(content=prompt)
	]

	response = llm.invoke(messages)

	print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")

	# Extract JSON from response
	profile_data = extract_json_from_text(response.content)

	if profile_data:
	# Normalize keys to lowercase with underscores
	normalized_profile = {}
	for key, value in profile_data.items():
	normalized_key = key.lower().replace(' ', '_').replace('-', '_')
	normalized_profile[normalized_key] = value

	print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
	return normalized_profile
	else:
	# Fallback: Create basic profile from user input
	print("⚠️ Could not parse JSON, creating basic profile")
	return {
	"user_input": user_input,
	"raw_profile": response.content,
	"note": "Profile extraction incomplete. Using raw input."
	}

	except Exception as e:
	print(f"❌ Profiling error: {str(e)}")
	return {
	"error": str(e),
	"user_input": user_input
	}


	def validate_profile(profile_data: dict) -> bool:
	"""
	Validates that profile has minimum required information

	Args:
	profile_data: Profile dictionary

	Returns:
	True if valid, False otherwise
	"""
	required_fields = ['age', 'state', 'education']

	for field in required_fields:
	if field not in profile_data or profile_data[field] == "Not Provided":
	return False

	return True


	if __name__ == "__main__":
	# Test the agent
	test_input = """
	I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
	My family income is around 3 lakh per year. I belong to the OBC category.
	I am currently unemployed and looking for government job opportunities.
	"""

	result = run_profiling_agent(test_input)
	print(json.dumps(result, indent=2))