Spaces:

holistic-ai
/

AgentGraph

Running

App Files Files Community

AgentGraph / agentgraph /methods /baseline /unified_method.py

wu981526092

add

7bc750c 5 months ago

raw

history blame contribute delete

9.06 kB

	"""
	Unified Knowledge Extraction Method (1-Task Approach)

	Copied from core/agent_monitoring_unified.py and adapted for evaluation framework.
	Uses the unified 1-task CrewAI approach with a single agent that performs all
	knowledge extraction tasks in one step.
	"""

	# Import the LiteLLM fix FIRST, before any other imports that might use LiteLLM
	import os
	import sys

	# Add the parent directory to the path to ensure imports work correctly
	sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
	import json
	import logging
	import time
	from datetime import datetime
	from typing import Any, Dict

	from crewai import Agent, Crew, Process, Task

	from evaluation.knowledge_extraction.baselines.base_method import BaseKnowledgeExtractionMethod
	from evaluation.knowledge_extraction.utils.models import KnowledgeGraph

	# Import shared prompt templates
	from evaluation.knowledge_extraction.utils.prompts import (
	ENTITY_EXTRACTION_INSTRUCTION_PROMPT,
	ENTITY_EXTRACTION_SYSTEM_PROMPT,
	GRAPH_BUILDER_SYSTEM_PROMPT,
	RELATION_EXTRACTION_INSTRUCTION_PROMPT,
	RELATION_EXTRACTION_SYSTEM_PROMPT,
	)
	from utils.fix_litellm_stop_param import * # This applies the patches # noqa: F403

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Set higher log levels for noisy libraries
	logging.getLogger("openai").setLevel(logging.WARNING)
	logging.getLogger("httpx").setLevel(logging.WARNING)
	logging.getLogger("litellm").setLevel(logging.WARNING)
	logging.getLogger("chromadb").setLevel(logging.WARNING)

	# Set default verbosity level
	verbose_level = 0

	# Set environment variables
	os.environ["OPENAI_MODEL_NAME"] = "gpt-5-mini"

	class UnifiedKnowledgeExtractionMethod(BaseKnowledgeExtractionMethod):
	"""Unified 1-task knowledge extraction method using CrewAI."""

	def __init__(self, **kwargs):
	super().__init__("unified_method", **kwargs)
	self._setup_agent_and_task()

	def _setup_agent_and_task(self):
	"""Set up the CrewAI agent and task."""

	# Create unified agent
	self.unified_knowledge_graph_agent = Agent(
	role="Unified Knowledge Graph Analyst",
	goal="Create comprehensive knowledge graphs from agent system data in a single analysis pass",
	backstory=f"""{ENTITY_EXTRACTION_SYSTEM_PROMPT}

	{RELATION_EXTRACTION_SYSTEM_PROMPT}

	{GRAPH_BUILDER_SYSTEM_PROMPT}.""",
	verbose=bool(verbose_level),
	llm=os.environ["OPENAI_MODEL_NAME"]
	)

	# Create unified task
	self.unified_knowledge_graph_task = Task(
	description=f"""
	Extract entities:
	{ENTITY_EXTRACTION_INSTRUCTION_PROMPT}

	Also extract relationships:
	{RELATION_EXTRACTION_INSTRUCTION_PROMPT}

	Finally, build the knowledge graph:
	""",
	agent=self.unified_knowledge_graph_agent,
	expected_output="A complete knowledge graph with entities, relations, and metadata",
	output_pydantic=KnowledgeGraph,
	)

	# Create crew
	self.unified_agent_monitoring_crew = Crew(
	agents=[self.unified_knowledge_graph_agent],
	tasks=[self.unified_knowledge_graph_task],
	verbose=bool(verbose_level),
	memory=False,
	planning=False,
	process=Process.sequential,
	)

	def process_text(self, text: str) -> Dict[str, Any]:
	"""
	Process input text using the unified 1-task CrewAI approach.

	Args:
	text: Input text to process

	Returns:
	Dictionary with kg_data, metadata, success, and optional error
	"""
	start_time = time.time()

	try:
	logger.info(f"process_text called with text length: {len(text)}")
	logger.info(f"text first 200 chars: {repr(text[:200])}")

	logger.info("Starting crew execution with input_data...")

	# Run the crew with proper input mechanism
	result = self.unified_agent_monitoring_crew.kickoff(inputs={"input_data": text})

	logger.info(f"Crew execution completed, result type: {type(result)}")

	processing_time = time.time() - start_time

	# Extract the knowledge graph from the result
	if hasattr(result, 'pydantic') and result.pydantic:
	kg_data = result.pydantic.dict()
	elif hasattr(result, 'raw'):
	# Try to parse as JSON
	try:
	kg_data = json.loads(result.raw)
	except: # noqa: E722
	kg_data = {"entities": [], "relations": [], "error": "Failed to parse result"}
	else:
	kg_data = {"entities": [], "relations": [], "error": "Unknown result format"}

	# Validate kg_data structure
	if not isinstance(kg_data, dict):
	raise ValueError("kg_data is not a dict after parsing")

	if not ("entities" in kg_data and "relations" in kg_data):
	raise ValueError("kg_data missing 'entities' or 'relations'")

	# Add metadata
	if "metadata" not in kg_data:
	kg_data["metadata"] = {}

	kg_data["metadata"]["processing_info"] = {
	"method": "unified_single_task",
	"processing_time_seconds": processing_time,
	"processed_at": datetime.now().isoformat(),
	"agent_count": 1,
	"task_count": 1,
	"api_calls": 1
	}

	# Calculate statistics
	entity_count = len(kg_data.get("entities", []))
	relation_count = len(kg_data.get("relations", []))

	return {
	"success": True,
	"kg_data": kg_data,
	"metadata": {
	"approach": "unified_1_task",
	"tasks_executed": 1,
	"agents_used": 1,
	"method": self.method_name,
	"processing_time_seconds": processing_time,
	"entity_count": entity_count,
	"relation_count": relation_count,
	"entities_per_second": entity_count / processing_time if processing_time > 0 else 0,
	"relations_per_second": relation_count / processing_time if processing_time > 0 else 0,
	"api_calls": 1
	}
	}

	except Exception as e:
	processing_time = time.time() - start_time
	logger.error(f"Error in unified knowledge extraction method: {e}")
	logger.error(f"Error type: {type(e).__name__}")
	import traceback
	logger.error(f"Traceback: {traceback.format_exc()}")
	return {
	"success": False,
	"error": str(e),
	"kg_data": {"entities": [], "relations": []},
	"metadata": {
	"approach": "unified_1_task",
	"tasks_executed": 0,
	"agents_used": 0,
	"method": self.method_name,
	"processing_time_seconds": processing_time,
	"api_calls": 1
	}
	}

	def extract_knowledge_graph(self, trace_data: str) -> Dict[str, Any]:
	"""
	Extract knowledge graph from trace data.

	Args:
	trace_data: Agent trace data as JSON string

	Returns:
	Dictionary with entities and relations
	"""
	try:
	# Debug logging
	logger.info(f"extract_knowledge_graph called with trace_data type: {type(trace_data)}")
	if isinstance(trace_data, str):
	logger.info(f"trace_data length: {len(trace_data)}")
	logger.info(f"trace_data first 200 chars: {repr(trace_data[:200])}")

	# Pass the JSON string directly to process_text without re-encoding
	result = self.process_text(trace_data)

	# Return just the knowledge graph data
	if result.get("success", False):
	return result.get("kg_data", {"entities": [], "relations": []})
	else:
	# Return empty knowledge graph on failure
	return {"entities": [], "relations": []}

	except Exception as e:
	logger.error(f"Error in extract_knowledge_graph: {e}")
	logger.error(f"trace_data type: {type(trace_data)}")
	if isinstance(trace_data, str):
	logger.error(f"trace_data content (first 200 chars): {repr(trace_data[:200])}")
	return {"entities": [], "relations": []}