Spaces:
Running
Running
Charles Grandjean commited on
Commit ·
9ce8464
1
Parent(s): bd87ed7
revamp analysis of docs
Browse files- agent_api.py +11 -11
- agent_states/actors_merger.py +9 -0
- agent_states/pdf_analyzer_state.py +11 -18
- agents/actors_merger.py +138 -0
- agents/pdf_analyzer.py +44 -57
- prompts/actors_merger.py +72 -0
- prompts/doc_assistant.py +1 -1
- requirements.txt +1 -0
- utils/tools.py +166 -0
agent_api.py
CHANGED
|
@@ -37,6 +37,7 @@ from agents.doc_assistant import DocAssistant
|
|
| 37 |
from langchain_openai import ChatOpenAI
|
| 38 |
from langchain_xai import ChatXAI
|
| 39 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
|
|
| 40 |
from mistralai import Mistral
|
| 41 |
import logging
|
| 42 |
import traceback
|
|
@@ -83,21 +84,20 @@ class LLMConfig:
|
|
| 83 |
"X-Cerebras-3rd-Party-Integration": "langgraph"
|
| 84 |
}
|
| 85 |
))
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
model=os.getenv("OPENROUTER_MODEL"),
|
| 89 |
api_key=os.getenv("OPENROUTER_API_KEY"),
|
| 90 |
base_url=os.getenv("OPENROUTER_URL"),
|
|
|
|
|
|
|
|
|
|
| 91 |
))
|
| 92 |
self.llm = NormalizedLLM(ChatGoogleGenerativeAI(
|
| 93 |
model=os.getenv("GEMINI_TOOL_MODEL", "gemini-3-flash-preview"),
|
| 94 |
api_key=os.getenv("GOOGLE_API_KEY"),
|
| 95 |
thinking_level="medium"
|
| 96 |
))
|
| 97 |
-
|
| 98 |
-
# model=os.getenv("XAI_TOOL_MODEL"),
|
| 99 |
-
# ))
|
| 100 |
-
|
| 101 |
# logger.info("✅ LLMConfig initialized with NormalizedLLM wrapper:")
|
| 102 |
# logger.info(f" - OpenAI LLM: {os.getenv('LLM_MODEL', 'gpt-5-nano-2025-08-07')}")
|
| 103 |
# logger.info(f" - Gemini LLM: {os.getenv('GEMINI_TOOL_MODEL', 'gemini-3-flash-preview')} (for tool calling)")
|
|
@@ -154,7 +154,7 @@ class CyberLegalAPI:
|
|
| 154 |
|
| 155 |
self.agent_client = CyberLegalAgent(llm=self.llm_config.slm, tools=tools.tools_for_client,tools_facade=tools.tools_for_client_facade)
|
| 156 |
self.agent_lawyer = CyberLegalAgent(llm=self.llm_config.slm, tools=tools.tools_for_lawyer,tools_facade=tools.tools_for_lawyer_facade)
|
| 157 |
-
self.pdf_analyzer = PDFAnalyzerAgent(llm=self.llm_config.
|
| 158 |
# Initialize doc_editor with tools
|
| 159 |
self.doc_editor = DocumentEditorAgent(
|
| 160 |
llm=self.llm_config.slm,
|
|
@@ -193,13 +193,13 @@ class CyberLegalAPI:
|
|
| 193 |
elif node.type == "file" and node.analysis:
|
| 194 |
analysis_parts = []
|
| 195 |
if node.analysis.summary:
|
| 196 |
-
summary_preview = node.analysis.summary
|
| 197 |
analysis_parts.append(f"summary: {summary_preview}")
|
| 198 |
if node.analysis.actors:
|
| 199 |
-
actors_preview = node.analysis.actors
|
| 200 |
analysis_parts.append(f"actors: {actors_preview}")
|
| 201 |
if node.analysis.key_details:
|
| 202 |
-
details_preview = node.analysis.key_details
|
| 203 |
analysis_parts.append(f"key_details: {details_preview}")
|
| 204 |
|
| 205 |
analysis_text = " | ".join(analysis_parts) if analysis_parts else "No analysis available"
|
|
|
|
| 37 |
from langchain_openai import ChatOpenAI
|
| 38 |
from langchain_xai import ChatXAI
|
| 39 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 40 |
+
from langchain_openrouter import ChatOpenRouter
|
| 41 |
from mistralai import Mistral
|
| 42 |
import logging
|
| 43 |
import traceback
|
|
|
|
| 84 |
"X-Cerebras-3rd-Party-Integration": "langgraph"
|
| 85 |
}
|
| 86 |
))
|
| 87 |
+
self.utils_llm = NormalizedLLM(ChatOpenRouter(
|
| 88 |
+
model=os.getenv("OPENROUTER_MAIN_MODEL"),
|
|
|
|
| 89 |
api_key=os.getenv("OPENROUTER_API_KEY"),
|
| 90 |
base_url=os.getenv("OPENROUTER_URL"),
|
| 91 |
+
extra_body={
|
| 92 |
+
"models": json.loads(os.getenv("OPENROUTER_MODELS", "[]"))
|
| 93 |
+
},
|
| 94 |
))
|
| 95 |
self.llm = NormalizedLLM(ChatGoogleGenerativeAI(
|
| 96 |
model=os.getenv("GEMINI_TOOL_MODEL", "gemini-3-flash-preview"),
|
| 97 |
api_key=os.getenv("GOOGLE_API_KEY"),
|
| 98 |
thinking_level="medium"
|
| 99 |
))
|
| 100 |
+
|
|
|
|
|
|
|
|
|
|
| 101 |
# logger.info("✅ LLMConfig initialized with NormalizedLLM wrapper:")
|
| 102 |
# logger.info(f" - OpenAI LLM: {os.getenv('LLM_MODEL', 'gpt-5-nano-2025-08-07')}")
|
| 103 |
# logger.info(f" - Gemini LLM: {os.getenv('GEMINI_TOOL_MODEL', 'gemini-3-flash-preview')} (for tool calling)")
|
|
|
|
| 154 |
|
| 155 |
self.agent_client = CyberLegalAgent(llm=self.llm_config.slm, tools=tools.tools_for_client,tools_facade=tools.tools_for_client_facade)
|
| 156 |
self.agent_lawyer = CyberLegalAgent(llm=self.llm_config.slm, tools=tools.tools_for_lawyer,tools_facade=tools.tools_for_lawyer_facade)
|
| 157 |
+
self.pdf_analyzer = PDFAnalyzerAgent(llm=self.llm_config.utils_llm, mistral_client=mistral_client)
|
| 158 |
# Initialize doc_editor with tools
|
| 159 |
self.doc_editor = DocumentEditorAgent(
|
| 160 |
llm=self.llm_config.slm,
|
|
|
|
| 193 |
elif node.type == "file" and node.analysis:
|
| 194 |
analysis_parts = []
|
| 195 |
if node.analysis.summary:
|
| 196 |
+
summary_preview = node.analysis.summary
|
| 197 |
analysis_parts.append(f"summary: {summary_preview}")
|
| 198 |
if node.analysis.actors:
|
| 199 |
+
actors_preview = node.analysis.actors
|
| 200 |
analysis_parts.append(f"actors: {actors_preview}")
|
| 201 |
if node.analysis.key_details:
|
| 202 |
+
details_preview = node.analysis.key_details
|
| 203 |
analysis_parts.append(f"key_details: {details_preview}")
|
| 204 |
|
| 205 |
analysis_text = " | ".join(analysis_parts) if analysis_parts else "No analysis available"
|
agent_states/actors_merger.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from typing import TypedDict, List, Dict, Any
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class ActorsMergerState(TypedDict):
|
| 6 |
+
new_extractions: List[Dict[str, Any]]
|
| 7 |
+
existing_actors: List[Dict[str, Any]]
|
| 8 |
+
messages: List[Any]
|
| 9 |
+
completed: bool
|
agent_states/pdf_analyzer_state.py
CHANGED
|
@@ -3,33 +3,26 @@
|
|
| 3 |
State management for PDF Analysis Agent
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
from typing import TypedDict, Optional
|
| 7 |
-
from langchain_core.messages import BaseMessage
|
| 8 |
|
| 9 |
|
| 10 |
class PDFAnalyzerState(TypedDict):
|
| 11 |
-
"""
|
| 12 |
-
State definition for the PDF Analysis Agent workflow
|
| 13 |
-
"""
|
| 14 |
# Input
|
| 15 |
pdf_path: str
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Extraction results
|
| 19 |
extracted_text: Optional[str]
|
| 20 |
-
|
| 21 |
-
# OCR
|
| 22 |
-
needs_ocr: bool
|
| 23 |
-
ocr_performed: bool
|
| 24 |
-
ocr_method: Optional[str]
|
| 25 |
-
|
| 26 |
# Analysis results
|
| 27 |
summary: Optional[str]
|
| 28 |
actors: Optional[str]
|
| 29 |
key_details: Optional[str]
|
| 30 |
-
|
| 31 |
-
intermediate_steps: List[BaseMessage]
|
| 32 |
-
|
| 33 |
# Metadata
|
| 34 |
document_type: Optional[str]
|
| 35 |
-
|
|
|
|
| 3 |
State management for PDF Analysis Agent
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
from typing import TypedDict, Optional
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class PDFAnalyzerState(TypedDict):
|
|
|
|
|
|
|
|
|
|
| 10 |
# Input
|
| 11 |
pdf_path: str
|
| 12 |
+
|
| 13 |
+
# Extraction
|
|
|
|
| 14 |
extracted_text: Optional[str]
|
| 15 |
+
|
| 16 |
+
# OCR
|
| 17 |
+
needs_ocr: bool
|
| 18 |
+
ocr_performed: bool
|
| 19 |
+
ocr_method: Optional[str]
|
| 20 |
+
|
| 21 |
# Analysis results
|
| 22 |
summary: Optional[str]
|
| 23 |
actors: Optional[str]
|
| 24 |
key_details: Optional[str]
|
| 25 |
+
|
|
|
|
|
|
|
| 26 |
# Metadata
|
| 27 |
document_type: Optional[str]
|
| 28 |
+
error: Optional[str]
|
agents/actors_merger.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
from typing import TypedDict, List, Dict, Any
|
| 5 |
+
from agent_states.actors_merger_state import ActorResolutionState
|
| 6 |
+
from langgraph.graph import StateGraph, END
|
| 7 |
+
from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage
|
| 8 |
+
from prompts.actors_merger import SYSTEM_PROMPT,ACTOR_MERGER_PROMPT
|
| 9 |
+
from utils.tools import tools_for_actors_merger, tools_for_actors_merger_facade
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ActorsMergerAgent:
|
| 15 |
+
def __init__(self, llm):
|
| 16 |
+
self.llm = llm.bind_tools(tools_for_actors_merger_facade, tool_choice="any")
|
| 17 |
+
self.workflow = self._build_workflow()
|
| 18 |
+
|
| 19 |
+
def _build_workflow(self):
|
| 20 |
+
workflow = StateGraph(ActorResolutionState)
|
| 21 |
+
workflow.add_node("reason", self._reason)
|
| 22 |
+
workflow.add_node("run_tools", self._run_tools)
|
| 23 |
+
|
| 24 |
+
workflow.set_entry_point("reason")
|
| 25 |
+
workflow.add_edge("reason", "run_tools")
|
| 26 |
+
|
| 27 |
+
workflow.add_conditional_edges(
|
| 28 |
+
"run_tools",
|
| 29 |
+
self._should_continue,
|
| 30 |
+
{
|
| 31 |
+
"continue": "reason",
|
| 32 |
+
"done": END,
|
| 33 |
+
},
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
return workflow.compile()
|
| 37 |
+
|
| 38 |
+
def _should_continue(self, state: ActorResolutionState) -> str:
|
| 39 |
+
return "done" if state["completed"] else "continue"
|
| 40 |
+
|
| 41 |
+
async def _reason(self, state: ActorResolutionState) -> ActorResolutionState:
|
| 42 |
+
if not state["messages"]:
|
| 43 |
+
state["messages"] = [
|
| 44 |
+
SystemMessage(content=SYSTEM_PROMPT),
|
| 45 |
+
HumanMessage(
|
| 46 |
+
content=f"""
|
| 47 |
+
{ACTOR_MERGER_PROMPT}
|
| 48 |
+
|
| 49 |
+
NEW_ACTORS:
|
| 50 |
+
{json.dumps(new_extractions, indent=2)}
|
| 51 |
+
|
| 52 |
+
EXISTING_ACTORS:
|
| 53 |
+
{json.dumps(existing_actors, indent=2)}
|
| 54 |
+
"""
|
| 55 |
+
),
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
response = await self.llm.ainvoke(state["messages"])
|
| 59 |
+
state["messages"].append(response)
|
| 60 |
+
return state
|
| 61 |
+
|
| 62 |
+
async def _run_tools(self, state: ActorResolutionState) -> ActorResolutionState:
|
| 63 |
+
last_message = state["messages"][-1]
|
| 64 |
+
tool_calls = getattr(last_message, "tool_calls", []) or []
|
| 65 |
+
|
| 66 |
+
for tool_call in tool_calls:
|
| 67 |
+
name = tool_call["name"]
|
| 68 |
+
args = tool_call.get("args", {})
|
| 69 |
+
|
| 70 |
+
if name == "add_actors":
|
| 71 |
+
actors = args.get("actors", [])
|
| 72 |
+
state["existing_actors"].extend(actors)
|
| 73 |
+
result = {"ok": True, "added": len(actors)}
|
| 74 |
+
|
| 75 |
+
elif name == "modify_actors":
|
| 76 |
+
modifications = args.get("modifications", [])
|
| 77 |
+
updated = 0
|
| 78 |
+
|
| 79 |
+
for mod in modifications:
|
| 80 |
+
target_name = mod.get("target_name")
|
| 81 |
+
if not target_name:
|
| 82 |
+
continue
|
| 83 |
+
|
| 84 |
+
for actor in state["existing_actors"]:
|
| 85 |
+
if actor.get("name") == target_name:
|
| 86 |
+
if mod.get("name"):
|
| 87 |
+
actor["name"] = mod["name"]
|
| 88 |
+
|
| 89 |
+
if mod.get("aliases"):
|
| 90 |
+
actor["aliases"] = list(dict.fromkeys(
|
| 91 |
+
(actor.get("aliases", []) or []) + (mod.get("aliases", []) or [])
|
| 92 |
+
))
|
| 93 |
+
|
| 94 |
+
if mod.get("description"):
|
| 95 |
+
actor["description"] = mod["description"]
|
| 96 |
+
|
| 97 |
+
if mod.get("implication"):
|
| 98 |
+
actor["implication"] = mod["implication"]
|
| 99 |
+
|
| 100 |
+
updated += 1
|
| 101 |
+
break
|
| 102 |
+
|
| 103 |
+
result = {"ok": True, "updated": updated}
|
| 104 |
+
|
| 105 |
+
elif name == "attempt_completion_actor_resolution":
|
| 106 |
+
state["completed"] = True
|
| 107 |
+
result = {"ok": True, "completed": True}
|
| 108 |
+
|
| 109 |
+
else:
|
| 110 |
+
result = {"ok": False, "error": f"Unknown tool: {name}"}
|
| 111 |
+
|
| 112 |
+
state["messages"].append(
|
| 113 |
+
ToolMessage(
|
| 114 |
+
content=json.dumps(result, ensure_ascii=False),
|
| 115 |
+
tool_call_id=tool_call["id"],
|
| 116 |
+
)
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
return state
|
| 120 |
+
|
| 121 |
+
async def resolve(
|
| 122 |
+
self,
|
| 123 |
+
new_extractions: List[Dict[str, Any]],
|
| 124 |
+
existing_actors: List[Dict[str, Any]]
|
| 125 |
+
) -> Dict[str, Any]:
|
| 126 |
+
initial_state: ActorResolutionState = {
|
| 127 |
+
"new_extractions": new_extractions,
|
| 128 |
+
"existing_actors": existing_actors,
|
| 129 |
+
"messages": [],
|
| 130 |
+
"completed": False,
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
final_state = await self.workflow.ainvoke(initial_state)
|
| 134 |
+
|
| 135 |
+
return {
|
| 136 |
+
"existing_actors": final_state["existing_actors"],
|
| 137 |
+
"completed": final_state["completed"],
|
| 138 |
+
}
|
agents/pdf_analyzer.py
CHANGED
|
@@ -35,16 +35,37 @@ class PDFAnalyzerAgent:
|
|
| 35 |
workflow.add_node("extract_key_details", self._extract_key_details)
|
| 36 |
workflow.add_node("generate_summary", self._generate_summary)
|
| 37 |
workflow.set_entry_point("detect_pdf_type")
|
| 38 |
-
workflow.add_conditional_edges(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
workflow.add_edge("ocr_pdf", "extract_actors")
|
|
|
|
|
|
|
|
|
|
| 40 |
workflow.add_edge("extract_content", "extract_actors")
|
| 41 |
-
workflow.add_edge("
|
| 42 |
-
workflow.add_edge("
|
|
|
|
|
|
|
|
|
|
| 43 |
workflow.add_edge("generate_summary", END)
|
|
|
|
| 44 |
return workflow.compile()
|
| 45 |
|
| 46 |
def _should_use_ocr(self, state: PDFAnalyzerState) -> str:
|
| 47 |
return "ocr" if state.get("needs_ocr", False) else "extract"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
async def _detect_pdf_type(self, state: PDFAnalyzerState) -> PDFAnalyzerState:
|
| 50 |
import os
|
|
@@ -71,7 +92,6 @@ class PDFAnalyzerAgent:
|
|
| 71 |
state["document_type"] = "unknown"
|
| 72 |
logger.warning(f"⚠️ Unknown file format: {file_ext}, will attempt OCR")
|
| 73 |
|
| 74 |
-
state["processing_status"] = "extracting"
|
| 75 |
return state
|
| 76 |
|
| 77 |
async def _ocr_pdf(self, state: PDFAnalyzerState) -> PDFAnalyzerState:
|
|
@@ -118,17 +138,14 @@ class PDFAnalyzerAgent:
|
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
logger.error(f"❌ OCR failed: {e}")
|
| 121 |
-
state["
|
| 122 |
-
state["extracted_text"] =
|
| 123 |
return state
|
| 124 |
|
| 125 |
-
state["processing_status"] = "analyzing"
|
| 126 |
return state
|
| 127 |
|
| 128 |
async def _extract_content(self, state: PDFAnalyzerState) -> PDFAnalyzerState:
|
| 129 |
"""Extract text content from PDF file"""
|
| 130 |
-
state["processing_status"] = "extracting"
|
| 131 |
-
|
| 132 |
try:
|
| 133 |
pdf_path = state["pdf_path"]
|
| 134 |
logger.info(f"📄 Extracting content from PDF: {pdf_path}")
|
|
@@ -138,9 +155,8 @@ class PDFAnalyzerAgent:
|
|
| 138 |
num_pages = len(reader.pages)
|
| 139 |
for page_num in range(num_pages):
|
| 140 |
page = reader.pages[page_num]
|
| 141 |
-
extracted_text += page.extract_text() + "\n\n"
|
| 142 |
state["extracted_text"] = extracted_text
|
| 143 |
-
state["processing_status"] = "analyzing"
|
| 144 |
logger.info(f"✅ Extracted {num_pages} pages from PDF")
|
| 145 |
|
| 146 |
except Exception as e:
|
|
@@ -157,29 +173,16 @@ class PDFAnalyzerAgent:
|
|
| 157 |
|
| 158 |
logger.info("👥 Extracting actors...")
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
# Add system message if not present
|
| 164 |
-
if not intermediate_steps:
|
| 165 |
-
intermediate_steps.append(SystemMessage(content=SYSTEM_PROMPT))
|
| 166 |
-
intermediate_steps.append(HumanMessage(content=f"Here is the legal document to analyze:\n\n{state['extracted_text']}"))
|
| 167 |
-
|
| 168 |
-
# Add prompt to extract actors
|
| 169 |
-
intermediate_steps.append(HumanMessage(content=EXTRACT_ACTORS_PROMPT))
|
| 170 |
-
|
| 171 |
-
response = await self.llm.ainvoke(intermediate_steps)
|
| 172 |
-
intermediate_steps.append(response)
|
| 173 |
-
state["actors"] = response.content
|
| 174 |
-
state["intermediate_steps"] = intermediate_steps
|
| 175 |
|
| 176 |
# Log detailed LLM response
|
| 177 |
logger.info("=" * 80)
|
| 178 |
logger.info("🤖 LLM RESPONSE (extract_actors)")
|
| 179 |
logger.info("=" * 80)
|
| 180 |
-
logger.info(f"📊 Response length: {len(
|
| 181 |
logger.info(f"📄 Content preview (first 300 chars):")
|
| 182 |
-
logger.info(
|
| 183 |
logger.info("=" * 80)
|
| 184 |
logger.info("✅ Actors extracted")
|
| 185 |
|
|
@@ -193,22 +196,16 @@ class PDFAnalyzerAgent:
|
|
| 193 |
|
| 194 |
logger.info("🔑 Extracting key details...")
|
| 195 |
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
intermediate_steps.append(HumanMessage(content=EXTRACT_KEY_DETAILS_PROMPT))
|
| 199 |
-
|
| 200 |
-
response = await self.llm.ainvoke(intermediate_steps)
|
| 201 |
-
intermediate_steps.append(response)
|
| 202 |
-
state["key_details"] = response.content
|
| 203 |
-
state["intermediate_steps"] = intermediate_steps
|
| 204 |
|
| 205 |
# Log detailed LLM response
|
| 206 |
logger.info("=" * 80)
|
| 207 |
logger.info("🤖 LLM RESPONSE (extract_key_details)")
|
| 208 |
logger.info("=" * 80)
|
| 209 |
-
logger.info(f"📊 Response length: {len(
|
| 210 |
logger.info(f"📄 Content preview (first 300 chars):")
|
| 211 |
-
logger.info(
|
| 212 |
logger.info("=" * 80)
|
| 213 |
logger.info("✅ Key details extracted")
|
| 214 |
|
|
@@ -222,23 +219,16 @@ class PDFAnalyzerAgent:
|
|
| 222 |
|
| 223 |
logger.info("📝 Generating document summary...")
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
intermediate_steps.append(HumanMessage(content=GENERATE_SUMMARY_PROMPT))
|
| 228 |
-
|
| 229 |
-
response = await self.llm.ainvoke(intermediate_steps)
|
| 230 |
-
intermediate_steps.append(response)
|
| 231 |
-
state["summary"] = response.content
|
| 232 |
-
state["intermediate_steps"] = intermediate_steps
|
| 233 |
-
state["processing_status"] = "complete"
|
| 234 |
|
| 235 |
# Log detailed LLM response
|
| 236 |
logger.info("=" * 80)
|
| 237 |
logger.info("🤖 LLM RESPONSE (generate_summary)")
|
| 238 |
logger.info("=" * 80)
|
| 239 |
-
logger.info(f"📊 Response length: {len(
|
| 240 |
logger.info(f"📄 Content preview (first 300 chars):")
|
| 241 |
-
logger.info(
|
| 242 |
logger.info("=" * 80)
|
| 243 |
logger.info("✅ Summary generated")
|
| 244 |
|
|
@@ -260,29 +250,26 @@ class PDFAnalyzerAgent:
|
|
| 260 |
|
| 261 |
initial_state: PDFAnalyzerState = {
|
| 262 |
"pdf_path": pdf_path,
|
| 263 |
-
"pdf_content": None,
|
| 264 |
"extracted_text": None,
|
| 265 |
"summary": None,
|
| 266 |
"actors": None,
|
| 267 |
"key_details": None,
|
| 268 |
-
"document_type":
|
| 269 |
-
"processing_status": "pending",
|
| 270 |
-
"intermediate_steps": [],
|
| 271 |
"needs_ocr": False,
|
| 272 |
"ocr_performed": False,
|
| 273 |
-
"ocr_method": None
|
|
|
|
| 274 |
}
|
| 275 |
|
| 276 |
logger.info(f"🚀 Starting PDF analysis for: {pdf_path}")
|
| 277 |
final_state = await self.workflow.ainvoke(initial_state)
|
| 278 |
|
| 279 |
-
logger.info(f"✅ PDF analysis complete.
|
| 280 |
|
| 281 |
return {
|
| 282 |
"summary": final_state.get("summary"),
|
| 283 |
"actors": final_state.get("actors"),
|
| 284 |
"key_details": final_state.get("key_details"),
|
| 285 |
-
"processing_status": final_state.get("processing_status"),
|
| 286 |
"ocr_used": final_state.get("ocr_performed", False),
|
| 287 |
"ocr_method": final_state.get("ocr_method")
|
| 288 |
-
}
|
|
|
|
| 35 |
workflow.add_node("extract_key_details", self._extract_key_details)
|
| 36 |
workflow.add_node("generate_summary", self._generate_summary)
|
| 37 |
workflow.set_entry_point("detect_pdf_type")
|
| 38 |
+
workflow.add_conditional_edges(
|
| 39 |
+
"detect_pdf_type",
|
| 40 |
+
self._should_use_ocr,
|
| 41 |
+
{"ocr": "ocr_pdf", "extract": "extract_content"}
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
workflow.add_edge("ocr_pdf", "extract_actors")
|
| 45 |
+
workflow.add_edge("ocr_pdf", "extract_key_details")
|
| 46 |
+
workflow.add_edge("ocr_pdf", "generate_summary")
|
| 47 |
+
|
| 48 |
workflow.add_edge("extract_content", "extract_actors")
|
| 49 |
+
workflow.add_edge("extract_content", "extract_key_details")
|
| 50 |
+
workflow.add_edge("extract_content", "generate_summary")
|
| 51 |
+
|
| 52 |
+
workflow.add_edge("extract_actors", END)
|
| 53 |
+
workflow.add_edge("extract_key_details", END)
|
| 54 |
workflow.add_edge("generate_summary", END)
|
| 55 |
+
|
| 56 |
return workflow.compile()
|
| 57 |
|
| 58 |
def _should_use_ocr(self, state: PDFAnalyzerState) -> str:
|
| 59 |
return "ocr" if state.get("needs_ocr", False) else "extract"
|
| 60 |
+
|
| 61 |
+
async def _run_prompt(self, extracted_text: str, task_prompt: str) -> str:
|
| 62 |
+
messages = [
|
| 63 |
+
SystemMessage(content=SYSTEM_PROMPT),
|
| 64 |
+
HumanMessage(content=f"Here is the legal document to analyze:\n\n{extracted_text}"),
|
| 65 |
+
HumanMessage(content=task_prompt),
|
| 66 |
+
]
|
| 67 |
+
response = await self.llm.ainvoke(messages)
|
| 68 |
+
return response.content
|
| 69 |
|
| 70 |
async def _detect_pdf_type(self, state: PDFAnalyzerState) -> PDFAnalyzerState:
|
| 71 |
import os
|
|
|
|
| 92 |
state["document_type"] = "unknown"
|
| 93 |
logger.warning(f"⚠️ Unknown file format: {file_ext}, will attempt OCR")
|
| 94 |
|
|
|
|
| 95 |
return state
|
| 96 |
|
| 97 |
async def _ocr_pdf(self, state: PDFAnalyzerState) -> PDFAnalyzerState:
|
|
|
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
logger.error(f"❌ OCR failed: {e}")
|
| 141 |
+
state["error"] = str(e)
|
| 142 |
+
state["extracted_text"] = None
|
| 143 |
return state
|
| 144 |
|
|
|
|
| 145 |
return state
|
| 146 |
|
| 147 |
async def _extract_content(self, state: PDFAnalyzerState) -> PDFAnalyzerState:
|
| 148 |
"""Extract text content from PDF file"""
|
|
|
|
|
|
|
| 149 |
try:
|
| 150 |
pdf_path = state["pdf_path"]
|
| 151 |
logger.info(f"📄 Extracting content from PDF: {pdf_path}")
|
|
|
|
| 155 |
num_pages = len(reader.pages)
|
| 156 |
for page_num in range(num_pages):
|
| 157 |
page = reader.pages[page_num]
|
| 158 |
+
extracted_text += (page.extract_text() or "") + "\n\n"
|
| 159 |
state["extracted_text"] = extracted_text
|
|
|
|
| 160 |
logger.info(f"✅ Extracted {num_pages} pages from PDF")
|
| 161 |
|
| 162 |
except Exception as e:
|
|
|
|
| 173 |
|
| 174 |
logger.info("👥 Extracting actors...")
|
| 175 |
|
| 176 |
+
response_content = await self._run_prompt(state["extracted_text"], EXTRACT_ACTORS_PROMPT)
|
| 177 |
+
state["actors"] = response_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
# Log detailed LLM response
|
| 180 |
logger.info("=" * 80)
|
| 181 |
logger.info("🤖 LLM RESPONSE (extract_actors)")
|
| 182 |
logger.info("=" * 80)
|
| 183 |
+
logger.info(f"📊 Response length: {len(response_content)} characters")
|
| 184 |
logger.info(f"📄 Content preview (first 300 chars):")
|
| 185 |
+
logger.info(response_content[:300] + ("..." if len(response_content) > 300 else ""))
|
| 186 |
logger.info("=" * 80)
|
| 187 |
logger.info("✅ Actors extracted")
|
| 188 |
|
|
|
|
| 196 |
|
| 197 |
logger.info("🔑 Extracting key details...")
|
| 198 |
|
| 199 |
+
response_content = await self._run_prompt(state["extracted_text"], EXTRACT_KEY_DETAILS_PROMPT)
|
| 200 |
+
state["key_details"] = response_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
# Log detailed LLM response
|
| 203 |
logger.info("=" * 80)
|
| 204 |
logger.info("🤖 LLM RESPONSE (extract_key_details)")
|
| 205 |
logger.info("=" * 80)
|
| 206 |
+
logger.info(f"📊 Response length: {len(response_content)} characters")
|
| 207 |
logger.info(f"📄 Content preview (first 300 chars):")
|
| 208 |
+
logger.info(response_content[:300] + ("..." if len(response_content) > 300 else ""))
|
| 209 |
logger.info("=" * 80)
|
| 210 |
logger.info("✅ Key details extracted")
|
| 211 |
|
|
|
|
| 219 |
|
| 220 |
logger.info("📝 Generating document summary...")
|
| 221 |
|
| 222 |
+
response_content = await self._run_prompt(state["extracted_text"], GENERATE_SUMMARY_PROMPT)
|
| 223 |
+
state["summary"] = response_content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
# Log detailed LLM response
|
| 226 |
logger.info("=" * 80)
|
| 227 |
logger.info("🤖 LLM RESPONSE (generate_summary)")
|
| 228 |
logger.info("=" * 80)
|
| 229 |
+
logger.info(f"📊 Response length: {len(response_content)} characters")
|
| 230 |
logger.info(f"📄 Content preview (first 300 chars):")
|
| 231 |
+
logger.info(response_content[:300] + ("..." if len(response_content) > 300 else ""))
|
| 232 |
logger.info("=" * 80)
|
| 233 |
logger.info("✅ Summary generated")
|
| 234 |
|
|
|
|
| 250 |
|
| 251 |
initial_state: PDFAnalyzerState = {
|
| 252 |
"pdf_path": pdf_path,
|
|
|
|
| 253 |
"extracted_text": None,
|
| 254 |
"summary": None,
|
| 255 |
"actors": None,
|
| 256 |
"key_details": None,
|
| 257 |
+
"document_type": None,
|
|
|
|
|
|
|
| 258 |
"needs_ocr": False,
|
| 259 |
"ocr_performed": False,
|
| 260 |
+
"ocr_method": None,
|
| 261 |
+
"error": None,
|
| 262 |
}
|
| 263 |
|
| 264 |
logger.info(f"🚀 Starting PDF analysis for: {pdf_path}")
|
| 265 |
final_state = await self.workflow.ainvoke(initial_state)
|
| 266 |
|
| 267 |
+
logger.info(f"✅ PDF analysis complete.")
|
| 268 |
|
| 269 |
return {
|
| 270 |
"summary": final_state.get("summary"),
|
| 271 |
"actors": final_state.get("actors"),
|
| 272 |
"key_details": final_state.get("key_details"),
|
|
|
|
| 273 |
"ocr_used": final_state.get("ocr_performed", False),
|
| 274 |
"ocr_method": final_state.get("ocr_method")
|
| 275 |
+
}
|
prompts/actors_merger.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
SYSTEM_PROMPT = """You are an actor resolution agent.
|
| 4 |
+
|
| 5 |
+
You receive:
|
| 6 |
+
1. Newly extracted actors from a legal document
|
| 7 |
+
2. Existing actors already known in the system
|
| 8 |
+
|
| 9 |
+
Your goal is to integrate all newly extracted actors into the existing actor list.
|
| 10 |
+
|
| 11 |
+
Each actor has:
|
| 12 |
+
- name
|
| 13 |
+
- aliases
|
| 14 |
+
- description
|
| 15 |
+
- implication
|
| 16 |
+
|
| 17 |
+
You can use only these tools:
|
| 18 |
+
- add_actors: create one or more new actors
|
| 19 |
+
- modify_actors: update existing actors if a new extraction clearly refers to them
|
| 20 |
+
- attempt_completion: call this only when all extracted actors have been handled
|
| 21 |
+
|
| 22 |
+
Rules:
|
| 23 |
+
- Prefer modifying an existing actor if the new actor clearly matches it
|
| 24 |
+
- Prefer adding a new actor if there is uncertainty
|
| 25 |
+
- Use name, aliases, description, and implication to decide
|
| 26 |
+
- Do not leave any extracted actor untreated
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
ACTOR_MERGER_PROMPT = """
|
| 30 |
+
You are integrating newly extracted actors from a legal document into an existing actor registry.
|
| 31 |
+
|
| 32 |
+
You are given:
|
| 33 |
+
|
| 34 |
+
NEW_ACTORS:
|
| 35 |
+
Actors extracted from the current document.
|
| 36 |
+
|
| 37 |
+
EXISTING_ACTORS:
|
| 38 |
+
Actors already known in the system.
|
| 39 |
+
|
| 40 |
+
Your goal is to integrate the new actors into the existing registry.
|
| 41 |
+
|
| 42 |
+
Rules:
|
| 43 |
+
|
| 44 |
+
1. If a NEW_ACTOR clearly refers to an EXISTING_ACTOR:
|
| 45 |
+
- Use modify_actors
|
| 46 |
+
- Add missing information such as:
|
| 47 |
+
- new aliases
|
| 48 |
+
- additional description
|
| 49 |
+
- additional implications
|
| 50 |
+
- Do NOT duplicate the actor.
|
| 51 |
+
|
| 52 |
+
2. If a NEW_ACTOR does NOT match any existing actor:
|
| 53 |
+
- Use add_actors to create a new actor entry.
|
| 54 |
+
|
| 55 |
+
3. Matching should consider:
|
| 56 |
+
- similar names
|
| 57 |
+
- aliases
|
| 58 |
+
- descriptions
|
| 59 |
+
- contextual role in the document.
|
| 60 |
+
|
| 61 |
+
4. Be conservative with merges.
|
| 62 |
+
If you are unsure whether two actors are the same, create a new actor.
|
| 63 |
+
|
| 64 |
+
5. Continue until ALL NEW_ACTORS have been handled.
|
| 65 |
+
|
| 66 |
+
6. When every actor has been processed, call attempt_completion.
|
| 67 |
+
|
| 68 |
+
Remember:
|
| 69 |
+
- Never ignore a NEW_ACTOR.
|
| 70 |
+
- Never duplicate actors unnecessarily.
|
| 71 |
+
- Prefer enriching existing actors rather than recreating them.
|
| 72 |
+
"""
|
prompts/doc_assistant.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
System prompts for the doc creator router agent
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
ROUTER_SYSTEM_PROMPT = """You are a Document
|
| 7 |
|
| 8 |
## CRITICAL RULES
|
| 9 |
|
|
|
|
| 3 |
System prompts for the doc creator router agent
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
ROUTER_SYSTEM_PROMPT = """You were created by Hexiagon labs. You are Hexiagon AI, a Document Assistant Agent that decides whether to respond to a user's question or modify their HTML document.
|
| 7 |
|
| 8 |
## CRITICAL RULES
|
| 9 |
|
requirements.txt
CHANGED
|
@@ -12,6 +12,7 @@ langchain>=0.1.0
|
|
| 12 |
langchain-openai>=0.1.0
|
| 13 |
langchain-community>=0.0.20
|
| 14 |
langchain-google-genai>=1.0.0
|
|
|
|
| 15 |
mistralai>=1.0.0
|
| 16 |
langchain-xai==1.2.2
|
| 17 |
# FastAPI and server dependencies
|
|
|
|
| 12 |
langchain-openai>=0.1.0
|
| 13 |
langchain-community>=0.0.20
|
| 14 |
langchain-google-genai>=1.0.0
|
| 15 |
+
langchain-openrouter>=0.1.0
|
| 16 |
mistralai>=1.0.0
|
| 17 |
langchain-xai==1.2.2
|
| 18 |
# FastAPI and server dependencies
|
utils/tools.py
CHANGED
|
@@ -1077,6 +1077,168 @@ async def _attempt_completion(message: str) -> Dict[str, Any]:
|
|
| 1077 |
"message": message
|
| 1078 |
}
|
| 1079 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1080 |
|
| 1081 |
# Export tool sets for different user types
|
| 1082 |
tools_for_client_facade = [query_knowledge_graph, find_lawyers, message_lawyer, search_web]
|
|
@@ -1091,4 +1253,8 @@ tools_for_doc_assistant = [_query_knowledge_graph, _retrieve_lawyer_document, _e
|
|
| 1091 |
tools_for_doc_editor_facade = [replace_html, add_html, delete_html, view_current_document, attempt_completion]
|
| 1092 |
tools_for_doc_editor = [_replace_html, _add_html, _delete_html, _view_current_document, _attempt_completion]
|
| 1093 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1094 |
tools = tools_for_client
|
|
|
|
| 1077 |
"message": message
|
| 1078 |
}
|
| 1079 |
|
| 1080 |
+
# ============ ACTOR RESOLUTION TOOLS ============
|
| 1081 |
+
|
| 1082 |
+
@tool
|
| 1083 |
+
async def add_actors(actors: List[Dict[str, Any]]) -> str:
|
| 1084 |
+
"""
|
| 1085 |
+
Add one or more new actors to the actor registry.
|
| 1086 |
+
|
| 1087 |
+
Each actor should contain:
|
| 1088 |
+
- name
|
| 1089 |
+
- aliases
|
| 1090 |
+
- description
|
| 1091 |
+
- implication
|
| 1092 |
+
|
| 1093 |
+
Use this tool when a newly extracted actor does not reasonably match
|
| 1094 |
+
an existing actor and should be added as a new entry.
|
| 1095 |
+
"""
|
| 1096 |
+
return
|
| 1097 |
+
|
| 1098 |
+
|
| 1099 |
+
@tool
|
| 1100 |
+
async def modify_actors(modifications: List[Dict[str, Any]]) -> str:
|
| 1101 |
+
"""
|
| 1102 |
+
Modify one or more existing actors.
|
| 1103 |
+
|
| 1104 |
+
Each modification should usually contain:
|
| 1105 |
+
- target_name: name of the existing actor to update
|
| 1106 |
+
- optionally updated name
|
| 1107 |
+
- aliases
|
| 1108 |
+
- description
|
| 1109 |
+
- implication
|
| 1110 |
+
|
| 1111 |
+
Use this tool when a newly extracted actor appears to refer to an
|
| 1112 |
+
already existing actor and you want to enrich or update that actor.
|
| 1113 |
+
"""
|
| 1114 |
+
return
|
| 1115 |
+
|
| 1116 |
+
|
| 1117 |
+
@tool
|
| 1118 |
+
async def attempt_completion_actor_resolution(message: str) -> Dict[str, Any]:
|
| 1119 |
+
"""
|
| 1120 |
+
Signal that actor resolution is complete.
|
| 1121 |
+
|
| 1122 |
+
Call this only when all newly extracted actors have been handled,
|
| 1123 |
+
either by adding them as new actors or modifying existing ones.
|
| 1124 |
+
|
| 1125 |
+
Args:
|
| 1126 |
+
message: Short summary of what was resolved
|
| 1127 |
+
|
| 1128 |
+
Returns:
|
| 1129 |
+
Dict with 'ok' and 'message'
|
| 1130 |
+
"""
|
| 1131 |
+
logger.info(f" ✅ attempt_completion_actor_resolution | {message}")
|
| 1132 |
+
return {
|
| 1133 |
+
"ok": True,
|
| 1134 |
+
"message": message
|
| 1135 |
+
}
|
| 1136 |
+
|
| 1137 |
+
|
| 1138 |
+
@tool
|
| 1139 |
+
async def _add_actors(
|
| 1140 |
+
existing_actors: List[Dict[str, Any]],
|
| 1141 |
+
actors: List[Dict[str, Any]]
|
| 1142 |
+
) -> Dict[str, Any]:
|
| 1143 |
+
"""
|
| 1144 |
+
Real implementation for adding actors.
|
| 1145 |
+
|
| 1146 |
+
Args:
|
| 1147 |
+
existing_actors: Current actor registry from state
|
| 1148 |
+
actors: New actors to add
|
| 1149 |
+
|
| 1150 |
+
Returns:
|
| 1151 |
+
Dict with updated actor list
|
| 1152 |
+
"""
|
| 1153 |
+
try:
|
| 1154 |
+
updated_actors = existing_actors.copy()
|
| 1155 |
+
updated_actors.extend(actors)
|
| 1156 |
+
|
| 1157 |
+
logger.info(f" ✅ add_actors | added:{len(actors)}")
|
| 1158 |
+
|
| 1159 |
+
return {
|
| 1160 |
+
"ok": True,
|
| 1161 |
+
"existing_actors": updated_actors,
|
| 1162 |
+
"added": len(actors)
|
| 1163 |
+
}
|
| 1164 |
+
except Exception as e:
|
| 1165 |
+
return {
|
| 1166 |
+
"ok": False,
|
| 1167 |
+
"error": f"Error adding actors: {str(e)}"
|
| 1168 |
+
}
|
| 1169 |
+
|
| 1170 |
+
|
| 1171 |
+
@tool
|
| 1172 |
+
async def _modify_actors(
|
| 1173 |
+
existing_actors: List[Dict[str, Any]],
|
| 1174 |
+
modifications: List[Dict[str, Any]]
|
| 1175 |
+
) -> Dict[str, Any]:
|
| 1176 |
+
"""
|
| 1177 |
+
Real implementation for modifying existing actors.
|
| 1178 |
+
|
| 1179 |
+
Matching is intentionally simple for MVP:
|
| 1180 |
+
- match by target_name against actor['name']
|
| 1181 |
+
|
| 1182 |
+
Each modification may contain:
|
| 1183 |
+
- target_name
|
| 1184 |
+
- name
|
| 1185 |
+
- aliases
|
| 1186 |
+
- description
|
| 1187 |
+
- implication
|
| 1188 |
+
"""
|
| 1189 |
+
try:
|
| 1190 |
+
updated_actors = [actor.copy() for actor in existing_actors]
|
| 1191 |
+
updated_count = 0
|
| 1192 |
+
|
| 1193 |
+
for mod in modifications:
|
| 1194 |
+
target_name = mod.get("target_name")
|
| 1195 |
+
if not target_name:
|
| 1196 |
+
continue
|
| 1197 |
+
|
| 1198 |
+
for actor in updated_actors:
|
| 1199 |
+
if actor.get("name") == target_name:
|
| 1200 |
+
if mod.get("name"):
|
| 1201 |
+
actor["name"] = mod["name"]
|
| 1202 |
+
|
| 1203 |
+
if mod.get("aliases"):
|
| 1204 |
+
existing_aliases = actor.get("aliases", []) or []
|
| 1205 |
+
new_aliases = mod.get("aliases", []) or []
|
| 1206 |
+
actor["aliases"] = list(dict.fromkeys(existing_aliases + new_aliases))
|
| 1207 |
+
|
| 1208 |
+
if mod.get("description"):
|
| 1209 |
+
actor["description"] = mod["description"]
|
| 1210 |
+
|
| 1211 |
+
if mod.get("implication"):
|
| 1212 |
+
actor["implication"] = mod["implication"]
|
| 1213 |
+
|
| 1214 |
+
updated_count += 1
|
| 1215 |
+
break
|
| 1216 |
+
|
| 1217 |
+
logger.info(f" ✅ modify_actors | updated:{updated_count}")
|
| 1218 |
+
|
| 1219 |
+
return {
|
| 1220 |
+
"ok": True,
|
| 1221 |
+
"existing_actors": updated_actors,
|
| 1222 |
+
"updated": updated_count
|
| 1223 |
+
}
|
| 1224 |
+
except Exception as e:
|
| 1225 |
+
return {
|
| 1226 |
+
"ok": False,
|
| 1227 |
+
"error": f"Error modifying actors: {str(e)}"
|
| 1228 |
+
}
|
| 1229 |
+
|
| 1230 |
+
|
| 1231 |
+
@tool
|
| 1232 |
+
async def _attempt_completion_actor_resolution(message: str) -> Dict[str, Any]:
|
| 1233 |
+
"""
|
| 1234 |
+
Real implementation for actor resolution completion.
|
| 1235 |
+
"""
|
| 1236 |
+
logger.info(f" ✅ attempt_completion_actor_resolution | {message}")
|
| 1237 |
+
return {
|
| 1238 |
+
"ok": True,
|
| 1239 |
+
"message": message
|
| 1240 |
+
}
|
| 1241 |
+
|
| 1242 |
|
| 1243 |
# Export tool sets for different user types
|
| 1244 |
tools_for_client_facade = [query_knowledge_graph, find_lawyers, message_lawyer, search_web]
|
|
|
|
| 1253 |
tools_for_doc_editor_facade = [replace_html, add_html, delete_html, view_current_document, attempt_completion]
|
| 1254 |
tools_for_doc_editor = [_replace_html, _add_html, _delete_html, _view_current_document, _attempt_completion]
|
| 1255 |
|
| 1256 |
+
tools_for_actors_merger_facade = [add_actors,modify_actors,attempt_completion_actor_resolution]
|
| 1257 |
+
|
| 1258 |
+
tools_for_actors_merger = [_add_actors,_modify_actors,_attempt_completion_actor_resolution]
|
| 1259 |
+
|
| 1260 |
tools = tools_for_client
|