Charles Grandjean commited on
Commit
27d80a8
·
1 Parent(s): 3eb9886

add search and docs in the prompts

Browse files
Files changed (8) hide show
  1. add_secrets.ipynb +1 -1
  2. agent_api.py +56 -41
  3. agent_state.py +3 -8
  4. langraph_agent.py +6 -2
  5. prompts.py +4 -0
  6. requirements.txt +1 -0
  7. tools.py +32 -4
  8. utils.py +1 -1
add_secrets.ipynb CHANGED
@@ -40,7 +40,7 @@
40
  "name": "stdout",
41
  "output_type": "stream",
42
  "text": [
43
- "Uploaded 72 secrets to Cyberlgl/CyberLegalAIendpoint.\n"
44
  ]
45
  }
46
  ],
 
40
  "name": "stdout",
41
  "output_type": "stream",
42
  "text": [
43
+ "Uploaded 77 secrets to Cyberlgl/CyberLegalAIendpoint.\n"
44
  ]
45
  }
46
  ],
agent_api.py CHANGED
@@ -25,12 +25,12 @@ from lawyer_selector import LawyerSelectorAgent
25
  from prompts import SYSTEM_PROMPT_CLIENT, SYSTEM_PROMPT_LAWYER
26
  from pdf_analyzer import PDFAnalyzerAgent
27
  from langchain_openai import ChatOpenAI
28
- from langchain_google_genai import ChatGoogleGenerativeAI
29
  from mistralai import Mistral
30
  import logging
31
  import base64
32
  import tempfile
33
  import os as pathlib
 
34
 
35
  # Load environment variables
36
  load_dotenv(dotenv_path=".env", override=False)
@@ -65,11 +65,17 @@ def require_password(x_api_key: str = Depends(api_key_header)):
65
  class Message(BaseModel):
66
  role: str = Field(..., description="Role: 'user' or 'assistant'")
67
  content: str = Field(..., description="Message content")
68
-
 
 
 
 
69
  class ChatRequest(BaseModel):
70
  message: str = Field(..., description="User's question")
71
  conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
72
  userType: Optional[str] = Field(default="client", description="User type: 'client' for general users or 'lawyer' for legal professionals")
 
 
73
 
74
  class ChatResponse(BaseModel):
75
  response: str = Field(..., description="Assistant's response")
@@ -106,44 +112,25 @@ class CyberLegalAPI:
106
  """
107
 
108
  def __init__(self):
109
- # Ensure .env is loaded
110
  load_dotenv(dotenv_path=".env", override=True)
111
 
112
  llm_provider = os.getenv("LLM_PROVIDER", "openai").lower()
113
  self.llm_provider = llm_provider
114
 
115
- # Validate required environment variables
116
- if llm_provider == "openai":
117
- openai_key = os.getenv("OPENAI_API_KEY")
118
- if not openai_key:
119
- raise ValueError("OPENAI_API_KEY environment variable is not set. Please check your .env file.")
120
- logger.info(f"🔑 OPENAI_API_KEY found (length: {len(openai_key)})")
121
- elif llm_provider == "gemini":
122
- gemini_key = os.getenv("GOOGLE_API_KEY")
123
- if not gemini_key:
124
- raise ValueError("GOOGLE_API_KEY environment variable is not set. Please check your .env file.")
125
- logger.info(f"🔑 GOOGLE_API_KEY found (length: {len(gemini_key)})")
126
-
127
- # Initialize LLM based on provider
128
- if llm_provider == "gemini":
129
- llm = ChatGoogleGenerativeAI(
130
- model="gemini-1.5-flash",
131
- temperature=0.1,
132
- google_api_key=os.getenv("GOOGLE_API_KEY")
133
- )
134
- else:
135
- llm = ChatOpenAI(
136
- model=os.getenv("LLM_MODEL", "gpt-5-nano-2025-08-07"),
137
- reasoning_effort="low",
138
- api_key=os.getenv("OPENAI_API_KEY"),
139
- base_url=os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
140
- default_headers={
141
- "X-Cerebras-3rd-Party-Integration": "langgraph"
142
- }
143
- )
144
 
 
 
145
  # Initialize LawyerSelectorAgent and LightRAGClient, then set them globally in tools.py
146
- global lawyer_selector_agent, lightrag_client
147
 
148
  lawyer_selector_agent = LawyerSelectorAgent(llm=llm)
149
  tools.lawyer_selector_agent = lawyer_selector_agent
@@ -151,23 +138,43 @@ class CyberLegalAPI:
151
  lightrag_client = LightRAGClient()
152
  tools.lightrag_client = lightrag_client
153
 
154
- # Initialize Mistral client for OCR (optional)
155
- mistral_client = None
156
- if os.getenv("MISTRAL_API_KEY"):
157
- mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
158
- logger.info("✅ Mistral OCR client initialized")
159
-
 
 
 
 
 
160
  self.agent_client = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_CLIENT, tools=tools.tools_for_client)
161
  self.agent_lawyer = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_LAWYER, tools=tools.tools_for_lawyer)
162
  self.pdf_analyzer = PDFAnalyzerAgent(llm=llm, mistral_client=mistral_client)
163
  self.conversation_manager = ConversationManager()
 
164
  logger.info(f"🔧 CyberLegalAPI initialized with {llm_provider.upper()} provider")
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  async def process_request(self, request: ChatRequest) -> ChatResponse:
167
  """
168
  Process chat request through the agent
169
  """
170
- # Validate message
171
  is_valid, error_msg = validate_query(request.message)
172
  if not is_valid:
173
  raise HTTPException(status_code=400, detail=error_msg)
@@ -193,10 +200,18 @@ class CyberLegalAPI:
193
  })
194
 
195
  try:
 
 
 
 
 
 
196
  # Process through selected agent with raw message and conversation history
197
  result = await agent.process_query(
198
  user_query=request.message,
199
- conversation_history=conversation_history
 
 
200
  )
201
 
202
  # Create response
 
25
  from prompts import SYSTEM_PROMPT_CLIENT, SYSTEM_PROMPT_LAWYER
26
  from pdf_analyzer import PDFAnalyzerAgent
27
  from langchain_openai import ChatOpenAI
 
28
  from mistralai import Mistral
29
  import logging
30
  import base64
31
  import tempfile
32
  import os as pathlib
33
+ from langchain_tavily import TavilySearch
34
 
35
  # Load environment variables
36
  load_dotenv(dotenv_path=".env", override=False)
 
65
  class Message(BaseModel):
66
  role: str = Field(..., description="Role: 'user' or 'assistant'")
67
  content: str = Field(..., description="Message content")
68
+ class DocumentAnalysis(BaseModel):
69
+ file_name: str
70
+ summary: Optional[str]
71
+ actors: Optional[str]
72
+ key_details: Optional[str]
73
  class ChatRequest(BaseModel):
74
  message: str = Field(..., description="User's question")
75
  conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
76
  userType: Optional[str] = Field(default="client", description="User type: 'client' for general users or 'lawyer' for legal professionals")
77
+ jurisdiction: Optional[str] = Field(default="Romania", description="Jurisdiction of the user")
78
+ documentAnalyses: Optional[List[DocumentAnalysis]] = Field(default=None, description="Lawyer's document analyses")
79
 
80
  class ChatResponse(BaseModel):
81
  response: str = Field(..., description="Assistant's response")
 
112
  """
113
 
114
  def __init__(self):
 
115
  load_dotenv(dotenv_path=".env", override=True)
116
 
117
  llm_provider = os.getenv("LLM_PROVIDER", "openai").lower()
118
  self.llm_provider = llm_provider
119
 
120
+ llm = ChatOpenAI(
121
+ model=os.getenv("LLM_MODEL", "gpt-5-nano-2025-08-07"),
122
+ reasoning_effort="low",
123
+ api_key=os.getenv("OPENAI_API_KEY"),
124
+ base_url=os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
125
+ default_headers={
126
+ "X-Cerebras-3rd-Party-Integration": "langgraph"
127
+ }
128
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
131
+ logger.info("✅ Mistral OCR client initialized")
132
  # Initialize LawyerSelectorAgent and LightRAGClient, then set them globally in tools.py
133
+ global lawyer_selector_agent, lightrag_client, tavily_search
134
 
135
  lawyer_selector_agent = LawyerSelectorAgent(llm=llm)
136
  tools.lawyer_selector_agent = lawyer_selector_agent
 
138
  lightrag_client = LightRAGClient()
139
  tools.lightrag_client = lightrag_client
140
 
141
+ tavily_search = TavilySearch(
142
+ api_key=os.getenv("TAVILY_API_KEY"),
143
+ max_results=5,
144
+ topic="general",
145
+ search_depth="advanced",
146
+ include_answer=True,
147
+ include_raw_content=False
148
+ )
149
+ tools.tavily_search = tavily_search
150
+ logger.info("✅ Tavily search client initialized")
151
+
152
  self.agent_client = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_CLIENT, tools=tools.tools_for_client)
153
  self.agent_lawyer = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_LAWYER, tools=tools.tools_for_lawyer)
154
  self.pdf_analyzer = PDFAnalyzerAgent(llm=llm, mistral_client=mistral_client)
155
  self.conversation_manager = ConversationManager()
156
+ self.base_lawyer_prompt = SYSTEM_PROMPT_LAWYER
157
  logger.info(f"🔧 CyberLegalAPI initialized with {llm_provider.upper()} provider")
158
 
159
+ def _build_lawyer_prompt(self, document_analyses: Optional[List[DocumentAnalysis]], jurisdiction: str) -> str:
160
+ """Build lawyer prompt with optional document context"""
161
+ if not document_analyses:
162
+ return self.base_lawyer_prompt.format(jurisdiction=jurisdiction)
163
+
164
+ docs_text = "\n\n### Available Document Analyses in Your Workspace\n"
165
+ for i, doc in enumerate(document_analyses, 1):
166
+ docs_text += f"[Doc {i}] {doc.file_name}\n"
167
+ if doc.summary: docs_text += f"Summary: {doc.summary}\n"
168
+ if doc.actors: docs_text += f"Actors: {doc.actors}\n"
169
+ if doc.key_details: docs_text += f"Key Details: {doc.key_details}\n"
170
+ docs_text += "\n"
171
+
172
+ return self.base_lawyer_prompt.format(jurisdiction=jurisdiction) + docs_text + "Consider these documents when relevant.\n"
173
+
174
  async def process_request(self, request: ChatRequest) -> ChatResponse:
175
  """
176
  Process chat request through the agent
177
  """
 
178
  is_valid, error_msg = validate_query(request.message)
179
  if not is_valid:
180
  raise HTTPException(status_code=400, detail=error_msg)
 
200
  })
201
 
202
  try:
203
+ # Build dynamic system prompt for lawyers with document analyses
204
+ system_prompt = None
205
+ if request.userType == "lawyer" and request.documentAnalyses:
206
+ system_prompt = self._build_lawyer_prompt(request.documentAnalyses, request.jurisdiction)
207
+ logger.info(f"📚 Using lawyer prompt with {len(request.documentAnalyses)} document analyses")
208
+
209
  # Process through selected agent with raw message and conversation history
210
  result = await agent.process_query(
211
  user_query=request.message,
212
+ conversation_history=conversation_history,
213
+ jurisdiction=request.jurisdiction,
214
+ system_prompt=system_prompt
215
  )
216
 
217
  # Create response
agent_state.py CHANGED
@@ -14,21 +14,16 @@ class AgentState(TypedDict):
14
  # User interaction
15
  user_query: str
16
  conversation_history: List[Dict[str, str]]
17
- intermediate_steps:List[Dict[str, Any]]
18
-
19
- lightrag_response: Optional[Dict[str, Any]]
20
- lightrag_error: Optional[str]
21
 
22
  # Context processing
23
- processed_context: Optional[str]
24
  relevant_documents: List[str]
25
 
26
- final_response: Optional[str]
27
-
28
  # Metadata
29
  query_timestamp: str
30
  processing_time: Optional[float]
31
- query_type: Optional[str] # "comparison", "explanation", "compliance", "general"
32
 
33
 
34
  class ConversationManager:
 
14
  # User interaction
15
  user_query: str
16
  conversation_history: List[Dict[str, str]]
17
+ intermediate_steps: List[Dict[str, Any]]
18
+ system_prompt: Optional[str]
 
 
19
 
20
  # Context processing
 
21
  relevant_documents: List[str]
22
 
 
 
23
  # Metadata
24
  query_timestamp: str
25
  processing_time: Optional[float]
26
+ jurisdiction: Optional[str]
27
 
28
 
29
  class ConversationManager:
langraph_agent.py CHANGED
@@ -54,7 +54,9 @@ class CyberLegalAgent:
54
 
55
  if not intermediate_steps:
56
  history = state.get("conversation_history", [])
57
- intermediate_steps.append(SystemMessage(content=self.system_prompt))
 
 
58
  for msg in history:
59
  if isinstance(msg, dict):
60
  if msg.get("role") == "user":
@@ -91,7 +93,7 @@ class CyberLegalAgent:
91
  state["intermediate_steps"] = intermediate_steps
92
  return state
93
 
94
- async def process_query(self, user_query: str, conversation_history: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]:
95
  initial_state = {
96
  "user_query": user_query,
97
  "conversation_history": conversation_history or [],
@@ -99,6 +101,8 @@ class CyberLegalAgent:
99
  "relevant_documents": [],
100
  "query_timestamp": datetime.now().isoformat(),
101
  "processing_time": None,
 
 
102
  }
103
  self.performance_monitor.reset()
104
 
 
54
 
55
  if not intermediate_steps:
56
  history = state.get("conversation_history", [])
57
+ # Use provided system prompt if available, otherwise use the default
58
+ system_prompt_to_use = state.get("system_prompt", self.system_prompt)
59
+ intermediate_steps.append(SystemMessage(content=system_prompt_to_use))
60
  for msg in history:
61
  if isinstance(msg, dict):
62
  if msg.get("role") == "user":
 
93
  state["intermediate_steps"] = intermediate_steps
94
  return state
95
 
96
+ async def process_query(self, user_query: str, jurisdiction: str, conversation_history: Optional[List[Dict[str, str]]] = None, system_prompt: Optional[str] = None) -> Dict[str, Any]:
97
  initial_state = {
98
  "user_query": user_query,
99
  "conversation_history": conversation_history or [],
 
101
  "relevant_documents": [],
102
  "query_timestamp": datetime.now().isoformat(),
103
  "processing_time": None,
104
+ "jurisdiction": jurisdiction,
105
+ "system_prompt": system_prompt
106
  }
107
  self.performance_monitor.reset()
108
 
prompts.py CHANGED
@@ -7,10 +7,12 @@ System prompts for the LangGraph cyber-legal assistant
7
  SYSTEM_PROMPT_CLIENT = """### Role
8
  You are a helpful cyber-legal assistant specializing in EU regulations and directives.
9
  You translate complex legal information into clear, easy-to-understand language for non-lawyers.
 
10
 
11
  ### Available Tools
12
  1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
13
  2. **find_lawyers**: Recommend suitable lawyers based on the user's legal issue and conversation context.
 
14
 
15
  ### Tool-Calling Process
16
  You operate in an iterative loop:
@@ -41,9 +43,11 @@ You operate in an iterative loop:
41
  # Lawyer specialist system prompt - designed for legal professionals
42
  SYSTEM_PROMPT_LAWYER = """### Role
43
  You are an expert cyber-legal assistant specializing in EU regulations and directives with deep knowledge of legal frameworks, precedents, and technical legal analysis.
 
44
 
45
  ### Available Tools
46
  1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
 
47
 
48
  ### Tool-Calling Process
49
  You operate in an iterative loop:
 
7
  SYSTEM_PROMPT_CLIENT = """### Role
8
  You are a helpful cyber-legal assistant specializing in EU regulations and directives.
9
  You translate complex legal information into clear, easy-to-understand language for non-lawyers.
10
+ Client Jurisdiction: {jurisdiction}
11
 
12
  ### Available Tools
13
  1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
14
  2. **find_lawyers**: Recommend suitable lawyers based on the user's legal issue and conversation context.
15
+ 3. **search_web**: Search the web for current information, recent legal updates, court decisions, or news that may not be in the knowledge graph.
16
 
17
  ### Tool-Calling Process
18
  You operate in an iterative loop:
 
43
  # Lawyer specialist system prompt - designed for legal professionals
44
  SYSTEM_PROMPT_LAWYER = """### Role
45
  You are an expert cyber-legal assistant specializing in EU regulations and directives with deep knowledge of legal frameworks, precedents, and technical legal analysis.
46
+ Lawyer Jurisdiction: {jurisdiction}
47
 
48
  ### Available Tools
49
  1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
50
+ 2. **search_web**: Search the web for current information, recent legal updates, court decisions, or news that may not be in the knowledge graph.
51
 
52
  ### Tool-Calling Process
53
  You operate in an iterative loop:
requirements.txt CHANGED
@@ -21,3 +21,4 @@ uvicorn[standard]>=0.24.0
21
  # Additional utilities
22
  pydantic>=2.0.0
23
  typing-extensions>=4.0.0
 
 
21
  # Additional utilities
22
  pydantic>=2.0.0
23
  typing-extensions>=4.0.0
24
+ langchain-tavily>=0.2.16
tools.py CHANGED
@@ -3,14 +3,17 @@
3
  Tools for the CyberLegal Agent
4
  """
5
 
 
6
  from typing import List, Dict, Any, Optional
7
  from langchain_core.tools import tool
 
8
  from lawyer_selector import LawyerSelectorAgent
9
  from utils import LightRAGClient, ConversationFormatter
10
 
11
  # Global instances - will be initialized in agent_api.py
12
  lawyer_selector_agent: Optional[LawyerSelectorAgent] = None
13
  lightrag_client: Optional[LightRAGClient] = None
 
14
 
15
  @tool
16
  async def query_knowledge_graph(query: str, conversation_history: List[Dict[str, str]]) -> str:
@@ -49,6 +52,31 @@ async def query_knowledge_graph(query: str, conversation_history: List[Dict[str,
49
  except Exception as e:
50
  return f"Error querying knowledge graph: {str(e)}"
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  @tool
53
  async def find_lawyers(query: str, conversation_history: List[Dict[str, str]]) -> str:
54
  """
@@ -107,11 +135,11 @@ async def find_lawyers(query: str, conversation_history: List[Dict[str, str]]) -
107
 
108
  # Export tool sets for different user types
109
 
110
- # Tools available to general clients (knowledge graph + lawyer finder)
111
- tools_for_client = [query_knowledge_graph, find_lawyers]
112
 
113
- # Tools available to lawyers (knowledge graph only - lawyers don't need to find other lawyers)
114
- tools_for_lawyer = [query_knowledge_graph]
115
 
116
  # Default tools (backward compatibility - client tools)
117
  tools = tools_for_client
 
3
  Tools for the CyberLegal Agent
4
  """
5
 
6
+ import os
7
  from typing import List, Dict, Any, Optional
8
  from langchain_core.tools import tool
9
+ from langchain_tavily import TavilySearch
10
  from lawyer_selector import LawyerSelectorAgent
11
  from utils import LightRAGClient, ConversationFormatter
12
 
13
  # Global instances - will be initialized in agent_api.py
14
  lawyer_selector_agent: Optional[LawyerSelectorAgent] = None
15
  lightrag_client: Optional[LightRAGClient] = None
16
+ tavily_search = None
17
 
18
  @tool
19
  async def query_knowledge_graph(query: str, conversation_history: List[Dict[str, str]]) -> str:
 
52
  except Exception as e:
53
  return f"Error querying knowledge graph: {str(e)}"
54
 
55
+ @tool
56
+ async def search_web(query: str) -> str:
57
+ """Search the web for current legal updates and news using Tavily."""
58
+ try:
59
+ if tavily_search is None:
60
+ raise ValueError("TavilySearch not initialized in agent_api.py")
61
+
62
+ result = await tavily_search.ainvoke({"query": query})
63
+ import json
64
+ data = json.loads(result) if isinstance(result, str) else result
65
+
66
+ output = ["🌐 WEB SEARCH RESULTS", "=" * 80]
67
+ if data.get('answer'):
68
+ output.append(f"\n💡 AI Answer: {data['answer']}")
69
+
70
+ for i, r in enumerate(data.get('results', []), 1):
71
+ output.append(f"\n📄 Result {i}")
72
+ output.append(f" Title: {r.get('title', 'N/A')}")
73
+ output.append(f" URL: {r.get('url', 'N/A')}")
74
+ output.append(f" Summary: {r.get('content', '')[:300]}...")
75
+
76
+ return "\n".join(output)
77
+ except Exception as e:
78
+ return f"Error: {str(e)}"
79
+
80
  @tool
81
  async def find_lawyers(query: str, conversation_history: List[Dict[str, str]]) -> str:
82
  """
 
135
 
136
  # Export tool sets for different user types
137
 
138
+ # Tools available to general clients (knowledge graph + lawyer finder + web search)
139
+ tools_for_client = [query_knowledge_graph, find_lawyers, search_web]
140
 
141
+ # Tools available to lawyers (knowledge graph + web search for current legal updates)
142
+ tools_for_lawyer = [query_knowledge_graph, search_web]
143
 
144
  # Default tools (backward compatibility - client tools)
145
  tools = tools_for_client
utils.py CHANGED
@@ -256,7 +256,7 @@ def validate_query(query: str) -> Tuple[bool, Optional[str]]:
256
  if not query or not query.strip():
257
  return False, "Query cannot be empty."
258
 
259
- if len(query) > 1000:
260
  return False, "Query is too long. Please keep it under 1000 characters."
261
 
262
  return True, None
 
256
  if not query or not query.strip():
257
  return False, "Query cannot be empty."
258
 
259
+ if len(query) > 2500:
260
  return False, "Query is too long. Please keep it under 1000 characters."
261
 
262
  return True, None