Spaces:

Cyberlgl
/

CyberLegalAIendpoint

Running

App Files Files Community

Charles Grandjean commited on Jan 13

Commit

27d80a8

1 Parent(s): 3eb9886

add search and docs in the prompts

Browse files

Files changed (8) hide show

add_secrets.ipynb +1 -1
agent_api.py +56 -41
agent_state.py +3 -8
langraph_agent.py +6 -2
prompts.py +4 -0
requirements.txt +1 -0
tools.py +32 -4
utils.py +1 -1

add_secrets.ipynb CHANGED Viewed

@@ -40,7 +40,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Uploaded 72 secrets to Cyberlgl/CyberLegalAIendpoint.\n"
      ]
     }
    ],

      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Uploaded 77 secrets to Cyberlgl/CyberLegalAIendpoint.\n"
      ]
     }
    ],

agent_api.py CHANGED Viewed

@@ -25,12 +25,12 @@ from lawyer_selector import LawyerSelectorAgent
 from prompts import SYSTEM_PROMPT_CLIENT, SYSTEM_PROMPT_LAWYER
 from pdf_analyzer import PDFAnalyzerAgent
 from langchain_openai import ChatOpenAI
-from langchain_google_genai import ChatGoogleGenerativeAI
 from mistralai import Mistral
 import logging
 import base64
 import tempfile
 import os as pathlib
 # Load environment variables
 load_dotenv(dotenv_path=".env", override=False)
@@ -65,11 +65,17 @@ def require_password(x_api_key: str = Depends(api_key_header)):
 class Message(BaseModel):
     role: str = Field(..., description="Role: 'user' or 'assistant'")
     content: str = Field(..., description="Message content")
 class ChatRequest(BaseModel):
     message: str = Field(..., description="User's question")
     conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
     userType: Optional[str] = Field(default="client", description="User type: 'client' for general users or 'lawyer' for legal professionals")
 class ChatResponse(BaseModel):
     response: str = Field(..., description="Assistant's response")
@@ -106,44 +112,25 @@ class CyberLegalAPI:
     """
     def __init__(self):
-        # Ensure .env is loaded
         load_dotenv(dotenv_path=".env", override=True)
         llm_provider = os.getenv("LLM_PROVIDER", "openai").lower()
         self.llm_provider = llm_provider
-        # Validate required environment variables
-        if llm_provider == "openai":
-            openai_key = os.getenv("OPENAI_API_KEY")
-            if not openai_key:
-                raise ValueError("OPENAI_API_KEY environment variable is not set. Please check your .env file.")
-            logger.info(f"🔑 OPENAI_API_KEY found (length: {len(openai_key)})")
-        elif llm_provider == "gemini":
-            gemini_key = os.getenv("GOOGLE_API_KEY")
-            if not gemini_key:
-                raise ValueError("GOOGLE_API_KEY environment variable is not set. Please check your .env file.")
-            logger.info(f"🔑 GOOGLE_API_KEY found (length: {len(gemini_key)})")
-        # Initialize LLM based on provider
-        if llm_provider == "gemini":
-            llm = ChatGoogleGenerativeAI(
-                model="gemini-1.5-flash",
-                temperature=0.1,
-                google_api_key=os.getenv("GOOGLE_API_KEY")
-            )
-        else:
-            llm = ChatOpenAI(
-                model=os.getenv("LLM_MODEL", "gpt-5-nano-2025-08-07"),
-                reasoning_effort="low",
-                api_key=os.getenv("OPENAI_API_KEY"),
-                base_url=os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
-                default_headers={
-                    "X-Cerebras-3rd-Party-Integration": "langgraph"
-                }
-            )
         # Initialize LawyerSelectorAgent and LightRAGClient, then set them globally in tools.py
-        global lawyer_selector_agent, lightrag_client
         lawyer_selector_agent = LawyerSelectorAgent(llm=llm)
         tools.lawyer_selector_agent = lawyer_selector_agent
@@ -151,23 +138,43 @@ class CyberLegalAPI:
         lightrag_client = LightRAGClient()
         tools.lightrag_client = lightrag_client
-        # Initialize Mistral client for OCR (optional)
-        mistral_client = None
-        if os.getenv("MISTRAL_API_KEY"):
-            mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
-            logger.info("✅ Mistral OCR client initialized")
         self.agent_client = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_CLIENT, tools=tools.tools_for_client)
         self.agent_lawyer = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_LAWYER, tools=tools.tools_for_lawyer)
         self.pdf_analyzer = PDFAnalyzerAgent(llm=llm, mistral_client=mistral_client)
         self.conversation_manager = ConversationManager()
         logger.info(f"🔧 CyberLegalAPI initialized with {llm_provider.upper()} provider")
     async def process_request(self, request: ChatRequest) -> ChatResponse:
         """
         Process chat request through the agent
         """
-        # Validate message
         is_valid, error_msg = validate_query(request.message)
         if not is_valid:
             raise HTTPException(status_code=400, detail=error_msg)
@@ -193,10 +200,18 @@ class CyberLegalAPI:
             })
         try:
             # Process through selected agent with raw message and conversation history
             result = await agent.process_query(
                 user_query=request.message,
-                conversation_history=conversation_history
             )
             # Create response

 from prompts import SYSTEM_PROMPT_CLIENT, SYSTEM_PROMPT_LAWYER
 from pdf_analyzer import PDFAnalyzerAgent
 from langchain_openai import ChatOpenAI
 from mistralai import Mistral
 import logging
 import base64
 import tempfile
 import os as pathlib
+from langchain_tavily import TavilySearch
 # Load environment variables
 load_dotenv(dotenv_path=".env", override=False)
 class Message(BaseModel):
     role: str = Field(..., description="Role: 'user' or 'assistant'")
     content: str = Field(..., description="Message content")
+class DocumentAnalysis(BaseModel):
+    file_name: str
+    summary: Optional[str]
+    actors: Optional[str]
+    key_details: Optional[str]
 class ChatRequest(BaseModel):
     message: str = Field(..., description="User's question")
     conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
     userType: Optional[str] = Field(default="client", description="User type: 'client' for general users or 'lawyer' for legal professionals")
+    jurisdiction: Optional[str] = Field(default="Romania", description="Jurisdiction of the user")
+    documentAnalyses: Optional[List[DocumentAnalysis]] = Field(default=None, description="Lawyer's document analyses")
 class ChatResponse(BaseModel):
     response: str = Field(..., description="Assistant's response")
     """
     def __init__(self):
         load_dotenv(dotenv_path=".env", override=True)
         llm_provider = os.getenv("LLM_PROVIDER", "openai").lower()
         self.llm_provider = llm_provider
+        llm = ChatOpenAI(
+            model=os.getenv("LLM_MODEL", "gpt-5-nano-2025-08-07"),
+            reasoning_effort="low",
+            api_key=os.getenv("OPENAI_API_KEY"),
+            base_url=os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1"),
+            default_headers={
+                "X-Cerebras-3rd-Party-Integration": "langgraph"
+            }
+        )
+        mistral_client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
+        logger.info("✅ Mistral OCR client initialized")
         # Initialize LawyerSelectorAgent and LightRAGClient, then set them globally in tools.py
+        global lawyer_selector_agent, lightrag_client, tavily_search
         lawyer_selector_agent = LawyerSelectorAgent(llm=llm)
         tools.lawyer_selector_agent = lawyer_selector_agent
         lightrag_client = LightRAGClient()
         tools.lightrag_client = lightrag_client
+        tavily_search = TavilySearch(
+            api_key=os.getenv("TAVILY_API_KEY"),
+            max_results=5,
+            topic="general",
+            search_depth="advanced",
+            include_answer=True,
+            include_raw_content=False
+        )
+        tools.tavily_search = tavily_search
+        logger.info("✅ Tavily search client initialized")
         self.agent_client = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_CLIENT, tools=tools.tools_for_client)
         self.agent_lawyer = CyberLegalAgent(llm=llm, system_prompt=SYSTEM_PROMPT_LAWYER, tools=tools.tools_for_lawyer)
         self.pdf_analyzer = PDFAnalyzerAgent(llm=llm, mistral_client=mistral_client)
         self.conversation_manager = ConversationManager()
+        self.base_lawyer_prompt = SYSTEM_PROMPT_LAWYER
         logger.info(f"🔧 CyberLegalAPI initialized with {llm_provider.upper()} provider")
+    def _build_lawyer_prompt(self, document_analyses: Optional[List[DocumentAnalysis]], jurisdiction: str) -> str:
+        """Build lawyer prompt with optional document context"""
+        if not document_analyses:
+            return self.base_lawyer_prompt.format(jurisdiction=jurisdiction)
+        docs_text = "\n\n### Available Document Analyses in Your Workspace\n"
+        for i, doc in enumerate(document_analyses, 1):
+            docs_text += f"[Doc {i}] {doc.file_name}\n"
+            if doc.summary: docs_text += f"Summary: {doc.summary}\n"
+            if doc.actors: docs_text += f"Actors: {doc.actors}\n"
+            if doc.key_details: docs_text += f"Key Details: {doc.key_details}\n"
+            docs_text += "\n"
+        return self.base_lawyer_prompt.format(jurisdiction=jurisdiction) + docs_text + "Consider these documents when relevant.\n"
     async def process_request(self, request: ChatRequest) -> ChatResponse:
         """
         Process chat request through the agent
         """
         is_valid, error_msg = validate_query(request.message)
         if not is_valid:
             raise HTTPException(status_code=400, detail=error_msg)
             })
         try:
+            # Build dynamic system prompt for lawyers with document analyses
+            system_prompt = None
+            if request.userType == "lawyer" and request.documentAnalyses:
+                system_prompt = self._build_lawyer_prompt(request.documentAnalyses, request.jurisdiction)
+                logger.info(f"📚 Using lawyer prompt with {len(request.documentAnalyses)} document analyses")
             # Process through selected agent with raw message and conversation history
             result = await agent.process_query(
                 user_query=request.message,
+                conversation_history=conversation_history,
+                jurisdiction=request.jurisdiction,
+                system_prompt=system_prompt
             )
             # Create response

agent_state.py CHANGED Viewed

@@ -14,21 +14,16 @@ class AgentState(TypedDict):
     # User interaction
     user_query: str
     conversation_history: List[Dict[str, str]]
-    intermediate_steps:List[Dict[str, Any]]
-    lightrag_response: Optional[Dict[str, Any]]
-    lightrag_error: Optional[str]
     # Context processing
-    processed_context: Optional[str]
     relevant_documents: List[str]
-    final_response: Optional[str]
     # Metadata
     query_timestamp: str
     processing_time: Optional[float]
-    query_type: Optional[str]  # "comparison", "explanation", "compliance", "general"
 class ConversationManager:

     # User interaction
     user_query: str
     conversation_history: List[Dict[str, str]]
+    intermediate_steps: List[Dict[str, Any]]
+    system_prompt: Optional[str]
     # Context processing
     relevant_documents: List[str]
     # Metadata
     query_timestamp: str
     processing_time: Optional[float]
+    jurisdiction: Optional[str]
 class ConversationManager:

langraph_agent.py CHANGED Viewed

@@ -54,7 +54,9 @@ class CyberLegalAgent:
         if not intermediate_steps:
             history = state.get("conversation_history", [])
-            intermediate_steps.append(SystemMessage(content=self.system_prompt))
             for msg in history:
                 if isinstance(msg, dict):
                     if msg.get("role") == "user":
@@ -91,7 +93,7 @@ class CyberLegalAgent:
         state["intermediate_steps"] = intermediate_steps
         return state
-    async def process_query(self, user_query: str, conversation_history: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]:
         initial_state = {
             "user_query": user_query,
             "conversation_history": conversation_history or [],
@@ -99,6 +101,8 @@ class CyberLegalAgent:
             "relevant_documents": [],
             "query_timestamp": datetime.now().isoformat(),
             "processing_time": None,
         }
         self.performance_monitor.reset()

         if not intermediate_steps:
             history = state.get("conversation_history", [])
+            # Use provided system prompt if available, otherwise use the default
+            system_prompt_to_use = state.get("system_prompt", self.system_prompt)
+            intermediate_steps.append(SystemMessage(content=system_prompt_to_use))
             for msg in history:
                 if isinstance(msg, dict):
                     if msg.get("role") == "user":
         state["intermediate_steps"] = intermediate_steps
         return state
+    async def process_query(self, user_query: str, jurisdiction: str, conversation_history: Optional[List[Dict[str, str]]] = None, system_prompt: Optional[str] = None) -> Dict[str, Any]:
         initial_state = {
             "user_query": user_query,
             "conversation_history": conversation_history or [],
             "relevant_documents": [],
             "query_timestamp": datetime.now().isoformat(),
             "processing_time": None,
+            "jurisdiction": jurisdiction,
+            "system_prompt": system_prompt
         }
         self.performance_monitor.reset()

prompts.py CHANGED Viewed

@@ -7,10 +7,12 @@ System prompts for the LangGraph cyber-legal assistant
 SYSTEM_PROMPT_CLIENT = """### Role
 You are a helpful cyber-legal assistant specializing in EU regulations and directives.
 You translate complex legal information into clear, easy-to-understand language for non-lawyers.
 ### Available Tools
 1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
 2. **find_lawyers**: Recommend suitable lawyers based on the user's legal issue and conversation context.
 ### Tool-Calling Process
 You operate in an iterative loop:
@@ -41,9 +43,11 @@ You operate in an iterative loop:
 # Lawyer specialist system prompt - designed for legal professionals
 SYSTEM_PROMPT_LAWYER = """### Role
 You are an expert cyber-legal assistant specializing in EU regulations and directives with deep knowledge of legal frameworks, precedents, and technical legal analysis.
 ### Available Tools
 1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
 ### Tool-Calling Process
 You operate in an iterative loop:

 SYSTEM_PROMPT_CLIENT = """### Role
 You are a helpful cyber-legal assistant specializing in EU regulations and directives.
 You translate complex legal information into clear, easy-to-understand language for non-lawyers.
+Client Jurisdiction: {jurisdiction}
 ### Available Tools
 1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
 2. **find_lawyers**: Recommend suitable lawyers based on the user's legal issue and conversation context.
+3. **search_web**: Search the web for current information, recent legal updates, court decisions, or news that may not be in the knowledge graph.
 ### Tool-Calling Process
 You operate in an iterative loop:
 # Lawyer specialist system prompt - designed for legal professionals
 SYSTEM_PROMPT_LAWYER = """### Role
 You are an expert cyber-legal assistant specializing in EU regulations and directives with deep knowledge of legal frameworks, precedents, and technical legal analysis.
+Lawyer Jurisdiction: {jurisdiction}
 ### Available Tools
 1. **query_knowledge_graph**: Search legal documents (GDPR, NIS2, DORA, etc.) to answer questions about EU cyber regulations and directives.
+2. **search_web**: Search the web for current information, recent legal updates, court decisions, or news that may not be in the knowledge graph.
 ### Tool-Calling Process
 You operate in an iterative loop:

requirements.txt CHANGED Viewed

@@ -21,3 +21,4 @@ uvicorn[standard]>=0.24.0
 # Additional utilities
 pydantic>=2.0.0
 typing-extensions>=4.0.0

 # Additional utilities
 pydantic>=2.0.0
 typing-extensions>=4.0.0
+langchain-tavily>=0.2.16

tools.py CHANGED Viewed

@@ -3,14 +3,17 @@
 Tools for the CyberLegal Agent
 """
 from typing import List, Dict, Any, Optional
 from langchain_core.tools import tool
 from lawyer_selector import LawyerSelectorAgent
 from utils import LightRAGClient, ConversationFormatter
 # Global instances - will be initialized in agent_api.py
 lawyer_selector_agent: Optional[LawyerSelectorAgent] = None
 lightrag_client: Optional[LightRAGClient] = None
 @tool
 async def query_knowledge_graph(query: str, conversation_history: List[Dict[str, str]]) -> str:
@@ -49,6 +52,31 @@ async def query_knowledge_graph(query: str, conversation_history: List[Dict[str,
     except Exception as e:
         return f"Error querying knowledge graph: {str(e)}"
 @tool
 async def find_lawyers(query: str, conversation_history: List[Dict[str, str]]) -> str:
     """
@@ -107,11 +135,11 @@ async def find_lawyers(query: str, conversation_history: List[Dict[str, str]]) -
 # Export tool sets for different user types
-# Tools available to general clients (knowledge graph + lawyer finder)
-tools_for_client = [query_knowledge_graph, find_lawyers]
-# Tools available to lawyers (knowledge graph only - lawyers don't need to find other lawyers)
-tools_for_lawyer = [query_knowledge_graph]
 # Default tools (backward compatibility - client tools)
 tools = tools_for_client

 Tools for the CyberLegal Agent
 """
+import os
 from typing import List, Dict, Any, Optional
 from langchain_core.tools import tool
+from langchain_tavily import TavilySearch
 from lawyer_selector import LawyerSelectorAgent
 from utils import LightRAGClient, ConversationFormatter
 # Global instances - will be initialized in agent_api.py
 lawyer_selector_agent: Optional[LawyerSelectorAgent] = None
 lightrag_client: Optional[LightRAGClient] = None
+tavily_search = None
 @tool
 async def query_knowledge_graph(query: str, conversation_history: List[Dict[str, str]]) -> str:
     except Exception as e:
         return f"Error querying knowledge graph: {str(e)}"
+@tool
+async def search_web(query: str) -> str:
+    """Search the web for current legal updates and news using Tavily."""
+    try:
+        if tavily_search is None:
+            raise ValueError("TavilySearch not initialized in agent_api.py")
+        result = await tavily_search.ainvoke({"query": query})
+        import json
+        data = json.loads(result) if isinstance(result, str) else result
+        output = ["🌐 WEB SEARCH RESULTS", "=" * 80]
+        if data.get('answer'):
+            output.append(f"\n💡 AI Answer: {data['answer']}")
+        for i, r in enumerate(data.get('results', []), 1):
+            output.append(f"\n📄 Result {i}")
+            output.append(f"   Title: {r.get('title', 'N/A')}")
+            output.append(f"   URL: {r.get('url', 'N/A')}")
+            output.append(f"   Summary: {r.get('content', '')[:300]}...")
+        return "\n".join(output)
+    except Exception as e:
+        return f"Error: {str(e)}"
 @tool
 async def find_lawyers(query: str, conversation_history: List[Dict[str, str]]) -> str:
     """
 # Export tool sets for different user types
+# Tools available to general clients (knowledge graph + lawyer finder + web search)
+tools_for_client = [query_knowledge_graph, find_lawyers, search_web]
+# Tools available to lawyers (knowledge graph + web search for current legal updates)
+tools_for_lawyer = [query_knowledge_graph, search_web]
 # Default tools (backward compatibility - client tools)
 tools = tools_for_client

utils.py CHANGED Viewed

@@ -256,7 +256,7 @@ def validate_query(query: str) -> Tuple[bool, Optional[str]]:
     if not query or not query.strip():
         return False, "Query cannot be empty."
-    if len(query) > 1000:
         return False, "Query is too long. Please keep it under 1000 characters."
     return True, None

     if not query or not query.strip():
         return False, "Query cannot be empty."
+    if len(query) > 2500:
         return False, "Query is too long. Please keep it under 1000 characters."
     return True, None