Spaces:

Cyberlgl
/

CyberLegalAIendpoint

Sleeping

App Files Files Community

Charles Grandjean commited on Feb 19

Commit

2f4c4da

1 Parent(s): 47fa0a5

tree format in prompt

Browse files

Files changed (3) hide show

agent_api.py +82 -30
structured_outputs/api_models.py +23 -5
tests/test_documents_tree.py +257 -0

agent_api.py CHANGED Viewed

@@ -20,7 +20,8 @@ import secrets
 from structured_outputs.api_models import (
     Message, DocumentAnalysis, ChatRequest, ChatResponse,
     HealthResponse, AnalyzePDFRequest, AnalyzePDFResponse,
-    LawyerProfile, DocCreatorRequest, DocCreatorResponse
 )
 from langgraph_agent import CyberLegalAgent
@@ -136,7 +137,60 @@ class CyberLegalAPI:
         self.conversation_manager = ConversationManager()
         logger.info(f"🔧 CyberLegalAPI initialized with {llm_provider.upper()} provider")
-    def _build_lawyer_prompt(self, document_analyses: Optional[List[DocumentAnalysis]], jurisdiction: str, lawyer_profile: Optional[LawyerProfile] = None) -> str:
         """Build lawyer prompt with optional document context and lawyer profile"""
         prompt_parts = []
@@ -158,16 +212,11 @@ class CyberLegalAPI:
             profile_text += "\nWhen answering, consider this lawyer's expertise and experience level. Tailor your responses to be appropriate for their seniority and specialization.\n"
             prompt_parts.append(profile_text)
-        # Add document analyses if available
-        if document_analyses:
-            docs_text = "\n### Documents parsed in the lawyer profile\n"
-            for i, doc in enumerate(document_analyses, 1):
-                docs_text += f"[Doc {i}] {doc.file_name}\n"
-                if doc.summary: docs_text += f"Summary: {doc.summary}\n"
-                if doc.actors: docs_text += f"Actors: {doc.actors}\n"
-                if doc.key_details: docs_text += f"Key Details: {doc.key_details}\n"
-                docs_text += "\n"
-            docs_text += "Use these documents if the user's question is related to their content.\n"
             prompt_parts.append(docs_text)
         # Combine base prompt with context
@@ -208,18 +257,20 @@ class CyberLegalAPI:
         logger.info(f"💬 User query: {request.message}")
         try:
-            # Build dynamic system prompt for lawyers with document analyses and/or lawyer profile
             if request.userType == "lawyer":
                 system_prompt = self._build_lawyer_prompt(
-                    request.documentAnalyses,
                     request.jurisdiction,
                     request.lawyerProfile
                 )
                 context_parts = []
                 if request.lawyerProfile:
                     context_parts.append("lawyer profile")
-                if request.documentAnalyses:
-                    context_parts.append(f"{len(request.documentAnalyses)} document analyses")
                 if context_parts:
                     logger.info(f"📚 Using lawyer prompt with {', '.join(context_parts)}")
                 else:
@@ -320,6 +371,7 @@ class CyberLegalAPI:
                     actors=result.get("actors", ""),
                     key_details=result.get("key_details", ""),
                     summary=result.get("summary", ""),
                     processing_status=result.get("processing_status", "unknown"),
                     processing_time=processing_time,
                     timestamp=datetime.now().isoformat(),
@@ -366,7 +418,12 @@ class CyberLegalAPI:
         logger.info(f"👤 Client ID: {request.clientId}")
         logger.info(f"📋 Instruction: {request.instruction}")
         logger.info(f"📏 Document size: {len(request.documentContent)} bytes")
-        logger.info(f"📚 Document summaries: {len(request.documentSummaries) if request.documentSummaries else 0}")
         logger.info(f"💬 Conversation history: {len(request.conversationHistory) if request.conversationHistory else 0} messages")
         try:
@@ -375,21 +432,16 @@ class CyberLegalAPI:
             doc_text = request.documentContent
             logger.info(f"✅ HTML document ready - size: {len(doc_text)} bytes")
-            # Convert document summaries if provided
             doc_summaries = []
-            if request.documentSummaries:
-                logger.info("📚 Processing document summaries...")
-                for i, doc in enumerate(request.documentSummaries, 1):
-                    logger.info(f"  [{i}] {doc.file_name} - {doc.summary[:50]}...")
-                    doc_summaries.append({
-                        "file_name": doc.file_name,
-                        "summary": doc.summary,
-                        "actors": doc.actors,
-                        "key_details": doc.key_details
-                    })
-                logger.info(f"✅ {len(doc_summaries)} document summaries loaded")
             else:
-                logger.info("ℹ️ No document summaries provided")
             # Convert conversation history if provided
             conversation_history = []

 from structured_outputs.api_models import (
     Message, DocumentAnalysis, ChatRequest, ChatResponse,
     HealthResponse, AnalyzePDFRequest, AnalyzePDFResponse,
+    LawyerProfile, DocCreatorRequest, DocCreatorResponse,
+    DocumentsTree, TreeNode
 )
 from langgraph_agent import CyberLegalAgent
         self.conversation_manager = ConversationManager()
         logger.info(f"🔧 CyberLegalAPI initialized with {llm_provider.upper()} provider")
+    def _format_documents_tree(self, node: TreeNode, indent: int = 0) -> str:
+        """
+        Format documents tree as hierarchical text with indentation.
+        Example:
+        - Subdirectory 1:
+           - file11: summary | actors | key_details
+           - Sub-sub-directory 11:
+                 - file111: summary | actors | key_details
+        """
+        lines = []
+        indent_str = " " * indent
+        if node.type == "folder":
+            lines.append(f"{indent_str}- {node.name}:")
+            if node.children:
+                for child in node.children:
+                    lines.append(self._format_documents_tree(child, indent + 3))
+        elif node.type == "file" and node.analysis:
+            analysis_parts = []
+            if node.analysis.summary:
+                summary_preview = node.analysis.summary[:100] + "..." if len(node.analysis.summary) > 100 else node.analysis.summary
+                analysis_parts.append(f"summary: {summary_preview}")
+            if node.analysis.actors:
+                actors_preview = node.analysis.actors[:100] + "..." if len(node.analysis.actors) > 100 else node.analysis.actors
+                analysis_parts.append(f"actors: {actors_preview}")
+            if node.analysis.key_details:
+                details_preview = node.analysis.key_details[:100] + "..." if len(node.analysis.key_details) > 100 else node.analysis.key_details
+                analysis_parts.append(f"key_details: {details_preview}")
+            analysis_text = " | ".join(analysis_parts) if analysis_parts else "No analysis available"
+            lines.append(f"{indent_str}- {node.name}: {analysis_text}")
+        return "\n".join(lines)
+    def _extract_flat_documents(self, node: TreeNode) -> List[Dict[str, Any]]:
+        """
+        Recursively extract all documents with analysis from tree into flat list.
+        Used for endpoints that expect flat document structure.
+        """
+        docs = []
+        if node.type == "file" and node.analysis:
+            docs.append({
+                "file_name": node.name,
+                "summary": node.analysis.summary,
+                "actors": node.analysis.actors,
+                "key_details": node.analysis.key_details
+            })
+        if node.children:
+            for child in node.children:
+                docs.extend(self._extract_flat_documents(child))
+        return docs
+    def _build_lawyer_prompt(self, documents_tree: Optional[DocumentsTree], jurisdiction: str, lawyer_profile: Optional[LawyerProfile] = None) -> str:
         """Build lawyer prompt with optional document context and lawyer profile"""
         prompt_parts = []
             profile_text += "\nWhen answering, consider this lawyer's expertise and experience level. Tailor your responses to be appropriate for their seniority and specialization.\n"
             prompt_parts.append(profile_text)
+        # Add documents tree if available
+        if documents_tree and documents_tree.children:
+            docs_text = "\n### Documents in Lawyer's Database\n"
+            docs_text += self._format_documents_tree(documents_tree)
+            docs_text += "\n\nUse these documents when relevant to the question.\n"
             prompt_parts.append(docs_text)
         # Combine base prompt with context
         logger.info(f"💬 User query: {request.message}")
         try:
+            # Build dynamic system prompt for lawyers with documents tree and/or lawyer profile
             if request.userType == "lawyer":
                 system_prompt = self._build_lawyer_prompt(
+                    request.documents_tree,
                     request.jurisdiction,
                     request.lawyerProfile
                 )
                 context_parts = []
                 if request.lawyerProfile:
                     context_parts.append("lawyer profile")
+                if request.documents_tree and request.documents_tree.children:
+                    # Count documents in tree
+                    doc_count = sum(1 for node in self._extract_flat_documents(request.documents_tree))
+                    context_parts.append(f"{doc_count} documents")
                 if context_parts:
                     logger.info(f"📚 Using lawyer prompt with {', '.join(context_parts)}")
                 else:
                     actors=result.get("actors", ""),
                     key_details=result.get("key_details", ""),
                     summary=result.get("summary", ""),
+                    extracted_text=result.get("extracted_text", ""),
                     processing_status=result.get("processing_status", "unknown"),
                     processing_time=processing_time,
                     timestamp=datetime.now().isoformat(),
         logger.info(f"👤 Client ID: {request.clientId}")
         logger.info(f"📋 Instruction: {request.instruction}")
         logger.info(f"📏 Document size: {len(request.documentContent)} bytes")
+        # Count documents in tree
+        doc_count = 0
+        if request.documents_tree and request.documents_tree.children:
+            doc_count = sum(1 for node in self._extract_flat_documents(request.documents_tree))
+        logger.info(f"📚 Documents in tree: {doc_count}")
         logger.info(f"💬 Conversation history: {len(request.conversationHistory) if request.conversationHistory else 0} messages")
         try:
             doc_text = request.documentContent
             logger.info(f"✅ HTML document ready - size: {len(doc_text)} bytes")
+            # Extract documents from tree if provided (convert to flat list for doc_editor agent)
             doc_summaries = []
+            if request.documents_tree and request.documents_tree.children:
+                logger.info("📚 Processing documents from tree...")
+                doc_summaries = self._extract_flat_documents(request.documents_tree)
+                for i, doc in enumerate(doc_summaries, 1):
+                    logger.info(f"  [{i}] {doc['file_name']} - {doc['summary'][:50] if doc['summary'] else 'No summary'}...")
+                logger.info(f"✅ {len(doc_summaries)} documents loaded from tree")
             else:
+                logger.info("ℹ️ No documents provided")
             # Convert conversation history if provided
             conversation_history = []

structured_outputs/api_models.py CHANGED Viewed

@@ -25,13 +25,31 @@ class LawyerProfile(BaseModel):
 class DocumentAnalysis(BaseModel):
-    """Document analysis result"""
-    file_name: str
     summary: Optional[str] = None
     actors: Optional[str] = None
     key_details: Optional[str] = None
 class ChatRequest(BaseModel):
     """Chat request model"""
     clientId: str = Field(..., description="Unique client identifier")
@@ -39,7 +57,7 @@ class ChatRequest(BaseModel):
     conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
     userType: Optional[str] = Field(default="client", description="User type: 'client' for general users or 'lawyer' for legal professionals")
     jurisdiction: Optional[str] = Field(default="Romania", description="Jurisdiction of the user")
-    documentAnalyses: Optional[List[DocumentAnalysis]] = Field(default=None, description="Lawyer's document analyses")
     lawyerProfile: Optional[LawyerProfile] = Field(default=None, description="Lawyer's professional profile")
@@ -75,14 +93,14 @@ class AnalyzePDFResponse(BaseModel):
     processing_time: float = Field(..., description="Processing time in seconds")
     timestamp: str = Field(..., description="Analysis timestamp")
     error: Optional[str] = Field(None, description="Error message if any")
 class DocCreatorRequest(BaseModel):
     """Document creator request model"""
     instruction: str = Field(..., description="User's instruction for document editing")
     documentContent: str = Field(..., description="HTML document content")
     contentFormat: str = Field(default="html", description="Format of document content (always 'html')")
-    documentSummaries: Optional[List[DocumentAnalysis]] = Field(default=None, description="Context from analyzed documents")
     conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
     clientId: str = Field(..., description="Unique client identifier")

 class DocumentAnalysis(BaseModel):
+    """Document analysis content"""
     summary: Optional[str] = None
     actors: Optional[str] = None
     key_details: Optional[str] = None
+class TreeNode(BaseModel):
+    """Node in the document tree (folder or file)"""
+    name: str
+    type: str = Field(..., description="Type: 'folder' or 'file'")
+    file_path: Optional[str] = None
+    file_size: Optional[int] = None
+    mime_type: Optional[str] = None
+    created_at: Optional[str] = None
+    analysis: Optional[DocumentAnalysis] = None
+    children: Optional[List['TreeNode']] = None
+class DocumentsTree(BaseModel):
+    """Root of the document tree"""
+    name: str = "root"
+    type: str = "folder"
+    children: Optional[List[TreeNode]] = None
 class ChatRequest(BaseModel):
     """Chat request model"""
     clientId: str = Field(..., description="Unique client identifier")
     conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
     userType: Optional[str] = Field(default="client", description="User type: 'client' for general users or 'lawyer' for legal professionals")
     jurisdiction: Optional[str] = Field(default="Romania", description="Jurisdiction of the user")
+    documents_tree: Optional[DocumentsTree] = Field(default=None, description="Hierarchical tree of lawyer's documents")
     lawyerProfile: Optional[LawyerProfile] = Field(default=None, description="Lawyer's professional profile")
     processing_time: float = Field(..., description="Processing time in seconds")
     timestamp: str = Field(..., description="Analysis timestamp")
     error: Optional[str] = Field(None, description="Error message if any")
+    extracted_content: Optional[str] = Field(None, description="Extracted content")
 class DocCreatorRequest(BaseModel):
     """Document creator request model"""
     instruction: str = Field(..., description="User's instruction for document editing")
     documentContent: str = Field(..., description="HTML document content")
     contentFormat: str = Field(default="html", description="Format of document content (always 'html')")
+    documents_tree: Optional[DocumentsTree] = Field(default=None, description="Hierarchical tree of documents for context")
     conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
     clientId: str = Field(..., description="Unique client identifier")

tests/test_documents_tree.py ADDED Viewed

	@@ -0,0 +1,257 @@

+#!/usr/bin/env python3
+"""
+Test the documents_tree integration with the API
+"""
+import sys
+import os
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from structured_outputs.api_models import (
+    TreeNode, DocumentsTree, DocumentAnalysis, ChatRequest
+)
+def test_tree_node_creation():
+    """Test creating a tree node"""
+    print("🧪 Testing TreeNode creation...")
+    file_analysis = DocumentAnalysis(
+        summary="Test summary",
+        actors="Actor 1, Actor 2",
+        key_details="Key detail 1, Key detail 2"
+    )
+    file_node = TreeNode(
+        name="test.pdf",
+        type="file",
+        file_path="user-id/test.pdf",
+        file_size=100000,
+        mime_type="application/pdf",
+        created_at="2025-01-15T10:30:00Z",
+        analysis=file_analysis
+    )
+    assert file_node.name == "test.pdf"
+    assert file_node.type == "file"
+    assert file_node.analysis.summary == "Test summary"
+    print("✅ TreeNode creation works")
+def test_documents_tree_creation():
+    """Test creating a documents tree"""
+    print("\n🧪 Testing DocumentsTree creation...")
+    # Create file nodes with analyses
+    file1_analysis = DocumentAnalysis(
+        summary="Contract summary",
+        actors="SCI Martin, SARL Dupont",
+        key_details="Durée: 9 ans, Loyer: 3500€/mois"
+    )
+    file1 = TreeNode(
+        name="bail-commercial.pdf",
+        type="file",
+        file_path="user-id/abc123-bail-commercial.pdf",
+        file_size=245000,
+        mime_type="application/pdf",
+        created_at="2025-01-15T10:30:00Z",
+        analysis=file1_analysis
+    )
+    file2_analysis = DocumentAnalysis(
+        summary="Legal note summary",
+        actors="Entreprise XYZ, CNIL",
+        key_details="Base légale: intérêt légitime"
+    )
+    file2 = TreeNode(
+        name="note-juridique.pdf",
+        type="file",
+        file_path="user-id/def456-note-juridique.pdf",
+        file_size=120000,
+        mime_type="application/pdf",
+        created_at="2025-02-01T14:00:00Z",
+        analysis=file2_analysis
+    )
+    # Create folder with files
+    contracts_folder = TreeNode(
+        name="Contracts",
+        type="folder",
+        children=[file1]
+    )
+    # Create root tree
+    tree = DocumentsTree(
+        name="root",
+        type="folder",
+        children=[contracts_folder, file2]
+    )
+    assert tree.name == "root"
+    assert tree.type == "folder"
+    assert len(tree.children) == 2
+    assert tree.children[0].name == "Contracts"
+    assert tree.children[0].type == "folder"
+    assert tree.children[0].children[0].name == "bail-commercial.pdf"
+    assert tree.children[1].name == "note-juridique.pdf"
+    print("✅ DocumentsTree creation works")
+def test_chat_request_with_tree():
+    """Test creating a ChatRequest with documents_tree"""
+    print("\n🧪 Testing ChatRequest with documents_tree...")
+    # Create a simple tree
+    file_analysis = DocumentAnalysis(
+        summary="Test document",
+        actors="Actor 1",
+        key_details="Detail 1"
+    )
+    file_node = TreeNode(
+        name="test.pdf",
+        type="file",
+        analysis=file_analysis
+    )
+    tree = DocumentsTree(
+        children=[file_node]
+    )
+    # Create chat request
+    request = ChatRequest(
+        clientId="test-client",
+        message="What are my documents?",
+        userType="lawyer",
+        jurisdiction="Romania",
+        documents_tree=tree
+    )
+    assert request.clientId == "test-client"
+    assert request.message == "What are my documents?"
+    assert request.userType == "lawyer"
+    assert request.documents_tree is not None
+    assert request.documents_tree.children[0].name == "test.pdf"
+    print("✅ ChatRequest with documents_tree works")
+def test_format_documents_tree():
+    """Test the _format_documents_tree method"""
+    print("\n🧪 Testing _format_documents_tree...")
+    # Import the API to access the method
+    from agent_api import CyberLegalAPI
+    # Create a tree
+    file_analysis = DocumentAnalysis(
+        summary="A very long summary that should be truncated at 100 characters and then show ellipsis",
+        actors="Actor 1, Actor 2, Actor 3",
+        key_details="Key detail"
+    )
+    file_node = TreeNode(
+        name="test.pdf",
+        type="file",
+        analysis=file_analysis
+    )
+    tree = DocumentsTree(
+        children=[file_node]
+    )
+    # Create API instance
+    api = CyberLegalAPI()
+    # Format the tree
+    formatted = api._format_documents_tree(tree)
+    print("📄 Formatted tree:")
+    print(formatted)
+    assert "test.pdf" in formatted
+    assert "summary:" in formatted
+    assert "actors:" in formatted
+    assert "key_details:" in formatted
+    print("✅ _format_documents_tree works")
+def test_extract_flat_documents():
+    """Test the _extract_flat_documents method"""
+    print("\n🧪 Testing _extract_flat_documents...")
+    # Import the API to access the method
+    from agent_api import CyberLegalAPI
+    # Create a tree with multiple files in folders
+    file1_analysis = DocumentAnalysis(
+        summary="File 1 summary",
+        actors="Actor 1",
+        key_details="Detail 1"
+    )
+    file1 = TreeNode(
+        name="file1.pdf",
+        type="file",
+        analysis=file1_analysis
+    )
+    file2_analysis = DocumentAnalysis(
+        summary="File 2 summary",
+        actors="Actor 2",
+        key_details="Detail 2"
+    )
+    file2 = TreeNode(
+        name="file2.pdf",
+        type="file",
+        analysis=file2_analysis
+    )
+    folder = TreeNode(
+        name="Folder",
+        type="folder",
+        children=[file1]
+    )
+    tree = DocumentsTree(
+        children=[folder, file2]
+    )
+    # Create API instance
+    api = CyberLegalAPI()
+    # Extract flat documents
+    flat_docs = api._extract_flat_documents(tree)
+    print(f"📄 Extracted {len(flat_docs)} documents:")
+    for doc in flat_docs:
+        print(f"  - {doc['file_name']}: {doc['summary']}")
+    assert len(flat_docs) == 2
+    assert flat_docs[0]['file_name'] == 'file1.pdf'
+    assert flat_docs[0]['summary'] == 'File 1 summary'
+    assert flat_docs[1]['file_name'] == 'file2.pdf'
+    assert flat_docs[1]['summary'] == 'File 2 summary'
+    print("✅ _extract_flat_documents works")
+if __name__ == "__main__":
+    print("🚀 Running documents_tree integration tests\n")
+    print("=" * 80)
+    try:
+        test_tree_node_creation()
+        test_documents_tree_creation()
+        test_chat_request_with_tree()
+        test_format_documents_tree()
+        test_extract_flat_documents()
+        print("\n" + "=" * 80)
+        print("✅ All tests passed!")
+        print("=" * 80)
+    except Exception as e:
+        print("\n" + "=" * 80)
+        print(f"❌ Test failed: {e}")
+        print("=" * 80)
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)