Spaces:

Cyberlgl
/

CyberLegalAIendpoint

Sleeping

File size: 12,102 Bytes

#!/usr/bin/env python3
"""
Test the /doc_creator endpoint for TipTap JSON document editing
"""

import asyncio
import json
import sys
import os
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables
load_dotenv(dotenv_path=".env", override=False)

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from structured_outputs.api_models import DocCreatorRequest, DocCreatorResponse, Message
from agents.doc_editor import DocumentEditorAgent
from langchain_openai import ChatOpenAI


# Sample TipTap document for testing
SAMPLE_TAPTAP_DOC = {
    "type": "doc",
    "content": [
        {
            "type": "heading",
            "attrs": {"level": 1, "textAlign": "center"},
            "content": [
                {"type": "text", "text": "CONTRAT DE PRESTATION DE SERVICES"}
            ]
        },
        {
            "type": "heading",
            "attrs": {"level": 2, "textAlign": "left"},
            "content": [
                {"type": "text", "text": "Article 1 - Objet"}
            ]
        },
        {
            "type": "paragraph",
            "attrs": {"textAlign": "justify"},
            "content": [
                {"type": "text", "text": "Le présent contrat a pour objet de définir les conditions dans lesquelles "},
                {"type": "text", "marks": [{"type": "bold"}], "text": "la Société X"},
                {"type": "text", "text": " s'engage à fournir des services."}
            ]
        },
        {
            "type": "heading",
            "attrs": {"level": 2, "textAlign": "left"},
            "content": [
                {"type": "text", "text": "Article 2 - Durée"}
            ]
        },
        {
            "type": "paragraph",
            "attrs": {"textAlign": "justify"},
            "content": [
                {"type": "text", "text": "Le contrat prend effet le "},
                {"type": "text", "marks": [{"type": "italic"}], "text": "1er janvier 2026"},
                {"type": "text", "text": " pour une durée de "},
                {"type": "text", "marks": [{"type": "underline"}], "text": "12 mois"},
                {"type": "text", "text": "."}
            ]
        }
    ]
}


async def test_endpoint_direct():
    """Test the doc_editor agent directly (simulating endpoint behavior)"""
    print("🧪 Testing /doc_creator endpoint behavior directly")
    print("=" * 60)
    
    # Initialize agent
    print("\n📝 Initializing DocumentEditorAgent...")
    llm_model = os.getenv("LLM_MODEL", "gpt-4o-mini")
    print(f"   Using model: {llm_model}")
    llm = ChatOpenAI(model=llm_model, temperature=0)
    doc_editor = DocumentEditorAgent(llm=llm)
    
    # Convert to canonical format (as endpoint does)
    doc_text = json.dumps(SAMPLE_TAPTAP_DOC, ensure_ascii=False, sort_keys=True, indent=2)
    
    print(f"✅ Canonical document prepared ({len(doc_text)} characters)")
    
    # Test 1: Simple replacement
    print("\n" + "=" * 60)
    print("🔧 TEST 1: Replace '12 mois' with '24 mois'")
    print("=" * 60)
    
    result1 = await doc_editor.edit_document(
        doc_text=doc_text,
        user_instruction="Change '12 mois' en '24 mois' dans la durée du contrat",
        doc_summaries=[],
        conversation_history=[],
        max_iterations=10
    )
    
    print(f"✅ Success: {result1['success']}")
    print(f"📝 Message: {result1['message']}")
    print(f"🔄 Iterations: {result1.get('iteration_count', 'N/A')}")
    
    if result1['success']:
        modified_doc = json.loads(result1['doc_text'])
        
        # Verify the change
        found_24 = False
        for item in modified_doc.get('content', []):
            if item.get('type') == 'paragraph':
                content = item.get('content', [])
                for text_node in content:
                    if text_node.get('type') == 'text':
                        if '24 mois' in text_node.get('text', ''):
                            found_24 = True
        
        print(f"✅ Verification: {'24 mois found' if found_24 else '24 mois NOT found'}")
    
    # Test 2: Add new article
    print("\n" + "=" * 60)
    print("🔧 TEST 2: Add Article 3 - Prix")
    print("=" * 60)
    
    current_doc_text = result1['doc_text'] if result1['success'] else doc_text
    
    result2 = await doc_editor.edit_document(
        doc_text=current_doc_text,
        user_instruction="Ajoute un Article 3 - Prix après l'Article 2. Le prix est de 200€ par heure.",
        doc_summaries=[],
        conversation_history=[],
        max_iterations=10
    )
    
    print(f"✅ Success: {result2['success']}")
    print(f"📝 Message: {result2['message']}")
    print(f"🔄 Iterations: {result2.get('iteration_count', 'N/A')}")
    
    if result2['success']:
        modified_doc = json.loads(result2['doc_text'])
        
        # Verify the article was added
        article3_count = 0
        for item in modified_doc.get('content', []):
            if item.get('type') == 'heading':
                content = item.get('content', [])
                for text_node in content:
                    if text_node.get('type') == 'text':
                        if 'Article 3' in text_node.get('text', '') and 'Prix' in text_node.get('text', ''):
                            article3_count += 1
        
        print(f"✅ Verification: {'Article 3 found' if article3_count > 0 else 'Article 3 NOT found'}")
    
    # Test 3: Delete content
    print("\n" + "=" * 60)
    print("🔧 TEST 3: Remove 'la Société X' and replace with 'CyberLGL'")
    print("=" * 60)
    
    current_doc_text = result2['doc_text'] if result2['success'] else current_doc_text
    
    result3 = await doc_editor.edit_document(
        doc_text=current_doc_text,
        user_instruction="Remplace 'la Société X' par 'CyberLGL' dans l'Article 1",
        doc_summaries=[],
        conversation_history=[],
        max_iterations=10
    )
    
    print(f"✅ Success: {result3['success']}")
    print(f"📝 Message: {result3['message']}")
    print(f"🔄 Iterations: {result3.get('iteration_count', 'N/A')}")
    
    if result3['success']:
        modified_doc = json.loads(result3['doc_text'])
        
        # Verify the change
        found_cyberlgl = False
        for item in modified_doc.get('content', []):
            if item.get('type') == 'paragraph':
                content = item.get('content', [])
                for text_node in content:
                    if text_node.get('type') == 'text':
                        if 'CyberLGL' in text_node.get('text', ''):
                            found_cyberlgl = True
        
        print(f"✅ Verification: {'CyberLGL found' if found_cyberlgl else 'CyberLGL NOT found'}")
    
    # Test 4: Complex edit with context
    print("\n" + "=" * 60)
    print("🔧 TEST 4: Add confidentiality clause with document context")
    print("=" * 60)
    
    current_doc_text = result3['doc_text'] if result3['success'] else current_doc_text
    
    doc_summaries = [
        {
            "file_name": "contrat_type.pdf",
            "summary": "Contrat type avec clause de confidentialité standard",
            "actors": "Prestataire, Client",
            "key_details": "Clause de confidentialité de 5 ans après fin du contrat"
        }
    ]
    
    result4 = await doc_editor.edit_document(
        doc_text=current_doc_text,
        user_instruction="Ajoute une clause de confidentialité conforme au contrat type après l'Article 3",
        doc_summaries=doc_summaries,
        conversation_history=[],
        max_iterations=10
    )
    
    print(f"✅ Success: {result4['success']}")
    print(f"📝 Message: {result4['message']}")
    print(f"🔄 Iterations: {result4.get('iteration_count', 'N/A')}")
    
    if result4['success']:
        modified_doc = json.loads(result4['doc_text'])
        
        # Count articles
        article_count = 0
        for item in modified_doc.get('content', []):
            if item.get('type') == 'heading':
                content = item.get('content', [])
                for text_node in content:
                    if text_node.get('type') == 'text':
                        if 'Article' in text_node.get('text', ''):
                            article_count += 1
        
        print(f"✅ Verification: Total articles = {article_count}")
    
    # Summary
    print("\n" + "=" * 60)
    print("📊 TEST SUMMARY")
    print("=" * 60)
    tests = [
        ("Replace text", result1['success']),
        ("Add article", result2['success']),
        ("Modify content", result3['success']),
        ("Add with context", result4['success'])
    ]
    
    for test_name, success in tests:
        status = "✅ PASS" if success else "❌ FAIL"
        print(f"{status} - {test_name}")
    
    total_tests = len(tests)
    passed_tests = sum(1 for _, success in tests if success)
    print(f"\n📈 Results: {passed_tests}/{total_tests} tests passed")
    
    return all(success for _, success in tests)


async def test_request_format():
    """Test that the request/response format matches DocCreatorRequest/DocCreatorResponse"""
    print("\n" + "=" * 60)
    print("🧪 Testing Request/Response Format")
    print("=" * 60)
    
    # Create DocCreatorRequest
    request = DocCreatorRequest(
        instruction="Test instruction",
        documentContent=SAMPLE_TAPTAP_DOC,
        contentFormat="tiptap-json",
        documentSummaries=None,
        conversationHistory=[
            Message(role="user", content="Previous message"),
            Message(role="assistant", content="Previous response")
        ],
        clientId="test-client-123"
    )
    
    print(f"✅ DocCreatorRequest created")
    print(f"   - instruction: {request.instruction}")
    print(f"   - contentFormat: {request.contentFormat}")
    print(f"   - clientId: {request.clientId}")
    print(f"   - conversationHistory length: {len(request.conversationHistory)}")
    
    # Create DocCreatorResponse
    response = DocCreatorResponse(
        response="Test successful",
        modifiedDocument=SAMPLE_TAPTAP_DOC,
        processing_time=1.5,
        timestamp="2026-02-15T19:00:00",
        error=None
    )
    
    print(f"\n✅ DocCreatorResponse created")
    print(f"   - response: {response.response}")
    print(f"   - processing_time: {response.processing_time}")
    print(f"   - error: {response.error}")
    
    return True


def print_canonical_format():
    """Show what the canonical format looks like"""
    print("\n" + "=" * 60)
    print("📋 Canonical TipTap JSON Format")
    print("=" * 60)
    
    canonical = json.dumps(SAMPLE_TAPTAP_DOC, ensure_ascii=False, sort_keys=True, indent=2)
    print(canonical)
    print(f"\n📊 Format info:")
    print(f"   - Characters: {len(canonical)}")
    print(f"   - Lines: {len(canonical.split(chr(10)))}")
    print(f"   - Keys sorted: Yes (sort_keys=True)")
    print(f"   - Indentation: 2 spaces")


async def main():
    """Run all tests"""
    print("🚀 Document Editor Endpoint Test Suite")
    print("=" * 60)
    
    try:
        # Show canonical format
        print_canonical_format()
        
        # Test request/response format
        await test_request_format()
        
        # Test endpoint behavior
        success = await test_endpoint_direct()
        
        # Final summary
        print("\n" + "=" * 60)
        print("✅ ALL TESTS COMPLETED")
        print("=" * 60)
        
        if success:
            print("🎉 The /doc_creator endpoint is ready for use!")
            print("\n📖 See DOC_CREATOR_ENDPOINT_GUIDE.md for usage instructions")
        else:
            print("⚠️  Some tests failed. Check the output above for details.")
        
        return 0 if success else 1
        
    except Exception as e:
        print(f"\n❌ Test suite failed with error: {e}")
        import traceback
        traceback.print_exc()
        return 1


if __name__ == "__main__":
    exit_code = asyncio.run(main())
    sys.exit(exit_code)