Spaces:

Cyberlgl
/

CyberLegalAIendpoint

Running

CyberLegalAIendpoint / tests /test_doc_creator_endpoint.py

Charles Grandjean

solve tests

8cc8e89 17 days ago

12.1 kB

	#!/usr/bin/env python3
	"""
	Test the /doc_creator endpoint for TipTap JSON document editing
	"""

	import asyncio
	import json
	import sys
	import os
	from pathlib import Path
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv(dotenv_path=".env", override=False)

	# Add parent directory to path
	sys.path.insert(0, str(Path(__file__).parent.parent))

	from structured_outputs.api_models import DocCreatorRequest, DocCreatorResponse, Message
	from agents.doc_editor import DocumentEditorAgent
	from langchain_openai import ChatOpenAI


	# Sample TipTap document for testing
	SAMPLE_TAPTAP_DOC = {
	"type": "doc",
	"content": [
	{
	"type": "heading",
	"attrs": {"level": 1, "textAlign": "center"},
	"content": [
	{"type": "text", "text": "CONTRAT DE PRESTATION DE SERVICES"}
	]
	},
	{
	"type": "heading",
	"attrs": {"level": 2, "textAlign": "left"},
	"content": [
	{"type": "text", "text": "Article 1 - Objet"}
	]
	},
	{
	"type": "paragraph",
	"attrs": {"textAlign": "justify"},
	"content": [
	{"type": "text", "text": "Le présent contrat a pour objet de définir les conditions dans lesquelles "},
	{"type": "text", "marks": [{"type": "bold"}], "text": "la Société X"},
	{"type": "text", "text": " s'engage à fournir des services."}
	]
	},
	{
	"type": "heading",
	"attrs": {"level": 2, "textAlign": "left"},
	"content": [
	{"type": "text", "text": "Article 2 - Durée"}
	]
	},
	{
	"type": "paragraph",
	"attrs": {"textAlign": "justify"},
	"content": [
	{"type": "text", "text": "Le contrat prend effet le "},
	{"type": "text", "marks": [{"type": "italic"}], "text": "1er janvier 2026"},
	{"type": "text", "text": " pour une durée de "},
	{"type": "text", "marks": [{"type": "underline"}], "text": "12 mois"},
	{"type": "text", "text": "."}
	]
	}
	]
	}


	async def test_endpoint_direct():
	"""Test the doc_editor agent directly (simulating endpoint behavior)"""
	print("🧪 Testing /doc_creator endpoint behavior directly")
	print("=" * 60)

	# Initialize agent
	print("\n📝 Initializing DocumentEditorAgent...")
	llm_model = os.getenv("LLM_MODEL", "gpt-4o-mini")
	print(f" Using model: {llm_model}")
	llm = ChatOpenAI(model=llm_model, temperature=0)
	doc_editor = DocumentEditorAgent(llm=llm)

	# Convert to canonical format (as endpoint does)
	doc_text = json.dumps(SAMPLE_TAPTAP_DOC, ensure_ascii=False, sort_keys=True, indent=2)

	print(f"✅ Canonical document prepared ({len(doc_text)} characters)")

	# Test 1: Simple replacement
	print("\n" + "=" * 60)
	print("🔧 TEST 1: Replace '12 mois' with '24 mois'")
	print("=" * 60)

	result1 = await doc_editor.edit_document(
	doc_text=doc_text,
	user_instruction="Change '12 mois' en '24 mois' dans la durée du contrat",
	doc_summaries=[],
	conversation_history=[],
	max_iterations=10
	)

	print(f"✅ Success: {result1['success']}")
	print(f"📝 Message: {result1['message']}")
	print(f"🔄 Iterations: {result1.get('iteration_count', 'N/A')}")

	if result1['success']:
	modified_doc = json.loads(result1['doc_text'])

	# Verify the change
	found_24 = False
	for item in modified_doc.get('content', []):
	if item.get('type') == 'paragraph':
	content = item.get('content', [])
	for text_node in content:
	if text_node.get('type') == 'text':
	if '24 mois' in text_node.get('text', ''):
	found_24 = True

	print(f"✅ Verification: {'24 mois found' if found_24 else '24 mois NOT found'}")

	# Test 2: Add new article
	print("\n" + "=" * 60)
	print("🔧 TEST 2: Add Article 3 - Prix")
	print("=" * 60)

	current_doc_text = result1['doc_text'] if result1['success'] else doc_text

	result2 = await doc_editor.edit_document(
	doc_text=current_doc_text,
	user_instruction="Ajoute un Article 3 - Prix après l'Article 2. Le prix est de 200€ par heure.",
	doc_summaries=[],
	conversation_history=[],
	max_iterations=10
	)

	print(f"✅ Success: {result2['success']}")
	print(f"📝 Message: {result2['message']}")
	print(f"🔄 Iterations: {result2.get('iteration_count', 'N/A')}")

	if result2['success']:
	modified_doc = json.loads(result2['doc_text'])

	# Verify the article was added
	article3_count = 0
	for item in modified_doc.get('content', []):
	if item.get('type') == 'heading':
	content = item.get('content', [])
	for text_node in content:
	if text_node.get('type') == 'text':
	if 'Article 3' in text_node.get('text', '') and 'Prix' in text_node.get('text', ''):
	article3_count += 1

	print(f"✅ Verification: {'Article 3 found' if article3_count > 0 else 'Article 3 NOT found'}")

	# Test 3: Delete content
	print("\n" + "=" * 60)
	print("🔧 TEST 3: Remove 'la Société X' and replace with 'CyberLGL'")
	print("=" * 60)

	current_doc_text = result2['doc_text'] if result2['success'] else current_doc_text

	result3 = await doc_editor.edit_document(
	doc_text=current_doc_text,
	user_instruction="Remplace 'la Société X' par 'CyberLGL' dans l'Article 1",
	doc_summaries=[],
	conversation_history=[],
	max_iterations=10
	)

	print(f"✅ Success: {result3['success']}")
	print(f"📝 Message: {result3['message']}")
	print(f"🔄 Iterations: {result3.get('iteration_count', 'N/A')}")

	if result3['success']:
	modified_doc = json.loads(result3['doc_text'])

	# Verify the change
	found_cyberlgl = False
	for item in modified_doc.get('content', []):
	if item.get('type') == 'paragraph':
	content = item.get('content', [])
	for text_node in content:
	if text_node.get('type') == 'text':
	if 'CyberLGL' in text_node.get('text', ''):
	found_cyberlgl = True

	print(f"✅ Verification: {'CyberLGL found' if found_cyberlgl else 'CyberLGL NOT found'}")

	# Test 4: Complex edit with context
	print("\n" + "=" * 60)
	print("🔧 TEST 4: Add confidentiality clause with document context")
	print("=" * 60)

	current_doc_text = result3['doc_text'] if result3['success'] else current_doc_text

	doc_summaries = [
	{
	"file_name": "contrat_type.pdf",
	"summary": "Contrat type avec clause de confidentialité standard",
	"actors": "Prestataire, Client",
	"key_details": "Clause de confidentialité de 5 ans après fin du contrat"
	}
	]

	result4 = await doc_editor.edit_document(
	doc_text=current_doc_text,
	user_instruction="Ajoute une clause de confidentialité conforme au contrat type après l'Article 3",
	doc_summaries=doc_summaries,
	conversation_history=[],
	max_iterations=10
	)

	print(f"✅ Success: {result4['success']}")
	print(f"📝 Message: {result4['message']}")
	print(f"🔄 Iterations: {result4.get('iteration_count', 'N/A')}")

	if result4['success']:
	modified_doc = json.loads(result4['doc_text'])

	# Count articles
	article_count = 0
	for item in modified_doc.get('content', []):
	if item.get('type') == 'heading':
	content = item.get('content', [])
	for text_node in content:
	if text_node.get('type') == 'text':
	if 'Article' in text_node.get('text', ''):
	article_count += 1

	print(f"✅ Verification: Total articles = {article_count}")

	# Summary
	print("\n" + "=" * 60)
	print("📊 TEST SUMMARY")
	print("=" * 60)
	tests = [
	("Replace text", result1['success']),
	("Add article", result2['success']),
	("Modify content", result3['success']),
	("Add with context", result4['success'])
	]

	for test_name, success in tests:
	status = "✅ PASS" if success else "❌ FAIL"
	print(f"{status} - {test_name}")

	total_tests = len(tests)
	passed_tests = sum(1 for _, success in tests if success)
	print(f"\n📈 Results: {passed_tests}/{total_tests} tests passed")

	return all(success for _, success in tests)


	async def test_request_format():
	"""Test that the request/response format matches DocCreatorRequest/DocCreatorResponse"""
	print("\n" + "=" * 60)
	print("🧪 Testing Request/Response Format")
	print("=" * 60)

	# Create DocCreatorRequest
	request = DocCreatorRequest(
	instruction="Test instruction",
	documentContent=SAMPLE_TAPTAP_DOC,
	contentFormat="tiptap-json",
	documentSummaries=None,
	conversationHistory=[
	Message(role="user", content="Previous message"),
	Message(role="assistant", content="Previous response")
	],
	clientId="test-client-123"
	)

	print(f"✅ DocCreatorRequest created")
	print(f" - instruction: {request.instruction}")
	print(f" - contentFormat: {request.contentFormat}")
	print(f" - clientId: {request.clientId}")
	print(f" - conversationHistory length: {len(request.conversationHistory)}")

	# Create DocCreatorResponse
	response = DocCreatorResponse(
	response="Test successful",
	modifiedDocument=SAMPLE_TAPTAP_DOC,
	processing_time=1.5,
	timestamp="2026-02-15T19:00:00",
	error=None
	)

	print(f"\n✅ DocCreatorResponse created")
	print(f" - response: {response.response}")
	print(f" - processing_time: {response.processing_time}")
	print(f" - error: {response.error}")

	return True


	def print_canonical_format():
	"""Show what the canonical format looks like"""
	print("\n" + "=" * 60)
	print("📋 Canonical TipTap JSON Format")
	print("=" * 60)

	canonical = json.dumps(SAMPLE_TAPTAP_DOC, ensure_ascii=False, sort_keys=True, indent=2)
	print(canonical)
	print(f"\n📊 Format info:")
	print(f" - Characters: {len(canonical)}")
	print(f" - Lines: {len(canonical.split(chr(10)))}")
	print(f" - Keys sorted: Yes (sort_keys=True)")
	print(f" - Indentation: 2 spaces")


	async def main():
	"""Run all tests"""
	print("🚀 Document Editor Endpoint Test Suite")
	print("=" * 60)

	try:
	# Show canonical format
	print_canonical_format()

	# Test request/response format
	await test_request_format()

	# Test endpoint behavior
	success = await test_endpoint_direct()

	# Final summary
	print("\n" + "=" * 60)
	print("✅ ALL TESTS COMPLETED")
	print("=" * 60)

	if success:
	print("🎉 The /doc_creator endpoint is ready for use!")
	print("\n📖 See DOC_CREATOR_ENDPOINT_GUIDE.md for usage instructions")
	else:
	print("⚠️ Some tests failed. Check the output above for details.")

	return 0 if success else 1

	except Exception as e:
	print(f"\n❌ Test suite failed with error: {e}")
	import traceback
	traceback.print_exc()
	return 1


	if __name__ == "__main__":
	exit_code = asyncio.run(main())
	sys.exit(exit_code)