Charles Grandjean commited on
Commit
de9078a
Β·
1 Parent(s): 6853143

logging and html format

Browse files
agent_api.py CHANGED
@@ -351,37 +351,65 @@ class CyberLegalAPI:
351
 
352
  async def create_or_edit_document(self, request: DocCreatorRequest) -> DocCreatorResponse:
353
  """
354
- Create or edit a TipTap document using the document editor agent
355
  Args:
356
- request: Document editing request with TipTap JSON content
357
  Returns:
358
  DocCreatorResponse with assistant's response and modified document
359
  """
360
  start_time = datetime.now()
 
 
 
 
 
 
 
 
 
 
 
361
  try:
362
- # Convert TipTap JSON to canonical format
363
- logger.info(f"πŸ“ Processing document edit request")
364
- # Convert to canonical JSON (sorted keys, 2-space indent)
365
- doc_text = json.dumps(request.documentContent, ensure_ascii=False, sort_keys=True, indent=2)
 
366
  # Convert document summaries if provided
367
  doc_summaries = []
368
  if request.documentSummaries:
369
- for doc in request.documentSummaries:
 
 
370
  doc_summaries.append({
371
  "file_name": doc.file_name,
372
  "summary": doc.summary,
373
  "actors": doc.actors,
374
  "key_details": doc.key_details
375
  })
 
 
 
 
376
  # Convert conversation history if provided
377
  conversation_history = []
378
- for msg in request.conversationHistory or []:
379
- conversation_history.append({
380
- "role": msg.role,
381
- "content": msg.content
382
- })
 
 
 
 
 
 
 
 
383
  # Call document editor agent
384
- logger.info(f"πŸ€– Calling document editor agent")
 
 
385
  result = await self.doc_editor.edit_document(
386
  doc_text=doc_text,
387
  user_instruction=request.instruction,
@@ -389,15 +417,25 @@ class CyberLegalAPI:
389
  conversation_history=conversation_history,
390
  max_iterations=10
391
  )
 
392
  # Calculate processing time
393
  processing_time = (datetime.now() - start_time).total_seconds()
394
- # Prepare response
395
- modified_document = None
 
 
 
 
 
 
 
 
 
 
396
  if result['success']:
397
- try:
398
- modified_document = json.loads(result['doc_text'])
399
- except Exception as e:
400
- logger.error(f"❌ Failed to parse modified document: {e}")
401
  response = DocCreatorResponse(
402
  response=result['message'],
403
  modifiedDocument=modified_document,
@@ -406,7 +444,9 @@ class CyberLegalAPI:
406
  error=None if result['success'] else result.get('message')
407
  )
408
 
409
- logger.info(f"βœ… Document editing completed in {processing_time:.2f}s")
 
 
410
  return response
411
 
412
  except Exception as e:
@@ -463,25 +503,35 @@ async def chat_endpoint(request: ChatRequest):
463
  @app.post("/doc_creator", response_model=DocCreatorResponse, dependencies=[Depends(require_password)])
464
  async def doc_creator_endpoint(request: DocCreatorRequest):
465
  """
466
- Document creator/editor endpoint for TipTap JSON documents
467
 
468
  Args:
469
  request: Document editing request
470
  - instruction: User's instruction for document editing
471
- - documentContent: TipTap JSON document content
472
- - contentFormat: Always "tiptap-json"
473
  - documentSummaries: Optional context from analyzed documents
474
  - conversationHistory: Optional previous conversation messages
475
  - clientId: Unique client identifier
476
 
477
  Returns:
478
  DocCreatorResponse with assistant's response and modified document
 
 
 
 
 
 
 
 
 
 
479
 
480
  Usage:
481
- - Send TipTap JSON from editor.getJSON() in documentContent
482
  - Provide clear instructions for modifications
483
  - Optionally include document summaries for context
484
- - Returns modified TipTap JSON ready for editor.setContent()
485
 
486
  Supported Operations:
487
  - Replace text: "Change '12 months' to '24 months'"
@@ -489,18 +539,16 @@ async def doc_creator_endpoint(request: DocCreatorRequest):
489
  - Delete content: "Remove the section about confidentiality"
490
  - Complex edits: "Add a clause about GDPR compliance in Article 1"
491
 
492
- Example TipTap JSON structure:
493
- {
494
- "type": "doc",
495
- "content": [
496
- {
497
- "type": "paragraph",
498
- "content": [
499
- {"type": "text", "text": "Hello world"}
500
- ]
501
- }
502
- ]
503
- }
504
  """
505
  return await api.create_or_edit_document(request)
506
 
 
351
 
352
  async def create_or_edit_document(self, request: DocCreatorRequest) -> DocCreatorResponse:
353
  """
354
+ Create or edit an HTML document using the document editor agent
355
  Args:
356
+ request: Document editing request with HTML content
357
  Returns:
358
  DocCreatorResponse with assistant's response and modified document
359
  """
360
  start_time = datetime.now()
361
+
362
+ # Log incoming request details
363
+ logger.info("=" * 80)
364
+ logger.info("πŸ“₯ DOC_CREATOR REQUEST RECEIVED")
365
+ logger.info("=" * 80)
366
+ logger.info(f"πŸ‘€ Client ID: {request.clientId}")
367
+ logger.info(f"πŸ“‹ Instruction: {request.instruction}")
368
+ logger.info(f"πŸ“ Document size: {len(request.documentContent)} bytes")
369
+ logger.info(f"πŸ“š Document summaries: {len(request.documentSummaries) if request.documentSummaries else 0}")
370
+ logger.info(f"πŸ’¬ Conversation history: {len(request.conversationHistory) if request.conversationHistory else 0} messages")
371
+
372
  try:
373
+ # Use HTML directly (no canonicalization needed)
374
+ logger.info("πŸ”„ Using HTML document content directly...")
375
+ doc_text = request.documentContent
376
+ logger.info(f"βœ… HTML document ready - size: {len(doc_text)} bytes")
377
+
378
  # Convert document summaries if provided
379
  doc_summaries = []
380
  if request.documentSummaries:
381
+ logger.info("πŸ“š Processing document summaries...")
382
+ for i, doc in enumerate(request.documentSummaries, 1):
383
+ logger.info(f" [{i}] {doc.file_name} - {doc.summary[:50]}...")
384
  doc_summaries.append({
385
  "file_name": doc.file_name,
386
  "summary": doc.summary,
387
  "actors": doc.actors,
388
  "key_details": doc.key_details
389
  })
390
+ logger.info(f"βœ… {len(doc_summaries)} document summaries loaded")
391
+ else:
392
+ logger.info("ℹ️ No document summaries provided")
393
+
394
  # Convert conversation history if provided
395
  conversation_history = []
396
+ if request.conversationHistory:
397
+ logger.info(f"πŸ’¬ Processing conversation history ({len(request.conversationHistory)} messages)...")
398
+ for i, msg in enumerate(request.conversationHistory, 1):
399
+ role_emoji = "πŸ‘€" if msg.role == "user" else "πŸ€–"
400
+ logger.info(f" [{i}] {role_emoji} {msg.role}: {msg.content[:50]}...")
401
+ conversation_history.append({
402
+ "role": msg.role,
403
+ "content": msg.content
404
+ })
405
+ logger.info(f"βœ… {len(conversation_history)} conversation messages loaded")
406
+ else:
407
+ logger.info("ℹ️ No conversation history provided")
408
+
409
  # Call document editor agent
410
+ logger.info("=" * 80)
411
+ logger.info("πŸ€– CALLING DOCUMENT EDITOR AGENT")
412
+ logger.info("=" * 80)
413
  result = await self.doc_editor.edit_document(
414
  doc_text=doc_text,
415
  user_instruction=request.instruction,
 
417
  conversation_history=conversation_history,
418
  max_iterations=10
419
  )
420
+
421
  # Calculate processing time
422
  processing_time = (datetime.now() - start_time).total_seconds()
423
+
424
+ # Log results
425
+ logger.info("=" * 80)
426
+ logger.info("πŸ“Š DOCUMENT EDITING RESULTS")
427
+ logger.info("=" * 80)
428
+ logger.info(f"βœ… Success: {result['success']}")
429
+ logger.info(f"πŸ”„ Iterations: {result['iteration_count']}")
430
+ logger.info(f"⏱️ Processing time: {processing_time:.2f}s")
431
+ logger.info(f"πŸ’¬ Message: {result['message'][:100]}...")
432
+
433
+ # Prepare response - return HTML directly
434
+ modified_document = result['doc_text'] if result['success'] else None
435
  if result['success']:
436
+ logger.info(f"πŸ“ Modified document size: {len(result['doc_text'])} bytes")
437
+ logger.info(f"πŸ“ˆ Size change: {len(result['doc_text']) - len(doc_text):+d} bytes")
438
+
 
439
  response = DocCreatorResponse(
440
  response=result['message'],
441
  modifiedDocument=modified_document,
 
444
  error=None if result['success'] else result.get('message')
445
  )
446
 
447
+ logger.info("=" * 80)
448
+ logger.info("βœ… DOCUMENT EDITING COMPLETED SUCCESSFULLY")
449
+ logger.info("=" * 80)
450
  return response
451
 
452
  except Exception as e:
 
503
  @app.post("/doc_creator", response_model=DocCreatorResponse, dependencies=[Depends(require_password)])
504
  async def doc_creator_endpoint(request: DocCreatorRequest):
505
  """
506
+ Document creator/editor endpoint for HTML documents
507
 
508
  Args:
509
  request: Document editing request
510
  - instruction: User's instruction for document editing
511
+ - documentContent: HTML document content
512
+ - contentFormat: Always "html"
513
  - documentSummaries: Optional context from analyzed documents
514
  - conversationHistory: Optional previous conversation messages
515
  - clientId: Unique client identifier
516
 
517
  Returns:
518
  DocCreatorResponse with assistant's response and modified document
519
+
520
+ On success:
521
+ - response: Completion message
522
+ - modifiedDocument: Modified HTML
523
+ - error: null
524
+
525
+ On failure (validation error or max iterations reached):
526
+ - response: Error message
527
+ - modifiedDocument: null
528
+ - error: Error description
529
 
530
  Usage:
531
+ - Send HTML content in documentContent
532
  - Provide clear instructions for modifications
533
  - Optionally include document summaries for context
534
+ - Returns modified HTML ready for display
535
 
536
  Supported Operations:
537
  - Replace text: "Change '12 months' to '24 months'"
 
539
  - Delete content: "Remove the section about confidentiality"
540
  - Complex edits: "Add a clause about GDPR compliance in Article 1"
541
 
542
+ Example HTML structure:
543
+ <h1>Contract</h1>
544
+ <h2>Article 1 - Duration</h2>
545
+ <p>This contract shall last for 12 months.</p>
546
+
547
+ Error Handling:
548
+ - The agent validates all modifications with BeautifulSoup
549
+ - If a modification is invalid (HTML structure broken), the agent automatically retries
550
+ - If max iterations (10) is reached without completion, an error is returned
551
+ - Check the 'error' field in the response to detect failures
 
 
552
  """
553
  return await api.create_or_edit_document(request)
554
 
prompts/doc_editor.py CHANGED
@@ -3,7 +3,7 @@
3
  System prompts for the document editor agent
4
  """
5
 
6
- DOC_EDITOR_SYSTEM_PROMPT = """You are a Document Editing Agent specialized in modifying TipTap JSON documents.
7
 
8
  ## CRITICAL RULES
9
 
@@ -11,47 +11,47 @@ DOC_EDITOR_SYSTEM_PROMPT = """You are a Document Editing Agent specialized in mo
11
 
12
  2. **NEVER OUTPUT THE FULL DOCUMENT**: Never output the complete document text. The system always has the current document state.
13
 
14
- 3. **EXACT MATCHING**: When using replace/add/delete, you must copy the EXACT text block from the canonical JSON (including all whitespace, quotes, and indentation). The search/anchor must match exactly.
15
 
16
  4. **ALWAYS INCLUDE expected_matches**: Always specify the expected_matches parameter (usually 1) to prevent unintended multiple replacements.
17
 
18
  5. **SMALL, PRECISE EDITS**: Make small, targeted edits. Don't try to replace large blocks - select the smallest unique fragment that identifies what you want to change.
19
 
20
- 6. **VALIDATE YOUR CHOICES**: If a tool returns an error (mismatch or invalid JSON), analyze the error and try again with a more precise search/anchor.
21
 
22
  7. **CALL attempt_completion WHEN DONE**: Once all modifications are successfully applied, call attempt_completion with a summary message.
23
 
24
  ## AVAILABLE TOOLS
25
 
26
- ### replace
27
- Replace an exact block of text in the document.
28
 
29
  Parameters:
30
- - doc_text: (provided automatically) The current canonical document
31
- - search: The exact text block to replace (must match exactly)
32
- - replace: The exact text block to insert
33
  - expected_matches: Number of occurrences (default: 1)
34
 
35
  Example:
36
  ```json
37
  {
38
  "type": "tool_call",
39
- "name": "replace",
40
  "arguments": {
41
- "search": "\"text\": \"12 mois\"",
42
- "replace": "\"text\": \"24 mois\"",
43
  "expected_matches": 1
44
  }
45
  }
46
  ```
47
 
48
- ### add
49
- Add content before or after an anchor block.
50
 
51
  Parameters:
52
- - doc_text: (provided automatically) The current canonical document
53
- - anchor_search: The exact text block to find (must match exactly)
54
- - insert: The exact text block to insert
55
  - position: "before" or "after" (default: "after")
56
  - expected_matches: Number of anchor occurrences (default: 1)
57
 
@@ -59,31 +59,31 @@ Example:
59
  ```json
60
  {
61
  "type": "tool_call",
62
- "name": "add",
63
  "arguments": {
64
- "anchor_search": "{\n \"type\": \"heading\",\n \"attrs\": { \"level\": 2, \"textAlign\": \"left\" },\n \"content\": [\n { \"type\": \"text\", \"text\": \"Article 2 - DurΓ©e\" }\n ]\n }",
65
- "insert": ",\n {\n \"type\": \"heading\",\n \"attrs\": { \"level\": 2, \"textAlign\": \"left\" },\n \"content\": [\n { \"type\": \"text\", \"text\": \"Article 3 - Prix\" }\n ]\n }",
66
  "position": "after",
67
  "expected_matches": 1
68
  }
69
  }
70
  ```
71
 
72
- ### delete
73
- Delete an exact block of text.
74
 
75
  Parameters:
76
- - doc_text: (provided automatically) The current canonical document
77
- - search: The exact text block to delete (must match exactly)
78
  - expected_matches: Number of occurrences (default: 1)
79
 
80
  Example:
81
  ```json
82
  {
83
  "type": "tool_call",
84
- "name": "delete",
85
  "arguments": {
86
- "search": "{\n \"type\": \"paragraph\",\n \"attrs\": { \"textAlign\": \"justify\" },\n \"content\": [\n { \"type\": \"text\", \"text\": \"Unwanted text\" }\n ]\n }",
87
  "expected_matches": 1
88
  }
89
  }
@@ -109,27 +109,28 @@ Example:
109
  ## ERROR HANDLING
110
 
111
  If you receive an error:
112
- - "Match count mismatch": Your search/anchor didn't match exactly. Check whitespace, quotes, and indentation. Try a more specific or different fragment.
113
- - "Post-edit validation failed": The replacement broke the JSON structure. Ensure your insert/replace is valid JSON with proper commas and brackets.
114
 
115
  ## WORKFLOW
116
 
117
  1. Read the instruction and current document
118
  2. Identify what needs to be changed
119
- 3. Call the appropriate tool (replace/add/delete) with exact text matching
120
  4. If successful and more changes needed, call the next tool
121
  5. If all changes done, call attempt_completion
122
  6. If an error occurs, analyze it and retry with corrected parameters
123
 
124
  ## TIPS
125
 
126
- - For text content: search for `"text": "exact text here"` - it's usually unique
127
- - For headings: search for the entire heading node block
128
  - For paragraphs: search for a unique phrase in the text content
129
- - Always include proper commas in your insert/replace when adding to arrays
130
- - The document uses TipTap JSON format with nodes like "doc", "heading", "paragraph", "text", etc.
 
131
 
132
- Remember: Exact matching is crucial. Copy the text block exactly as it appears in the canonical JSON!
133
  """
134
 
135
 
 
3
  System prompts for the document editor agent
4
  """
5
 
6
+ DOC_EDITOR_SYSTEM_PROMPT = """You are a Document Editing Agent specialized in modifying HTML documents.
7
 
8
  ## CRITICAL RULES
9
 
 
11
 
12
  2. **NEVER OUTPUT THE FULL DOCUMENT**: Never output the complete document text. The system always has the current document state.
13
 
14
+ 3. **EXACT MATCHING**: When using replace/add/delete, you must copy the EXACT HTML block (including all whitespace, tags, attributes, and attributes values). The search/anchor must match exactly.
15
 
16
  4. **ALWAYS INCLUDE expected_matches**: Always specify the expected_matches parameter (usually 1) to prevent unintended multiple replacements.
17
 
18
  5. **SMALL, PRECISE EDITS**: Make small, targeted edits. Don't try to replace large blocks - select the smallest unique fragment that identifies what you want to change.
19
 
20
+ 6. **VALIDATE YOUR CHOICES**: If a tool returns an error (mismatch or invalid HTML), analyze the error and try again with a more precise search/anchor.
21
 
22
  7. **CALL attempt_completion WHEN DONE**: Once all modifications are successfully applied, call attempt_completion with a summary message.
23
 
24
  ## AVAILABLE TOOLS
25
 
26
+ ### replace_html
27
+ Replace an exact block of HTML text in the document.
28
 
29
  Parameters:
30
+ - doc_text: (provided automatically) The current HTML document
31
+ - search: The exact HTML block to replace (must match exactly, including whitespace, tags, and attributes)
32
+ - replace: The exact HTML block to insert
33
  - expected_matches: Number of occurrences (default: 1)
34
 
35
  Example:
36
  ```json
37
  {
38
  "type": "tool_call",
39
+ "name": "replace_html",
40
  "arguments": {
41
+ "search": "<p>12 mois</p>",
42
+ "replace": "<p>24 mois</p>",
43
  "expected_matches": 1
44
  }
45
  }
46
  ```
47
 
48
+ ### add_html
49
+ Add HTML content before or after an anchor block.
50
 
51
  Parameters:
52
+ - doc_text: (provided automatically) The current HTML document
53
+ - anchor_search: The exact HTML block to find (must match exactly)
54
+ - insert: The exact HTML block to insert
55
  - position: "before" or "after" (default: "after")
56
  - expected_matches: Number of anchor occurrences (default: 1)
57
 
 
59
  ```json
60
  {
61
  "type": "tool_call",
62
+ "name": "add_html",
63
  "arguments": {
64
+ "anchor_search": "<h2>Article 2 - DurΓ©e</h2>",
65
+ "insert": "<h3>Article 3 - Prix</h3>",
66
  "position": "after",
67
  "expected_matches": 1
68
  }
69
  }
70
  ```
71
 
72
+ ### delete_html
73
+ Delete an exact block of HTML from the document.
74
 
75
  Parameters:
76
+ - doc_text: (provided automatically) The current HTML document
77
+ - search: The exact HTML block to delete (must match exactly)
78
  - expected_matches: Number of occurrences (default: 1)
79
 
80
  Example:
81
  ```json
82
  {
83
  "type": "tool_call",
84
+ "name": "delete_html",
85
  "arguments": {
86
+ "search": "<p>Unwanted text</p>",
87
  "expected_matches": 1
88
  }
89
  }
 
109
  ## ERROR HANDLING
110
 
111
  If you receive an error:
112
+ - "Match count mismatch": Your search/anchor didn't match exactly. Check whitespace, tags, attributes, and attribute values. Try a more specific or different fragment.
113
+ - "Post-edit validation failed": The replacement broke the HTML structure. Ensure your insert/replace is valid HTML with proper tags.
114
 
115
  ## WORKFLOW
116
 
117
  1. Read the instruction and current document
118
  2. Identify what needs to be changed
119
+ 3. Call the appropriate tool (replace_html/add_html/delete_html) with exact HTML matching
120
  4. If successful and more changes needed, call the next tool
121
  5. If all changes done, call attempt_completion
122
  6. If an error occurs, analyze it and retry with corrected parameters
123
 
124
  ## TIPS
125
 
126
+ - For text content: search for unique text within tags like `<p>exact text here</p>` - it's usually unique
127
+ - For headings: search for the entire heading tag like `<h2>Article 2 - DurΓ©e</h2>`
128
  - For paragraphs: search for a unique phrase in the text content
129
+ - Always include proper HTML structure with opening and closing tags
130
+ - Be mindful of HTML attributes - include them exactly as they appear
131
+ - For inline elements like `<strong>`, `<em>`, etc., include the full element with tags
132
 
133
+ Remember: Exact matching is crucial. Copy the HTML block exactly as it appears in the document!
134
  """
135
 
136
 
requirements.txt CHANGED
@@ -23,3 +23,4 @@ pydantic>=2.0.0
23
  typing-extensions>=4.0.0
24
  langchain-tavily>=0.2.16
25
  resend>=0.8.0
 
 
23
  typing-extensions>=4.0.0
24
  langchain-tavily>=0.2.16
25
  resend>=0.8.0
26
+ beautifulsoup4>=4.12.0
structured_outputs/api_models.py CHANGED
@@ -80,8 +80,8 @@ class AnalyzePDFResponse(BaseModel):
80
  class DocCreatorRequest(BaseModel):
81
  """Document creator request model"""
82
  instruction: str = Field(..., description="User's instruction for document editing")
83
- documentContent: dict = Field(..., description="TipTap JSON document content")
84
- contentFormat: str = Field(default="tiptap-json", description="Format of document content (always 'tiptap-json')")
85
  documentSummaries: Optional[List[DocumentAnalysis]] = Field(default=None, description="Context from analyzed documents")
86
  conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
87
  clientId: str = Field(..., description="Unique client identifier")
@@ -90,7 +90,7 @@ class DocCreatorRequest(BaseModel):
90
  class DocCreatorResponse(BaseModel):
91
  """Document creator response model"""
92
  response: str = Field(..., description="Assistant's response")
93
- modifiedDocument: Optional[dict] = Field(None, description="Modified TipTap JSON document (if changes were made)")
94
  processing_time: float = Field(..., description="Processing time in seconds")
95
  timestamp: str = Field(..., description="Response timestamp")
96
  error: Optional[str] = Field(None, description="Error message if any")
 
80
  class DocCreatorRequest(BaseModel):
81
  """Document creator request model"""
82
  instruction: str = Field(..., description="User's instruction for document editing")
83
+ documentContent: str = Field(..., description="HTML document content")
84
+ contentFormat: str = Field(default="html", description="Format of document content (always 'html')")
85
  documentSummaries: Optional[List[DocumentAnalysis]] = Field(default=None, description="Context from analyzed documents")
86
  conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
87
  clientId: str = Field(..., description="Unique client identifier")
 
90
  class DocCreatorResponse(BaseModel):
91
  """Document creator response model"""
92
  response: str = Field(..., description="Assistant's response")
93
+ modifiedDocument: Optional[str] = Field(None, description="Modified HTML document (if changes were made)")
94
  processing_time: float = Field(..., description="Processing time in seconds")
95
  timestamp: str = Field(..., description="Response timestamp")
96
  error: Optional[str] = Field(None, description="Error message if any")
subagents/doc_editor.py CHANGED
@@ -1,16 +1,17 @@
1
  #!/usr/bin/env python3
2
  """
3
- Document Editor Agent - LangGraph agent for modifying TipTap JSON documents
4
  Implements Cline-like iterative editing with validation
5
  """
6
 
7
  import logging
 
8
  from typing import Dict, Any, List
9
  from langgraph.graph import StateGraph, END
10
  from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
11
 
12
  from agent_states.doc_editor_state import DocEditorState
13
- from utils.doc_editor_tools import replace, add, delete, attempt_completion
14
  from prompts.doc_editor import get_doc_editor_system_prompt
15
 
16
  logger = logging.getLogger(__name__)
@@ -18,11 +19,11 @@ logger = logging.getLogger(__name__)
18
 
19
  class DocumentEditorAgent:
20
  """
21
- Agent for editing TipTap JSON documents using Cline-like iterative approach.
22
 
23
  Workflow:
24
- 1. Agent generates a tool call (replace/add/delete/attempt_completion)
25
- 2. Tools execute with validation
26
  3. Check if complete or max iterations reached
27
  4. Repeat until completion or error
28
  """
@@ -35,7 +36,7 @@ class DocumentEditorAgent:
35
  llm: Language model instance (ChatOpenAI or compatible)
36
  """
37
  self.llm = llm
38
- self.tools = [replace, add, delete, attempt_completion]
39
  self.llm_with_tools = self.llm.bind_tools(self.tools)
40
  self.workflow = self._build_workflow()
41
 
@@ -111,13 +112,20 @@ class DocumentEditorAgent:
111
  """
112
  intermediate_steps = state.get("intermediate_steps", [])
113
  iteration_count = state.get("iteration_count", 0)
 
 
114
 
115
- logger.info(f"πŸ”„ Agent iteration {iteration_count + 1}/{state.get('max_iterations', 10)}")
 
 
 
 
116
 
117
  # First iteration: add system prompt and user instruction
118
  if iteration_count == 0:
119
  system_prompt = get_doc_editor_system_prompt()
120
  intermediate_steps.append(SystemMessage(content=system_prompt))
 
121
 
122
  # Build context message with conversation history and doc_summaries
123
  conversation_history = state.get("conversation_history", [])
@@ -138,15 +146,19 @@ class DocumentEditorAgent:
138
 
139
  # Add document and instruction
140
  full_message = (
141
- f"Current document (TipTap JSON):\n{state['doc_text']}\n\n"
142
  f"{context_msg}\n\n"
143
  f"Instruction: {state['user_instruction']}"
144
  )
145
  intermediate_steps.append(HumanMessage(content=full_message))
 
 
146
 
147
  # Call LLM with tools
 
148
  response = await self.llm_with_tools.ainvoke(intermediate_steps)
149
  intermediate_steps.append(response)
 
150
 
151
  state["intermediate_steps"] = intermediate_steps
152
  return state
@@ -159,27 +171,43 @@ class DocumentEditorAgent:
159
  last_message = intermediate_steps[-1]
160
 
161
  if not (hasattr(last_message, 'tool_calls') and last_message.tool_calls):
 
162
  return state
163
 
 
 
164
  # Increment iteration count
165
  state["iteration_count"] = state.get("iteration_count", 0) + 1
166
 
167
- for tool_call in last_message.tool_calls:
168
  tool_name = tool_call['name']
169
  tool_func = next((t for t in self.tools if t.name == tool_name), None)
170
 
 
 
 
171
  if tool_func:
172
  args = tool_call['args'].copy()
173
 
174
  # Inject current doc_text into tool calls
175
- if tool_name in ["replace", "add", "delete"]:
176
  args["doc_text"] = state["doc_text"]
177
- logger.info(f"πŸ“ Injecting doc_text to {tool_name}")
 
 
 
 
 
 
178
 
179
  # Execute tool
180
  try:
181
  result = await tool_func.ainvoke(args)
182
- logger.info(f"πŸ”§ Tool {tool_name} result: ok={result.get('ok')}, matches={result.get('matches')}")
 
 
 
 
183
 
184
  # Update doc_text if tool was successful
185
  if result.get("ok") and "doc_text" in result:
@@ -201,6 +229,7 @@ class DocumentEditorAgent:
201
  except Exception as e:
202
  error_msg = f"Error executing {tool_name}: {str(e)}"
203
  logger.error(f"❌ {error_msg}")
 
204
  intermediate_steps.append(
205
  ToolMessage(
206
  content=error_msg,
@@ -224,7 +253,7 @@ class DocumentEditorAgent:
224
  Edit a document according to the user instruction.
225
 
226
  Args:
227
- doc_text: Canonical TipTap JSON string (use sort_keys=True, indent=2)
228
  user_instruction: What changes to make to the document
229
  doc_summaries: Optional summaries of the document for context
230
  conversation_history: Optional previous conversation messages for context
@@ -232,11 +261,31 @@ class DocumentEditorAgent:
232
 
233
  Returns:
234
  Dict with:
235
- - doc_text: Modified document (canonical JSON)
236
  - message: Completion message or error description
237
  - success: Boolean indicating success
238
  - iteration_count: Number of iterations performed
239
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Initialize state
241
  initial_state = {
242
  "doc_text": doc_text,
@@ -249,34 +298,52 @@ class DocumentEditorAgent:
249
  "intermediate_steps": []
250
  }
251
 
252
- logger.info(f"πŸš€ Starting document editing: {user_instruction}")
253
 
254
  # Run workflow
255
  try:
 
256
  final_state = await self.workflow.ainvoke(initial_state)
257
 
258
  # Prepare result
259
  success = final_state.get("completion_message") is not None
260
  message = final_state.get("completion_message") or "Editing completed without explicit completion message"
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  if not success:
263
- iteration_count = final_state.get("iteration_count", 0)
264
  max_iters = final_state.get("max_iterations", 10)
265
  if iteration_count >= max_iters:
 
266
  message = f"Failed to complete editing within {max_iters} iterations"
267
 
268
  return {
269
  "doc_text": final_state["doc_text"],
270
  "message": message,
271
  "success": success,
272
- "iteration_count": final_state["iteration_count"]
273
  }
274
 
275
  except Exception as e:
276
- logger.error(f"❌ Error in document editing workflow: {e}")
 
 
 
 
277
  return {
278
  "doc_text": doc_text, # Return original on error
279
  "message": f"Error during editing: {str(e)}",
280
  "success": False,
281
  "iteration_count": 0
282
- }
 
1
  #!/usr/bin/env python3
2
  """
3
+ Document Editor Agent - LangGraph agent for modifying HTML documents
4
  Implements Cline-like iterative editing with validation
5
  """
6
 
7
  import logging
8
+ import traceback
9
  from typing import Dict, Any, List
10
  from langgraph.graph import StateGraph, END
11
  from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
12
 
13
  from agent_states.doc_editor_state import DocEditorState
14
+ from utils.doc_editor_tools import replace_html, add_html, delete_html, attempt_completion
15
  from prompts.doc_editor import get_doc_editor_system_prompt
16
 
17
  logger = logging.getLogger(__name__)
 
19
 
20
  class DocumentEditorAgent:
21
  """
22
+ Agent for editing HTML documents using Cline-like iterative approach.
23
 
24
  Workflow:
25
+ 1. Agent generates a tool call (replace_html/add_html/delete_html/attempt_completion)
26
+ 2. Tools execute with BeautifulSoup validation
27
  3. Check if complete or max iterations reached
28
  4. Repeat until completion or error
29
  """
 
36
  llm: Language model instance (ChatOpenAI or compatible)
37
  """
38
  self.llm = llm
39
+ self.tools = [replace_html, add_html, delete_html, attempt_completion]
40
  self.llm_with_tools = self.llm.bind_tools(self.tools)
41
  self.workflow = self._build_workflow()
42
 
 
112
  """
113
  intermediate_steps = state.get("intermediate_steps", [])
114
  iteration_count = state.get("iteration_count", 0)
115
+ max_iterations = state.get("max_iterations", 10)
116
+ current_doc_size = len(state.get("doc_text", ""))
117
 
118
+ logger.info("")
119
+ logger.info("=" * 80)
120
+ logger.info(f"πŸ”„ AGENT ITERATION {iteration_count + 1}/{max_iterations}")
121
+ logger.info("=" * 80)
122
+ logger.info(f"πŸ“ Current document size: {current_doc_size} bytes")
123
 
124
  # First iteration: add system prompt and user instruction
125
  if iteration_count == 0:
126
  system_prompt = get_doc_editor_system_prompt()
127
  intermediate_steps.append(SystemMessage(content=system_prompt))
128
+ logger.info("πŸ“ System prompt added")
129
 
130
  # Build context message with conversation history and doc_summaries
131
  conversation_history = state.get("conversation_history", [])
 
146
 
147
  # Add document and instruction
148
  full_message = (
149
+ f"Current document (HTML):\n{state['doc_text']}\n\n"
150
  f"{context_msg}\n\n"
151
  f"Instruction: {state['user_instruction']}"
152
  )
153
  intermediate_steps.append(HumanMessage(content=full_message))
154
+ logger.info(f"πŸ’¬ User message added ({len(full_message)} chars)")
155
+ logger.info(f"πŸ“š Context: {len(conversation_history)} history + {len(doc_summaries)} summaries")
156
 
157
  # Call LLM with tools
158
+ logger.info("πŸ€– Calling LLM with tools...")
159
  response = await self.llm_with_tools.ainvoke(intermediate_steps)
160
  intermediate_steps.append(response)
161
+ logger.info("βœ… LLM response received")
162
 
163
  state["intermediate_steps"] = intermediate_steps
164
  return state
 
171
  last_message = intermediate_steps[-1]
172
 
173
  if not (hasattr(last_message, 'tool_calls') and last_message.tool_calls):
174
+ logger.info("ℹ️ No tool calls in last message, returning to agent")
175
  return state
176
 
177
+ logger.info(f"πŸ”§ Executing {len(last_message.tool_calls)} tool call(s)...")
178
+
179
  # Increment iteration count
180
  state["iteration_count"] = state.get("iteration_count", 0) + 1
181
 
182
+ for i, tool_call in enumerate(last_message.tool_calls, 1):
183
  tool_name = tool_call['name']
184
  tool_func = next((t for t in self.tools if t.name == tool_name), None)
185
 
186
+ logger.info("")
187
+ logger.info(f"πŸ”§ Tool {i}/{len(last_message.tool_calls)}: {tool_name}")
188
+
189
  if tool_func:
190
  args = tool_call['args'].copy()
191
 
192
  # Inject current doc_text into tool calls
193
+ if tool_name in ["replace_html", "add_html", "delete_html"]:
194
  args["doc_text"] = state["doc_text"]
195
+ logger.info(f"πŸ“ Injecting doc_text ({len(state['doc_text'])} bytes) to {tool_name}")
196
+
197
+ # Log tool arguments (sanitized)
198
+ safe_args = {k: v for k, v in args.items() if k != 'doc_text'}
199
+ if tool_name in ["replace_html", "add_html", "delete_html"]:
200
+ safe_args["doc_text"] = f"<{len(args['doc_text'])} bytes>"
201
+ logger.info(f"πŸ“₯ Arguments: {safe_args}")
202
 
203
  # Execute tool
204
  try:
205
  result = await tool_func.ainvoke(args)
206
+ logger.info(f"πŸ”§ Tool {tool_name} executed:")
207
+ logger.info(f" βœ… ok={result.get('ok')}")
208
+ logger.info(f" πŸ” matches={result.get('matches')}")
209
+ if result.get("ok") and "doc_text" in result:
210
+ logger.info(f" πŸ“ New doc_text size: {len(result['doc_text'])} bytes")
211
 
212
  # Update doc_text if tool was successful
213
  if result.get("ok") and "doc_text" in result:
 
229
  except Exception as e:
230
  error_msg = f"Error executing {tool_name}: {str(e)}"
231
  logger.error(f"❌ {error_msg}")
232
+ logger.error(f"πŸ” Traceback: {traceback.format_exc()}")
233
  intermediate_steps.append(
234
  ToolMessage(
235
  content=error_msg,
 
253
  Edit a document according to the user instruction.
254
 
255
  Args:
256
+ doc_text: HTML document string
257
  user_instruction: What changes to make to the document
258
  doc_summaries: Optional summaries of the document for context
259
  conversation_history: Optional previous conversation messages for context
 
261
 
262
  Returns:
263
  Dict with:
264
+ - doc_text: Modified document (HTML)
265
  - message: Completion message or error description
266
  - success: Boolean indicating success
267
  - iteration_count: Number of iterations performed
268
  """
269
+ # Log initial state
270
+ logger.info("=" * 80)
271
+ logger.info("🎯 DOCUMENT EDITOR AGENT STARTING")
272
+ logger.info("=" * 80)
273
+ logger.info(f"πŸ“ Initial document size: {len(doc_text)} bytes")
274
+ logger.info(f"πŸ“‹ Instruction: {user_instruction[:100]}{'...' if len(user_instruction) > 100 else ''}")
275
+ logger.info(f"πŸ“š Document summaries: {len(doc_summaries)}")
276
+ logger.info(f"πŸ’¬ Conversation history: {len(conversation_history)} messages")
277
+ logger.info(f"πŸ”„ Max iterations: {max_iterations}")
278
+
279
+ if doc_summaries:
280
+ logger.info("πŸ“š Document summaries loaded:")
281
+ for i, summary in enumerate(doc_summaries[:3], 1): # Show first 3
282
+ logger.info(f" [{i}] {str(summary)[:100]}...")
283
+ if len(doc_summaries) > 3:
284
+ logger.info(f" ... and {len(doc_summaries) - 3} more")
285
+
286
+ if conversation_history:
287
+ logger.info(f"πŸ’¬ Conversation history loaded ({len(conversation_history)} messages)")
288
+
289
  # Initialize state
290
  initial_state = {
291
  "doc_text": doc_text,
 
298
  "intermediate_steps": []
299
  }
300
 
301
+ logger.info("🎯 Initial state prepared, starting workflow...")
302
 
303
  # Run workflow
304
  try:
305
+ logger.info("πŸ”„ Invoking LangGraph workflow...")
306
  final_state = await self.workflow.ainvoke(initial_state)
307
 
308
  # Prepare result
309
  success = final_state.get("completion_message") is not None
310
  message = final_state.get("completion_message") or "Editing completed without explicit completion message"
311
 
312
+ iteration_count = final_state.get("iteration_count", 0)
313
+ final_doc_size = len(final_state["doc_text"])
314
+ size_change = final_doc_size - len(doc_text)
315
+
316
+ logger.info("=" * 80)
317
+ logger.info("πŸ“Š DOCUMENT EDITING COMPLETED")
318
+ logger.info("=" * 80)
319
+ logger.info(f"βœ… Success: {success}")
320
+ logger.info(f"πŸ”„ Iterations: {iteration_count}")
321
+ logger.info(f"πŸ“ Final document size: {final_doc_size} bytes")
322
+ logger.info(f"πŸ“ˆ Size change: {size_change:+d} bytes ({size_change/len(doc_text)*100:+.1f}%)")
323
+ logger.info(f"πŸ’¬ Message: {message[:100]}{'...' if len(message) > 100 else ''}")
324
+
325
  if not success:
 
326
  max_iters = final_state.get("max_iterations", 10)
327
  if iteration_count >= max_iters:
328
+ logger.warning(f"⚠️ Failed to complete editing within {max_iters} iterations")
329
  message = f"Failed to complete editing within {max_iters} iterations"
330
 
331
  return {
332
  "doc_text": final_state["doc_text"],
333
  "message": message,
334
  "success": success,
335
+ "iteration_count": iteration_count
336
  }
337
 
338
  except Exception as e:
339
+ logger.error("=" * 80)
340
+ logger.error("❌ DOCUMENT EDITING FAILED")
341
+ logger.error("=" * 80)
342
+ logger.error(f"❌ Error: {str(e)}")
343
+ logger.error(f"πŸ” Traceback: {traceback.format_exc()}")
344
  return {
345
  "doc_text": doc_text, # Return original on error
346
  "message": f"Error during editing: {str(e)}",
347
  "success": False,
348
  "iteration_count": 0
349
+ }
tests/test_logging_doc_editor.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify detailed logging in document editor
4
+ """
5
+
6
+ import json
7
+ import logging
8
+ import sys
9
+ import os
10
+ import asyncio
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+
14
+ # Add parent directory to path for imports
15
+ sys.path.insert(0, str(Path(__file__).parent.parent))
16
+
17
+ from utils.doc_editor_tools import replace, add, delete, attempt_completion
18
+ from subagents.doc_editor import DocumentEditorAgent
19
+ from langchain_openai import ChatOpenAI
20
+
21
+ # Configure detailed logging
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format='%(asctime)s | %(levelname)-8s | %(name)s | %(message)s',
25
+ datefmt='%Y-%m-%d %H:%M:%S',
26
+ stream=sys.stdout
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ def test_tool_logging():
32
+ """Test logging in individual tools"""
33
+ print("\n" + "=" * 80)
34
+ print("TEST 1: Testing Tool Logging")
35
+ print("=" * 80)
36
+
37
+ # Sample TipTap JSON
38
+ doc = {
39
+ "type": "doc",
40
+ "content": [
41
+ {
42
+ "type": "heading",
43
+ "attrs": {"level": 1, "textAlign": "center"},
44
+ "content": [{"type": "text", "text": "Document de test"}]
45
+ },
46
+ {
47
+ "type": "paragraph",
48
+ "content": [{"type": "text", "text": "Ceci est un test du document editor."}]
49
+ }
50
+ ]
51
+ }
52
+
53
+ # Convert to canonical format
54
+ doc_text = json.dumps(doc, ensure_ascii=False, sort_keys=True, indent=2)
55
+
56
+ print(f"\nπŸ“ Initial document: {len(doc_text)} bytes")
57
+ print(f"\nInitial document:")
58
+ print(doc_text)
59
+
60
+ # Test 1: Replace
61
+ print("\n" + "-" * 80)
62
+ print("TEST 1.1: Replace Tool")
63
+ print("-" * 80)
64
+ result = replace.invoke({
65
+ "doc_text": doc_text,
66
+ "search": '"text": "test"',
67
+ "replace": '"text": "example"',
68
+ "expected_matches": 1
69
+ })
70
+ print(f"\nResult: {json.dumps(result, indent=2)}")
71
+
72
+ if result.get("ok"):
73
+ print("\nβœ… Replace test PASSED")
74
+ print(f"Updated document ({len(result['doc_text'])} bytes):")
75
+ print(result['doc_text'])
76
+ else:
77
+ print(f"\n❌ Replace test FAILED: {result.get('error')}")
78
+
79
+ # Test 2: Add
80
+ print("\n" + "-" * 80)
81
+ print("TEST 1.2: Add Tool")
82
+ print("-" * 80)
83
+ if result.get("ok"):
84
+ new_doc_text = result['doc_text']
85
+ add_result = add.invoke({
86
+ "doc_text": new_doc_text,
87
+ "anchor_search": '"text": "Document de example"',
88
+ "insert": ',\n {"type": "text", "marks": [{"type": "bold"}], "text": " - Version 1.0"}',
89
+ "position": "after",
90
+ "expected_matches": 1
91
+ })
92
+ print(f"\nResult: {json.dumps(add_result, indent=2)}")
93
+
94
+ if add_result.get("ok"):
95
+ print("\nβœ… Add test PASSED")
96
+ print(f"Updated document ({len(add_result['doc_text'])} bytes):")
97
+ print(add_result['doc_text'])
98
+ else:
99
+ print(f"\n❌ Add test FAILED: {add_result.get('error')}")
100
+
101
+ # Test 3: Delete
102
+ print("\n" + "-" * 80)
103
+ print("TEST 1.3: Delete Tool")
104
+ print("-" * 80)
105
+ delete_result = delete.invoke({
106
+ "doc_text": doc_text,
107
+ "search": '"text": "Document de test"',
108
+ "expected_matches": 1
109
+ })
110
+ print(f"\nResult: {json.dumps(delete_result, indent=2)}")
111
+
112
+ if delete_result.get("ok"):
113
+ print("\nβœ… Delete test PASSED")
114
+ else:
115
+ print(f"\n❌ Delete test FAILED: {delete_result.get('error')}")
116
+
117
+ async def test_agent_logging():
118
+ """Test logging in document editor agent"""
119
+ print("\n" + "=" * 80)
120
+ print("TEST 2: Testing Agent Logging")
121
+ print("=" * 80)
122
+
123
+ # Sample document
124
+ doc = {
125
+ "type": "doc",
126
+ "content": [
127
+ {
128
+ "type": "paragraph",
129
+ "content": [{"type": "text", "text": "Ceci est un document de test."}]
130
+ }
131
+ ]
132
+ }
133
+
134
+ doc_text = json.dumps(doc, ensure_ascii=False, sort_keys=True, indent=2)
135
+
136
+ print(f"\nπŸ“ Initial document: {len(doc_text)} bytes")
137
+ print(f"πŸ“‹ Instruction: Replace 'test' with 'example'")
138
+
139
+ # Check if we have API key
140
+ if not os.getenv("OPENAI_API_KEY"):
141
+ print("\n⚠️ OPENAI_API_KEY not set, skipping agent test")
142
+ return
143
+
144
+ try:
145
+ # Initialize LLM
146
+ llm = ChatOpenAI(
147
+ model=os.getenv("LLM_MODEL", "gpt-4o-mini"),
148
+ api_key=os.getenv("OPENAI_API_KEY"),
149
+ base_url=os.getenv("LLM_BINDING_HOST", "https://api.openai.com/v1")
150
+ )
151
+
152
+ # Initialize agent
153
+ agent = DocumentEditorAgent(llm=llm)
154
+
155
+ # Run agent
156
+ result = await agent.edit_document(
157
+ doc_text=doc_text,
158
+ user_instruction="Replace 'test' with 'example'",
159
+ max_iterations=3
160
+ )
161
+
162
+ print(f"\nπŸ“Š Agent Result:")
163
+ print(f" βœ… Success: {result['success']}")
164
+ print(f" πŸ”„ Iterations: {result['iteration_count']}")
165
+ print(f" πŸ’¬ Message: {result['message']}")
166
+
167
+ if result['success']:
168
+ print(f"\nβœ… Agent test PASSED")
169
+ print(f"πŸ“ Final document size: {len(result['doc_text'])} bytes")
170
+ else:
171
+ print(f"\n❌ Agent test FAILED")
172
+
173
+ except Exception as e:
174
+ print(f"\n❌ Agent test failed with error: {e}")
175
+ import traceback
176
+ traceback.print_exc()
177
+
178
+ if __name__ == "__main__":
179
+ print("\n" + "=" * 80)
180
+ print("DOCUMENT EDITOR LOGGING TEST")
181
+ print("=" * 80)
182
+ print(f"Started at: {datetime.now().isoformat()}")
183
+
184
+ # Test tools logging
185
+ test_tool_logging()
186
+
187
+ # Test agent logging (if API key available)
188
+ # Uncomment to test with actual LLM calls
189
+ # asyncio.run(test_agent_logging())
190
+
191
+ print("\n" + "=" * 80)
192
+ print("TEST COMPLETED")
193
+ print("=" * 80)
194
+ print(f"Ended at: {datetime.now().isoformat()}")
utils/doc_editor_tools.py CHANGED
@@ -1,53 +1,93 @@
1
  #!/usr/bin/env python3
2
  """
3
- Document editor tools for TipTap JSON modification
4
- Implements Cline-like text-based editing with exact match + validation
5
  """
6
 
7
- import json
8
- from typing import Literal, Dict, Any, Optional
 
9
  from langchain_core.tools import tool
10
 
11
-
12
- def _canon(obj: Any) -> str:
13
- """
14
- Canonical pretty-print: stable key order + stable indentation.
15
- IMPORTANT: Always send *this* representation to the LLM, otherwise SEARCH won't match.
16
- """
17
- return json.dumps(obj, ensure_ascii=False, sort_keys=True, indent=2)
18
 
19
 
20
- def _parse_tiptap(doc_text: str) -> Dict[str, Any]:
21
  """
22
- Parse and validate TipTap JSON structure.
23
- Raises ValueError if structure is invalid.
 
 
 
 
 
 
 
 
24
  """
 
 
 
25
  try:
26
- doc = json.loads(doc_text)
27
- except json.JSONDecodeError as e:
28
- raise ValueError(f"Invalid JSON: {e}")
29
-
30
- # Minimal TipTap validation
31
- if not isinstance(doc, dict):
32
- raise ValueError("Root must be a JSON object (dict).")
33
- if doc.get("type") != "doc":
34
- raise ValueError("Root.type must be 'doc'.")
35
- if "content" not in doc:
36
- raise ValueError("Root must have 'content' field.")
37
- if not isinstance(doc.get("content"), list):
38
- raise ValueError("Root.content must be a list.")
39
-
40
- return doc
 
 
 
 
 
 
 
41
 
42
 
43
- def _replace_func(doc_text: str, search: str, replace: str, expected_matches: int = 1) -> Dict[str, Any]:
 
44
  """
45
- Internal function for replacing text blocks.
46
- This is the actual implementation used by the replace and delete tools.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  """
 
 
 
 
 
 
 
48
  # Count exact matches
49
  m = doc_text.count(search) if search else 0
 
 
50
  if m != expected_matches:
 
51
  return {
52
  "ok": False,
53
  "error": f"Match count mismatch: found {m} occurrences, expected {expected_matches}",
@@ -55,17 +95,31 @@ def _replace_func(doc_text: str, search: str, replace: str, expected_matches: in
55
  }
56
 
57
  # Perform replacement
 
58
  new_text = doc_text.replace(search, replace, expected_matches)
 
 
59
 
60
- # Validate the result
61
  try:
62
- doc = _parse_tiptap(new_text)
 
 
 
 
 
 
 
 
 
 
63
  return {
64
  "ok": True,
65
- "doc_text": _canon(doc),
66
  "matches": m
67
  }
68
  except Exception as e:
 
69
  return {
70
  "ok": False,
71
  "error": f"Post-edit validation failed: {e}",
@@ -74,50 +128,43 @@ def _replace_func(doc_text: str, search: str, replace: str, expected_matches: in
74
 
75
 
76
  @tool
77
- def replace(doc_text: str, search: str, replace: str, expected_matches: int = 1) -> Dict[str, Any]:
 
 
78
  """
79
- Replace an exact block of text in the TipTap JSON document.
80
-
81
- This tool performs exact string matching on the canonical JSON representation.
82
- It's critical that the 'search' parameter matches exactly (including whitespace and quotes).
83
-
84
- Args:
85
- doc_text: The canonical TipTap JSON string
86
- search: The exact text block to replace (must match exactly)
87
- replace: The exact text block to insert
88
- expected_matches: Expected number of occurrences (default: 1)
89
-
90
- Returns:
91
- Dict with 'ok' (bool), 'doc_text' (updated canonical JSON), 'matches' (int),
92
- and optionally 'error' (str) if something went wrong
93
- """
94
- return _replace_func(doc_text=doc_text, search=search, replace=replace, expected_matches=expected_matches)
95
-
96
-
97
- @tool
98
- def add(doc_text: str, anchor_search: str, insert: str,
99
- position: Literal["before", "after"] = "after",
100
- expected_matches: int = 1) -> Dict[str, Any]:
101
- """
102
- Add content before or after an anchor block in the TipTap JSON document.
103
 
104
  This tool finds an exact anchor block and inserts new content adjacent to it.
105
- Useful for adding new articles, clauses, or sections.
106
 
107
  Args:
108
- doc_text: The canonical TipTap JSON string
109
- anchor_search: The exact text block to find (must match exactly)
110
- insert: The exact text block to insert
111
  position: "before" or "after" (default: "after")
112
  expected_matches: Expected number of anchor occurrences (default: 1)
113
 
114
  Returns:
115
- Dict with 'ok' (bool), 'doc_text' (updated canonical JSON), 'matches' (int),
116
  and optionally 'error' (str) if something went wrong
 
 
 
 
 
117
  """
 
 
 
 
 
 
118
  # Count exact matches of anchor
119
  m = doc_text.count(anchor_search) if anchor_search else 0
 
 
120
  if m != expected_matches:
 
121
  return {
122
  "ok": False,
123
  "error": f"Anchor match count mismatch: found {m} occurrences, expected {expected_matches}",
@@ -125,20 +172,35 @@ def add(doc_text: str, anchor_search: str, insert: str,
125
  }
126
 
127
  # Insert before or after anchor
 
128
  if position == "before":
129
  new_text = doc_text.replace(anchor_search, insert + anchor_search, 1)
130
  else: # after
131
  new_text = doc_text.replace(anchor_search, anchor_search + insert, 1)
132
 
133
- # Validate the result
 
 
 
134
  try:
135
- doc = _parse_tiptap(new_text)
 
 
 
 
 
 
 
 
 
 
136
  return {
137
  "ok": True,
138
- "doc_text": _canon(doc),
139
  "matches": m
140
  }
141
  except Exception as e:
 
142
  return {
143
  "ok": False,
144
  "error": f"Post-edit validation failed: {e}",
@@ -147,27 +209,39 @@ def add(doc_text: str, anchor_search: str, insert: str,
147
 
148
 
149
  @tool
150
- def delete(doc_text: str, search: str, expected_matches: int = 1) -> Dict[str, Any]:
151
  """
152
- Delete an exact block of text from the TipTap JSON document.
153
 
154
- This is a convenience wrapper around replace with an empty replacement.
155
  Useful for removing unwanted sections, clauses, or content.
156
 
157
  Args:
158
- doc_text: The canonical TipTap JSON string
159
- search: The exact text block to delete (must match exactly)
160
  expected_matches: Expected number of occurrences (default: 1)
161
 
162
  Returns:
163
- Dict with 'ok' (bool), 'doc_text' (updated canonical JSON), 'matches' (int),
164
  and optionally 'error' (str) if something went wrong
 
 
 
165
  """
166
- return _replace_func(doc_text=doc_text, search=search, replace="", expected_matches=expected_matches)
 
 
 
 
 
 
 
 
 
167
 
168
 
169
  @tool
170
- def attempt_completion(message: str) -> Dict[str, Any]:
171
  """
172
  Signal that document editing is complete and provide a summary message.
173
 
@@ -180,6 +254,8 @@ def attempt_completion(message: str) -> Dict[str, Any]:
180
  Returns:
181
  Dict with 'ok' (bool) and 'message' (str)
182
  """
 
 
183
  return {
184
  "ok": True,
185
  "message": message
 
1
  #!/usr/bin/env python3
2
  """
3
+ Document editor tools for HTML modification
4
+ Implements Cline-like text-based editing with exact match + BeautifulSoup validation for HTML
5
  """
6
 
7
+ import logging
8
+ from typing import Literal, Dict, Any
9
+ from bs4 import BeautifulSoup
10
  from langchain_core.tools import tool
11
 
12
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
13
 
14
 
15
+ async def _validate_html(html: str) -> tuple[bool, str]:
16
  """
17
+ Validate HTML structure using BeautifulSoup.
18
+
19
+ BeautifulSoup is very tolerant and will parse even malformed HTML.
20
+ We use it to ensure the HTML is at least parseable and contains some content.
21
+
22
+ Args:
23
+ html: The HTML string to validate
24
+
25
+ Returns:
26
+ tuple[bool, str]: (is_valid, error_message)
27
  """
28
+ if not html or not html.strip():
29
+ return False, "HTML document is empty"
30
+
31
  try:
32
+ # Parse the HTML (html.parser is included in stdlib)
33
+ soup = BeautifulSoup(html, 'html.parser')
34
+
35
+ # Get the normalized HTML
36
+ normalized = str(soup)
37
+
38
+ # Check if there's any content
39
+ if not normalized.strip():
40
+ return False, "HTML document is empty after parsing"
41
+
42
+ # Check if there are any HTML tags
43
+ if len(soup.find_all()) == 0:
44
+ return False, "HTML document contains no HTML tags"
45
+
46
+ # Check for obvious structural issues
47
+ # BeautifulSoup will have already failed if the HTML is completely unparsable
48
+ # So at this point we know it's at least somewhat valid
49
+
50
+ return True, "OK"
51
+
52
+ except Exception as e:
53
+ return False, f"HTML parsing failed: {str(e)}"
54
 
55
 
56
+ @tool
57
+ async def replace_html(doc_text: str, search: str, replace: str, expected_matches: int = 1) -> Dict[str, Any]:
58
  """
59
+ Replace an exact block of HTML text in the document.
60
+
61
+ This tool performs exact string matching on the HTML content.
62
+ It's critical that the 'search' parameter matches exactly (including whitespace, tags, and attributes).
63
+
64
+ Args:
65
+ doc_text: The HTML document content
66
+ search: The exact HTML block to replace (must match exactly, including whitespace)
67
+ replace: The exact HTML block to insert
68
+ expected_matches: Expected number of occurrences (default: 1)
69
+
70
+ Returns:
71
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
72
+ and optionally 'error' (str) if something went wrong
73
+
74
+ Example:
75
+ search = "<p>12 mois</p>"
76
+ replace = "<p>24 mois</p>"
77
  """
78
+ logger.info(" πŸ”§ TOOL: replace_html")
79
+
80
+ # Log operation details
81
+ logger.info(f" πŸ” SEARCH pattern ({len(search)} chars): {search[:80]}{'...' if len(search) > 80 else ''}")
82
+ logger.info(f" ✏️ REPLACE pattern ({len(replace)} chars): {replace[:80]}{'...' if len(replace) > 80 else ''}")
83
+ logger.info(f" 🎯 Expected matches: {expected_matches}")
84
+
85
  # Count exact matches
86
  m = doc_text.count(search) if search else 0
87
+ logger.info(f" πŸ”’ Actual matches found: {m}")
88
+
89
  if m != expected_matches:
90
+ logger.warning(f" ⚠️ Match count mismatch: found {m}, expected {expected_matches}")
91
  return {
92
  "ok": False,
93
  "error": f"Match count mismatch: found {m} occurrences, expected {expected_matches}",
 
95
  }
96
 
97
  # Perform replacement
98
+ logger.info(" ✏️ Performing replacement...")
99
  new_text = doc_text.replace(search, replace, expected_matches)
100
+ size_change = len(new_text) - len(doc_text)
101
+ logger.info(f" πŸ“ Size change: {size_change:+d} bytes ({size_change/len(doc_text)*100:+.1f}%)")
102
 
103
+ # Validate result with BeautifulSoup
104
  try:
105
+ logger.info(" βœ”οΈ Validating HTML structure...")
106
+ is_valid, error_msg = await _validate_html(new_text)
107
+ if not is_valid:
108
+ logger.error(f" ❌ Validation failed: {error_msg}")
109
+ return {
110
+ "ok": False,
111
+ "error": f"Post-edit validation failed: {error_msg}",
112
+ "matches": m
113
+ }
114
+ logger.info(" βœ… HTML validation passed")
115
+ logger.info(f" πŸ“ New doc size: {len(new_text)} bytes")
116
  return {
117
  "ok": True,
118
+ "doc_text": new_text,
119
  "matches": m
120
  }
121
  except Exception as e:
122
+ logger.error(f" ❌ Validation failed: {e}")
123
  return {
124
  "ok": False,
125
  "error": f"Post-edit validation failed: {e}",
 
128
 
129
 
130
  @tool
131
+ async def add_html(doc_text: str, anchor_search: str, insert: str,
132
+ position: Literal["before", "after"] = "after",
133
+ expected_matches: int = 1) -> Dict[str, Any]:
134
  """
135
+ Add HTML content before or after an anchor block in the document.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  This tool finds an exact anchor block and inserts new content adjacent to it.
138
+ Useful for adding new paragraphs, sections, or elements.
139
 
140
  Args:
141
+ doc_text: The HTML document content
142
+ anchor_search: The exact HTML block to find (must match exactly)
143
+ insert: The exact HTML block to insert
144
  position: "before" or "after" (default: "after")
145
  expected_matches: Expected number of anchor occurrences (default: 1)
146
 
147
  Returns:
148
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
149
  and optionally 'error' (str) if something went wrong
150
+
151
+ Example:
152
+ anchor_search = "<h2>Article 2 - DurΓ©e</h2>"
153
+ insert = "<h3>Article 3 - Prix</h3>"
154
+ position = "after"
155
  """
156
+ logger.info(" πŸ”§ TOOL: add_html")
157
+ logger.info(f" πŸ“ Position: {position}")
158
+ logger.info(f" πŸ” ANCHOR pattern ({len(anchor_search)} chars): {anchor_search[:80]}{'...' if len(anchor_search) > 80 else ''}")
159
+ logger.info(f" ✏️ INSERT pattern ({len(insert)} chars): {insert[:80]}{'...' if len(insert) > 80 else ''}")
160
+ logger.info(f" 🎯 Expected matches: {expected_matches}")
161
+
162
  # Count exact matches of anchor
163
  m = doc_text.count(anchor_search) if anchor_search else 0
164
+ logger.info(f" πŸ”’ Actual anchor matches found: {m}")
165
+
166
  if m != expected_matches:
167
+ logger.warning(f" ⚠️ Anchor match count mismatch: found {m}, expected {expected_matches}")
168
  return {
169
  "ok": False,
170
  "error": f"Anchor match count mismatch: found {m} occurrences, expected {expected_matches}",
 
172
  }
173
 
174
  # Insert before or after anchor
175
+ logger.info(f" ✏️ Inserting content {position} anchor...")
176
  if position == "before":
177
  new_text = doc_text.replace(anchor_search, insert + anchor_search, 1)
178
  else: # after
179
  new_text = doc_text.replace(anchor_search, anchor_search + insert, 1)
180
 
181
+ size_change = len(new_text) - len(doc_text)
182
+ logger.info(f" πŸ“ Size change: {size_change:+d} bytes ({size_change/len(doc_text)*100:+.1f}%)")
183
+
184
+ # Validate result with BeautifulSoup
185
  try:
186
+ logger.info(" βœ”οΈ Validating HTML structure...")
187
+ is_valid, error_msg = await _validate_html(new_text)
188
+ if not is_valid:
189
+ logger.error(f" ❌ Validation failed: {error_msg}")
190
+ return {
191
+ "ok": False,
192
+ "error": f"Post-edit validation failed: {error_msg}",
193
+ "matches": m
194
+ }
195
+ logger.info(" βœ… HTML validation passed")
196
+ logger.info(f" πŸ“ New doc size: {len(new_text)} bytes")
197
  return {
198
  "ok": True,
199
+ "doc_text": new_text,
200
  "matches": m
201
  }
202
  except Exception as e:
203
+ logger.error(f" ❌ Validation failed: {e}")
204
  return {
205
  "ok": False,
206
  "error": f"Post-edit validation failed: {e}",
 
209
 
210
 
211
  @tool
212
+ async def delete_html(doc_text: str, search: str, expected_matches: int = 1) -> Dict[str, Any]:
213
  """
214
+ Delete an exact block of HTML from the document.
215
 
216
+ This is a convenience wrapper around replace_html with an empty replacement.
217
  Useful for removing unwanted sections, clauses, or content.
218
 
219
  Args:
220
+ doc_text: The HTML document content
221
+ search: The exact HTML block to delete (must match exactly)
222
  expected_matches: Expected number of occurrences (default: 1)
223
 
224
  Returns:
225
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
226
  and optionally 'error' (str) if something went wrong
227
+
228
+ Example:
229
+ search = "<p>This paragraph should be deleted</p>"
230
  """
231
+ logger.info(" πŸ”§ TOOL: delete_html")
232
+ result = await replace_html.invoke({
233
+ "doc_text": doc_text,
234
+ "search": search,
235
+ "replace": "",
236
+ "expected_matches": expected_matches
237
+ })
238
+ if result.get("ok"):
239
+ logger.info(f" πŸ—‘οΈ Deleted {len(search)} characters")
240
+ return result
241
 
242
 
243
  @tool
244
+ async def attempt_completion(message: str) -> Dict[str, Any]:
245
  """
246
  Signal that document editing is complete and provide a summary message.
247
 
 
254
  Returns:
255
  Dict with 'ok' (bool) and 'message' (str)
256
  """
257
+ logger.info(" βœ… TOOL: attempt_completion")
258
+ logger.info(f" πŸ“ Completion message: {message}")
259
  return {
260
  "ok": True,
261
  "message": message