Charles Grandjean commited on
Commit
8858ffb
·
1 Parent(s): 15f0f72

refactor utilities

Browse files
utils/doc_editor_tools.py DELETED
@@ -1,276 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Document editor tools for HTML modification
4
- Implements Cline-like text-based editing with exact match + BeautifulSoup validation for HTML
5
- """
6
-
7
- import logging
8
- from typing import Literal, Dict, Any
9
- from bs4 import BeautifulSoup
10
- from langchain_core.tools import tool
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- async def _validate_html(html: str) -> tuple[bool, str]:
15
- """
16
- Validate HTML structure using BeautifulSoup.
17
-
18
- BeautifulSoup is very tolerant and will parse even malformed HTML.
19
- We use it to ensure the HTML is at least parseable and contains some content.
20
-
21
- Args:
22
- html: The HTML string to validate
23
-
24
- Returns:
25
- tuple[bool, str]: (is_valid, error_message)
26
- """
27
- if not html or not html.strip():
28
- return False, "HTML document is empty"
29
-
30
- try:
31
- # Parse the HTML (html.parser is included in stdlib)
32
- soup = BeautifulSoup(html, 'html.parser')
33
-
34
- # Get the normalized HTML
35
- normalized = str(soup)
36
-
37
- # Check if there's any content
38
- if not normalized.strip():
39
- return False, "HTML document is empty after parsing"
40
-
41
- # Check if there are any HTML tags
42
- if len(soup.find_all()) == 0:
43
- return False, "HTML document contains no HTML tags"
44
-
45
- # Check for obvious structural issues
46
- # BeautifulSoup will have already failed if the HTML is completely unparsable
47
- # So at this point we know it's at least somewhat valid
48
-
49
- return True, "OK"
50
-
51
- except Exception as e:
52
- return False, f"HTML parsing failed: {str(e)}"
53
-
54
-
55
- @tool
56
- async def replace_html(doc_text: str, search: str, replace: str, expected_matches: int = 1) -> Dict[str, Any]:
57
- """
58
- Replace an exact block of HTML text in the document.
59
-
60
- This tool performs exact string matching on the HTML content.
61
- It's critical that the 'search' parameter matches exactly (including whitespace, tags, and attributes).
62
-
63
- Args:
64
- doc_text: The HTML document content
65
- search: The exact HTML block to replace (must match exactly, including whitespace)
66
- replace: The exact HTML block to insert
67
- expected_matches: Expected number of occurrences (default: 1)
68
-
69
- Returns:
70
- Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
71
- and optionally 'error' (str) if something went wrong
72
-
73
- Example:
74
- search = "<p>12 mois</p>"
75
- replace = "<p>24 mois</p>"
76
- """
77
- logger.info(f" 🔧 replace_html | search:{len(search)}b | replace:{len(replace)}b | expect:{expected_matches}")
78
-
79
- # Count exact matches
80
- m = doc_text.count(search) if search else 0
81
-
82
- if m != expected_matches:
83
- error = f"Search not found. Expected {expected_matches}, found {m}"
84
- logger.warning(f" ❌ {error}")
85
- return {
86
- "ok": False,
87
- "error": error,
88
- "matches": m,
89
- "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{error}\nHTML must match EXACTLY and your search pattern has unexpected number of matches"
90
- }
91
-
92
- # Perform replacement
93
- new_text = doc_text.replace(search, replace, expected_matches)
94
-
95
- # Validate result with BeautifulSoup
96
- is_valid, validation_error = await _validate_html(new_text)
97
- if not is_valid:
98
- logger.warning(f" ❌ Validation failed: {validation_error}")
99
- return {
100
- "ok": False,
101
- "error": f"Invalid HTML: {validation_error}",
102
- "matches": m,
103
- "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{validation_error}"
104
- }
105
-
106
- logger.info(f" ✅ Success | +{len(new_text)-len(doc_text)}b")
107
- return {
108
- "ok": True,
109
- "doc_text": new_text,
110
- "matches": m
111
- }
112
-
113
-
114
- @tool
115
- async def add_html(doc_text: str, anchor_search: str, insert: str,
116
- position: Literal["before", "after"] = "after",
117
- expected_matches: int = 1) -> Dict[str, Any]:
118
- """
119
- Add HTML content before or after an anchor block in the document.
120
-
121
- This tool finds an exact anchor block and inserts new content adjacent to it.
122
- Useful for adding new paragraphs, sections, or elements.
123
-
124
- Args:
125
- doc_text: The HTML document content
126
- anchor_search: The exact HTML block to find (must match exactly)
127
- insert: The exact HTML block to insert
128
- position: "before" or "after" (default: "after")
129
- expected_matches: Expected number of anchor occurrences (default: 1)
130
-
131
- Returns:
132
- Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
133
- and optionally 'error' (str) if something went wrong
134
-
135
- Example:
136
- anchor_search = "<h2>Article 2 - Durée</h2>"
137
- insert = "<h3>Article 3 - Prix</h3>"
138
- position = "after"
139
- """
140
- logger.info(f" 🔧 add_html | anchor:{len(anchor_search)}b | insert:{len(insert)}b | pos:{position} | expect:{expected_matches}")
141
-
142
- # Count exact matches of anchor
143
- m = doc_text.count(anchor_search) if anchor_search else 0
144
-
145
- if m != expected_matches:
146
- error = f"Anchor not found. Expected {expected_matches}, found {m}"
147
- logger.warning(f" ❌ {error}")
148
- return {
149
- "ok": False,
150
- "error": error,
151
- "matches": m,
152
- "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{error}\nAnchor must match EXACTLY, and your search pattern has unexpected number of matches"
153
- }
154
-
155
- # Insert before or after anchor
156
- if position == "before":
157
- new_text = doc_text.replace(anchor_search, insert + anchor_search, 1)
158
- else: # after
159
- new_text = doc_text.replace(anchor_search, anchor_search + insert, 1)
160
-
161
- # Validate result with BeautifulSoup
162
- is_valid, validation_error = await _validate_html(new_text)
163
- if not is_valid:
164
- logger.warning(f" ❌ Validation failed: {validation_error}")
165
- return {
166
- "ok": False,
167
- "error": f"Invalid HTML: {validation_error}",
168
- "matches": m,
169
- "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{validation_error}"
170
- }
171
-
172
- logger.info(f" ✅ Success | +{len(new_text)-len(doc_text)}b")
173
- return {
174
- "ok": True,
175
- "doc_text": new_text,
176
- "matches": m
177
- }
178
-
179
-
180
- @tool
181
- async def delete_html(doc_text: str, search: str, expected_matches: int = 1) -> Dict[str, Any]:
182
- """
183
- Delete an exact block of HTML from document.
184
-
185
- This is a convenience wrapper around replace_html with an empty replacement.
186
- Useful for removing unwanted sections, clauses, or content.
187
-
188
- Args:
189
- doc_text: The HTML document content
190
- search: The exact HTML block to delete (must match exactly)
191
- expected_matches: Expected number of occurrences (default: 1)
192
-
193
- Returns:
194
- Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
195
- and optionally 'error' (str) if something went wrong
196
-
197
- Example:
198
- search = "<p>This paragraph should be deleted</p>"
199
- """
200
- logger.info(f" 🔧 delete_html | search:{len(search)}b | expect:{expected_matches}")
201
-
202
- # Count exact matches
203
- m = doc_text.count(search) if search else 0
204
-
205
- if m != expected_matches:
206
- error = f"Search not found. Expected {expected_matches}, found {m}"
207
- logger.warning(f" ❌ {error}")
208
- return {
209
- "ok": False,
210
- "error": error,
211
- "matches": m,
212
- "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{error}\nHTML must match EXACTLY and your search pattern has unexpected number of matches"
213
- }
214
-
215
- # Perform replacement with empty string
216
- new_text = doc_text.replace(search, "", expected_matches)
217
-
218
- # Validate result with BeautifulSoup
219
- is_valid, validation_error = await _validate_html(new_text)
220
- if not is_valid:
221
- logger.warning(f" ❌ Validation failed: {validation_error}")
222
- return {
223
- "ok": False,
224
- "error": f"Invalid HTML: {validation_error}",
225
- "matches": m,
226
- "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{validation_error}"
227
- }
228
-
229
- logger.info(f" ✅ Success | -{len(doc_text)-len(new_text)}b")
230
- return {
231
- "ok": True,
232
- "doc_text": new_text,
233
- "matches": m
234
- }
235
-
236
-
237
- @tool
238
- async def view_current_document() -> Dict[str, Any]:
239
- """
240
- View the current state of the document being edited.
241
-
242
- Use this tool to see the current document content after modifications.
243
- This helps you verify previous edits and understand the current structure.
244
-
245
- The document content is automatically provided by the workflow state.
246
-
247
- Returns:
248
- Dict with 'ok' (bool) and 'content' (str) containing the HTML document
249
- """
250
- # doc_text is injected by _tools_node in doc_editor.py
251
- logger.info(f" 🔍 view_current_document")
252
- return {
253
- "ok": True,
254
- "content": ""
255
- }
256
-
257
-
258
- @tool
259
- async def attempt_completion(message: str) -> Dict[str, Any]:
260
- """
261
- Signal that document editing is complete and provide a summary message.
262
-
263
- This tool should be called when all requested modifications have been
264
- successfully applied to document.
265
-
266
- Args:
267
- message: A summary message describing what was changed in document
268
-
269
- Returns:
270
- Dict with 'ok' (bool) and 'message' (str)
271
- """
272
- logger.info(f" ✅ attempt_completion | {message}")
273
- return {
274
- "ok": True,
275
- "message": message
276
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/tools.py CHANGED
@@ -5,14 +5,18 @@ Tools for the CyberLegal Agent
5
 
6
  import os
7
  import json
8
- from typing import List, Dict, Any, Optional
9
  from langchain_core.tools import tool
10
  from langchain_tavily import TavilySearch
11
- from subagents.lawyer_selector import LawyerSelectorAgent
12
- from subagents.lawyer_messenger import LawyerMessengerAgent
13
- from subagents.doc_editor import DocumentEditorAgent
14
  from utils.lightrag_client import LightRAGClient, get_lightrag_client, validate_jurisdiction, get_available_jurisdictions
15
  import resend
 
 
 
 
16
 
17
  # Global instances - will be initialized in agent_api.py
18
  lawyer_selector_agent: Optional[LawyerSelectorAgent] = None
@@ -276,7 +280,6 @@ async def retrieve_lawyer_document(file_path: str) -> str:
276
  except Exception as e:
277
  return f"Error retrieving document: {str(e)}"
278
 
279
-
280
  @tool
281
  async def _retrieve_lawyer_document(
282
  user_id: str,
@@ -357,6 +360,123 @@ async def _retrieve_lawyer_document(
357
  return f"Error retrieving document: {str(e)}"
358
 
359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  # ============ DOC ASSISTANT TOOLS ============
361
 
362
  @tool
@@ -379,7 +499,6 @@ async def edit_document(plan: str) -> str:
379
  """
380
  return
381
 
382
-
383
  @tool
384
  async def _edit_document(
385
  doc_text: str,
@@ -442,14 +561,399 @@ async def _edit_document(
442
  }
443
 
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  # Export tool sets for different user types
446
- tools_for_client_facade = [query_knowledge_graph, find_lawyers, message_lawyer, search_web]
447
- tools_for_client = [_query_knowledge_graph, _find_lawyers, _message_lawyer, search_web ]
448
- tools_for_lawyer_facade = [query_knowledge_graph, search_web, retrieve_lawyer_document]
449
- tools_for_lawyer = [_query_knowledge_graph, search_web, _retrieve_lawyer_document]
450
 
451
  # Tools for DocAssistant (document router)
452
- tools_for_doc_facade = [query_knowledge_graph, retrieve_lawyer_document, edit_document]
453
- tools_for_doc = [_query_knowledge_graph, _retrieve_lawyer_document, _edit_document]
 
 
 
454
 
455
  tools = tools_for_client
 
5
 
6
  import os
7
  import json
8
+ from typing import List, Dict, Any, Optional, Literal
9
  from langchain_core.tools import tool
10
  from langchain_tavily import TavilySearch
11
+ from agents.lawyer_selector import LawyerSelectorAgent
12
+ from agents.lawyer_messenger import LawyerMessengerAgent
13
+ from agents.doc_editor import DocumentEditorAgent
14
  from utils.lightrag_client import LightRAGClient, get_lightrag_client, validate_jurisdiction, get_available_jurisdictions
15
  import resend
16
+ import logging
17
+ from bs4 import BeautifulSoup
18
+
19
+ logger = logging.getLogger(__name__)
20
 
21
  # Global instances - will be initialized in agent_api.py
22
  lawyer_selector_agent: Optional[LawyerSelectorAgent] = None
 
280
  except Exception as e:
281
  return f"Error retrieving document: {str(e)}"
282
 
 
283
  @tool
284
  async def _retrieve_lawyer_document(
285
  user_id: str,
 
360
  return f"Error retrieving document: {str(e)}"
361
 
362
 
363
+ # ============ DRAFT DOCUMENT TOOL ============
364
+
365
+ @tool
366
+ async def create_draft_document(
367
+ title: str,
368
+ content: str,
369
+ path: str
370
+ ) -> str:
371
+ """
372
+ Create a new document draft and save it to "My documents" via Supabase.
373
+
374
+ This tool saves an HTML document as a PDF draft in the user's document storage.
375
+ The document will be stored in the specified folder path within "My Documents".
376
+
377
+ Use this tool when the user wants to:
378
+ - Create a new document draft
379
+ - Save a generated document
380
+ - Store a document in their document library
381
+
382
+ Args:
383
+ title: Document title (e.g., "Contract de bail", "Note juridique")
384
+ content: Document content in HTML format (e.g., "<h1>Title</h1><p>Content...</p>")
385
+ path: Folder path where to save the document
386
+ - Empty string "" → root folder of My Documents
387
+ - "Contracts/" → ./Contracts/title.pdf
388
+ - "Drafts/Legal/" → ./Drafts/Legal/title.pdf
389
+
390
+ Returns:
391
+ Confirmation message with document path and success status
392
+
393
+ Example:
394
+ create_draft_document(
395
+ title="Contract de bail",
396
+ content="<h1>Contrat de bail</h1><p>Ce contrat est conclu entre...</p>",
397
+ path="Contracts/"
398
+ )
399
+ → Saves as "./Contracts/Contract de bail.pdf" and returns confirmation
400
+ """
401
+ return
402
+
403
+ @tool
404
+ async def _create_draft_document(
405
+ user_id: str,
406
+ title: str,
407
+ content: str,
408
+ path: str
409
+ ) -> str:
410
+ """
411
+ Real implementation of create_draft_document - calls Supabase endpoint.
412
+ Args:
413
+ user_id: User UUID (injected by the agent)
414
+ title: Document title
415
+ content: Document HTML content
416
+ path: Folder path
417
+
418
+ Returns:
419
+ Success/failure message with document path
420
+ """
421
+ try:
422
+ import httpx
423
+
424
+ # Check configuration from environment
425
+ base_url = os.getenv("SUPABASE_BASE_URL")
426
+ cyberlgl_api_key = os.getenv("CYBERLGL_API_KEY")
427
+
428
+ if path:
429
+ # Remove leading ./ if present
430
+ if path.startswith('./'):
431
+ path = path[2:]
432
+ # Ensure trailing /
433
+ if not path.endswith('/'):
434
+ path += '/'
435
+ else:
436
+ path = ''
437
+
438
+ full_path = f"./{path}{title}.pdf"
439
+
440
+ endpoint_url = f"{base_url}/create-document-from-html"
441
+
442
+ request_body = {
443
+ "userId": user_id,
444
+ "html": content,
445
+ "path": full_path
446
+ }
447
+
448
+ async with httpx.AsyncClient() as client:
449
+ response = await client.post(
450
+ endpoint_url,
451
+ json=request_body,
452
+ headers={
453
+ "x-api-key": cyberlgl_api_key
454
+ },
455
+ timeout=30.0
456
+ )
457
+
458
+ if response.status_code == 200:
459
+ return f"✅ Document successfully saved to: {full_path}"
460
+ elif response.status_code == 400:
461
+ error_data = response.json() if response.headers.get('content-type', '').startswith('application/json') else {}
462
+ return f"❌ Bad request: {error_data.get('error', 'Unknown error')}"
463
+ elif response.status_code == 401:
464
+ return "❌ Authentication failed: Invalid API key"
465
+ elif response.status_code == 403:
466
+ return "❌ Access denied: You do not have permission to save documents"
467
+ elif response.status_code == 500:
468
+ return "❌ Server error: Failed to save document"
469
+ else:
470
+ return f"❌ Error: HTTP {response.status_code} - {response.text}"
471
+
472
+ except httpx.TimeoutError:
473
+ return "❌ Error: Timeout while saving document"
474
+ except httpx.RequestError as e:
475
+ return f"❌ Error: Failed to connect to document server: {str(e)}"
476
+ except Exception as e:
477
+ return f"❌ Error saving document: {str(e)}"
478
+
479
+
480
  # ============ DOC ASSISTANT TOOLS ============
481
 
482
  @tool
 
499
  """
500
  return
501
 
 
502
  @tool
503
  async def _edit_document(
504
  doc_text: str,
 
561
  }
562
 
563
 
564
+ async def _validate_html(html: str) -> tuple[bool, str]:
565
+ """
566
+ Validate HTML structure using BeautifulSoup.
567
+
568
+ BeautifulSoup is very tolerant and will parse even malformed HTML.
569
+ We use it to ensure the HTML is at least parseable and contains some content.
570
+
571
+ Args:
572
+ html: The HTML string to validate
573
+
574
+ Returns:
575
+ tuple[bool, str]: (is_valid, error_message)
576
+ """
577
+ if not html or not html.strip():
578
+ return False, "HTML document is empty"
579
+
580
+ try:
581
+ # Parse the HTML (html.parser is included in stdlib)
582
+ soup = BeautifulSoup(html, 'html.parser')
583
+
584
+ # Get the normalized HTML
585
+ normalized = str(soup)
586
+
587
+ # Check if there's any content
588
+ if not normalized.strip():
589
+ return False, "HTML document is empty after parsing"
590
+
591
+ # Check if there are any HTML tags
592
+ if len(soup.find_all()) == 0:
593
+ return False, "HTML document contains no HTML tags"
594
+
595
+ # Check for obvious structural issues
596
+ # BeautifulSoup will have already failed if the HTML is completely unparsable
597
+ # So at this point we know it's at least somewhat valid
598
+
599
+ return True, "OK"
600
+
601
+ except Exception as e:
602
+ return False, f"HTML parsing failed: {str(e)}"
603
+
604
+ # ============ FACADES (Public Interface) ============
605
+
606
+ @tool
607
+ async def replace_html(
608
+ search: str,
609
+ replace: str,
610
+ expected_matches: int = 1
611
+ ) -> str:
612
+ """
613
+ Replace an exact block of HTML text in the document.
614
+
615
+ This tool performs exact string matching on the HTML content.
616
+ It's critical that the 'search' parameter matches exactly (including whitespace, tags, and attributes).
617
+
618
+ Args:
619
+ doc_text: The HTML document content (injected automatically)
620
+ search: The exact HTML block to replace (must match exactly, including whitespace)
621
+ replace: The exact HTML block to insert
622
+ expected_matches: Expected number of occurrences (default: 1)
623
+
624
+ Returns:
625
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
626
+ and optionally 'error' (str) if something went wrong
627
+
628
+ Example:
629
+ search = "<p>12 mois</p>"
630
+ replace = "<p>24 mois</p>"
631
+ """
632
+ return
633
+
634
+
635
+ @tool
636
+ async def add_html(
637
+ anchor_search: str,
638
+ insert: str,
639
+ position: Literal["before", "after"] = "after",
640
+ expected_matches: int = 1
641
+ ) -> str:
642
+ """
643
+ Add HTML content before or after an anchor block in the document.
644
+
645
+ This tool finds an exact anchor block and inserts new content adjacent to it.
646
+ Useful for adding new paragraphs, sections, or elements.
647
+
648
+ Args:
649
+ doc_text: The HTML document content (injected automatically)
650
+ anchor_search: The exact HTML block to find (must match exactly)
651
+ insert: The exact HTML block to insert
652
+ position: "before" or "after" (default: "after")
653
+ expected_matches: Expected number of anchor occurrences (default: 1)
654
+
655
+ Returns:
656
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
657
+ and optionally 'error' (str) if something went wrong
658
+
659
+ Example:
660
+ anchor_search = "<h2>Article 2 - Durée</h2>"
661
+ insert = "<h3>Article 3 - Prix</h3>"
662
+ position = "after"
663
+ """
664
+ return
665
+
666
+
667
+ @tool
668
+ async def delete_html(
669
+ search: str,
670
+ expected_matches: int = 1
671
+ ) -> str:
672
+ """
673
+ Delete an exact block of HTML from document.
674
+
675
+ This is a convenience wrapper around replace_html with an empty replacement.
676
+ Useful for removing unwanted sections, clauses, or content.
677
+
678
+ Args:
679
+ doc_text: The HTML document content (injected automatically)
680
+ search: The exact HTML block to delete (must match exactly)
681
+ expected_matches: Expected number of occurrences (default: 1)
682
+
683
+ Returns:
684
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
685
+ and optionally 'error' (str) if something went wrong
686
+
687
+ Example:
688
+ search = "<p>This paragraph should be deleted</p>"
689
+ """
690
+ return
691
+
692
+
693
+ @tool
694
+ async def view_current_document() -> str:
695
+ """
696
+ View the current state of the document being edited.
697
+
698
+ Use this tool to see the current document content after modifications.
699
+ This helps you verify previous edits and understand the current structure.
700
+
701
+ The document content is automatically provided by the workflow state.
702
+
703
+ Returns:
704
+ Dict with 'ok' (bool) and 'content' (str) containing the HTML document
705
+ """
706
+ return
707
+
708
+
709
+ @tool
710
+ async def attempt_completion(message: str) -> Dict[str, Any]:
711
+ """
712
+ Signal that document editing is complete and provide a summary message (real implementation).
713
+
714
+ This tool should be called when all requested modifications have been
715
+ successfully applied to document.
716
+
717
+ Args:
718
+ message: A summary message describing what was changed in document
719
+
720
+ Returns:
721
+ Dict with 'ok' (bool) and 'message' (str)
722
+ """
723
+ logger.info(f" ✅ attempt_completion | {message}")
724
+ return {
725
+ "ok": True,
726
+ "message": message
727
+ }
728
+
729
+
730
+ # ============ REAL IMPLEMENTATIONS ============
731
+
732
+ @tool
733
+ async def _replace_html(doc_text: str, search: str, replace: str, expected_matches: int = 1) -> Dict[str, Any]:
734
+ """
735
+ Replace an exact block of HTML text in the document (real implementation).
736
+
737
+ This tool performs exact string matching on the HTML content.
738
+ It's critical that the 'search' parameter matches exactly (including whitespace, tags, and attributes).
739
+
740
+ Args:
741
+ doc_text: The HTML document content
742
+ search: The exact HTML block to replace (must match exactly, including whitespace)
743
+ replace: The exact HTML block to insert
744
+ expected_matches: Expected number of occurrences (default: 1)
745
+
746
+ Returns:
747
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
748
+ and optionally 'error' (str) if something went wrong
749
+ """
750
+ logger.info(f" 🔧 replace_html | search:{len(search)}b | replace:{len(replace)}b | expect:{expected_matches}")
751
+
752
+ # Count exact matches
753
+ m = doc_text.count(search) if search else 0
754
+
755
+ if m != expected_matches:
756
+ error = f"Search not found. Expected {expected_matches}, found {m}"
757
+ logger.warning(f" ❌ {error}")
758
+ return {
759
+ "ok": False,
760
+ "error": error,
761
+ "matches": m,
762
+ "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{error}\nHTML must match EXACTLY and your search pattern has unexpected number of matches"
763
+ }
764
+
765
+ # Perform replacement
766
+ new_text = doc_text.replace(search, replace, expected_matches)
767
+
768
+ # Validate result with BeautifulSoup
769
+ is_valid, validation_error = await _validate_html(new_text)
770
+ if not is_valid:
771
+ logger.warning(f" ❌ Validation failed: {validation_error}")
772
+ return {
773
+ "ok": False,
774
+ "error": f"Invalid HTML: {validation_error}",
775
+ "matches": m,
776
+ "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{validation_error}"
777
+ }
778
+
779
+ logger.info(f" ✅ Success | +{len(new_text)-len(doc_text)}b")
780
+ return {
781
+ "ok": True,
782
+ "doc_text": new_text,
783
+ "matches": m
784
+ }
785
+
786
+
787
+ @tool
788
+ async def _add_html(doc_text: str, anchor_search: str, insert: str,
789
+ position: Literal["before", "after"] = "after",
790
+ expected_matches: int = 1) -> Dict[str, Any]:
791
+ """
792
+ Add HTML content before or after an anchor block in the document (real implementation).
793
+
794
+ This tool finds an exact anchor block and inserts new content adjacent to it.
795
+ Useful for adding new paragraphs, sections, or elements.
796
+
797
+ Args:
798
+ doc_text: The HTML document content
799
+ anchor_search: The exact HTML block to find (must match exactly)
800
+ insert: The exact HTML block to insert
801
+ position: "before" or "after" (default: "after")
802
+ expected_matches: Expected number of anchor occurrences (default: 1)
803
+
804
+ Returns:
805
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
806
+ and optionally 'error' (str) if something went wrong
807
+ """
808
+ logger.info(f" 🔧 add_html | anchor:{len(anchor_search)}b | insert:{len(insert)}b | pos:{position} | expect:{expected_matches}")
809
+
810
+ # Count exact matches of anchor
811
+ m = doc_text.count(anchor_search) if anchor_search else 0
812
+
813
+ if m != expected_matches:
814
+ error = f"Anchor not found. Expected {expected_matches}, found {m}"
815
+ logger.warning(f" ❌ {error}")
816
+ return {
817
+ "ok": False,
818
+ "error": error,
819
+ "matches": m,
820
+ "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{error}\nAnchor must match EXACTLY, and your search pattern has unexpected number of matches"
821
+ }
822
+
823
+ # Insert before or after anchor
824
+ if position == "before":
825
+ new_text = doc_text.replace(anchor_search, insert + anchor_search, 1)
826
+ else: # after
827
+ new_text = doc_text.replace(anchor_search, anchor_search + insert, 1)
828
+
829
+ # Validate result with BeautifulSoup
830
+ is_valid, validation_error = await _validate_html(new_text)
831
+ if not is_valid:
832
+ logger.warning(f" ❌ Validation failed: {validation_error}")
833
+ return {
834
+ "ok": False,
835
+ "error": f"Invalid HTML: {validation_error}",
836
+ "matches": m,
837
+ "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{validation_error}"
838
+ }
839
+
840
+ logger.info(f" ✅ Success | +{len(new_text)-len(doc_text)}b")
841
+ return {
842
+ "ok": True,
843
+ "doc_text": new_text,
844
+ "matches": m
845
+ }
846
+
847
+
848
+ @tool
849
+ async def _delete_html(doc_text: str, search: str, expected_matches: int = 1) -> Dict[str, Any]:
850
+ """
851
+ Delete an exact block of HTML from document (real implementation).
852
+
853
+ This is a convenience wrapper around replace_html with an empty replacement.
854
+ Useful for removing unwanted sections, clauses, or content.
855
+
856
+ Args:
857
+ doc_text: The HTML document content
858
+ search: The exact HTML block to delete (must match exactly)
859
+ expected_matches: Expected number of occurrences (default: 1)
860
+
861
+ Returns:
862
+ Dict with 'ok' (bool), 'doc_text' (updated HTML), 'matches' (int),
863
+ and optionally 'error' (str) if something went wrong
864
+ """
865
+ logger.info(f" 🔧 delete_html | search:{len(search)}b | expect:{expected_matches}")
866
+
867
+ # Count exact matches
868
+ m = doc_text.count(search) if search else 0
869
+
870
+ if m != expected_matches:
871
+ error = f"Search not found. Expected {expected_matches}, found {m}"
872
+ logger.warning(f" ❌ {error}")
873
+ return {
874
+ "ok": False,
875
+ "error": error,
876
+ "matches": m,
877
+ "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{error}\nHTML must match EXACTLY and your search pattern has unexpected number of matches"
878
+ }
879
+
880
+ # Perform replacement with empty string
881
+ new_text = doc_text.replace(search, "", expected_matches)
882
+
883
+ # Validate result with BeautifulSoup
884
+ is_valid, validation_error = await _validate_html(new_text)
885
+ if not is_valid:
886
+ logger.warning(f" ❌ Validation failed: {validation_error}")
887
+ return {
888
+ "ok": False,
889
+ "error": f"Invalid HTML: {validation_error}",
890
+ "matches": m,
891
+ "DANGER_PANEL": f"⚠️⚠️⚠️ MODIFICATION FAILED ⚠️⚠️⚠️\n{validation_error}"
892
+ }
893
+
894
+ logger.info(f" ✅ Success | -{len(doc_text)-len(new_text)}b")
895
+ return {
896
+ "ok": True,
897
+ "doc_text": new_text,
898
+ "matches": m
899
+ }
900
+
901
+
902
+ @tool
903
+ async def _view_current_document(doc_text: str) -> Dict[str, Any]:
904
+ """
905
+ View the current state of the document being edited (real implementation).
906
+
907
+ Use this tool to see the current document content after modifications.
908
+ This helps you verify previous edits and understand the current structure.
909
+
910
+ Args:
911
+ doc_text: The HTML document content (injected from state)
912
+
913
+ Returns:
914
+ Dict with 'ok' (bool) and 'content' (str) containing the HTML document
915
+ """
916
+ logger.info(f" 🔍 view_current_document ({len(doc_text)}b)")
917
+ return {
918
+ "ok": True,
919
+ "content": doc_text
920
+ }
921
+
922
+
923
+ @tool
924
+ async def _attempt_completion(message: str) -> Dict[str, Any]:
925
+ """
926
+ Signal that document editing is complete and provide a summary message (real implementation).
927
+
928
+ This tool should be called when all requested modifications have been
929
+ successfully applied to document.
930
+
931
+ Args:
932
+ message: A summary message describing what was changed in document
933
+
934
+ Returns:
935
+ Dict with 'ok' (bool) and 'message' (str)
936
+ """
937
+ logger.info(f" ✅ attempt_completion | {message}")
938
+ return {
939
+ "ok": True,
940
+ "message": message
941
+ }
942
+
943
+
944
+
945
+
946
  # Export tool sets for different user types
947
+ tools_for_client_facade = [query_knowledge_graph, find_lawyers, message_lawyer, search_web, create_draft_document]
948
+ tools_for_client = [_query_knowledge_graph, _find_lawyers, _message_lawyer, search_web, _create_draft_document]
949
+ tools_for_lawyer_facade = [query_knowledge_graph, search_web, retrieve_lawyer_document, create_draft_document]
950
+ tools_for_lawyer = [_query_knowledge_graph, search_web, _retrieve_lawyer_document, _create_draft_document]
951
 
952
  # Tools for DocAssistant (document router)
953
+ tools_for_doc_assistant_facade = [query_knowledge_graph, retrieve_lawyer_document, edit_document]
954
+ tools_for_doc_assistant = [_query_knowledge_graph, _retrieve_lawyer_document, _edit_document]
955
+
956
+ tools_for_doc_editor_facade = [replace_html, add_html, delete_html, view_current_document, attempt_completion]
957
+ tools_for_doc_editor = [_replace_html, _add_html, _delete_html, _view_current_document, _attempt_completion]
958
 
959
  tools = tools_for_client
utils/utils.py DELETED
@@ -1,92 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Utility functions for agent operations
4
- """
5
-
6
- import time
7
- from typing import Tuple
8
- import logging
9
-
10
- # Configure logging
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- class PerformanceMonitor:
16
- """
17
- Monitor agent performance and timing
18
- """
19
-
20
- def __init__(self):
21
- self.metrics = {}
22
-
23
- def start_timer(self, operation: str) -> None:
24
- """
25
- Start timing an operation
26
- """
27
- self.metrics[f"{operation}_start"] = time.time()
28
-
29
- def end_timer(self, operation: str) -> float:
30
- """
31
- End timing an operation and return duration
32
- """
33
- start_time = self.metrics.get(f"{operation}_start")
34
- if start_time:
35
- duration = time.time() - start_time
36
- self.metrics[f"{operation}_duration"] = duration
37
- return duration
38
- return 0.0
39
-
40
- def get_metrics(self) -> dict:
41
- """
42
- Get all collected metrics
43
- """
44
- return self.metrics.copy()
45
-
46
- def reset(self) -> None:
47
- """
48
- Reset all metrics
49
- """
50
- self.metrics.clear()
51
-
52
-
53
- def validate_query(query: str) -> Tuple[bool, str]:
54
- """
55
- Validate user query
56
- """
57
- if not query or not query.strip():
58
- return False, "Query cannot be empty."
59
-
60
- if len(query) > 2500:
61
- return False, "Query is too long. Please keep it under 1000 characters."
62
-
63
- return True, None
64
-
65
-
66
- def format_error_message(error: str) -> str:
67
- """
68
- Format error messages for user display
69
- """
70
- error_map = {
71
- "Server unreachable": "❌ The legal database is currently unavailable. Please try again in a moment.",
72
- "timeout": "❌ The request timed out. Please try again.",
73
- "invalid json": "❌ There was an issue processing the response. Please try again.",
74
- "health check failed": "❌ The system is initializing. Please wait a moment and try again."
75
- }
76
-
77
- for key, message in error_map.items():
78
- if key.lower() in error.lower():
79
- return message
80
-
81
- return f"❌ An error occurred: {error}"
82
-
83
-
84
- def create_safe_filename(query: str, timestamp: str) -> str:
85
- """
86
- Create a safe filename for logging purposes
87
- """
88
- # Remove problematic characters
89
- safe_query = "".join(c for c in query if c.isalnum() or c in (' ', '-', '_')).strip()
90
- safe_query = safe_query[:50] # Limit length
91
-
92
- return f"{timestamp}_{safe_query}.log"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/{update_notifier.py → utils_fn.py} RENAMED
@@ -1,13 +1,15 @@
1
  #!/usr/bin/env python3
2
  """
3
- Utility for pushing document updates to the external endpoint
4
  """
5
 
6
- import os
 
7
  import logging
8
- from typing import Optional
9
  import httpx
10
-
 
11
  logger = logging.getLogger(__name__)
12
 
13
 
@@ -103,4 +105,54 @@ async def push_document_update(
103
 
104
  except Exception as e:
105
  logger.error(f"❌ Unexpected error pushing document update for {document_id}: {str(e)}")
106
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ Utility functions for agent operations
4
  """
5
 
6
+ import time
7
+ from typing import Tuple, Optional
8
  import logging
9
+ import os
10
  import httpx
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
 
 
105
 
106
  except Exception as e:
107
  logger.error(f"❌ Unexpected error pushing document update for {document_id}: {str(e)}")
108
+ return False
109
+
110
+ class PerformanceMonitor:
111
+ """
112
+ Monitor agent performance and timing
113
+ """
114
+
115
+ def __init__(self):
116
+ self.metrics = {}
117
+
118
+ def start_timer(self, operation: str) -> None:
119
+ """
120
+ Start timing an operation
121
+ """
122
+ self.metrics[f"{operation}_start"] = time.time()
123
+
124
+ def end_timer(self, operation: str) -> float:
125
+ """
126
+ End timing an operation and return duration
127
+ """
128
+ start_time = self.metrics.get(f"{operation}_start")
129
+ if start_time:
130
+ duration = time.time() - start_time
131
+ self.metrics[f"{operation}_duration"] = duration
132
+ return duration
133
+ return 0.0
134
+
135
+ def get_metrics(self) -> dict:
136
+ """
137
+ Get all collected metrics
138
+ """
139
+ return self.metrics.copy()
140
+
141
+ def reset(self) -> None:
142
+ """
143
+ Reset all metrics
144
+ """
145
+ self.metrics.clear()
146
+
147
+
148
+ def validate_query(query: str) -> Tuple[bool, str]:
149
+ """
150
+ Validate user query
151
+ """
152
+ if not query or not query.strip():
153
+ return False, "Query cannot be empty."
154
+
155
+ if len(query) > 2500:
156
+ return False, "Query is too long. Please keep it under 1000 characters."
157
+
158
+ return True, None