Leonardo commited on
Commit
1ab1fd4
·
verified ·
1 Parent(s): 5ca0d67

Update scripts/frontmatter_tool.py

Browse files
Files changed (1) hide show
  1. scripts/frontmatter_tool.py +15 -62
scripts/frontmatter_tool.py CHANGED
@@ -3,10 +3,13 @@ Frontmatter Generator Tool for Smolagents
3
 
4
  This tool helps generate consistent YAML frontmatter for documents,
5
  useful for RAG systems, static site generators, and document organization.
 
 
6
  """
7
 
8
  import re
9
  import yaml
 
10
  from datetime import datetime
11
  from typing import Dict, List, Optional, Any, Union
12
  from smolagents import Tool
@@ -20,6 +23,7 @@ class FrontmatterGeneratorTool(Tool):
20
  Generates or extracts YAML frontmatter for documents. Frontmatter provides structured
21
  metadata for documents including title, author, date, description, and tags.
22
  Useful for document organization, RAG systems, and static site generators.
 
23
  """
24
 
25
  inputs = {
@@ -27,37 +31,37 @@ class FrontmatterGeneratorTool(Tool):
27
  "type": "string",
28
  "description": "Document content (with or without existing frontmatter)",
29
  },
30
- "title": {"type": "string", "description": "Document title", "optional": True},
31
  "author": {
32
  "type": "string",
33
  "description": "Document author(s)",
34
- "optional": True,
35
  },
36
  "date": {
37
  "type": "string",
38
  "description": "Document date in YYYY-MM-DD format (defaults to today if not provided)",
39
- "optional": True,
40
  },
41
  "date_format": {
42
  "type": "string",
43
  "description": "Format string for the document date (e.g., '%Y-%m-%d', '%d/%m/%Y'). Defaults to '%Y-%m-%d'",
44
- "optional": True,
45
  "default": "%Y-%m-%d",
46
  },
47
  "description": {
48
  "type": "string",
49
  "description": "Brief description of the document",
50
- "optional": True,
51
  },
52
  "tags": {
53
  "type": "string",
54
  "description": "Comma-separated list of tags",
55
- "optional": True,
56
  },
57
  "additional_fields": {
58
  "type": "string",
59
  "description": "JSON string with additional frontmatter fields",
60
- "optional": True,
61
  },
62
  "mode": {
63
  "type": "string",
@@ -130,6 +134,10 @@ class FrontmatterGeneratorTool(Tool):
130
  else:
131
  return "Error: Empty content provided"
132
 
 
 
 
 
133
  # Process based on mode
134
  try:
135
  if mode == "extract":
@@ -191,8 +199,6 @@ class FrontmatterGeneratorTool(Tool):
191
  return {}
192
 
193
  try:
194
- import json
195
-
196
  return json.loads(additional_fields)
197
  except json.JSONDecodeError:
198
  raise ValueError("additional_fields must be a valid JSON string")
@@ -393,56 +399,3 @@ class FrontmatterGeneratorTool(Tool):
393
 
394
  # Combine frontmatter with content
395
  return frontmatter + clean_content
396
-
397
-
398
- # Example usage
399
- def example_usage():
400
- """Simple example of how to use the FrontmatterGeneratorTool"""
401
- tool = FrontmatterGeneratorTool()
402
-
403
- # Example document
404
- document = """# The Role of the Seat in International Arbitration
405
-
406
- This paper examines the significance of the seat in international arbitration proceedings,
407
- with a focus on recent developments in Australian law."""
408
-
409
- # Generate frontmatter
410
- result = tool.forward(
411
- content=document,
412
- title="The Role of the Seat in International Arbitration",
413
- author="Matthew Barry",
414
- date="13 March 2013", # Now handles this format correctly
415
- description="A legal analysis of the role of the seat in international arbitration.",
416
- tags="international arbitration, enforcement, australian courts",
417
- mode="generate",
418
- )
419
-
420
- print(result)
421
- return result
422
-
423
-
424
- # Define what gets imported with "from frontmatter_generator import *"
425
- __all__ = ["FrontmatterGeneratorTool", "example_usage"]
426
-
427
- # Example usage
428
- if __name__ == "__main__":
429
- tool = FrontmatterGeneratorTool()
430
-
431
- # # Example document
432
- # document = """# The Role of the Seat in International Arbitration
433
-
434
- # This paper examines the significance of the seat in international arbitration proceedings,
435
- # with a focus on recent developments in Australian law."""
436
-
437
- # # Generate frontmatter
438
- # result = tool.forward(
439
- # content=document,
440
- # title="The Role of the Seat in International Arbitration",
441
- # author="Matthew Barry",
442
- # date="2013-12-13",
443
- # description="A legal analysis of the role of the seat in international arbitration.",
444
- # tags="international arbitration, enforcement, australian courts",
445
- # mode="generate"
446
- # )
447
-
448
- # print(result)
 
3
 
4
  This tool helps generate consistent YAML frontmatter for documents,
5
  useful for RAG systems, static site generators, and document organization.
6
+ Integrates with TextInspectorTool and MarkdownConverter for a complete
7
+ document processing pipeline.
8
  """
9
 
10
  import re
11
  import yaml
12
+ import json
13
  from datetime import datetime
14
  from typing import Dict, List, Optional, Any, Union
15
  from smolagents import Tool
 
23
  Generates or extracts YAML frontmatter for documents. Frontmatter provides structured
24
  metadata for documents including title, author, date, description, and tags.
25
  Useful for document organization, RAG systems, and static site generators.
26
+ Works with content from the inspect_file_as_text tool to add metadata to documents.
27
  """
28
 
29
  inputs = {
 
31
  "type": "string",
32
  "description": "Document content (with or without existing frontmatter)",
33
  },
34
+ "title": {"type": "string", "description": "Document title", "nullable": True},
35
  "author": {
36
  "type": "string",
37
  "description": "Document author(s)",
38
+ "nullable": True,
39
  },
40
  "date": {
41
  "type": "string",
42
  "description": "Document date in YYYY-MM-DD format (defaults to today if not provided)",
43
+ "nullable": True,
44
  },
45
  "date_format": {
46
  "type": "string",
47
  "description": "Format string for the document date (e.g., '%Y-%m-%d', '%d/%m/%Y'). Defaults to '%Y-%m-%d'",
48
+ "nullable": True,
49
  "default": "%Y-%m-%d",
50
  },
51
  "description": {
52
  "type": "string",
53
  "description": "Brief description of the document",
54
+ "nullable": True,
55
  },
56
  "tags": {
57
  "type": "string",
58
  "description": "Comma-separated list of tags",
59
+ "nullable": True,
60
  },
61
  "additional_fields": {
62
  "type": "string",
63
  "description": "JSON string with additional frontmatter fields",
64
+ "nullable": True,
65
  },
66
  "mode": {
67
  "type": "string",
 
134
  else:
135
  return "Error: Empty content provided"
136
 
137
+ # Special handling for TextInspectorTool output
138
+ if content.startswith("Document content:"):
139
+ content = content[len("Document content:") :].strip()
140
+
141
  # Process based on mode
142
  try:
143
  if mode == "extract":
 
199
  return {}
200
 
201
  try:
 
 
202
  return json.loads(additional_fields)
203
  except json.JSONDecodeError:
204
  raise ValueError("additional_fields must be a valid JSON string")
 
399
 
400
  # Combine frontmatter with content
401
  return frontmatter + clean_content