juanmaguitar commited on
Commit
0c3a95b
·
1 Parent(s): 2fabcd9

Tool to convert HTML to blocks

Browse files
Files changed (4) hide show
  1. .gitignore +2 -1
  2. app.py +7 -1
  3. requirements.txt +3 -2
  4. tools/html_to_wp_blocks.py +111 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- instructions.md
 
 
1
+ instructions.md
2
+ .DS_Store
app.py CHANGED
@@ -10,6 +10,7 @@ from tools.wordpress_post import WordPressPostTool
10
  from tools.blog_generator import BlogGeneratorTool
11
  from tools.visit_webpage import VisitWebpageTool
12
  from tools.web_search import DuckDuckGoSearchTool
 
13
 
14
  from Gradio_UI import GradioUI
15
 
@@ -156,6 +157,7 @@ with open("prompts.yaml", 'r') as stream:
156
  # Initialize WordPress tools
157
  wordpress_post = WordPressPostTool(wordpress_credentials)
158
  blog_generator = BlogGeneratorTool(model=model)
 
159
 
160
  agent = CodeAgent(
161
  model=model,
@@ -167,6 +169,7 @@ agent = CodeAgent(
167
  # get_weather,
168
  wordpress_post,
169
  blog_generator,
 
170
  visit_webpage,
171
  web_search
172
  ],
@@ -185,15 +188,18 @@ prompt_templates["system_prompt"] += """
185
  You are also capable of managing a WordPress blog through the following tools:
186
  - wordpress_post: Publishes posts to WordPress
187
  - blog_generator: Generates AI-written blog posts
 
188
 
189
  Always check credentials before attempting to post content.
190
 
191
  Example WordPress workflow:
192
  1. Set credentials (first time only)
193
  2. Generate blog content
194
- 3. Publish to WordPress
 
195
 
196
  Remember to:
 
197
  - Validate WordPress credentials before posting
198
  - Generate high-quality, relevant content
199
  - Handle errors gracefully
 
10
  from tools.blog_generator import BlogGeneratorTool
11
  from tools.visit_webpage import VisitWebpageTool
12
  from tools.web_search import DuckDuckGoSearchTool
13
+ from tools.html_to_wp_blocks import HTMLToWPBlocksTool
14
 
15
  from Gradio_UI import GradioUI
16
 
 
157
  # Initialize WordPress tools
158
  wordpress_post = WordPressPostTool(wordpress_credentials)
159
  blog_generator = BlogGeneratorTool(model=model)
160
+ html_to_blocks = HTMLToWPBlocksTool()
161
 
162
  agent = CodeAgent(
163
  model=model,
 
169
  # get_weather,
170
  wordpress_post,
171
  blog_generator,
172
+ html_to_blocks,
173
  visit_webpage,
174
  web_search
175
  ],
 
188
  You are also capable of managing a WordPress blog through the following tools:
189
  - wordpress_post: Publishes posts to WordPress
190
  - blog_generator: Generates AI-written blog posts
191
+ - html_to_blocks: Converts HTML content to WordPress Gutenberg blocks format
192
 
193
  Always check credentials before attempting to post content.
194
 
195
  Example WordPress workflow:
196
  1. Set credentials (first time only)
197
  2. Generate blog content
198
+ 3. Convert HTML content to WordPress blocks format
199
+ 4. Publish blocks-formatted content to WordPress
200
 
201
  Remember to:
202
+ - ALWAYS convert HTML content to WordPress blocks format before publishing
203
  - Validate WordPress credentials before posting
204
  - Generate high-quality, relevant content
205
  - Handle errors gracefully
requirements.txt CHANGED
@@ -4,6 +4,7 @@ requests>=2.31.0
4
  duckduckgo_search>=4.1.0
5
  pandas>=2.0.0
6
  pytz>=2024.1
7
- pyyaml>=6.0.1
8
  gradio>=5.15.0
9
- python-dotenv>=1.0.0
 
 
4
  duckduckgo_search>=4.1.0
5
  pandas>=2.0.0
6
  pytz>=2024.1
7
+ pyyaml>=6.0.7
8
  gradio>=5.15.0
9
+ python-dotenv>=1.0.0
10
+ beautifulsoup4>=4.12.0
tools/html_to_wp_blocks.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+ import json
3
+ from typing import Dict, List, Optional
4
+ from smolagents.tools import Tool
5
+
6
+
7
+ class HTMLToWPBlocksTool(Tool):
8
+ name = "html_to_wp_blocks"
9
+ description = "Transforms HTML content into WordPress Gutenberg blocks"
10
+ inputs = {
11
+ 'html_content': {'type': 'string', 'description': 'The HTML content to transform'},
12
+ 'preserve_classes': {
13
+ 'type': 'boolean',
14
+ 'description': 'Whether to preserve HTML class attributes as block attributes',
15
+ 'nullable': True
16
+ }
17
+ }
18
+ output_type = "string"
19
+
20
+ # Mapping of HTML elements to WordPress block names
21
+ BLOCK_MAPPINGS = {
22
+ 'p': 'core/paragraph',
23
+ 'h1': 'core/heading',
24
+ 'h2': 'core/heading',
25
+ 'h3': 'core/heading',
26
+ 'h4': 'core/heading',
27
+ 'h5': 'core/heading',
28
+ 'h6': 'core/heading',
29
+ 'ul': 'core/list',
30
+ 'ol': 'core/list',
31
+ 'li': 'core/list-item',
32
+ 'img': 'core/image',
33
+ 'figure': 'core/image',
34
+ 'blockquote': 'core/quote',
35
+ 'pre': 'core/code',
36
+ 'code': 'core/code',
37
+ 'table': 'core/table',
38
+ }
39
+
40
+ def __init__(self):
41
+ super().__init__()
42
+
43
+ def _get_block_attributes(self, element) -> Dict:
44
+ """Extract relevant attributes from HTML element for block attributes."""
45
+ attrs = {}
46
+
47
+ # Handle heading levels
48
+ if element.name.startswith('h') and element.name[1].isdigit():
49
+ attrs['level'] = int(element.name[1])
50
+
51
+ # Handle alignment
52
+ if 'class' in element.attrs:
53
+ classes = element['class']
54
+ alignments = ['alignleft', 'alignright',
55
+ 'aligncenter', 'alignwide', 'alignfull']
56
+ for align in alignments:
57
+ if align in classes:
58
+ attrs['align'] = align.replace('align', '')
59
+
60
+ # Handle images
61
+ if element.name == 'img':
62
+ attrs['url'] = element.get('src', '')
63
+ if element.get('alt'):
64
+ attrs['alt'] = element['alt']
65
+
66
+ return attrs
67
+
68
+ def _element_to_block(self, element, preserve_classes: bool = False) -> str:
69
+ """Convert a single HTML element to a WordPress block."""
70
+ if element.name not in self.BLOCK_MAPPINGS:
71
+ return str(element)
72
+
73
+ block_name = self.BLOCK_MAPPINGS[element.name]
74
+ attrs = self._get_block_attributes(element)
75
+
76
+ if preserve_classes and 'class' in element.attrs:
77
+ attrs['className'] = ' '.join(element['class'])
78
+
79
+ # Handle nested content
80
+ inner_content = element.decode_contents().strip() if element.contents else ""
81
+
82
+ # Create block comment wrapper
83
+ block_start = f'<!-- wp:{block_name}'
84
+ if attrs:
85
+ block_start += f' {json.dumps(attrs)}'
86
+ block_start += ' -->'
87
+
88
+ block_end = f'<!-- /wp:{block_name} -->'
89
+
90
+ return f'{block_start}\n{inner_content}\n{block_end}'
91
+
92
+ def forward(self, html_content: str, preserve_classes: bool = False) -> str:
93
+ """Transform HTML content into WordPress blocks
94
+
95
+ Args:
96
+ html_content: The HTML content to transform
97
+ preserve_classes: Whether to preserve HTML class attributes
98
+
99
+ Returns:
100
+ String containing the WordPress block representation
101
+ """
102
+ soup = BeautifulSoup(html_content, 'html.parser')
103
+ blocks = []
104
+
105
+ # Process each top-level element
106
+ for element in soup.find_all(recursive=False):
107
+ if element.name: # Skip NavigableString objects
108
+ block = self._element_to_block(element, preserve_classes)
109
+ blocks.append(block)
110
+
111
+ return '\n\n'.join(blocks)