Yasu777 commited on
Commit
9ffd686
·
verified ·
1 Parent(s): db5314a

Update article_generator.py

Browse files
Files changed (1) hide show
  1. article_generator.py +28 -1
article_generator.py CHANGED
@@ -201,6 +201,30 @@ def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
201
  else:
202
  return (query, "No cached response found for this heading.")
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  # 記事を生成する関数
205
  def generate_article(editable_output2):
206
  print("Starting article generation...")
@@ -311,8 +335,11 @@ def generate_article(editable_output2):
311
  print("Final generated article content:") # 最終的な記事全体の内容を出力
312
  print(final_result)
313
 
 
 
 
314
  # 重複を排除
315
- final_result = remove_duplicates(final_result.split('\n'))
316
 
317
  # 生成された初期記事を拡張
318
  h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
 
201
  else:
202
  return (query, "No cached response found for this heading.")
203
 
204
+ # IDとセクションを付与する関数
205
+ def assign_ids_and_group_sections(html_content):
206
+ soup = BeautifulSoup(html_content, 'html.parser')
207
+ h2_elements = soup.find_all('h2')
208
+ section_id = 1
209
+
210
+ for h2 in h2_elements:
211
+ section_div = soup.new_tag('div', **{'class': 'section', 'id': f'section-{section_id}'})
212
+ h2['id'] = f'h2-{section_id}'
213
+ section_div.append(h2.extract())
214
+
215
+ next_tag = h2.next_sibling
216
+ while next_tag and (next_tag.name != 'h2'):
217
+ if next_tag.name == 'h3':
218
+ h3_id = f'h3-{section_id}-{next_tag.get_text().split()[0]}'
219
+ next_tag['id'] = h3_id
220
+ section_div.append(next_tag.extract())
221
+ next_tag = next_tag.next_sibling
222
+
223
+ h2.insert_before(section_div)
224
+ section_id += 1
225
+
226
+ return str(soup)
227
+
228
  # 記事を生成する関数
229
  def generate_article(editable_output2):
230
  print("Starting article generation...")
 
335
  print("Final generated article content:") # 最終的な記事全体の内容を出力
336
  print(final_result)
337
 
338
+ # 生成された一時的な記事にIDとグループ化を付与
339
+ grouped_html = assign_ids_and_group_sections(final_result)
340
+
341
  # 重複を排除
342
+ final_result = remove_duplicates(grouped_html.split('\n'))
343
 
344
  # 生成された初期記事を拡張
345
  h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)