Yasu777 commited on
Commit
353385a
·
verified ·
1 Parent(s): c6a028b

Update article_generator.py

Browse files
Files changed (1) hide show
  1. article_generator.py +34 -14
article_generator.py CHANGED
@@ -211,30 +211,47 @@ def generate_text_with_gpt4(prompt):
211
  )
212
  return response.choices[0]["message"]["content"].strip()
213
 
214
- # 初期データをTavily検索で収集する関数
215
  def perform_initial_tavily_search(h2_texts, h3_texts):
216
  tavily_search_tool = EnhancedTavilySearchTool()
217
  queries = []
218
-
 
219
  for idx, h2_text in enumerate(h2_texts):
220
  h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
221
  if not h3_for_this_h2 and h2_text.strip() != "まとめ": # "まとめ" セクションを除外
222
- print(f"No matching h3 elements found for h2: {h2_text} at index {idx+1}")
223
  continue
224
-
225
  query = f"{h2_text} {' '.join(h3_for_this_h2)}"
226
  queries.append(query)
227
 
228
- print("Performing Tavily search with queries:", queries)
229
  responses = tavily_search_tool.search(queries)
230
  response_dict = {}
231
  for i, query in enumerate(queries):
232
- if i < len(responses): # 応答リストの範囲内にあることを確認
233
  response_dict[query] = responses[i]
 
234
  else:
235
  response_dict[query] = "No response received"
236
-
237
- return response_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  def save_preloaded_tavily_data(data):
240
  with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
@@ -268,7 +285,7 @@ def generate_article(editable_output2):
268
 
269
  # 初期のTavily検索
270
  print("Performing initial Tavily search...")
271
- cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
272
  save_preloaded_tavily_data(cached_responses)
273
 
274
  executed_instructions = []
@@ -369,21 +386,24 @@ def generate_article(editable_output2):
369
 
370
  # 初期データをTavily検索で収集する関数
371
  h3_texts = [h3.get_text(strip=True) for h3 in updated_soup.find_all('h3')]
372
- cached_responses = perform_initial_tavily_search([], h3_texts)
373
  save_preloaded_tavily_data(cached_responses)
374
 
375
  # h3タグの拡張を行う
376
  expanded_soup = expand_h3_sections(updated_soup, cached_responses)
377
 
378
  final_html = str(expanded_soup)
379
- final_markdown = custom_html_to_markdown(final_html)
 
380
 
381
  with open("output3.txt", "w", encoding="utf-8") as f:
382
- f.write(final_html)
383
 
384
- print("Article generation complete. Output saved to output3.txt.")
385
- return final_markdown, final_html
386
 
 
 
387
 
388
  # HTMLをMarkdownに変換する関数
389
  def custom_html_to_markdown(html):
 
211
  )
212
  return response.choices[0]["message"]["content"].strip()
213
 
214
+ # Tavily APIからデータを取得し、結果とURLを保存する関数
215
  def perform_initial_tavily_search(h2_texts, h3_texts):
216
  tavily_search_tool = EnhancedTavilySearchTool()
217
  queries = []
218
+ references = {} # URL参照を保存するための辞書
219
+
220
  for idx, h2_text in enumerate(h2_texts):
221
  h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
222
  if not h3_for_this_h2 and h2_text.strip() != "まとめ": # "まとめ" セクションを除外
 
223
  continue
224
+
225
  query = f"{h2_text} {' '.join(h3_for_this_h2)}"
226
  queries.append(query)
227
 
 
228
  responses = tavily_search_tool.search(queries)
229
  response_dict = {}
230
  for i, query in enumerate(queries):
231
+ if i < len(responses):
232
  response_dict[query] = responses[i]
233
+ references[query] = responses[i].get('url', 'No URL available') # URLを保存
234
  else:
235
  response_dict[query] = "No response received"
236
+
237
+ return response_dict, references
238
+
239
+ # 記事の最後に参照URLを追加する関数
240
+ def add_references_section(soup, references):
241
+ ref_section = soup.new_tag('div', **{'class': 'references'})
242
+ ref_title = soup.new_tag('h2')
243
+ ref_title.string = "参照"
244
+ ref_section.append(ref_title)
245
+
246
+ for ref in references.values():
247
+ ref_tag = soup.new_tag('p')
248
+ ref_link = soup.new_tag('a', href=ref)
249
+ ref_link.string = ref
250
+ ref_tag.append(ref_link)
251
+ ref_section.append(ref_tag)
252
+
253
+ soup.append(ref_section)
254
+ return soup
255
 
256
  def save_preloaded_tavily_data(data):
257
  with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
 
285
 
286
  # 初期のTavily検索
287
  print("Performing initial Tavily search...")
288
+ cached_responses, references = perform_initial_tavily_search(h2_texts, h3_texts)
289
  save_preloaded_tavily_data(cached_responses)
290
 
291
  executed_instructions = []
 
386
 
387
  # 初期データをTavily検索で収集する関数
388
  h3_texts = [h3.get_text(strip=True) for h3 in updated_soup.find_all('h3')]
389
+ cached_responses, references = perform_initial_tavily_search([], h3_texts)
390
  save_preloaded_tavily_data(cached_responses)
391
 
392
  # h3タグの拡張を行う
393
  expanded_soup = expand_h3_sections(updated_soup, cached_responses)
394
 
395
  final_html = str(expanded_soup)
396
+ final_soup_with_refs = add_references_section(BeautifulSoup(final_html, 'html.parser'), references) # 参照セクションの追加
397
+ final_html_with_refs = str(final_soup_with_refs)
398
 
399
  with open("output3.txt", "w", encoding="utf-8") as f:
400
+ f.write(final_html_with_refs)
401
 
402
+ print("Final generated article content with references:")
403
+ print(final_html_with_refs)
404
 
405
+ print("Article generation complete. Output saved to output3.txt.")
406
+ return final_html_with_refs
407
 
408
  # HTMLをMarkdownに変換する関数
409
  def custom_html_to_markdown(html):