BabyWriterPRO.v6.1

Build error

App Files Files Community

Yasu777 commited on May 23, 2024

Commit

f67455f

verified ·

1 Parent(s): 9a4eb89

Update article_generator.py

Browse files

Files changed (1) hide show

article_generator.py +59 -50

article_generator.py CHANGED Viewed

@@ -58,6 +58,32 @@ def remove_duplicates(text_list):
     return result
 # 記事のセクションをGPT-4で拡張する関数
 def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
     prompts = []
     h3_to_text = {}
@@ -134,7 +160,15 @@ def generate_expanded_article(article_html, h3_to_text, cached_responses):
             if h3.get_text() in h3_to_text:
                 new_paragraph = soup.new_tag('p')
                 new_paragraph.string = h3_to_text[h3.get_text()]
-                h3.insert_after(new_paragraph)
     process_summary_section(soup, cached_responses)  # まとめセクションを特別処理し、キャッシュされたレスポンスを渡す
@@ -162,7 +196,7 @@ def setup_plan_and_execute_agent():
 # GPT-4を使用してテキストを生成するヘルパー関数
 def generate_text_with_gpt4(prompt):
     response = openai.ChatCompletion.create(
-        model="gpt-4o",
         messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
                   {"role": "user", "content": prompt}],
         temperature=0.7,
@@ -201,30 +235,6 @@ def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
     else:
         return (query, "No cached response found for this heading.")
-# IDとセクションを付与する関数
-def assign_ids_and_group_sections(html_content):
-    soup = BeautifulSoup(html_content, 'html.parser')
-    h2_elements = soup.find_all('h2')
-    section_id = 1
-    for h2 in h2_elements:
-        section_div = soup.new_tag('div', **{'class': 'section', 'id': f'section-{section_id}'})
-        h2['id'] = f'h2-{section_id}'
-        section_div.append(h2.extract())
-        next_tag = h2.next_sibling
-        while next_tag and (next_tag.name != 'h2'):
-            if next_tag.name == 'h3':
-                h3_id = f'h3-{section_id}-{next_tag.get_text().split()[0]}'
-                next_tag['id'] = h3_id
-                section_div.append(next_tag.extract())
-            next_tag = next_tag.next_sibling
-        h2.insert_before(section_div)
-        section_id += 1
-    return str(soup)
 # 記事を生成する関数
 def generate_article(editable_output2):
     print("Starting article generation...")
@@ -268,31 +278,32 @@ def generate_article(editable_output2):
     research_summary = "\n".join([json.dumps(result) for result in research_results])
     instructions = []
     instructions.append(f"""
-    <h1>{h1_text}</h1>
-    "{h1_text}"に関する導入文を日本語で作成してください。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
     sentences = research_summary.split('。')
-    # 質問の数を制限
     max_questions_per_h3 = 2
     for idx, h2_text in enumerate(h2_texts):
         h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
         instructions.append(f"""
-            <h2>{h2_text}</h2>
-            "{h2_text}"に関する導入文を日本語で作成してください。この導入文は、以下の小見出しの内容を考慮してください：{"、".join(h3_for_this_h2)}。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
-        for h3 in h3_for_this_h2:
             related_sentences = [sentence for sentence in sentences if h3 in sentence][:max_questions_per_h3]
             if related_sentences:
                 content_for_h3 = "。".join(related_sentences) + "。"
                 instructions.append(f"""
-                    <h3>{h3}</h3>
-                    "{h3}"に関する詳細な内容として、以下の情報を日本語で記述してください：{content_for_h3} ここでも、オリジナルな内容を心がけてください。""")
             else:
                 instructions.append(f"""
-                    <h3>{h3}</h3>
-                    "{h3}"に関する詳細な内容を日本語で記述してください。オリジナルな内容を心がけてください。""")
     # トークン数を制限するためにメッセージを分割
     split_instructions = []
@@ -335,22 +346,20 @@ def generate_article(editable_output2):
     print("Final generated article content:")  # 最終的な記事全体の内容を出力
     print(final_result)
-    # 生成された一時的な記事にIDとグループ化を付与
-    grouped_html = assign_ids_and_group_sections(final_result)
-    print("Grouped HTML content:")  # グループ化されたHTML内容を表示
-    print(grouped_html)
-    # 重複を排除
-    final_result = remove_duplicates(grouped_html.split('\n'))
-    # 生成された初期記事を拡張
-    h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
-    expanded_article = generate_expanded_article("\n".join(final_result), h3_to_text, cached_responses)
     with open("output3.txt", "w", encoding="utf-8") as f:
-        f.write(expanded_article)
     print("Article generation complete. Output saved to output3.txt.")
-    print(expanded_article)  # ログに最終結果を出力
-    return expanded_article

     return result
 # 記事のセクションをGPT-4で拡張する関数
+def expand_h3_sections(soup, preloaded_data):
+    h3_elements = soup.find_all('h3')
+    for h3 in h3_elements:
+        h3_text = h3.get_text(strip=True)
+        section_id = h3['id']
+        key = f"{h3_text} {section_id}"
+        if key in preloaded_data:
+            context = preloaded_data[key]
+            prompt = f"「{h3_text}」について詳しく説明してください。こちらが背景情報です：\n{context}"
+        else:
+            prompt = f"「{h3_text}」について詳しく説明してください。"
+        expanded_text = generate_text_with_gpt4(prompt)
+        new_paragraph = soup.new_tag('p')
+        new_paragraph.string = expanded_text
+        # h3タグの次の要素を取得し、その後の要素を探す
+        next_sibling = h3.find_next_sibling()
+        if next_sibling:
+            next_sibling.insert_after(new_paragraph)  # 次の要素が存在する場合のみ挿入を行う
+        else:
+            h3.parent.append(new_paragraph)  # h3タグの親が存在する場合、親に直接追加
+    return soup
 def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
     prompts = []
     h3_to_text = {}
             if h3.get_text() in h3_to_text:
                 new_paragraph = soup.new_tag('p')
                 new_paragraph.string = h3_to_text[h3.get_text()]
+                # h3タグの次の要素を取得し、その後に追加する
+                next_sibling = h3.find_next_sibling()
+                if next_sibling:
+                    next_sibling.insert_after(new_paragraph)
+                else:
+                    if h3.parent:
+                        h3.insert_after(new_paragraph)
+                    else:
+                        print(f"Error: h3 element '{h3.get_text()}' has no parent.")
     process_summary_section(soup, cached_responses)  # まとめセクションを特別処理し、キャッシュされたレスポンスを渡す
 # GPT-4を使用してテキストを生成するヘルパー関数
 def generate_text_with_gpt4(prompt):
     response = openai.ChatCompletion.create(
+        model="gpt-4-turbo",
         messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
                   {"role": "user", "content": prompt}],
         temperature=0.7,
     else:
         return (query, "No cached response found for this heading.")
 # 記事を生成する関数
 def generate_article(editable_output2):
     print("Starting article generation...")
     research_summary = "\n".join([json.dumps(result) for result in research_results])
     instructions = []
+    # IDを含むHTMLプロンプトの作成
     instructions.append(f"""
+    <h1 id="title">{h1_text}</h1>
+    <p>「{h1_text}」に関する導入文を日本語で作成してください。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。</p>""")
     sentences = research_summary.split('。')
     max_questions_per_h3 = 2
     for idx, h2_text in enumerate(h2_texts):
         h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
         instructions.append(f"""
+        <div id="section-{idx+1}">
+            <h2 id="h2-{idx+1}">{h2_text}</h2>
+            <p>「{h2_text}」に関する導入文を日本語で作成してください。この導入文は、以下の小見出しの内容を考慮してください：{"、".join(h3_for_this_h2)}。</p>""")
+        for h3_idx, h3 in enumerate(h3_for_this_h2):
             related_sentences = [sentence for sentence in sentences if h3 in sentence][:max_questions_per_h3]
             if related_sentences:
                 content_for_h3 = "。".join(related_sentences) + "。"
                 instructions.append(f"""
+                    <h3 id="h3-{idx+1}-{h3_idx+1}">{h3}</h3>
+                    <p>「{h3}」に関する詳細な内容として、以下の情報を日本語で記述してください：{content_for_h3}</p>""")
             else:
                 instructions.append(f"""
+                    <h3 id="h3-{idx+1}-{h3_idx+1}">{h3}</h3>
+                    <p>「{h3}」に関する詳細な内容を日本語で記述してください。オリジナルな内容を心がけてください。</p>""")
+        instructions.append("</div>")  # 各セクションの終わりにdivタグを閉じる
     # トークン数を制限するためにメッセージを分割
     split_instructions = []
     print("Final generated article content:")  # 最終的な記事全体の内容を出力
     print(final_result)
+    # 更新されたHTMLの解析
+    updated_soup = BeautifulSoup(final_result, 'html.parser')
+    # 初期データをTavily検索で収集する関数
+    h3_texts = [h3.get_text(strip=True) for h3 in updated_soup.find_all('h3')]
+    cached_responses = perform_initial_tavily_search([], h3_texts)
+    save_preloaded_tavily_data(cached_responses)
+    # h3タグの拡張を行う
+    expanded_soup = expand_h3_sections(updated_soup, cached_responses)
+    final_html = str(expanded_soup)
     with open("output3.txt", "w", encoding="utf-8") as f:
+        f.write(final_html)
     print("Article generation complete. Output saved to output3.txt.")
+    return final_html