BabyWriterPRO.v6.1

Build error

App Files Files Community

Yasu777 commited on May 22, 2024

Commit

a761bb6

verified ·

1 Parent(s): f71c11d

Update article_generator.py

Browse files

Files changed (1) hide show

article_generator.py +183 -98

article_generator.py CHANGED Viewed

@@ -47,15 +47,103 @@ class EnhancedTavilySearchTool:
         else:
             raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
-# 重複を排除するヘルパー関数
-def remove_duplicates(text_list):
-    seen = set()
-    result = []
-    for text in text_list:
-        if text not in seen:
-            seen.add(text)
-            result.append(text)
-    return result
 # 記事のセクションをGPT-4で拡張する関数
 def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
@@ -109,18 +197,7 @@ def process_standalone_h2(soup):
             new_paragraph.string = expanded_text
             h2.insert_after(new_paragraph)
-def process_summary_section(soup):
-    summary_section = soup.find('h2', text='まとめ')
-    if summary_section:
-        next_paragraph = summary_section.find_next_sibling('p')
-        if not next_paragraph:
-            # もしまとめセクションの後にパラグラフがなければ、デフォルトのテキストを挿入
-            new_paragraph = soup.new_tag('p')
-            new_paragraph.string = "まとめの具体的な内容は現在利用可能ではありません。"
-            summary_section.insert_after(new_paragraph)
-        # 既存のテキストを使用する場合は何もしません（拡張しない）
-def generate_expanded_article(article_html, h3_to_text, cached_responses):
     print("記事を拡張中...")
     soup = BeautifulSoup(article_html, 'html.parser')
     process_standalone_h2(soup)  # 独立した<h2>セクションを処理
@@ -137,75 +214,16 @@ def generate_expanded_article(article_html, h3_to_text, cached_responses):
                 new_paragraph.string = h3_to_text[h3.get_text()]
                 h3.insert_after(new_paragraph)
-    # まとめセクションを特別に処理
-    process_summary_section(soup)
     return str(soup)
-# PlanAndExecuteエージェントをセットアップする関数
-def setup_plan_and_execute_agent():
-    google_search_tool = Tool(
-        name="GoogleSearch",
-        func=GoogleSearchTool().search,
-        description="Search tool using Google API"
-    )
-    tools = [google_search_tool]
-    model_name = "gpt-3.5-turbo-0125"
-    llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
-    planner = load_chat_planner(llm)
-    executor = load_agent_executor(llm, tools, verbose=True)
-    agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
-    print("PlanAndExecute agent setup complete.")
-    return agent
-# GPT-4を使用してテキストを生成するヘルパー関数
-def generate_text_with_gpt4(prompt):
-    response = openai.ChatCompletion.create(
-        model="gpt-4o",
-        messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
-                  {"role": "user", "content": prompt}],
-        temperature=0.7,
-        max_tokens=500
-    )
-    return response.choices[0]["message"]["content"].strip()
-# 初期データをTavily検索で収集する関数
-def perform_initial_tavily_search(h2_texts, h3_texts):
-    tavily_search_tool = EnhancedTavilySearchTool()
-    queries = []
-    for idx, h2_text in enumerate(h2_texts):  # インデックスの取得方法を改善
-        h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
-        query = f"{h2_text} {' '.join(h3_for_this_h2)}"
-        queries.append(query)
-    print("Performing Tavily search with queries:", queries)  # デバッグ情報追加
-    response = tavily_search_tool.search(queries)
-    return {query: response[i] for i, query in enumerate(queries)}
-def save_preloaded_tavily_data(data):
-    with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
-        json.dump(data, f, ensure_ascii=False, indent=4)
-    print("Preloaded Tavily data saved.")
-def load_preloaded_tavily_data():
-    with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
-        print("Preloaded Tavily data loaded.")
-        return json.load(f)
-def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
-    query = f"{h2_text} {' '.join(h3_for_this_h2)}"
-    if query in cached_responses:
-        return (query, cached_responses[query])
-    else:
-        return (query, "No cached response found for this heading.")
 # 記事を生成する関数
 def generate_article(editable_output2):
     print("Starting article generation...")
     # エージェントのセットアップ
     agent = setup_plan_and_execute_agent()
@@ -221,9 +239,6 @@ def generate_article(editable_output2):
     cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
     save_preloaded_tavily_data(cached_responses)
-    executed_instructions = []
-    research_results = []
     with ThreadPoolExecutor(max_workers=5) as executor:
         futures = []
         for h2_text in h2_texts:
@@ -235,6 +250,7 @@ def generate_article(editable_output2):
             if purpose not in executed_instructions:
                 executed_instructions.append(purpose)
                 research_results.append(response)
     print("Tavily search complete.")
@@ -300,25 +316,26 @@ def generate_article(editable_output2):
                 messages=[system_message, user_message],
                 temperature=0.7,
             )
-            generated_text = response.choices[0]["message"]["content"]
-            print(f"Generated content for section {i+1}:")  # 生成された各セクションの内容���出力
-            print(generated_text)
-            results.append(generated_text)
         except Exception as e:
             error_message = f"Error occurred during ChatCompletion: {str(e)}"
             print(error_message)  # ログにエラーメッセージを出力
             results.append(error_message)
     final_result = "\n".join(results)
-    print("Final generated article content:")  # 最終的な記事全体の内容を出力
-    print(final_result)
-    # 重複を排除
-    final_result = remove_duplicates(final_result.split('\n'))
     # 生成された初期記事を拡張
     h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
-    expanded_article = generate_expanded_article("\n".join(final_result), h3_to_text, cached_responses)
     with open("output3.txt", "w", encoding="utf-8") as f:
         f.write(expanded_article)
@@ -326,4 +343,72 @@ def generate_article(editable_output2):
     print("Article generation complete. Output saved to output3.txt.")
     print(expanded_article)  # ログに最終結果を出力
     return expanded_article

         else:
             raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
+# 実行された指示を追跡するリスト
+executed_instructions = []
+# 調査結果を保存するリスト
+research_results = []
+# 生成状態を保存するファイル
+state_file = "state.json"
+# 状態を保存する関数
+def save_state(state):
+    with open(state_file, "w", encoding="utf-8") as f:
+        json.dump(state, f, ensure_ascii=False, indent=4)
+    print("State saved. Current index:", state.get('current_index', 'Not available'))  # インデックス情報をログに出力
+# 状態をロードする関数
+def load_state():
+    if os.path.exists(state_file):
+        with open(state_file, "r", encoding="utf-8") as f:
+            state = json.load(f)
+            print("State loaded. Current index:", state.get('current_index', 'Not available'))  # インデックス情報をログに出力
+            return state
+    print("No state file found.")
+    return None
+# 状態をクリアする関数
+def clear_state():
+    if os.path.exists(state_file):
+        os.remove(state_file)
+    global executed_instructions, research_results
+    executed_instructions = []
+    research_results = []
+    print("State cleared.")
+    return "状態がクリアされました"
+# 見出しを処理する関数
+def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
+    query = f"{h2_text} {' '.join(h3_for_this_h2)}"
+    if query in cached_responses:
+        return (query, cached_responses[query])
+    else:
+        return (query, "No cached response found for this heading.")
+# 初期データをTavily検索で収集する関数
+def perform_initial_tavily_search(h2_texts, h3_texts):
+    tavily_search_tool = EnhancedTavilySearchTool()
+    queries = []
+    for idx, h2_text in enumerate(h2_texts):  # インデックスの取得方法を改善
+        h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
+        query = f"{h2_text} {' '.join(h3_for_this_h2)}"
+        queries.append(query)
+    print("Performing Tavily search with queries:", queries)  # デバッグ情報追加
+    response = tavily_search_tool.search(queries)
+    return {query: response[i] for i, query in enumerate(queries)}
+# キャッシュされたTavilyデータを保存する関数
+def save_preloaded_tavily_data(data):
+    with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+    print("Preloaded Tavily data saved.")
+# キャッシュされたTavilyデータをロードする関数
+def load_preloaded_tavily_data():
+    with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
+        print("Preloaded Tavily data loaded.")
+        return json.load(f)
+# PlanAndExecuteエージェントをセットアップする関数
+def setup_plan_and_execute_agent():
+    google_search_tool = Tool(
+        name="GoogleSearch",
+        func=GoogleSearchTool().search,
+        description="Search tool using Google API"
+    )
+    tools = [google_search_tool]
+    model_name = "gpt-3.5-turbo-0125"
+    llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
+    planner = load_chat_planner(llm)
+    executor = load_agent_executor(llm, tools, verbose=True)
+    agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
+    print("PlanAndExecute agent setup complete.")
+    return agent
+# GPT-4を使用してテキストを生成するヘルパー関数
+def generate_text_with_gpt4(prompt):
+    response = openai.ChatCompletion.create(
+        model="gpt-4o",
+        messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
+                  {"role": "user", "content": prompt}],
+        temperature=0.7,
+        max_tokens=500
+    )
+    return response.choices[0]["message"]["content"].strip()
 # 記事のセクションをGPT-4で拡張する関数
 def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
             new_paragraph.string = expanded_text
             h2.insert_after(new_paragraph)
+def generate_expanded_article(article_html, h3_to_text):
     print("記事を拡張中...")
     soup = BeautifulSoup(article_html, 'html.parser')
     process_standalone_h2(soup)  # 独立した<h2>セクションを処理
                 new_paragraph.string = h3_to_text[h3.get_text()]
                 h3.insert_after(new_paragraph)
     return str(soup)
 # 記事を生成する関数
 def generate_article(editable_output2):
     print("Starting article generation...")
+    # 途中から再開する場合のために状態を読み込み
+    state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
+    executed_instructions = state['executed_instructions']
+    research_results = state['research_results']
+    current_index = state['current_index']
     # エージェントのセットアップ
     agent = setup_plan_and_execute_agent()
     cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
     save_preloaded_tavily_data(cached_responses)
     with ThreadPoolExecutor(max_workers=5) as executor:
         futures = []
         for h2_text in h2_texts:
             if purpose not in executed_instructions:
                 executed_instructions.append(purpose)
                 research_results.append(response)
+                save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
     print("Tavily search complete.")
                 messages=[system_message, user_message],
                 temperature=0.7,
             )
+            results.append(response.choices[0]["message"]["content"])
         except Exception as e:
             error_message = f"Error occurred during ChatCompletion: {str(e)}"
             print(error_message)  # ログにエラーメッセージを出力
             results.append(error_message)
+            # 途中で止まった場合の状態を保存
+            save_state({
+                "executed_instructions": executed_instructions,
+                "research_results": research_results,
+                "split_instructions": split_instructions,
+                "results": results,
+                "current_index": i + 1
+            })
+            return error_message
     final_result = "\n".join(results)
     # 生成された初期記事を拡張
     h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
+    expanded_article = generate_expanded_article(final_result, h3_to_text)
     with open("output3.txt", "w", encoding="utf-8") as f:
         f.write(expanded_article)
     print("Article generation complete. Output saved to output3.txt.")
     print(expanded_article)  # ログに最終結果を出力
+    # 生成が完了したら状態ファイルを削除
+    if os.path.exists("state.json"):
+        os.remove("state.json")
+        print("State file removed.")
     return expanded_article
+def continue_generate_article():
+    print("Continuing article generation...")
+    state = load_state()
+    if not state:
+        return "再開する状態がありません。"
+    executed_instructions = state.get("executed_instructions", [])
+    research_results = state.get("research_results", [])
+    split_instructions = state.get("split_instructions", [])
+    results = state.get("results", [])
+    current_index = state.get("current_index", 0)
+    system_message = {
+        "role": "system",
+        "content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
+    }
+    for i in range(current_index, len(split_instructions)):
+        user_message = {
+            "role": "user",
+            "content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
+        }
+        try:
+            print(f"Sending instruction chunk {i+1} of {len(split_instructions)} to GPT-4...")
+            response = openai.ChatCompletion.create(
+                model="gpt-4-turbo",
+                messages=[system_message, user_message],
+                temperature=0.7,
+            )
+            results.append(response.choices[0]["message"]["content"])
+        except Exception as e:
+            error_message = f"Error occurred during ChatCompletion: {str(e)}"
+            print(error_message)  # ログにエラーメッセージを出力
+            results.append(error_message)
+            # 途中で止まった場合の状態を保存
+            save_state({
+                "executed_instructions": executed_instructions,
+                "research_results": research_results,
+                "split_instructions": split_instructions,
+                "results": results,
+                "current_index": i + 1
+            })
+            return error_message
+    final_result = "\n".join(results)
+    # 生成された初期記事を拡張
+    h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
+    expanded_article = generate_expanded_article(final_result, h3_to_text)
+    with open("output3.txt", "w", encoding="utf-8") as f:
+        f.write(expanded_article)
+    print("Article continuation complete. Output saved to output3.txt.")
+    print(expanded_article)  # ログに最終結果を出力
+    # 生成が完了したら状態ファイルを削除
+    if os.path.exists("state.json"):
+        os.remove("state.json")
+        print("State file removed.")
+    return expanded_article