BabyWriterPRO.v5.5 / article_generator.py
Yasu777's picture
Update article_generator.py
6da9245 verified
import os
import openai
import json
import requests
from bs4 import BeautifulSoup
from langchain.chat_models import ChatOpenAI
from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
from langchain.llms import OpenAI
from langchain.agents.tools import Tool
# APIキーの設定
openai.api_key = os.getenv("OPENAI_API_KEY")
tavily_api_key = os.getenv('TAVILY_API_KEY')
# Tavily APIのカスタムツールを定義
class EnhancedTavilySearchTool:
def search(self, query):
if len(query) < 5:
query += " details" # クエリを拡張して必要な文字数にする
params = {
'api_key': tavily_api_key,
'query': query,
'max_results': 5,
'detail_level': 'high'
}
response = requests.post('https://api.tavily.com/search', json=params)
if response.status_code == 200:
return response.json()['results']
else:
raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
# 実行された指示を追跡するリスト
executed_instructions = []
# 調査結果を保存するリスト
research_results = []
# 生成状態を保存するファイル
state_file = "state.json"
def save_state(state):
with open(state_file, "w", encoding="utf-8") as f:
json.dump(state, f, ensure_ascii=False, indent=4)
def load_state():
if os.path.exists(state_file):
with open(state_file, "r", encoding="utf-8") as f:
return json.load(f)
return None
def clear_state():
if os.path.exists(state_file):
os.remove(state_file)
global executed_instructions, research_results
executed_instructions = []
research_results = []
return "状態がクリアされました"
def generate_article(editable_output2):
# 途中から再開する場合のために状態を読み込み
state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
executed_instructions = state['executed_instructions']
research_results = state['research_results']
current_index = state['current_index']
tavily_search_tool = Tool(
name="TavilySearch",
func=EnhancedTavilySearchTool().search,
description="Enhanced search tool using Tavily API"
)
tools = [tavily_search_tool]
# PlannerとExecutorの拡張定義
model_name = "gpt-3.5-turbo-0125"
llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
planner = load_chat_planner(llm)
executor = load_agent_executor(llm, tools, verbose=True)
agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
# HTML解析
soup = BeautifulSoup(editable_output2, 'html.parser')
h1_text = soup.find('h1').get_text()
h2_texts = [h2.get_text() for h2 in soup.find_all('h2')]
h3_texts = [h3.get_text() for h3 in soup.find_all('h3')]
for h2_text in h2_texts:
h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{h2_texts.index(h2_text)+1}-")]
# 特定情報の指定
purpose = f"Research details about {h2_text} including {' and '.join(h3_for_this_h2)}"
if "人物" in h1_text or any("人物" in h2 for h2 in h2_texts) or any("人物" in h3 for h3 in h3_texts):
purpose += " including the person's name and career"
elif "商品" in h1_text or any("商品" in h2 for h2 in h2_texts) or any("商品" in h3 for h3 in h3_texts):
purpose += " including the brand name, product name, and price"
elif "イベント" in h1_text or any("イベント" in h2 for h2 in h2_texts) or any("イベント" in h3 for h3 in h3_texts):
purpose += " including the event's content, schedule, and venue"
if purpose not in executed_instructions:
try:
response = agent.run(purpose)
executed_instructions.append(purpose)
research_results.append(response)
save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
except Exception as e:
print(f"Error occurred: {str(e)}")
# エラー時に状態を保存して中断
save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text)})
return str(e)
else:
index = executed_instructions.index(purpose)
response = research_results[index]
system_message = {
"role": "system",
"content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
}
research_summary = "\n".join(research_results)
instructions = []
instructions.append(f"""
<h1>{h1_text}</h1>
"{h1_text}"に関する導入文を日本語で作成してください。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
sentences = research_summary.split('。')
# 質問の数を制限
max_questions_per_h3 = 2
for idx, h2_text in enumerate(h2_texts):
h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
instructions.append(f"""
<h2>{h2_text}</h2>
"{h2_text}"に関する導入文を日本語で作成してください。この導入文は、以下の小見出しの内容を考慮してください:{"、".join(h3_for_this_h2)}。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
for h3 in h3_for_this_h2:
related_sentences = [sentence for sentence in sentences if h3 in sentence][:max_questions_per_h3]
if related_sentences:
content_for_h3 = "。".join(related_sentences) + "。"
instructions.append(f"""
<h3>{h3}</h3>
"{h3}"に関する詳細な内容として、以下の情報を日本語で記述してください:{content_for_h3} ここでも、オリジナルな内容を心がけてください。""")
else:
instructions.append(f"""
<h3>{h3}</h3>
"{h3}"に関する詳細な内容を日本語で記述してください。オリジナルな内容を心がけてください。""")
# トークン数を制限するためにメッセージを分割
split_instructions = []
current_chunk = ""
max_tokens_per_chunk = 8000 # トークン数の上限を設定
for instruction in instructions:
if len(current_chunk + instruction) > max_tokens_per_chunk:
split_instructions.append(current_chunk)
current_chunk = instruction
else:
current_chunk += instruction
if current_chunk:
split_instructions.append(current_chunk)
results = []
for i, split_instruction in enumerate(split_instructions):
user_message = {
"role": "user",
"content": f"{i+1}/{len(split_instructions)}: {split_instruction}"
}
try:
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[system_message, user_message],
temperature=0.7,
)
results.append(response.choices[0]["message"]["content"])
except Exception as e:
error_message = f"Error occurred during ChatCompletion: {str(e)}"
print(error_message) # ログにエラーメッセージを出力
results.append(error_message)
# 途中で止まった場合の状態を保存
save_state({
"executed_instructions": executed_instructions,
"research_results": research_results,
"split_instructions": split_instructions,
"results": results,
"current_index": i + 1
})
return error_message
final_result = "\n".join(results)
with open("output3.txt", "w", encoding="utf-8") as f:
f.write(final_result)
print(final_result) # ログに最終結果を出力
# 生成が完了したら状態ファイルを削除
if os.path.exists(state_file):
os.remove(state_file)
return final_result
def continue_generate_article():
state = load_state()
if not state:
return "再開する状態がありません。"
executed_instructions = state.get("executed_instructions", [])
research_results = state.get("research_results", [])
split_instructions = state.get("split_instructions", [])
results = state.get("results", [])
current_index = state.get("current_index", 0)
system_message = {
"role": "system",
"content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
}
for i in range(current_index, len(split_instructions)):
user_message = {
"role": "user",
"content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
}
try:
response = openai.ChatCompletion.create(
model="gpt-4-turbo",
messages=[system_message, user_message],
temperature=0.7,
)
results.append(response.choices[0]["message"]["content"])
except Exception as e:
error_message = f"Error occurred during ChatCompletion: {str(e)}"
print(error_message) # ログにエラーメッセージを出力
results.append(error_message)
# 途中で止まった場合の状態を保存
save_state({
"executed_instructions": executed_instructions,
"research_results": research_results,
"split_instructions": split_instructions,
"results": results,
"current_index": i + 1
})
return error_message
final_result = "\n".join(results)
with open("output3.txt", "w", encoding="utf-8") as f:
f.write(final_result)
print(final_result) # ログに最終結果を出力
# 生成が完了したら状態ファイルを削除
if os.path.exists(state_file):
os.remove(state_file)
return final_result