Spaces:

77yasu77
/

BabyWriterPRO.v5.5

Sleeping

App Files Files Community

BabyWriterPRO.v5.5 / article_generator.py

Yasu777

Update article_generator.py

6da9245 verified almost 2 years ago

raw

history blame contribute delete

10.5 kB

	import os
	import openai
	import json
	import requests
	from bs4 import BeautifulSoup
	from langchain.chat_models import ChatOpenAI
	from langchain_experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner
	from langchain.llms import OpenAI
	from langchain.agents.tools import Tool

	# APIキーの設定
	openai.api_key = os.getenv("OPENAI_API_KEY")
	tavily_api_key = os.getenv('TAVILY_API_KEY')

	# Tavily APIのカスタムツールを定義
	class EnhancedTavilySearchTool:
	def search(self, query):
	if len(query) < 5:
	query += " details" # クエリを拡張して必要な文字数にする

	params = {
	'api_key': tavily_api_key,
	'query': query,
	'max_results': 5,
	'detail_level': 'high'
	}
	response = requests.post('https://api.tavily.com/search', json=params)
	if response.status_code == 200:
	return response.json()['results']
	else:
	raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")

	# 実行された指示を追跡するリスト
	executed_instructions = []
	# 調査結果を保存するリスト
	research_results = []

	# 生成状態を保存するファイル
	state_file = "state.json"

	def save_state(state):
	with open(state_file, "w", encoding="utf-8") as f:
	json.dump(state, f, ensure_ascii=False, indent=4)

	def load_state():
	if os.path.exists(state_file):
	with open(state_file, "r", encoding="utf-8") as f:
	return json.load(f)
	return None

	def clear_state():
	if os.path.exists(state_file):
	os.remove(state_file)
	global executed_instructions, research_results
	executed_instructions = []
	research_results = []
	return "状態がクリアされました"

	def generate_article(editable_output2):
	# 途中から再開する場合のために状態を読み込み
	state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
	executed_instructions = state['executed_instructions']
	research_results = state['research_results']
	current_index = state['current_index']

	tavily_search_tool = Tool(
	name="TavilySearch",
	func=EnhancedTavilySearchTool().search,
	description="Enhanced search tool using Tavily API"
	)

	tools = [tavily_search_tool]

	# PlannerとExecutorの拡張定義
	model_name = "gpt-3.5-turbo-0125"
	llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
	planner = load_chat_planner(llm)
	executor = load_agent_executor(llm, tools, verbose=True)

	agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)

	# HTML解析
	soup = BeautifulSoup(editable_output2, 'html.parser')
	h1_text = soup.find('h1').get_text()
	h2_texts = [h2.get_text() for h2 in soup.find_all('h2')]
	h3_texts = [h3.get_text() for h3 in soup.find_all('h3')]

	for h2_text in h2_texts:
	h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{h2_texts.index(h2_text)+1}-")]

	# 特定情報の指定
	purpose = f"Research details about {h2_text} including {' and '.join(h3_for_this_h2)}"
	if "人物" in h1_text or any("人物" in h2 for h2 in h2_texts) or any("人物" in h3 for h3 in h3_texts):
	purpose += " including the person's name and career"
	elif "商品" in h1_text or any("商品" in h2 for h2 in h2_texts) or any("商品" in h3 for h3 in h3_texts):
	purpose += " including the brand name, product name, and price"
	elif "イベント" in h1_text or any("イベント" in h2 for h2 in h2_texts) or any("イベント" in h3 for h3 in h3_texts):
	purpose += " including the event's content, schedule, and venue"

	if purpose not in executed_instructions:
	try:
	response = agent.run(purpose)
	executed_instructions.append(purpose)
	research_results.append(response)
	save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
	except Exception as e:
	print(f"Error occurred: {str(e)}")
	# エラー時に状態を保存して中断
	save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text)})
	return str(e)
	else:
	index = executed_instructions.index(purpose)
	response = research_results[index]

	system_message = {
	"role": "system",
	"content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
	}

	research_summary = "\n".join(research_results)
	instructions = []

	instructions.append(f"""
	<h1>{h1_text}</h1>
	"{h1_text}"に関する導入文を日本語で作成してください。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")

	sentences = research_summary.split('。')

	# 質問の数を制限
	max_questions_per_h3 = 2

	for idx, h2_text in enumerate(h2_texts):
	h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
	instructions.append(f"""
	<h2>{h2_text}</h2>
	"{h2_text}"に関する導入文を日本語で作成してください。この導入文は、以下の小見出しの内容を考慮してください：{"、".join(h3_for_this_h2)}。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
	for h3 in h3_for_this_h2:
	related_sentences = [sentence for sentence in sentences if h3 in sentence][:max_questions_per_h3]
	if related_sentences:
	content_for_h3 = "。".join(related_sentences) + "。"
	instructions.append(f"""
	<h3>{h3}</h3>
	"{h3}"に関する詳細な内容として、以下の情報を日本語で記述してください：{content_for_h3} ここでも、オリジナルな内容を心がけてください。""")
	else:
	instructions.append(f"""
	<h3>{h3}</h3>
	"{h3}"に関する詳細な内容を日本語で記述してください。オリジナルな内容を心がけてください。""")

	# トークン数を制限するためにメッセージを分割
	split_instructions = []
	current_chunk = ""
	max_tokens_per_chunk = 8000 # トークン数の上限を設定

	for instruction in instructions:
	if len(current_chunk + instruction) > max_tokens_per_chunk:
	split_instructions.append(current_chunk)
	current_chunk = instruction
	else:
	current_chunk += instruction

	if current_chunk:
	split_instructions.append(current_chunk)

	results = []
	for i, split_instruction in enumerate(split_instructions):
	user_message = {
	"role": "user",
	"content": f"{i+1}/{len(split_instructions)}: {split_instruction}"
	}
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o",
	messages=[system_message, user_message],
	temperature=0.7,
	)
	results.append(response.choices[0]["message"]["content"])
	except Exception as e:
	error_message = f"Error occurred during ChatCompletion: {str(e)}"
	print(error_message) # ログにエラーメッセージを出力
	results.append(error_message)
	# 途中で止まった場合の状態を保存
	save_state({
	"executed_instructions": executed_instructions,
	"research_results": research_results,
	"split_instructions": split_instructions,
	"results": results,
	"current_index": i + 1
	})
	return error_message

	final_result = "\n".join(results)

	with open("output3.txt", "w", encoding="utf-8") as f:
	f.write(final_result)

	print(final_result) # ログに最終結果を出力

	# 生成が完了したら状態ファイルを削除
	if os.path.exists(state_file):
	os.remove(state_file)

	return final_result

	def continue_generate_article():
	state = load_state()
	if not state:
	return "再開する状態がありません。"

	executed_instructions = state.get("executed_instructions", [])
	research_results = state.get("research_results", [])
	split_instructions = state.get("split_instructions", [])
	results = state.get("results", [])
	current_index = state.get("current_index", 0)

	system_message = {
	"role": "system",
	"content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
	}

	for i in range(current_index, len(split_instructions)):
	user_message = {
	"role": "user",
	"content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
	}
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4-turbo",
	messages=[system_message, user_message],
	temperature=0.7,
	)
	results.append(response.choices[0]["message"]["content"])
	except Exception as e:
	error_message = f"Error occurred during ChatCompletion: {str(e)}"
	print(error_message) # ログにエラーメッセージを出力
	results.append(error_message)
	# 途中で止まった場合の状態を保存
	save_state({
	"executed_instructions": executed_instructions,
	"research_results": research_results,
	"split_instructions": split_instructions,
	"results": results,
	"current_index": i + 1
	})
	return error_message

	final_result = "\n".join(results)

	with open("output3.txt", "w", encoding="utf-8") as f:
	f.write(final_result)

	print(final_result) # ログに最終結果を出力

	# 生成が完了したら状態ファイルを削除
	if os.path.exists(state_file):
	os.remove(state_file)

	return final_result