Yasu777 commited on
Commit
79ef5ed
·
verified ·
1 Parent(s): 7b81001

Update article_generator.py

Browse files
Files changed (1) hide show
  1. article_generator.py +121 -11
article_generator.py CHANGED
@@ -59,12 +59,16 @@ state_file = "state.json"
59
  def save_state(state):
60
  with open(state_file, "w", encoding="utf-8") as f:
61
  json.dump(state, f, ensure_ascii=False, indent=4)
 
62
 
63
  # 状態をロードする関数
64
  def load_state():
65
  if os.path.exists(state_file):
66
  with open(state_file, "r", encoding="utf-8") as f:
67
- return json.load(f)
 
 
 
68
  return None
69
 
70
  # 状態をクリアする関数
@@ -74,6 +78,7 @@ def clear_state():
74
  global executed_instructions, research_results
75
  executed_instructions = []
76
  research_results = []
 
77
  return "状態がクリアされました"
78
 
79
  # 見出しを処理する関数
@@ -89,22 +94,25 @@ def perform_initial_tavily_search(h2_texts, h3_texts):
89
  tavily_search_tool = EnhancedTavilySearchTool()
90
  queries = []
91
 
92
- for h2_text in h2_texts:
93
- h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{h2_texts.index(h2_text)+1}-")]
94
  query = f"{h2_text} {' '.join(h3_for_this_h2)}"
95
  queries.append(query)
96
 
97
- response = tavily_search_tool.search(queries) # 一回のリクエストで全てのクエリを処理
98
- return {query: response[i] for i, query in enumerate(queries)} # 結果を適切にマッピング
 
99
 
100
  # キャッシュされたTavilyデータを保存する関数
101
  def save_preloaded_tavily_data(data):
102
  with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
103
  json.dump(data, f, ensure_ascii=False, indent=4)
 
104
 
105
  # キャッシュされたTavilyデータをロードする関数
106
  def load_preloaded_tavily_data():
107
  with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
 
108
  return json.load(f)
109
 
110
  # PlanAndExecuteエージェントをセットアップする関数
@@ -123,10 +131,94 @@ def setup_plan_and_execute_agent():
123
  executor = load_agent_executor(llm, tools, verbose=True)
124
 
125
  agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
 
126
  return agent
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  # 記事を生成する関数
129
  def generate_article(editable_output2):
 
130
  # 途中から再開する場合のために状態を読み込み
131
  state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
132
  executed_instructions = state['executed_instructions']
@@ -143,6 +235,7 @@ def generate_article(editable_output2):
143
  h3_texts = [h3.get_text() for h3 in soup.find_all('h3')]
144
 
145
  # 初期のTavily検索
 
146
  cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
147
  save_preloaded_tavily_data(cached_responses)
148
 
@@ -159,6 +252,8 @@ def generate_article(editable_output2):
159
  research_results.append(response)
160
  save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
161
 
 
 
162
  system_message = {
163
  "role": "system",
164
  "content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
@@ -215,6 +310,7 @@ def generate_article(editable_output2):
215
  "content": f"{i+1}/{len(split_instructions)}: {split_instruction}"
216
  }
217
  try:
 
218
  response = openai.ChatCompletion.create(
219
  model="gpt-4-turbo",
220
  messages=[system_message, user_message],
@@ -237,18 +333,25 @@ def generate_article(editable_output2):
237
 
238
  final_result = "\n".join(results)
239
 
 
 
 
 
240
  with open("output3.txt", "w", encoding="utf-8") as f:
241
- f.write(final_result)
242
 
243
- print(final_result) # ログに最終結果を出力
 
244
 
245
  # 生成が完了したら状態ファイルを削除
246
  if os.path.exists("state.json"):
247
  os.remove("state.json")
 
248
 
249
- return final_result
250
 
251
  def continue_generate_article():
 
252
  state = load_state()
253
  if not state:
254
  return "再開する状態がありません。"
@@ -270,6 +373,7 @@ def continue_generate_article():
270
  "content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
271
  }
272
  try:
 
273
  response = openai.ChatCompletion.create(
274
  model="gpt-4-turbo",
275
  messages=[system_message, user_message],
@@ -292,13 +396,19 @@ def continue_generate_article():
292
 
293
  final_result = "\n".join(results)
294
 
 
 
 
 
295
  with open("output3.txt", "w", encoding="utf-8") as f:
296
- f.write(final_result)
297
 
298
- print(final_result) # ログに最終結果を出力
 
299
 
300
  # 生成が完了したら状態ファイルを削除
301
  if os.path.exists("state.json"):
302
  os.remove("state.json")
 
303
 
304
- return final_result
 
59
  def save_state(state):
60
  with open(state_file, "w", encoding="utf-8") as f:
61
  json.dump(state, f, ensure_ascii=False, indent=4)
62
+ print("State saved. Current index:", state.get('current_index', 'Not available')) # インデックス情報をログに出力
63
 
64
  # 状態をロードする関数
65
  def load_state():
66
  if os.path.exists(state_file):
67
  with open(state_file, "r", encoding="utf-8") as f:
68
+ state = json.load(f)
69
+ print("State loaded. Current index:", state.get('current_index', 'Not available')) # インデックス情報をログに出力
70
+ return state
71
+ print("No state file found.")
72
  return None
73
 
74
  # 状態をクリアする関数
 
78
  global executed_instructions, research_results
79
  executed_instructions = []
80
  research_results = []
81
+ print("State cleared.")
82
  return "状態がクリアされました"
83
 
84
  # 見出しを処理する関数
 
94
  tavily_search_tool = EnhancedTavilySearchTool()
95
  queries = []
96
 
97
+ for idx, h2_text in enumerate(h2_texts): # インデックスの取得方法を改善
98
+ h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
99
  query = f"{h2_text} {' '.join(h3_for_this_h2)}"
100
  queries.append(query)
101
 
102
+ print("Performing Tavily search with queries:", queries) # デバッグ情報追加
103
+ response = tavily_search_tool.search(queries)
104
+ return {query: response[i] for i, query in enumerate(queries)}
105
 
106
  # キャッシュされたTavilyデータを保存する関数
107
  def save_preloaded_tavily_data(data):
108
  with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
109
  json.dump(data, f, ensure_ascii=False, indent=4)
110
+ print("Preloaded Tavily data saved.")
111
 
112
  # キャッシュされたTavilyデータをロードする関数
113
  def load_preloaded_tavily_data():
114
  with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
115
+ print("Preloaded Tavily data loaded.")
116
  return json.load(f)
117
 
118
  # PlanAndExecuteエージェントをセットアップする関数
 
131
  executor = load_agent_executor(llm, tools, verbose=True)
132
 
133
  agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
134
+ print("PlanAndExecute agent setup complete.")
135
  return agent
136
 
137
+ # GPT-4を使用してテキストを生成するヘルパー関数
138
+ def generate_text_with_gpt4(prompt):
139
+ response = openai.ChatCompletion.create(
140
+ model="gpt-4o",
141
+ messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
142
+ {"role": "user", "content": prompt}],
143
+ temperature=0.7,
144
+ max_tokens=500
145
+ )
146
+ return response.choices[0]["message"]["content"].strip()
147
+
148
+ # 記事のセクションをGPT-4で拡張する関数
149
+ def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
150
+ prompts = []
151
+ h3_to_text = {}
152
+ for h3_text in h3_texts:
153
+ key = f"{h2_text} {h3_text}"
154
+ if key in preloaded_data:
155
+ context = preloaded_data[key]
156
+ prompt = f"「{h3_text}」について詳しく説明してください。こちらが背景情報です:\n{context}"
157
+ prompts.append(prompt)
158
+ h3_to_text[h3_text] = prompt # プロンプトではなく後で置き換えるテキストを格納するための準備
159
+ else:
160
+ prompt = f"「{h3_text}」について詳しく説明してください。"
161
+ prompts.append(prompt)
162
+ h3_to_text[h3_text] = prompt
163
+
164
+ if not prompts: # promptsが空の場合
165
+ print("No prompts to process.")
166
+ return []
167
+
168
+ expanded_texts = []
169
+ # ThreadPoolExecutorのmax_workersに最小値を設定
170
+ with ThreadPoolExecutor(max_workers=max(1, len(prompts))) as executor:
171
+ future_to_prompt = {executor.submit(generate_text_with_gpt4, prompt): h3_text for prompt, h3_text in zip(prompts, h3_texts)}
172
+ for future in as_completed(future_to_prompt):
173
+ h3_text = future_to_prompt[future]
174
+ try:
175
+ expanded_text = future.result()
176
+ expanded_texts.append(expanded_text)
177
+ h3_to_text[h3_text] = expanded_text # 実際に生成されたテキストを保存
178
+ except Exception as e:
179
+ error_message = f"Error generating text for {h3_text}: {str(e)}"
180
+ print(error_message)
181
+ expanded_texts.append("Error in text generation.")
182
+
183
+ return h3_to_text
184
+
185
+ # 記事を拡張する関数
186
+ def process_standalone_h2(soup):
187
+ h2_elements = soup.find_all('h2')
188
+ for h2 in h2_elements:
189
+ if not h2.find_next_sibling(lambda tag: tag.name == 'h3'):
190
+ # 'まとめ'のような<h3>タグがないセクションを処理
191
+ preloaded_data = load_preloaded_tavily_data()
192
+ key = f"{h2.get_text()}"
193
+ context = preloaded_data.get(key, "このセクションに関する具体的な情報はありません。")
194
+ prompt = f"「{h2.get_text()}」について詳しく説明してください。こちらが背景情報です:\n{context}"
195
+ expanded_text = generate_text_with_gpt4(prompt)
196
+ new_paragraph = soup.new_tag('p')
197
+ new_paragraph.string = expanded_text
198
+ h2.insert_after(new_paragraph)
199
+
200
+ def generate_expanded_article(article_html, h3_to_text):
201
+ print("記事を拡張中...")
202
+ soup = BeautifulSoup(article_html, 'html.parser')
203
+ process_standalone_h2(soup) # 独立した<h2>セクションを処理
204
+
205
+ h2_elements = soup.find_all('h2')
206
+ for h2 in h2_elements:
207
+ if h2.get_text().strip() == "まとめ":
208
+ continue # "まとめ"セクションは拡張しない
209
+
210
+ h3_elements = h2.find_next_siblings('h3')
211
+ for h3 in h3_elements:
212
+ if h3.get_text() in h3_to_text:
213
+ new_paragraph = soup.new_tag('p')
214
+ new_paragraph.string = h3_to_text[h3.get_text()]
215
+ h3.insert_after(new_paragraph)
216
+
217
+ return str(soup)
218
+
219
  # 記事を生成する関数
220
  def generate_article(editable_output2):
221
+ print("Starting article generation...")
222
  # 途中から再開する場合のために状態を読み込み
223
  state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
224
  executed_instructions = state['executed_instructions']
 
235
  h3_texts = [h3.get_text() for h3 in soup.find_all('h3')]
236
 
237
  # 初期のTavily検索
238
+ print("Performing initial Tavily search...")
239
  cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
240
  save_preloaded_tavily_data(cached_responses)
241
 
 
252
  research_results.append(response)
253
  save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
254
 
255
+ print("Tavily search complete.")
256
+
257
  system_message = {
258
  "role": "system",
259
  "content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
 
310
  "content": f"{i+1}/{len(split_instructions)}: {split_instruction}"
311
  }
312
  try:
313
+ print(f"Sending instruction chunk {i+1} of {len(split_instructions)} to GPT-4...")
314
  response = openai.ChatCompletion.create(
315
  model="gpt-4-turbo",
316
  messages=[system_message, user_message],
 
333
 
334
  final_result = "\n".join(results)
335
 
336
+ # 生成された初期記事を拡張
337
+ h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
338
+ expanded_article = generate_expanded_article(final_result, h3_to_text)
339
+
340
  with open("output3.txt", "w", encoding="utf-8") as f:
341
+ f.write(expanded_article)
342
 
343
+ print("Article generation complete. Output saved to output3.txt.")
344
+ print(expanded_article) # ログに最終結果を出力
345
 
346
  # 生成が完了したら状態ファイルを削除
347
  if os.path.exists("state.json"):
348
  os.remove("state.json")
349
+ print("State file removed.")
350
 
351
+ return expanded_article
352
 
353
  def continue_generate_article():
354
+ print("Continuing article generation...")
355
  state = load_state()
356
  if not state:
357
  return "再開する状態がありません。"
 
373
  "content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
374
  }
375
  try:
376
+ print(f"Sending instruction chunk {i+1} of {len(split_instructions)} to GPT-4...")
377
  response = openai.ChatCompletion.create(
378
  model="gpt-4-turbo",
379
  messages=[system_message, user_message],
 
396
 
397
  final_result = "\n".join(results)
398
 
399
+ # 生成された初期記事を拡張
400
+ h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
401
+ expanded_article = generate_expanded_article(final_result, h3_to_text)
402
+
403
  with open("output3.txt", "w", encoding="utf-8") as f:
404
+ f.write(expanded_article)
405
 
406
+ print("Article continuation complete. Output saved to output3.txt.")
407
+ print(expanded_article) # ログに最終結果を出力
408
 
409
  # 生成が完了したら状態ファイルを削除
410
  if os.path.exists("state.json"):
411
  os.remove("state.json")
412
+ print("State file removed.")
413
 
414
+ return expanded_article