Yasu777 commited on
Commit
366f5fd
·
verified ·
1 Parent(s): 0fcc693

Update article_generator.py

Browse files
Files changed (1) hide show
  1. article_generator.py +101 -189
article_generator.py CHANGED
@@ -47,103 +47,15 @@ class EnhancedTavilySearchTool:
47
  else:
48
  raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
49
 
50
- # 実行された指示追跡するリスト
51
- executed_instructions = []
52
- # 調査結果を保存するリスト
53
- research_results = []
54
-
55
- # 生成状態を保存するファイル
56
- state_file = "state.json"
57
-
58
- # 状態を保存する関数
59
- def save_state(state):
60
- with open(state_file, "w", encoding="utf-8") as f:
61
- json.dump(state, f, ensure_ascii=False, indent=4)
62
- print("State saved. Current index:", state.get('current_index', 'Not available')) # インデックス情報をログに出力
63
-
64
- # 状態をロードする関数
65
- def load_state():
66
- if os.path.exists(state_file):
67
- with open(state_file, "r", encoding="utf-8") as f:
68
- state = json.load(f)
69
- print("State loaded. Current index:", state.get('current_index', 'Not available')) # インデックス情報をログに出力
70
- return state
71
- print("No state file found.")
72
- return None
73
-
74
- # 状態をクリアする関数
75
- def clear_state():
76
- if os.path.exists(state_file):
77
- os.remove(state_file)
78
- global executed_instructions, research_results
79
- executed_instructions = []
80
- research_results = []
81
- print("State cleared.")
82
- return "状態がクリアされました"
83
-
84
- # 見出しを処理する関数
85
- def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
86
- query = f"{h2_text} {' '.join(h3_for_this_h2)}"
87
- if query in cached_responses:
88
- return (query, cached_responses[query])
89
- else:
90
- return (query, "No cached response found for this heading.")
91
-
92
- # 初期データをTavily検索で収集する関数
93
- def perform_initial_tavily_search(h2_texts, h3_texts):
94
- tavily_search_tool = EnhancedTavilySearchTool()
95
- queries = []
96
-
97
- for idx, h2_text in enumerate(h2_texts): # インデックスの取得方法を改善
98
- h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
99
- query = f"{h2_text} {' '.join(h3_for_this_h2)}"
100
- queries.append(query)
101
-
102
- print("Performing Tavily search with queries:", queries) # デバッグ情報追加
103
- response = tavily_search_tool.search(queries)
104
- return {query: response[i] for i, query in enumerate(queries)}
105
-
106
- # キャッシュされたTavilyデータを保存する関数
107
- def save_preloaded_tavily_data(data):
108
- with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
109
- json.dump(data, f, ensure_ascii=False, indent=4)
110
- print("Preloaded Tavily data saved.")
111
-
112
- # キャッシュされたTavilyデータをロードする関数
113
- def load_preloaded_tavily_data():
114
- with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
115
- print("Preloaded Tavily data loaded.")
116
- return json.load(f)
117
-
118
- # PlanAndExecuteエージェントをセットアップする関数
119
- def setup_plan_and_execute_agent():
120
- google_search_tool = Tool(
121
- name="GoogleSearch",
122
- func=GoogleSearchTool().search,
123
- description="Search tool using Google API"
124
- )
125
-
126
- tools = [google_search_tool]
127
-
128
- model_name = "gpt-3.5-turbo-0125"
129
- llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
130
- planner = load_chat_planner(llm)
131
- executor = load_agent_executor(llm, tools, verbose=True)
132
-
133
- agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
134
- print("PlanAndExecute agent setup complete.")
135
- return agent
136
-
137
- # GPT-4を使用してテキストを生成するヘルパー関数
138
- def generate_text_with_gpt4(prompt):
139
- response = openai.ChatCompletion.create(
140
- model="gpt-4o",
141
- messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
142
- {"role": "user", "content": prompt}],
143
- temperature=0.7,
144
- max_tokens=500
145
- )
146
- return response.choices[0]["message"]["content"].strip()
147
 
148
  # 記事のセクションをGPT-4で拡張する関数
149
  def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
@@ -197,7 +109,17 @@ def process_standalone_h2(soup):
197
  new_paragraph.string = expanded_text
198
  h2.insert_after(new_paragraph)
199
 
200
- def generate_expanded_article(article_html, h3_to_text):
 
 
 
 
 
 
 
 
 
 
201
  print("記事を拡張中...")
202
  soup = BeautifulSoup(article_html, 'html.parser')
203
  process_standalone_h2(soup) # 独立した<h2>セクションを処理
@@ -214,16 +136,74 @@ def generate_expanded_article(article_html, h3_to_text):
214
  new_paragraph.string = h3_to_text[h3.get_text()]
215
  h3.insert_after(new_paragraph)
216
 
 
 
217
  return str(soup)
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  # 記事を生成する関数
220
  def generate_article(editable_output2):
221
  print("Starting article generation...")
222
- # 途中から再開する場合のために状態を読み込み
223
- state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
224
- executed_instructions = state['executed_instructions']
225
- research_results = state['research_results']
226
- current_index = state['current_index']
227
 
228
  # エージェントのセットアップ
229
  agent = setup_plan_and_execute_agent()
@@ -239,6 +219,9 @@ def generate_article(editable_output2):
239
  cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
240
  save_preloaded_tavily_data(cached_responses)
241
 
 
 
 
242
  with ThreadPoolExecutor(max_workers=5) as executor:
243
  futures = []
244
  for h2_text in h2_texts:
@@ -250,7 +233,6 @@ def generate_article(editable_output2):
250
  if purpose not in executed_instructions:
251
  executed_instructions.append(purpose)
252
  research_results.append(response)
253
- save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
254
 
255
  print("Tavily search complete.")
256
 
@@ -273,21 +255,19 @@ def generate_article(editable_output2):
273
 
274
  for idx, h2_text in enumerate(h2_texts):
275
  h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
276
- # H2にIDを追加
277
  instructions.append(f"""
278
- <h2 id='section{idx+1}'>{h2_text}</h2>
279
  "{h2_text}"に関する導入文を日本語で作成してください。この導���文は、以下の小見出しの内容を考慮してください:{"、".join(h3_for_this_h2)}。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
280
- for h3_index, h3 in enumerate(h3_for_this_h2, start=1):
281
  related_sentences = [sentence for sentence in sentences if h3 in sentence][:max_questions_per_h3]
282
  if related_sentences:
283
  content_for_h3 = "。".join(related_sentences) + "。"
284
- # H3にIDを追加
285
  instructions.append(f"""
286
- <h3 id='section{idx+1}subsection{h3_index}'>{h3}</h3>
287
  "{h3}"に関する詳細な内容として、以下の情報を日本語で記述してください:{content_for_h3} ここでも、オリジナルな内容を心がけてください。""")
288
  else:
289
  instructions.append(f"""
290
- <h3 id='section{idx+1}subsection{h3_index}'>{h3}</h3>
291
  "{h3}"に関する詳細な内容を日本語で記述してください。オリジナルな内容を心がけてください。""")
292
 
293
  # トークン数を制限するためにメッセージを分割
@@ -318,99 +298,31 @@ def generate_article(editable_output2):
318
  messages=[system_message, user_message],
319
  temperature=0.7,
320
  )
321
- results.append(response.choices[0]["message"]["content"])
 
 
 
322
  except Exception as e:
323
  error_message = f"Error occurred during ChatCompletion: {str(e)}"
324
  print(error_message) # ログにエラーメッセージを出力
325
  results.append(error_message)
326
- # 途中で止まった場合の状態を保存
327
- save_state({
328
- "executed_instructions": executed_instructions,
329
- "research_results": research_results,
330
- "split_instructions": split_instructions,
331
- "results": results,
332
- "current_index": i + 1
333
- })
334
- return error_message
335
 
336
  final_result = "\n".join(results)
 
 
337
 
338
- # 生成された初期記事拡張
339
- h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
340
- expanded_article = generate_expanded_article(final_result, h3_to_text)
341
-
342
- with open("output3.txt", "w", encoding="utf-8") as f:
343
- f.write(expanded_article)
344
-
345
- print("Article generation complete. Output saved to output3.txt.")
346
- print(expanded_article) # ログに最終結果を出力
347
-
348
- # 生成が完了したら状態ファイルを削除
349
- if os.path.exists("state.json"):
350
- os.remove("state.json")
351
- print("State file removed.")
352
-
353
- return expanded_article
354
-
355
- def continue_generate_article():
356
- print("Continuing article generation...")
357
- state = load_state()
358
- if not state:
359
- return "再開する状態がありません。"
360
-
361
- executed_instructions = state.get("executed_instructions", [])
362
- research_results = state.get("research_results", [])
363
- split_instructions = state.get("split_instructions", [])
364
- results = state.get("results", [])
365
- current_index = state.get("current_index", 0)
366
-
367
- system_message = {
368
- "role": "system",
369
- "content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
370
- }
371
-
372
- for i in range(current_index, len(split_instructions)):
373
- user_message = {
374
- "role": "user",
375
- "content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
376
- }
377
- try:
378
- print(f"Sending instruction chunk {i+1} of {len(split_instructions)} to GPT-4...")
379
- response = openai.ChatCompletion.create(
380
- model="gpt-4-turbo",
381
- messages=[system_message, user_message],
382
- temperature=0.7,
383
- )
384
- results.append(response.choices[0]["message"]["content"])
385
- except Exception as e:
386
- error_message = f"Error occurred during ChatCompletion: {str(e)}"
387
- print(error_message) # ログにエラーメッセージを出力
388
- results.append(error_message)
389
- # 途中で止まった場合の状態を保存
390
- save_state({
391
- "executed_instructions": executed_instructions,
392
- "research_results": research_results,
393
- "split_instructions": split_instructions,
394
- "results": results,
395
- "current_index": i + 1
396
- })
397
- return error_message
398
-
399
- final_result = "\n".join(results)
400
 
401
  # 生成された初期記事を拡張
402
  h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
403
- expanded_article = generate_expanded_article(final_result, h3_to_text)
404
 
405
  with open("output3.txt", "w", encoding="utf-8") as f:
406
  f.write(expanded_article)
407
 
408
- print("Article continuation complete. Output saved to output3.txt.")
409
  print(expanded_article) # ログに最終結果を出力
410
 
411
- # 生成が完了したら状態ファイルを削除
412
- if os.path.exists("state.json"):
413
- os.remove("state.json")
414
- print("State file removed.")
415
-
416
  return expanded_article
 
 
47
  else:
48
  raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
49
 
50
+ # 重複排除するヘルパー関数
51
+ def remove_duplicates(text_list):
52
+ seen = set()
53
+ result = []
54
+ for text in text_list:
55
+ if text not in seen:
56
+ seen.add(text)
57
+ result.append(text)
58
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  # 記事のセクションをGPT-4で拡張する関数
61
  def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
 
109
  new_paragraph.string = expanded_text
110
  h2.insert_after(new_paragraph)
111
 
112
+ def process_summary_section(soup, cached_responses):
113
+ summary_section = soup.find('h2', text='まとめ')
114
+ if summary_section:
115
+ # まとめの内容を検索結果やAI生成結果から取得
116
+ summary_key = "まとめ"
117
+ summary_data = cached_responses.get(summary_key, "まとめの具体的な内容は現在利用可能ではあ��ません。")
118
+ new_paragraph = soup.new_tag('p')
119
+ new_paragraph.string = summary_data
120
+ summary_section.insert_after(new_paragraph)
121
+
122
+ def generate_expanded_article(article_html, h3_to_text, cached_responses):
123
  print("記事を拡張中...")
124
  soup = BeautifulSoup(article_html, 'html.parser')
125
  process_standalone_h2(soup) # 独立した<h2>セクションを処理
 
136
  new_paragraph.string = h3_to_text[h3.get_text()]
137
  h3.insert_after(new_paragraph)
138
 
139
+ process_summary_section(soup, cached_responses) # まとめセクションを特別処理し、キャッシュされたレスポンスを渡す
140
+
141
  return str(soup)
142
 
143
+ # PlanAndExecuteエージェントをセットアップする関数
144
+ def setup_plan_and_execute_agent():
145
+ google_search_tool = Tool(
146
+ name="GoogleSearch",
147
+ func=GoogleSearchTool().search,
148
+ description="Search tool using Google API"
149
+ )
150
+
151
+ tools = [google_search_tool]
152
+
153
+ model_name = "gpt-3.5-turbo-0125"
154
+ llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
155
+ planner = load_chat_planner(llm)
156
+ executor = load_agent_executor(llm, tools, verbose=True)
157
+
158
+ agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
159
+ print("PlanAndExecute agent setup complete.")
160
+ return agent
161
+
162
+ # GPT-4を使用してテキストを生成するヘルパー関数
163
+ def generate_text_with_gpt4(prompt):
164
+ response = openai.ChatCompletion.create(
165
+ model="gpt-4o",
166
+ messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
167
+ {"role": "user", "content": prompt}],
168
+ temperature=0.7,
169
+ max_tokens=500
170
+ )
171
+ return response.choices[0]["message"]["content"].strip()
172
+
173
+ # 初期データをTavily検索で収集する関数
174
+ def perform_initial_tavily_search(h2_texts, h3_texts):
175
+ tavily_search_tool = EnhancedTavilySearchTool()
176
+ queries = []
177
+
178
+ for idx, h2_text in enumerate(h2_texts): # インデックスの取得方法を改善
179
+ h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
180
+ query = f"{h2_text} {' '.join(h3_for_this_h2)}"
181
+ queries.append(query)
182
+
183
+ print("Performing Tavily search with queries:", queries) # デバッグ情報追加
184
+ response = tavily_search_tool.search(queries)
185
+ return {query: response[i] for i, query in enumerate(queries)}
186
+
187
+ def save_preloaded_tavily_data(data):
188
+ with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
189
+ json.dump(data, f, ensure_ascii=False, indent=4)
190
+ print("Preloaded Tavily data saved.")
191
+
192
+ def load_preloaded_tavily_data():
193
+ with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
194
+ print("Preloaded Tavily data loaded.")
195
+ return json.load(f)
196
+
197
+ def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
198
+ query = f"{h2_text} {' '.join(h3_for_this_h2)}"
199
+ if query in cached_responses:
200
+ return (query, cached_responses[query])
201
+ else:
202
+ return (query, "No cached response found for this heading.")
203
+
204
  # 記事を生成する関数
205
  def generate_article(editable_output2):
206
  print("Starting article generation...")
 
 
 
 
 
207
 
208
  # エージェントのセットアップ
209
  agent = setup_plan_and_execute_agent()
 
219
  cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
220
  save_preloaded_tavily_data(cached_responses)
221
 
222
+ executed_instructions = []
223
+ research_results = []
224
+
225
  with ThreadPoolExecutor(max_workers=5) as executor:
226
  futures = []
227
  for h2_text in h2_texts:
 
233
  if purpose not in executed_instructions:
234
  executed_instructions.append(purpose)
235
  research_results.append(response)
 
236
 
237
  print("Tavily search complete.")
238
 
 
255
 
256
  for idx, h2_text in enumerate(h2_texts):
257
  h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
 
258
  instructions.append(f"""
259
+ <h2>{h2_text}</h2>
260
  "{h2_text}"に関する導入文を日本語で作成してください。この導���文は、以下の小見出しの内容を考慮してください:{"、".join(h3_for_this_h2)}。直接的なコピーまたは近いフレーズを避けて、オリジナルな内容にしてください。""")
261
+ for h3 in h3_for_this_h2:
262
  related_sentences = [sentence for sentence in sentences if h3 in sentence][:max_questions_per_h3]
263
  if related_sentences:
264
  content_for_h3 = "。".join(related_sentences) + "。"
 
265
  instructions.append(f"""
266
+ <h3>{h3}</h3>
267
  "{h3}"に関する詳細な内容として、以下の情報を日本語で記述してください:{content_for_h3} ここでも、オリジナルな内容を心がけてください。""")
268
  else:
269
  instructions.append(f"""
270
+ <h3>{h3}</h3>
271
  "{h3}"に関する詳細な内容を日本語で記述してください。オリジナルな内容を心がけてください。""")
272
 
273
  # トークン数を制限するためにメッセージを分割
 
298
  messages=[system_message, user_message],
299
  temperature=0.7,
300
  )
301
+ generated_text = response.choices[0]["message"]["content"]
302
+ print(f"Generated content for section {i+1}:") # 生成された各セクションの内容を出力
303
+ print(generated_text)
304
+ results.append(generated_text)
305
  except Exception as e:
306
  error_message = f"Error occurred during ChatCompletion: {str(e)}"
307
  print(error_message) # ログにエラーメッセージを出力
308
  results.append(error_message)
 
 
 
 
 
 
 
 
 
309
 
310
  final_result = "\n".join(results)
311
+ print("Final generated article content:") # 最終的な記事全体の内容を出力
312
+ print(final_result)
313
 
314
+ # 重複排除
315
+ final_result = remove_duplicates(final_result.split('\n'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
  # 生成された初期記事を拡張
318
  h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
319
+ expanded_article = generate_expanded_article("\n".join(final_result), h3_to_text, cached_responses)
320
 
321
  with open("output3.txt", "w", encoding="utf-8") as f:
322
  f.write(expanded_article)
323
 
324
+ print("Article generation complete. Output saved to output3.txt.")
325
  print(expanded_article) # ログに最終結果を出力
326
 
 
 
 
 
 
327
  return expanded_article
328
+ ChatGPT