Yasu777 commited on
Commit
a761bb6
·
verified ·
1 Parent(s): f71c11d

Update article_generator.py

Browse files
Files changed (1) hide show
  1. article_generator.py +183 -98
article_generator.py CHANGED
@@ -47,15 +47,103 @@ class EnhancedTavilySearchTool:
47
  else:
48
  raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
49
 
50
- # 重複排除するヘルパー関数
51
- def remove_duplicates(text_list):
52
- seen = set()
53
- result = []
54
- for text in text_list:
55
- if text not in seen:
56
- seen.add(text)
57
- result.append(text)
58
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  # 記事のセクションをGPT-4で拡張する関数
61
  def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
@@ -109,18 +197,7 @@ def process_standalone_h2(soup):
109
  new_paragraph.string = expanded_text
110
  h2.insert_after(new_paragraph)
111
 
112
- def process_summary_section(soup):
113
- summary_section = soup.find('h2', text='まとめ')
114
- if summary_section:
115
- next_paragraph = summary_section.find_next_sibling('p')
116
- if not next_paragraph:
117
- # もしまとめセクションの後にパラグラフがなければ、デフォルトのテキストを挿入
118
- new_paragraph = soup.new_tag('p')
119
- new_paragraph.string = "まとめの具体的な内容は現在利用可能ではありません。"
120
- summary_section.insert_after(new_paragraph)
121
- # 既存のテキストを使用する場合は何もしません(拡張しない)
122
-
123
- def generate_expanded_article(article_html, h3_to_text, cached_responses):
124
  print("記事を拡張中...")
125
  soup = BeautifulSoup(article_html, 'html.parser')
126
  process_standalone_h2(soup) # 独立した<h2>セクションを処理
@@ -137,75 +214,16 @@ def generate_expanded_article(article_html, h3_to_text, cached_responses):
137
  new_paragraph.string = h3_to_text[h3.get_text()]
138
  h3.insert_after(new_paragraph)
139
 
140
- # まとめセクションを特別に処理
141
- process_summary_section(soup)
142
-
143
  return str(soup)
144
 
145
- # PlanAndExecuteエージェントをセットアップする関数
146
- def setup_plan_and_execute_agent():
147
- google_search_tool = Tool(
148
- name="GoogleSearch",
149
- func=GoogleSearchTool().search,
150
- description="Search tool using Google API"
151
- )
152
-
153
- tools = [google_search_tool]
154
-
155
- model_name = "gpt-3.5-turbo-0125"
156
- llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
157
- planner = load_chat_planner(llm)
158
- executor = load_agent_executor(llm, tools, verbose=True)
159
-
160
- agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
161
- print("PlanAndExecute agent setup complete.")
162
- return agent
163
-
164
- # GPT-4を使用してテキストを生成するヘルパー関数
165
- def generate_text_with_gpt4(prompt):
166
- response = openai.ChatCompletion.create(
167
- model="gpt-4o",
168
- messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
169
- {"role": "user", "content": prompt}],
170
- temperature=0.7,
171
- max_tokens=500
172
- )
173
- return response.choices[0]["message"]["content"].strip()
174
-
175
- # 初期データをTavily検索で収集する関数
176
- def perform_initial_tavily_search(h2_texts, h3_texts):
177
- tavily_search_tool = EnhancedTavilySearchTool()
178
- queries = []
179
-
180
- for idx, h2_text in enumerate(h2_texts): # インデックスの取得方法を改善
181
- h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
182
- query = f"{h2_text} {' '.join(h3_for_this_h2)}"
183
- queries.append(query)
184
-
185
- print("Performing Tavily search with queries:", queries) # デバッグ情報追加
186
- response = tavily_search_tool.search(queries)
187
- return {query: response[i] for i, query in enumerate(queries)}
188
-
189
- def save_preloaded_tavily_data(data):
190
- with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
191
- json.dump(data, f, ensure_ascii=False, indent=4)
192
- print("Preloaded Tavily data saved.")
193
-
194
- def load_preloaded_tavily_data():
195
- with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
196
- print("Preloaded Tavily data loaded.")
197
- return json.load(f)
198
-
199
- def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
200
- query = f"{h2_text} {' '.join(h3_for_this_h2)}"
201
- if query in cached_responses:
202
- return (query, cached_responses[query])
203
- else:
204
- return (query, "No cached response found for this heading.")
205
-
206
  # 記事を生成する関数
207
  def generate_article(editable_output2):
208
  print("Starting article generation...")
 
 
 
 
 
209
 
210
  # エージェントのセットアップ
211
  agent = setup_plan_and_execute_agent()
@@ -221,9 +239,6 @@ def generate_article(editable_output2):
221
  cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
222
  save_preloaded_tavily_data(cached_responses)
223
 
224
- executed_instructions = []
225
- research_results = []
226
-
227
  with ThreadPoolExecutor(max_workers=5) as executor:
228
  futures = []
229
  for h2_text in h2_texts:
@@ -235,6 +250,7 @@ def generate_article(editable_output2):
235
  if purpose not in executed_instructions:
236
  executed_instructions.append(purpose)
237
  research_results.append(response)
 
238
 
239
  print("Tavily search complete.")
240
 
@@ -300,25 +316,26 @@ def generate_article(editable_output2):
300
  messages=[system_message, user_message],
301
  temperature=0.7,
302
  )
303
- generated_text = response.choices[0]["message"]["content"]
304
- print(f"Generated content for section {i+1}:") # 生成された各セクションの内容���出力
305
- print(generated_text)
306
- results.append(generated_text)
307
  except Exception as e:
308
  error_message = f"Error occurred during ChatCompletion: {str(e)}"
309
  print(error_message) # ログにエラーメッセージを出力
310
  results.append(error_message)
 
 
 
 
 
 
 
 
 
311
 
312
  final_result = "\n".join(results)
313
- print("Final generated article content:") # 最終的な記事全体の内容を出力
314
- print(final_result)
315
-
316
- # 重複を排除
317
- final_result = remove_duplicates(final_result.split('\n'))
318
 
319
  # 生成された初期記事を拡張
320
  h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
321
- expanded_article = generate_expanded_article("\n".join(final_result), h3_to_text, cached_responses)
322
 
323
  with open("output3.txt", "w", encoding="utf-8") as f:
324
  f.write(expanded_article)
@@ -326,4 +343,72 @@ def generate_article(editable_output2):
326
  print("Article generation complete. Output saved to output3.txt.")
327
  print(expanded_article) # ログに最終結果を出力
328
 
 
 
 
 
 
329
  return expanded_article
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  else:
48
  raise Exception(f"Failed to fetch data from Tavily API: {response.status_code}, {response.text}")
49
 
50
+ # 実行された指示追跡するリスト
51
+ executed_instructions = []
52
+ # 調査結果を保存するリスト
53
+ research_results = []
54
+
55
+ # 生成状態を保存するファイル
56
+ state_file = "state.json"
57
+
58
+ # 状態を保存する関数
59
+ def save_state(state):
60
+ with open(state_file, "w", encoding="utf-8") as f:
61
+ json.dump(state, f, ensure_ascii=False, indent=4)
62
+ print("State saved. Current index:", state.get('current_index', 'Not available')) # インデックス情報をログに出力
63
+
64
+ # 状態をロードする関数
65
+ def load_state():
66
+ if os.path.exists(state_file):
67
+ with open(state_file, "r", encoding="utf-8") as f:
68
+ state = json.load(f)
69
+ print("State loaded. Current index:", state.get('current_index', 'Not available')) # インデックス情報をログに出力
70
+ return state
71
+ print("No state file found.")
72
+ return None
73
+
74
+ # 状態をクリアする関数
75
+ def clear_state():
76
+ if os.path.exists(state_file):
77
+ os.remove(state_file)
78
+ global executed_instructions, research_results
79
+ executed_instructions = []
80
+ research_results = []
81
+ print("State cleared.")
82
+ return "状態がクリアされました"
83
+
84
+ # 見出しを処理する関数
85
+ def process_heading(agent, h2_text, h3_for_this_h2, cached_responses):
86
+ query = f"{h2_text} {' '.join(h3_for_this_h2)}"
87
+ if query in cached_responses:
88
+ return (query, cached_responses[query])
89
+ else:
90
+ return (query, "No cached response found for this heading.")
91
+
92
+ # 初期データをTavily検索で収集する関数
93
+ def perform_initial_tavily_search(h2_texts, h3_texts):
94
+ tavily_search_tool = EnhancedTavilySearchTool()
95
+ queries = []
96
+
97
+ for idx, h2_text in enumerate(h2_texts): # インデックスの取得方法を改善
98
+ h3_for_this_h2 = [h3 for h3 in h3_texts if h3.startswith(f"{idx+1}-")]
99
+ query = f"{h2_text} {' '.join(h3_for_this_h2)}"
100
+ queries.append(query)
101
+
102
+ print("Performing Tavily search with queries:", queries) # デバッグ情報追加
103
+ response = tavily_search_tool.search(queries)
104
+ return {query: response[i] for i, query in enumerate(queries)}
105
+
106
+ # キャッシュされたTavilyデータを保存する関数
107
+ def save_preloaded_tavily_data(data):
108
+ with open("preloaded_tavily_data.json", "w", encoding="utf-8") as f:
109
+ json.dump(data, f, ensure_ascii=False, indent=4)
110
+ print("Preloaded Tavily data saved.")
111
+
112
+ # キャッシュされたTavilyデータをロードする関数
113
+ def load_preloaded_tavily_data():
114
+ with open("preloaded_tavily_data.json", "r", encoding="utf-8") as f:
115
+ print("Preloaded Tavily data loaded.")
116
+ return json.load(f)
117
+
118
+ # PlanAndExecuteエージェントをセットアップする関数
119
+ def setup_plan_and_execute_agent():
120
+ google_search_tool = Tool(
121
+ name="GoogleSearch",
122
+ func=GoogleSearchTool().search,
123
+ description="Search tool using Google API"
124
+ )
125
+
126
+ tools = [google_search_tool]
127
+
128
+ model_name = "gpt-3.5-turbo-0125"
129
+ llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=1000)
130
+ planner = load_chat_planner(llm)
131
+ executor = load_agent_executor(llm, tools, verbose=True)
132
+
133
+ agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)
134
+ print("PlanAndExecute agent setup complete.")
135
+ return agent
136
+
137
+ # GPT-4を使用してテキストを生成するヘルパー関数
138
+ def generate_text_with_gpt4(prompt):
139
+ response = openai.ChatCompletion.create(
140
+ model="gpt-4o",
141
+ messages=[{"role": "system", "content": "以下についての詳細な情報をまとめ、適宜箇所書き、もしくは表を使ってオリジナルの内容にしてください。"},
142
+ {"role": "user", "content": prompt}],
143
+ temperature=0.7,
144
+ max_tokens=500
145
+ )
146
+ return response.choices[0]["message"]["content"].strip()
147
 
148
  # 記事のセクションをGPT-4で拡張する関数
149
  def expand_section_with_gpt4(h2_text, h3_texts, preloaded_data):
 
197
  new_paragraph.string = expanded_text
198
  h2.insert_after(new_paragraph)
199
 
200
+ def generate_expanded_article(article_html, h3_to_text):
 
 
 
 
 
 
 
 
 
 
 
201
  print("記事を拡張中...")
202
  soup = BeautifulSoup(article_html, 'html.parser')
203
  process_standalone_h2(soup) # 独立した<h2>セクションを処理
 
214
  new_paragraph.string = h3_to_text[h3.get_text()]
215
  h3.insert_after(new_paragraph)
216
 
 
 
 
217
  return str(soup)
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  # 記事を生成する関数
220
  def generate_article(editable_output2):
221
  print("Starting article generation...")
222
+ # 途中から再開する場合のために状態を読み込み
223
+ state = load_state() or {'executed_instructions': [], 'research_results': [], 'current_index': 0}
224
+ executed_instructions = state['executed_instructions']
225
+ research_results = state['research_results']
226
+ current_index = state['current_index']
227
 
228
  # エージェントのセットアップ
229
  agent = setup_plan_and_execute_agent()
 
239
  cached_responses = perform_initial_tavily_search(h2_texts, h3_texts)
240
  save_preloaded_tavily_data(cached_responses)
241
 
 
 
 
242
  with ThreadPoolExecutor(max_workers=5) as executor:
243
  futures = []
244
  for h2_text in h2_texts:
 
250
  if purpose not in executed_instructions:
251
  executed_instructions.append(purpose)
252
  research_results.append(response)
253
+ save_state({'executed_instructions': executed_instructions, 'research_results': research_results, 'current_index': h2_texts.index(h2_text) + 1})
254
 
255
  print("Tavily search complete.")
256
 
 
316
  messages=[system_message, user_message],
317
  temperature=0.7,
318
  )
319
+ results.append(response.choices[0]["message"]["content"])
 
 
 
320
  except Exception as e:
321
  error_message = f"Error occurred during ChatCompletion: {str(e)}"
322
  print(error_message) # ログにエラーメッセージを出力
323
  results.append(error_message)
324
+ # 途中で止まった場合の状態を保存
325
+ save_state({
326
+ "executed_instructions": executed_instructions,
327
+ "research_results": research_results,
328
+ "split_instructions": split_instructions,
329
+ "results": results,
330
+ "current_index": i + 1
331
+ })
332
+ return error_message
333
 
334
  final_result = "\n".join(results)
 
 
 
 
 
335
 
336
  # 生成された初期記事を拡張
337
  h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
338
+ expanded_article = generate_expanded_article(final_result, h3_to_text)
339
 
340
  with open("output3.txt", "w", encoding="utf-8") as f:
341
  f.write(expanded_article)
 
343
  print("Article generation complete. Output saved to output3.txt.")
344
  print(expanded_article) # ログに最終結果を出力
345
 
346
+ # 生成が完了したら状態ファイルを削除
347
+ if os.path.exists("state.json"):
348
+ os.remove("state.json")
349
+ print("State file removed.")
350
+
351
  return expanded_article
352
+
353
+ def continue_generate_article():
354
+ print("Continuing article generation...")
355
+ state = load_state()
356
+ if not state:
357
+ return "再開する状態がありません。"
358
+
359
+ executed_instructions = state.get("executed_instructions", [])
360
+ research_results = state.get("research_results", [])
361
+ split_instructions = state.get("split_instructions", [])
362
+ results = state.get("results", [])
363
+ current_index = state.get("current_index", 0)
364
+
365
+ system_message = {
366
+ "role": "system",
367
+ "content": "あなたはプロのライターです。すべての回答を日本語でお願いします。"
368
+ }
369
+
370
+ for i in range(current_index, len(split_instructions)):
371
+ user_message = {
372
+ "role": "user",
373
+ "content": f"{i+1}/{len(split_instructions)}: {split_instructions[i]}"
374
+ }
375
+ try:
376
+ print(f"Sending instruction chunk {i+1} of {len(split_instructions)} to GPT-4...")
377
+ response = openai.ChatCompletion.create(
378
+ model="gpt-4-turbo",
379
+ messages=[system_message, user_message],
380
+ temperature=0.7,
381
+ )
382
+ results.append(response.choices[0]["message"]["content"])
383
+ except Exception as e:
384
+ error_message = f"Error occurred during ChatCompletion: {str(e)}"
385
+ print(error_message) # ログにエラーメッセージを出力
386
+ results.append(error_message)
387
+ # 途中で止まった場合の状態を保存
388
+ save_state({
389
+ "executed_instructions": executed_instructions,
390
+ "research_results": research_results,
391
+ "split_instructions": split_instructions,
392
+ "results": results,
393
+ "current_index": i + 1
394
+ })
395
+ return error_message
396
+
397
+ final_result = "\n".join(results)
398
+
399
+ # 生成された初期記事を拡張
400
+ h3_to_text = expand_section_with_gpt4(final_result, h3_texts, cached_responses)
401
+ expanded_article = generate_expanded_article(final_result, h3_to_text)
402
+
403
+ with open("output3.txt", "w", encoding="utf-8") as f:
404
+ f.write(expanded_article)
405
+
406
+ print("Article continuation complete. Output saved to output3.txt.")
407
+ print(expanded_article) # ログに最終結果を出力
408
+
409
+ # 生成が完了したら状態ファイルを削除
410
+ if os.path.exists("state.json"):
411
+ os.remove("state.json")
412
+ print("State file removed.")
413
+
414
+ return expanded_article