Zihao Wang
commited on
Commit
·
a625cd7
1
Parent(s):
0ff155a
edit apps
Browse files
app.py
CHANGED
|
@@ -137,8 +137,8 @@ Just respond to the instruction directly. DO NOT add additional explanations or
|
|
| 137 |
def split_draft(draft, split_char = '\n\n'):
|
| 138 |
# 将draft切分为多个段落
|
| 139 |
# split_char: '\n\n'
|
| 140 |
-
|
| 141 |
-
draft_paragraphs = [
|
| 142 |
# print(f"The draft answer has {len(draft_paragraphs)}")
|
| 143 |
return draft_paragraphs
|
| 144 |
|
|
@@ -197,7 +197,7 @@ If you find the answer is right and do not need to add more details, just output
|
|
| 197 |
**IMPORTANT**
|
| 198 |
Try to keep the structure (multiple paragraphs with its subtitles) in the revised answer and make it more structual for understanding.
|
| 199 |
Add more details from retrieved text to the answer.
|
| 200 |
-
Split the paragraphs with
|
| 201 |
Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to.
|
| 202 |
'''
|
| 203 |
# openai_client = OpenAI(api_key = openai.api_key)
|
|
@@ -218,6 +218,31 @@ Just output the revised answer directly. DO NOT add additional explanations or a
|
|
| 218 |
).choices[0].message.content
|
| 219 |
return revised_answer
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
def get_query_wrapper(q, question, answer):
|
| 222 |
result = get_query(question, answer)
|
| 223 |
q.put(result) # 将结果放入队列
|
|
@@ -230,6 +255,10 @@ def get_revise_answer_wrapper(q, question, answer, content):
|
|
| 230 |
result = get_revise_answer(question, answer, content)
|
| 231 |
q.put(result)
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
from multiprocessing import Process, Queue
|
| 234 |
def run_with_timeout(func, timeout, *args, **kwargs):
|
| 235 |
q = Queue() # 创建一个Queue对象用于进程间通信
|
|
@@ -239,12 +268,12 @@ def run_with_timeout(func, timeout, *args, **kwargs):
|
|
| 239 |
# 等待进程完成或超时
|
| 240 |
p.join(timeout)
|
| 241 |
if p.is_alive():
|
| 242 |
-
print(f"{datetime.now()} [INFO]
|
| 243 |
p.terminate() # 终止进程
|
| 244 |
p.join() # 确保进程已经终止
|
| 245 |
result = None # 超时情况下,我们没有结果
|
| 246 |
else:
|
| 247 |
-
print(f"{datetime.now()} [INFO]
|
| 248 |
result = q.get() # 从队列中获取结果
|
| 249 |
return result
|
| 250 |
|
|
@@ -271,38 +300,38 @@ def generate_diff_html(text1, text2):
|
|
| 271 |
newline_char = '\n'
|
| 272 |
|
| 273 |
def rat(question):
|
| 274 |
-
print(f"{datetime.now()} [INFO]
|
| 275 |
draft = get_draft(question)
|
| 276 |
-
print(f"{datetime.now()} [INFO]
|
| 277 |
# print(f"##################### DRAFT #######################")
|
| 278 |
# print(draft)
|
| 279 |
# print(f"##################### END #######################")
|
| 280 |
|
| 281 |
-
print(f"{datetime.now()} [INFO]
|
| 282 |
draft_paragraphs = split_draft(draft)
|
| 283 |
-
print(f"{datetime.now()} [INFO]
|
| 284 |
answer = ""
|
| 285 |
for i, p in enumerate(draft_paragraphs):
|
| 286 |
# print(str(i)*80)
|
| 287 |
-
print(f"{datetime.now()} [INFO]
|
| 288 |
answer = answer + '\n\n' + p
|
| 289 |
# print(f"[{i}/{len(draft_paragraphs)}] Original Answer:\n{answer.replace(newline_char, ' ')}")
|
| 290 |
|
| 291 |
# query = get_query(question, answer)
|
| 292 |
-
print(f"{datetime.now()} [INFO]
|
| 293 |
res = run_with_timeout(get_query_wrapper, 10, question, answer)
|
| 294 |
if not res:
|
| 295 |
-
print(f"{datetime.now()} [INFO]
|
| 296 |
continue
|
| 297 |
else:
|
| 298 |
query = res
|
| 299 |
print(f">>> {i}/{len(draft_paragraphs)} Query: {query.replace(newline_char, ' ')}")
|
| 300 |
|
| 301 |
-
print(f"{datetime.now()} [INFO]
|
| 302 |
# content = get_content(query)
|
| 303 |
res = run_with_timeout(get_content_wrapper, 10, query)
|
| 304 |
if not res:
|
| 305 |
-
print(f"{datetime.now()} [INFO]
|
| 306 |
continue
|
| 307 |
else:
|
| 308 |
content = res
|
|
@@ -310,21 +339,25 @@ def rat(question):
|
|
| 310 |
for j, c in enumerate(content):
|
| 311 |
if j > 2:
|
| 312 |
break
|
| 313 |
-
print(f"{datetime.now()} [INFO]
|
| 314 |
# answer = get_revise_answer(question, answer, c)
|
| 315 |
res = run_with_timeout(get_revise_answer_wrapper, 15, question, answer, c)
|
| 316 |
if not res:
|
| 317 |
-
print(f"{datetime.now()} [INFO]
|
| 318 |
continue
|
| 319 |
else:
|
| 320 |
diff_html = generate_diff_html(answer, res)
|
| 321 |
display(HTML(diff_html))
|
| 322 |
answer = res
|
| 323 |
-
print(f"{datetime.now()} [INFO]
|
| 324 |
# print(f"[{i}/{len(draft_paragraphs)}] REVISED ANSWER:\n {answer.replace(newline_char, ' ')}")
|
| 325 |
# print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
return draft, answer
|
| 327 |
-
# return answer
|
| 328 |
|
| 329 |
page_title = "RAT: Retrieval Augmented Thoughts Elicit Context-Aware Reasoning in Long-Horizon Generation"
|
| 330 |
page_md = """
|
|
|
|
| 137 |
def split_draft(draft, split_char = '\n\n'):
|
| 138 |
# 将draft切分为多个段落
|
| 139 |
# split_char: '\n\n'
|
| 140 |
+
paragraphs = draft.split(split_char)
|
| 141 |
+
draft_paragraphs = [para for para in paragraphs if len(para)>5]
|
| 142 |
# print(f"The draft answer has {len(draft_paragraphs)}")
|
| 143 |
return draft_paragraphs
|
| 144 |
|
|
|
|
| 197 |
**IMPORTANT**
|
| 198 |
Try to keep the structure (multiple paragraphs with its subtitles) in the revised answer and make it more structual for understanding.
|
| 199 |
Add more details from retrieved text to the answer.
|
| 200 |
+
Split the paragraphs with \n\n characters.
|
| 201 |
Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to.
|
| 202 |
'''
|
| 203 |
# openai_client = OpenAI(api_key = openai.api_key)
|
|
|
|
| 218 |
).choices[0].message.content
|
| 219 |
return revised_answer
|
| 220 |
|
| 221 |
+
def get_reflect_answer(question, answer):
|
| 222 |
+
reflect_prompt = '''
|
| 223 |
+
Add a title to each paragraph in the answer and output the final answer using markdown format. This will make the answer to this question look more structured for better understanding.
|
| 224 |
+
**IMPORTANT**
|
| 225 |
+
Try to keep the structure (multiple paragraphs with its subtitles) in the response and make it more structual for understanding.
|
| 226 |
+
Split the paragraphs with \n\n characters.
|
| 227 |
+
Just output the revised answer directly. DO NOT add additional explanations or annoucement in the revised answer unless you are asked to.
|
| 228 |
+
'''
|
| 229 |
+
openai_client = OpenAI(api_key = os.getenv('OPENAI_API_KEY'))
|
| 230 |
+
reflected_answer = openai_client.chat.completions.create(
|
| 231 |
+
model="gpt-3.5-turbo",
|
| 232 |
+
messages=[
|
| 233 |
+
{
|
| 234 |
+
"role": "system",
|
| 235 |
+
"content": chatgpt_system_prompt
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"role": "user",
|
| 239 |
+
"content": f"##Question:\n{question}\n\n##Answer:\n{answer}\n\n##Instruction:\n{reflect_prompt}"
|
| 240 |
+
}
|
| 241 |
+
],
|
| 242 |
+
temperature = 1.0
|
| 243 |
+
).choices[0].message.content
|
| 244 |
+
return reflected_answer
|
| 245 |
+
|
| 246 |
def get_query_wrapper(q, question, answer):
|
| 247 |
result = get_query(question, answer)
|
| 248 |
q.put(result) # 将结果放入队列
|
|
|
|
| 255 |
result = get_revise_answer(question, answer, content)
|
| 256 |
q.put(result)
|
| 257 |
|
| 258 |
+
def get_reflect_answer_wrapper(q, question, answer):
|
| 259 |
+
result = get_reflect_answer(question, answer)
|
| 260 |
+
q.put(result)
|
| 261 |
+
|
| 262 |
from multiprocessing import Process, Queue
|
| 263 |
def run_with_timeout(func, timeout, *args, **kwargs):
|
| 264 |
q = Queue() # 创建一个Queue对象用于进程间通信
|
|
|
|
| 268 |
# 等待进程完成或超时
|
| 269 |
p.join(timeout)
|
| 270 |
if p.is_alive():
|
| 271 |
+
print(f"{datetime.now()} [INFO] Function {str(func)} running timeout ({timeout}s), terminating...")
|
| 272 |
p.terminate() # 终止进程
|
| 273 |
p.join() # 确保进程已经终止
|
| 274 |
result = None # 超时情况下,我们没有结果
|
| 275 |
else:
|
| 276 |
+
print(f"{datetime.now()} [INFO] Function {str(func)} executed successfully.")
|
| 277 |
result = q.get() # 从队列中获取结果
|
| 278 |
return result
|
| 279 |
|
|
|
|
| 300 |
newline_char = '\n'
|
| 301 |
|
| 302 |
def rat(question):
|
| 303 |
+
print(f"{datetime.now()} [INFO] Generating draft...")
|
| 304 |
draft = get_draft(question)
|
| 305 |
+
print(f"{datetime.now()} [INFO] Return draft.")
|
| 306 |
# print(f"##################### DRAFT #######################")
|
| 307 |
# print(draft)
|
| 308 |
# print(f"##################### END #######################")
|
| 309 |
|
| 310 |
+
print(f"{datetime.now()} [INFO] Processing draft ...")
|
| 311 |
draft_paragraphs = split_draft(draft)
|
| 312 |
+
print(f"{datetime.now()} [INFO] Draft is splitted into {len(draft_paragraphs)} sections.")
|
| 313 |
answer = ""
|
| 314 |
for i, p in enumerate(draft_paragraphs):
|
| 315 |
# print(str(i)*80)
|
| 316 |
+
print(f"{datetime.now()} [INFO] Revising {i+1}/{len(draft_paragraphs)} sections ...")
|
| 317 |
answer = answer + '\n\n' + p
|
| 318 |
# print(f"[{i}/{len(draft_paragraphs)}] Original Answer:\n{answer.replace(newline_char, ' ')}")
|
| 319 |
|
| 320 |
# query = get_query(question, answer)
|
| 321 |
+
print(f"{datetime.now()} [INFO] Generating query ...")
|
| 322 |
res = run_with_timeout(get_query_wrapper, 10, question, answer)
|
| 323 |
if not res:
|
| 324 |
+
print(f"{datetime.now()} [INFO] Generating query timeout, skipping...")
|
| 325 |
continue
|
| 326 |
else:
|
| 327 |
query = res
|
| 328 |
print(f">>> {i}/{len(draft_paragraphs)} Query: {query.replace(newline_char, ' ')}")
|
| 329 |
|
| 330 |
+
print(f"{datetime.now()} [INFO] Crawling network pages ...")
|
| 331 |
# content = get_content(query)
|
| 332 |
res = run_with_timeout(get_content_wrapper, 10, query)
|
| 333 |
if not res:
|
| 334 |
+
print(f"{datetime.now()} [INFO] Parsing network pages timeout, skipping ...")
|
| 335 |
continue
|
| 336 |
else:
|
| 337 |
content = res
|
|
|
|
| 339 |
for j, c in enumerate(content):
|
| 340 |
if j > 2:
|
| 341 |
break
|
| 342 |
+
print(f"{datetime.now()} [INFO] Revising answers with retrieved network pages...[{j}/{min(len(content),3)}]")
|
| 343 |
# answer = get_revise_answer(question, answer, c)
|
| 344 |
res = run_with_timeout(get_revise_answer_wrapper, 15, question, answer, c)
|
| 345 |
if not res:
|
| 346 |
+
print(f"{datetime.now()} [INFO] Revising answers timeout, skipping ...")
|
| 347 |
continue
|
| 348 |
else:
|
| 349 |
diff_html = generate_diff_html(answer, res)
|
| 350 |
display(HTML(diff_html))
|
| 351 |
answer = res
|
| 352 |
+
print(f"{datetime.now()} [INFO] Answer revised [{j}/{min(len(content),3)}]")
|
| 353 |
# print(f"[{i}/{len(draft_paragraphs)}] REVISED ANSWER:\n {answer.replace(newline_char, ' ')}")
|
| 354 |
# print()
|
| 355 |
+
res = run_with_timeout(get_reflect_answer_wrapper, 30, question, answer)
|
| 356 |
+
if not res:
|
| 357 |
+
print(f"{datetime.now()} [INFO] Reflecting answers timeout, skipping next steps...")
|
| 358 |
+
else:
|
| 359 |
+
answer = res
|
| 360 |
return draft, answer
|
|
|
|
| 361 |
|
| 362 |
page_title = "RAT: Retrieval Augmented Thoughts Elicit Context-Aware Reasoning in Long-Horizon Generation"
|
| 363 |
page_md = """
|