Spaces:
Running
Running
Commit
·
2dea19e
1
Parent(s):
f64978e
优化:增加新的model参数
Browse files
app.py
CHANGED
|
@@ -422,7 +422,7 @@ class Reader:
|
|
| 422 |
|
| 423 |
return image_url
|
| 424 |
|
| 425 |
-
def summary_with_chat(self, paper_list, key):
|
| 426 |
htmls = []
|
| 427 |
utoken = 0
|
| 428 |
ctoken = 0
|
|
@@ -437,7 +437,7 @@ class Reader:
|
|
| 437 |
text += list(paper.section_text_dict.values())[0]
|
| 438 |
#max_token = 2500 * 4
|
| 439 |
#text = text[:max_token]
|
| 440 |
-
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key))
|
| 441 |
htmls.append(chat_summary_text)
|
| 442 |
|
| 443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
@@ -455,7 +455,7 @@ class Reader:
|
|
| 455 |
# methods
|
| 456 |
method_text += paper.section_text_dict[method_key]
|
| 457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
| 458 |
-
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key))
|
| 459 |
htmls.append(chat_method_text)
|
| 460 |
else:
|
| 461 |
chat_method_text = ''
|
|
@@ -478,7 +478,7 @@ class Reader:
|
|
| 478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
| 479 |
else:
|
| 480 |
text = summary_text
|
| 481 |
-
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key))
|
| 482 |
htmls.append(chat_conclusion_text)
|
| 483 |
htmls.append("\n")
|
| 484 |
# token统计
|
|
@@ -500,7 +500,7 @@ class Reader:
|
|
| 500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 501 |
stop=tenacity.stop_after_attempt(5),
|
| 502 |
reraise=True)
|
| 503 |
-
def chat_conclusion(self, text, key):
|
| 504 |
openai.api_key = key
|
| 505 |
conclusion_prompt_token = 650
|
| 506 |
text_token = len(self.encoding.encode(text))
|
|
@@ -527,6 +527,8 @@ class Reader:
|
|
| 527 |
model="gpt-3.5-turbo",
|
| 528 |
# prompt需要用英语替换,少占用token。
|
| 529 |
messages=messages,
|
|
|
|
|
|
|
| 530 |
)
|
| 531 |
|
| 532 |
result = ''
|
|
@@ -545,7 +547,7 @@ class Reader:
|
|
| 545 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 546 |
stop=tenacity.stop_after_attempt(5),
|
| 547 |
reraise=True)
|
| 548 |
-
def chat_method(self, text, key):
|
| 549 |
openai.api_key = key
|
| 550 |
method_prompt_token = 650
|
| 551 |
text_token = len(self.encoding.encode(text))
|
|
@@ -573,6 +575,8 @@ class Reader:
|
|
| 573 |
response = openai.ChatCompletion.create(
|
| 574 |
model="gpt-3.5-turbo",
|
| 575 |
messages=messages,
|
|
|
|
|
|
|
| 576 |
)
|
| 577 |
|
| 578 |
result = ''
|
|
@@ -592,7 +596,7 @@ class Reader:
|
|
| 592 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 593 |
stop=tenacity.stop_after_attempt(5),
|
| 594 |
reraise=True)
|
| 595 |
-
def chat_summary(self, text, key):
|
| 596 |
openai.api_key = key
|
| 597 |
summary_prompt_token = 1000
|
| 598 |
text_token = len(self.encoding.encode(text))
|
|
@@ -631,6 +635,8 @@ class Reader:
|
|
| 631 |
response = openai.ChatCompletion.create(
|
| 632 |
model="gpt-3.5-turbo",
|
| 633 |
messages=messages,
|
|
|
|
|
|
|
| 634 |
)
|
| 635 |
|
| 636 |
result = ''
|
|
@@ -661,7 +667,7 @@ class Reader:
|
|
| 661 |
print(f"Query: {self.query}")
|
| 662 |
print(f"Sort: {self.sort}")
|
| 663 |
|
| 664 |
-
def upload_pdf(key, text, file):
|
| 665 |
# 检查两个输入都不为空
|
| 666 |
if not key or not text or not file:
|
| 667 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
@@ -673,7 +679,7 @@ def upload_pdf(key, text, file):
|
|
| 673 |
paper_list = [Paper(path=file, sl=section_list)]
|
| 674 |
# 创建一个Reader对象
|
| 675 |
reader = Reader()
|
| 676 |
-
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key)
|
| 677 |
return cost, sum_info
|
| 678 |
|
| 679 |
api_title = "api-key可用验证"
|
|
@@ -718,6 +724,8 @@ Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github
|
|
| 718 |
ip = [
|
| 719 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
| 720 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
|
|
|
|
|
|
| 721 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
| 722 |
]
|
| 723 |
|
|
|
|
| 422 |
|
| 423 |
return image_url
|
| 424 |
|
| 425 |
+
def summary_with_chat(self, paper_list, key, p, temperature):
|
| 426 |
htmls = []
|
| 427 |
utoken = 0
|
| 428 |
ctoken = 0
|
|
|
|
| 437 |
text += list(paper.section_text_dict.values())[0]
|
| 438 |
#max_token = 2500 * 4
|
| 439 |
#text = text[:max_token]
|
| 440 |
+
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), p, temperature)
|
| 441 |
htmls.append(chat_summary_text)
|
| 442 |
|
| 443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
|
|
| 455 |
# methods
|
| 456 |
method_text += paper.section_text_dict[method_key]
|
| 457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
| 458 |
+
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), p, temperature)
|
| 459 |
htmls.append(chat_method_text)
|
| 460 |
else:
|
| 461 |
chat_method_text = ''
|
|
|
|
| 478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
| 479 |
else:
|
| 480 |
text = summary_text
|
| 481 |
+
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), p, temperature)
|
| 482 |
htmls.append(chat_conclusion_text)
|
| 483 |
htmls.append("\n")
|
| 484 |
# token统计
|
|
|
|
| 500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 501 |
stop=tenacity.stop_after_attempt(5),
|
| 502 |
reraise=True)
|
| 503 |
+
def chat_conclusion(self, text, key, p, temperature):
|
| 504 |
openai.api_key = key
|
| 505 |
conclusion_prompt_token = 650
|
| 506 |
text_token = len(self.encoding.encode(text))
|
|
|
|
| 527 |
model="gpt-3.5-turbo",
|
| 528 |
# prompt需要用英语替换,少占用token。
|
| 529 |
messages=messages,
|
| 530 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 531 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
| 532 |
)
|
| 533 |
|
| 534 |
result = ''
|
|
|
|
| 547 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 548 |
stop=tenacity.stop_after_attempt(5),
|
| 549 |
reraise=True)
|
| 550 |
+
def chat_method(self, text, key, p, temperature):
|
| 551 |
openai.api_key = key
|
| 552 |
method_prompt_token = 650
|
| 553 |
text_token = len(self.encoding.encode(text))
|
|
|
|
| 575 |
response = openai.ChatCompletion.create(
|
| 576 |
model="gpt-3.5-turbo",
|
| 577 |
messages=messages,
|
| 578 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 579 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
| 580 |
)
|
| 581 |
|
| 582 |
result = ''
|
|
|
|
| 596 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 597 |
stop=tenacity.stop_after_attempt(5),
|
| 598 |
reraise=True)
|
| 599 |
+
def chat_summary(self, text, key, p, temperature):
|
| 600 |
openai.api_key = key
|
| 601 |
summary_prompt_token = 1000
|
| 602 |
text_token = len(self.encoding.encode(text))
|
|
|
|
| 635 |
response = openai.ChatCompletion.create(
|
| 636 |
model="gpt-3.5-turbo",
|
| 637 |
messages=messages,
|
| 638 |
+
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 639 |
+
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
| 640 |
)
|
| 641 |
|
| 642 |
result = ''
|
|
|
|
| 667 |
print(f"Query: {self.query}")
|
| 668 |
print(f"Sort: {self.sort}")
|
| 669 |
|
| 670 |
+
def upload_pdf(key, text, p, temperature, file):
|
| 671 |
# 检查两个输入都不为空
|
| 672 |
if not key or not text or not file:
|
| 673 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
|
|
| 679 |
paper_list = [Paper(path=file, sl=section_list)]
|
| 680 |
# 创建一个Reader对象
|
| 681 |
reader = Reader()
|
| 682 |
+
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, p, temperature)
|
| 683 |
return cost, sum_info
|
| 684 |
|
| 685 |
api_title = "api-key可用验证"
|
|
|
|
| 724 |
ip = [
|
| 725 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
| 726 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
| 727 |
+
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
| 728 |
+
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
| 729 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
| 730 |
]
|
| 731 |
|