Spaces:
Running
Running
Commit
·
e2af90f
1
Parent(s):
8a972d8
Upload app.py
Browse files
app.py
CHANGED
|
@@ -290,7 +290,7 @@ class Reader:
|
|
| 290 |
def __init__(self, key_word='', query='', filter_keys='',
|
| 291 |
root_path='./',
|
| 292 |
gitee_key='',
|
| 293 |
-
sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', p=1.0, temperature=1.0):
|
| 294 |
self.key = str(key) # OpenAI key
|
| 295 |
self.user_name = user_name # 读者姓名
|
| 296 |
self.key_word = key_word # 读者感兴趣的关键词
|
|
@@ -422,7 +422,7 @@ class Reader:
|
|
| 422 |
|
| 423 |
return image_url
|
| 424 |
|
| 425 |
-
def summary_with_chat(self, paper_list, key, p, temperature):
|
| 426 |
htmls = []
|
| 427 |
utoken = 0
|
| 428 |
ctoken = 0
|
|
@@ -437,7 +437,7 @@ class Reader:
|
|
| 437 |
text += list(paper.section_text_dict.values())[0]
|
| 438 |
#max_token = 2500 * 4
|
| 439 |
#text = text[:max_token]
|
| 440 |
-
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), p=p, temperature=temperature)
|
| 441 |
htmls.append(chat_summary_text)
|
| 442 |
|
| 443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
@@ -455,7 +455,7 @@ class Reader:
|
|
| 455 |
# methods
|
| 456 |
method_text += paper.section_text_dict[method_key]
|
| 457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
| 458 |
-
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), p=p, temperature=temperature)
|
| 459 |
htmls.append(chat_method_text)
|
| 460 |
else:
|
| 461 |
chat_method_text = ''
|
|
@@ -478,7 +478,7 @@ class Reader:
|
|
| 478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
| 479 |
else:
|
| 480 |
text = summary_text
|
| 481 |
-
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), p=p, temperature=temperature)
|
| 482 |
htmls.append(chat_conclusion_text)
|
| 483 |
htmls.append("\n")
|
| 484 |
# token统计
|
|
@@ -500,7 +500,7 @@ class Reader:
|
|
| 500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 501 |
stop=tenacity.stop_after_attempt(5),
|
| 502 |
reraise=True)
|
| 503 |
-
def chat_conclusion(self, text, key, p, temperature):
|
| 504 |
openai.api_key = key
|
| 505 |
conclusion_prompt_token = 650
|
| 506 |
text_token = len(self.encoding.encode(text))
|
|
@@ -524,7 +524,7 @@ class Reader:
|
|
| 524 |
"""},
|
| 525 |
]
|
| 526 |
response = openai.ChatCompletion.create(
|
| 527 |
-
model=
|
| 528 |
# prompt需要用英语替换,少占用token。
|
| 529 |
messages=messages,
|
| 530 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
|
@@ -547,7 +547,7 @@ class Reader:
|
|
| 547 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 548 |
stop=tenacity.stop_after_attempt(5),
|
| 549 |
reraise=True)
|
| 550 |
-
def chat_method(self, text, key, p, temperature):
|
| 551 |
openai.api_key = key
|
| 552 |
method_prompt_token = 650
|
| 553 |
text_token = len(self.encoding.encode(text))
|
|
@@ -573,7 +573,7 @@ class Reader:
|
|
| 573 |
"""},
|
| 574 |
]
|
| 575 |
response = openai.ChatCompletion.create(
|
| 576 |
-
model=
|
| 577 |
messages=messages,
|
| 578 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 579 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
@@ -596,7 +596,7 @@ class Reader:
|
|
| 596 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 597 |
stop=tenacity.stop_after_attempt(5),
|
| 598 |
reraise=True)
|
| 599 |
-
def chat_summary(self, text, key, p, temperature):
|
| 600 |
openai.api_key = key
|
| 601 |
summary_prompt_token = 1000
|
| 602 |
text_token = len(self.encoding.encode(text))
|
|
@@ -633,7 +633,7 @@ class Reader:
|
|
| 633 |
]
|
| 634 |
|
| 635 |
response = openai.ChatCompletion.create(
|
| 636 |
-
model=
|
| 637 |
messages=messages,
|
| 638 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 639 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
@@ -667,7 +667,7 @@ class Reader:
|
|
| 667 |
print(f"Query: {self.query}")
|
| 668 |
print(f"Sort: {self.sort}")
|
| 669 |
|
| 670 |
-
def upload_pdf(key, text, p, temperature, file):
|
| 671 |
# 检查两个输入都不为空
|
| 672 |
if not key or not text or not file:
|
| 673 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
@@ -679,7 +679,7 @@ def upload_pdf(key, text, p, temperature, file):
|
|
| 679 |
paper_list = [Paper(path=file, sl=section_list)]
|
| 680 |
# 创建一个Reader对象
|
| 681 |
reader = Reader()
|
| 682 |
-
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, p=p, temperature=temperature)
|
| 683 |
return cost, sum_info
|
| 684 |
|
| 685 |
api_title = "api-key可用验证"
|
|
@@ -724,6 +724,7 @@ Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github
|
|
| 724 |
ip = [
|
| 725 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
| 726 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
|
|
|
| 727 |
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
| 728 |
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
| 729 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
|
@@ -733,4 +734,4 @@ chatpaper_gui = gradio.Interface(fn=upload_pdf, inputs=ip, outputs=["json", "htm
|
|
| 733 |
|
| 734 |
# Start server
|
| 735 |
gui = gradio.TabbedInterface(interface_list=[api_gui, chatpaper_gui], tab_names=["API-key", "ChatPaper"])
|
| 736 |
-
gui.launch(quiet=True,show_api=False)
|
|
|
|
| 290 |
def __init__(self, key_word='', query='', filter_keys='',
|
| 291 |
root_path='./',
|
| 292 |
gitee_key='',
|
| 293 |
+
sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', model_name="gpt-3.5-turbo", p=1.0, temperature=1.0):
|
| 294 |
self.key = str(key) # OpenAI key
|
| 295 |
self.user_name = user_name # 读者姓名
|
| 296 |
self.key_word = key_word # 读者感兴趣的关键词
|
|
|
|
| 422 |
|
| 423 |
return image_url
|
| 424 |
|
| 425 |
+
def summary_with_chat(self, paper_list, key, model_name, p, temperature):
|
| 426 |
htmls = []
|
| 427 |
utoken = 0
|
| 428 |
ctoken = 0
|
|
|
|
| 437 |
text += list(paper.section_text_dict.values())[0]
|
| 438 |
#max_token = 2500 * 4
|
| 439 |
#text = text[:max_token]
|
| 440 |
+
chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
| 441 |
htmls.append(chat_summary_text)
|
| 442 |
|
| 443 |
# TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
|
|
|
|
| 455 |
# methods
|
| 456 |
method_text += paper.section_text_dict[method_key]
|
| 457 |
text = summary_text + "\n<Methods>:\n" + method_text
|
| 458 |
+
chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
| 459 |
htmls.append(chat_method_text)
|
| 460 |
else:
|
| 461 |
chat_method_text = ''
|
|
|
|
| 478 |
text = summary_text + "\n <Conclusion>:\n" + conclusion_text
|
| 479 |
else:
|
| 480 |
text = summary_text
|
| 481 |
+
chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
|
| 482 |
htmls.append(chat_conclusion_text)
|
| 483 |
htmls.append("\n")
|
| 484 |
# token统计
|
|
|
|
| 500 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 501 |
stop=tenacity.stop_after_attempt(5),
|
| 502 |
reraise=True)
|
| 503 |
+
def chat_conclusion(self, text, key, model_name, p, temperature):
|
| 504 |
openai.api_key = key
|
| 505 |
conclusion_prompt_token = 650
|
| 506 |
text_token = len(self.encoding.encode(text))
|
|
|
|
| 524 |
"""},
|
| 525 |
]
|
| 526 |
response = openai.ChatCompletion.create(
|
| 527 |
+
model=model_name,
|
| 528 |
# prompt需要用英语替换,少占用token。
|
| 529 |
messages=messages,
|
| 530 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
|
|
|
| 547 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 548 |
stop=tenacity.stop_after_attempt(5),
|
| 549 |
reraise=True)
|
| 550 |
+
def chat_method(self, text, key, model_name, p, temperature):
|
| 551 |
openai.api_key = key
|
| 552 |
method_prompt_token = 650
|
| 553 |
text_token = len(self.encoding.encode(text))
|
|
|
|
| 573 |
"""},
|
| 574 |
]
|
| 575 |
response = openai.ChatCompletion.create(
|
| 576 |
+
model=model_name,
|
| 577 |
messages=messages,
|
| 578 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 579 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
|
|
| 596 |
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
|
| 597 |
stop=tenacity.stop_after_attempt(5),
|
| 598 |
reraise=True)
|
| 599 |
+
def chat_summary(self, text, key, model_name, p, temperature):
|
| 600 |
openai.api_key = key
|
| 601 |
summary_prompt_token = 1000
|
| 602 |
text_token = len(self.encoding.encode(text))
|
|
|
|
| 633 |
]
|
| 634 |
|
| 635 |
response = openai.ChatCompletion.create(
|
| 636 |
+
model=model_name,
|
| 637 |
messages=messages,
|
| 638 |
temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
|
| 639 |
top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
|
|
|
| 667 |
print(f"Query: {self.query}")
|
| 668 |
print(f"Sort: {self.sort}")
|
| 669 |
|
| 670 |
+
def upload_pdf(key, text, model_name, p, temperature, file):
|
| 671 |
# 检查两个输入都不为空
|
| 672 |
if not key or not text or not file:
|
| 673 |
return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
|
|
|
|
| 679 |
paper_list = [Paper(path=file, sl=section_list)]
|
| 680 |
# 创建一个Reader对象
|
| 681 |
reader = Reader()
|
| 682 |
+
sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, model_name=model_name, p=p, temperature=temperature)
|
| 683 |
return cost, sum_info
|
| 684 |
|
| 685 |
api_title = "api-key可用验证"
|
|
|
|
| 724 |
ip = [
|
| 725 |
gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
|
| 726 |
gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
|
| 727 |
+
gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], value="gpt-3.5-turbo", label="设备")
|
| 728 |
gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
|
| 729 |
gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
|
| 730 |
gradio.inputs.File(label="请上传论文PDF(必填)")
|
|
|
|
| 734 |
|
| 735 |
# Start server
|
| 736 |
gui = gradio.TabbedInterface(interface_list=[api_gui, chatpaper_gui], tab_names=["API-key", "ChatPaper"])
|
| 737 |
+
gui.launch(quiet=True,show_api=False)
|