Spaces:
Paused
Paused
| import numpy as np | |
| import os | |
| import re | |
| import jieba | |
| from io import BytesIO | |
| import datetime | |
| import time | |
| import openai, tenacity | |
| import argparse | |
| import configparser | |
| import json | |
| import tiktoken | |
| import PyPDF2 | |
| import gradio | |
| def contains_chinese(text): | |
| for ch in text: | |
| if u'\u4e00' <= ch <= u'\u9fff': | |
| return True | |
| return False | |
| def insert_sentence(text, sentence, interval): | |
| lines = text.split('\n') | |
| new_lines = [] | |
| for line in lines: | |
| if contains_chinese(line): | |
| words = list(jieba.cut(line)) | |
| separator = '' | |
| else: | |
| words = line.split() | |
| separator = ' ' | |
| new_words = [] | |
| count = 0 | |
| for word in words: | |
| new_words.append(word) | |
| count += 1 | |
| if count % interval == 0: | |
| new_words.append(sentence) | |
| new_lines.append(separator.join(new_words)) | |
| return '\n'.join(new_lines) | |
| # 定义Reviewer类 | |
| class Reviewer: | |
| # 初始化方法,设置属性 | |
| def __init__(self, api, api_base, review_format, paper_pdf, language): | |
| self.api = api | |
| self.review_format = review_format | |
| self.api_base = api_base | |
| self.language = language | |
| self.paper_pdf = paper_pdf | |
| self.max_token_num = 50000 | |
| self.encoding = tiktoken.get_encoding("gpt2") | |
| def review_by_chatgpt(self, paper_list): | |
| text = self.extract_chapter(self.paper_pdf) | |
| chat_review_text, total_token_used = self.chat_review(text=text) | |
| return chat_review_text, total_token_used | |
| def chat_review(self, text): | |
| openai.api_key = self.api # 读取api | |
| openai.api_base = self.api_base | |
| review_prompt_token = 1000 | |
| try: | |
| text_token = len(self.encoding.encode(text)) | |
| except: | |
| text_token = 13000 | |
| input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1)) | |
| input_text = "This is the paper for your review:" + text[:input_text_index] | |
| messages=[ | |
| {"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use {} answers".format(self.language)} , | |
| {"role": "user", "content": input_text + " Translate the output into {}.".format(self.language)}, | |
| ] | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4o-mini", | |
| messages=messages, | |
| temperature=0.5 | |
| ) | |
| result = '' | |
| for choice in response.choices: | |
| result += choice.message.content | |
| result = insert_sentence(result, '**Generated by ChatGPT, no copying allowed!**', 50) | |
| result += "\n\n⚠伦理声明/Ethics statement:\n--禁止直接复制生成的评论用于任何论文审稿工作!\n--Direct copying of generated comments for any paper review work is prohibited!" | |
| usage = response.usage.total_tokens + input_text_index | |
| except Exception as e: | |
| # 处理其他的异常 | |
| result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e) | |
| usage = 'xxxxx' | |
| print("********"*10) | |
| print(result) | |
| print("********"*10) | |
| return result, usage | |
| def extract_chapter(self, pdf_path): | |
| file_object = BytesIO(pdf_path) | |
| pdf_reader = PyPDF2.PdfReader(file_object) | |
| # 获取PDF的总页数 | |
| num_pages = len(pdf_reader.pages) | |
| # 初始化提取状态和提取文本 | |
| extraction_started = False | |
| extracted_text = "" | |
| # 遍历PDF中的每一页 | |
| for page_number in range(num_pages): | |
| page = pdf_reader.pages[page_number] | |
| page_text = page.extract_text() | |
| # 开始提取 | |
| extraction_started = True | |
| page_number_start = page_number | |
| # 如果提取已开始,将页面文本添加到提取文本中 | |
| if extraction_started: | |
| extracted_text += page_text | |
| # 停止提取 | |
| if page_number_start + 1 < page_number: | |
| break | |
| return extracted_text | |
| def main(api,api_base, review_format, paper_pdf, language): | |
| start_time = time.time() | |
| comments = '' | |
| output2 = '' | |
| if not api or not review_format or not paper_pdf: | |
| comments = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" | |
| output2 = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" | |
| # 判断PDF文件 | |
| else: | |
| # 创建一个Reader对象 | |
| reviewer1 = Reviewer(api,api_base, review_format, paper_pdf, language) | |
| # 开始判断是路径还是文件: | |
| comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf) | |
| time_used = time.time() - start_time | |
| output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" | |
| return comments, output2 | |
| ######################################################################################################## | |
| # 标题 | |
| title = "🤖ChatReviewer🤖" | |
| # 描述 | |
| description = '''<div align='left'> | |
| <img align='right' src='http://i.imgtg.com/2023/03/22/94PLN.png' width="220"> | |
| <strong>ChatReviewer是一款基于OpenAI的API开发的智能论文分析与建议助手。(本系统不会获取上传的pdf内容)</strong>其用途如下: | |
| ⭐️对论文的优缺点进行快速总结和分析,提高科研人员的文献阅读和理解的效率,紧跟研究前沿。 | |
| ⭐️对自己的论文进行分析,根据ChatReviewer生成的改进建议进行查漏补缺,进一步提高自己的论文质量。 | |
| 如果觉得很卡,可以点击右上角的Duplicate this Space,把ChatReviewer复制到你自己的Space中!(🈲:禁止直接复制生成的评论用于任何论文审稿工作!) | |
| 本项目的[Github](https://github.com/nishiwen1214/ChatReviewer),欢迎Star和Fork,也欢迎大佬赞助让本项目快速成长!💗 | |
| </div> | |
| ''' | |
| # 创建Gradio界面 | |
| inp = [gradio.Textbox(label="请输入你的API-key(sk开头的字符串)", | |
| value="", | |
| type='password'), | |
| gradio.Textbox(label="请输入第三方中转网址(以/v1结尾,使用原始OpenAI的API请跳过这里)", | |
| value="https://api.openai.com/v1"), | |
| gradio.Textbox(lines=5, | |
| label="请输入特定的分析要求和格式(否则为默认格式)", | |
| value="""* Overall Review | |
| Please briefly summarize the main points and contributions of this paper. | |
| xxx | |
| * Paper Strength | |
| Please provide a list of the strengths of this paper, including but not limited to: innovative and practical methodology, insightful empirical findings or in-depth theoretical analysis, | |
| well-structured review of relevant literature, and any other factors that may make the paper valuable to readers. (Maximum length: 2,000 characters) | |
| (1) xxx | |
| (2) xxx | |
| (3) xxx | |
| * Paper Weakness | |
| Please provide a numbered list of your main concerns regarding this paper (so authors could respond to the concerns individually). | |
| These may include, but are not limited to: inadequate implementation details for reproducing the study, limited evaluation and ablation studies for the proposed method, | |
| correctness of the theoretical analysis or experimental results, lack of comparisons or discussions with widely-known baselines in the field, lack of clarity in exposition, | |
| or any other factors that may impede the reader's understanding or benefit from the paper. Please kindly refrain from providing a general assessment of the paper's novelty without providing detailed explanations. (Maximum length: 2,000 characters) | |
| (1) xxx | |
| (2) xxx | |
| (3) xxx | |
| * Questions To Authors And Suggestions For Rebuttal | |
| Please provide a numbered list of specific and clear questions that pertain to the details of the proposed method, evaluation setting, or additional results that would aid in supporting the authors' claims. | |
| The questions should be formulated in a manner that, after the authors have answered them during the rebuttal, it would enable a more thorough assessment of the paper's quality. (Maximum length: 2,000 characters) | |
| *Overall score (1-10) | |
| The paper is scored on a scale of 1-10, with 10 being the full mark, and 6 stands for borderline accept. Then give the reason for your rating. | |
| xxx""" | |
| ), | |
| gradio.File(label="请上传论文PDF文件(请务必等pdf上传完成后再点击Submit!)",type="binary"), | |
| gradio.Radio(choices=["English", "Chinese", "French", "German","Japenese"], | |
| value="English", | |
| label="选择输出语言"), | |
| ] | |
| chat_reviewer_gui = gradio.Interface(fn=main, | |
| inputs=inp, | |
| outputs = [gradio.Textbox(lines=25, label="分析结果"), gradio.Textbox(lines=2, label="资源统计")], | |
| title=title, | |
| description=description) | |
| # Start server | |
| chat_reviewer_gui .launch(quiet=True, show_api=False) |