Spaces:
Build error
Build error
| from PyPDF2 import PdfReader | |
| import openai | |
| from .prompt import BASE_POINTS, READING_PROMT_V2 | |
| from .paper import Paper | |
| from .model_interface import OpenAIModel | |
| # Setting the API key to use the OpenAI API | |
| class PaperReader: | |
| """ | |
| A class for summarizing research papers using the OpenAI API. | |
| Attributes: | |
| openai_key (str): The API key to use the OpenAI API. | |
| token_length (int): The length of text to send to the API at a time. | |
| model (str): The GPT model to use for summarization. | |
| points_to_focus (str): The key points to focus on while summarizing. | |
| verbose (bool): A flag to enable/disable verbose logging. | |
| """ | |
| def __init__(self, openai_key, token_length=4000, model="gpt-3.5-turbo", | |
| points_to_focus=BASE_POINTS, verbose=False): | |
| # Setting the API key to use the OpenAI API | |
| openai.api_key = openai_key | |
| # Initializing prompts for the conversation | |
| self.init_prompt = READING_PROMT_V2.format(points_to_focus) | |
| self.summary_prompt = 'You are a researcher helper bot. Now you need to read the summaries of a research paper.' | |
| self.messages = [] # Initializing the conversation messages | |
| self.summary_msg = [] # Initializing the summary messages | |
| self.token_len = token_length # Setting the token length to use | |
| self.keep_round = 2 # Rounds of previous dialogues to keep in conversation | |
| self.model = model # Setting the GPT model to use | |
| self.verbose = verbose # Flag to enable/disable verbose logging | |
| self.model = OpenAIModel(api_key=openai_key, model=model) | |
| def drop_conversation(self, msg): | |
| # This method is used to drop previous messages from the conversation and keep only recent ones | |
| if len(msg) >= (self.keep_round + 1) * 2 + 1: | |
| new_msg = [msg[0]] | |
| for i in range(3, len(msg)): | |
| new_msg.append(msg[i]) | |
| return new_msg | |
| else: | |
| return msg | |
| def send_msg(self, msg): | |
| return self.model.send_msg(msg) | |
| def _chat(self, message): | |
| # This method is used to send a message and get a response from the OpenAI API | |
| # Adding the user message to the conversation messages | |
| self.messages.append({"role": "user", "content": message}) | |
| # Sending the messages to the API and getting the response | |
| response = self.send_msg(self.messages) | |
| # Adding the system response to the conversation messages | |
| self.messages.append({"role": "system", "content": response}) | |
| # Dropping previous conversation messages to keep the conversation history short | |
| self.messages = self.drop_conversation(self.messages) | |
| # Returning the system response | |
| return response | |
| def summarize(self, paper: Paper): | |
| # This method is used to summarize a given research paper | |
| # Adding the initial prompt to the conversation messages | |
| self.messages = [ | |
| {"role": "system", "content": self.init_prompt}, | |
| ] | |
| # Adding the summary prompt to the summary messages | |
| self.summary_msg = [{"role": "system", "content": self.summary_prompt}] | |
| # Reading and summarizing each part of the research paper | |
| for (page_idx, part_idx, text) in paper.iter_pages(): | |
| print('page: {}, part: {}'.format(page_idx, part_idx)) | |
| # Sending the text to the API and getting the response | |
| summary = self._chat('now I send you page {}, part {}:{}'.format(page_idx, part_idx, text)) | |
| # Logging the summary if verbose logging is enabled | |
| if self.verbose: | |
| print(summary) | |
| # Adding the summary of the part to the summary messages | |
| self.summary_msg.append({"role": "user", "content": '{}'.format(summary)}) | |
| # Adding a prompt for the user to summarize the whole paper to the summary messages | |
| self.summary_msg.append({"role": "user", "content": 'Now please make a summary of the whole paper'}) | |
| # Sending the summary messages to the API and getting the response | |
| result = self.send_msg(self.summary_msg) | |
| # Returning the summary of the whole paper | |
| return result | |
| def read_pdf_and_summarize(self, pdf_path): | |
| # This method is used to read a research paper from a PDF file and summarize it | |
| # Creating a PdfReader object to read the PDF file | |
| pdf_reader = PdfReader(pdf_path) | |
| paper = Paper(pdf_reader) | |
| # Summarizing the full text of the research paper and returning the summary | |
| print('reading pdf finished') | |
| summary = self.summarize(paper) | |
| return summary | |
| def get_summary_of_each_part(self): | |
| # This method is used to get the summary of each part of the research paper | |
| return self.summary_msg | |
| def question(self, question): | |
| # This method is used to ask a question after summarizing a paper | |
| # Adding the question to the summary messages | |
| self.summary_msg.append({"role": "user", "content": question}) | |
| # Sending the summary messages to the API and getting the response | |
| response = self.send_msg(self.summary_msg) | |
| # Adding the system response to the summary messages | |
| self.summary_msg.append({"role": "system", "content": response}) | |
| # Returning the system response | |
| return response | |