QuantumLearner commited on
Commit
a532e37
·
verified ·
1 Parent(s): eaf75eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -61
app.py CHANGED
@@ -1,63 +1,57 @@
 
 
 
1
  import chainlit as cl
2
  import arxiv
3
- from langchain.chat_models import ChatOpenAI
4
  from langchain.chains import ConversationalRetrievalChain
5
  from langchain.memory import ConversationBufferMemory
6
  from langchain.text_splitter import CharacterTextSplitter
7
- from langchain.embeddings import OpenAIEmbeddings
8
  from langchain.vectorstores import FAISS
9
- import os
10
  from dotenv import load_dotenv
11
- load_dotenv()
12
-
13
 
14
- # Initialize global variables
15
- selected_paper = None
16
- qa_chain = None
17
- papers = []
18
- state = "SEARCH" # Possible states: SEARCH, SELECT, QA
19
 
20
- @cl.on_chat_start
21
- def start():
22
- global state
23
- state = "SEARCH"
24
- cl.Message(content="Welcome! Please enter a search query for arXiv papers.").send()
 
25
 
26
- @cl.on_message
27
- def main(message: str):
28
- global selected_paper, qa_chain, papers, state
29
-
30
- if state == "SEARCH":
31
  search = arxiv.Search(
32
- query=message,
33
  max_results=5,
34
  sort_by=arxiv.SortCriterion.Relevance
35
  )
36
 
37
- papers = list(search.results())
38
 
39
- if not papers:
40
- cl.Message(content="No papers found. Please try another search query.").send()
41
- return
42
 
43
- paper_list = "\n".join([f"{i+1}. {paper.title} - {paper.authors[0]}\nLink: {paper.entry_id}" for i, paper in enumerate(papers)])
44
- cl.Message(content=f"Please select a paper by entering its number:\n\n{paper_list}\n\nEnter the number of the paper you want to select:").send()
45
- state = "SELECT"
 
 
46
 
47
- elif state == "SELECT":
48
  try:
49
- selected_index = int(message) - 1
50
- if 0 <= selected_index < len(papers):
51
- selected_paper = papers[selected_index]
52
  else:
53
- cl.Message(content="Invalid selection. Please try again.").send()
54
- return
55
  except ValueError:
56
- cl.Message(content="Invalid input. Please enter a number.").send()
57
- return
58
 
59
  # Download the entire paper content (if available)
60
- paper_text = f"{selected_paper.title}\n\n{selected_paper.summary}\n\n{selected_paper.comment}"
61
 
62
  # Split the text into chunks
63
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
@@ -69,8 +63,8 @@ def main(message: str):
69
  chunks,
70
  embeddings,
71
  metadatas=[{
72
- "title": selected_paper.title,
73
- "link": selected_paper.entry_id,
74
  "chunk": f"Chunk {i+1}/{len(chunks)}"
75
  } for i in range(len(chunks))]
76
  )
@@ -82,35 +76,59 @@ def main(message: str):
82
  output_key="answer"
83
  )
84
 
85
- qa_chain = ConversationalRetrievalChain.from_llm(
86
  ChatOpenAI(temperature=0, model="gpt-4o-mini"),
87
  vectorstore.as_retriever(),
88
  memory=memory,
89
  return_source_documents=True
90
  )
91
 
92
- cl.Message(content=f"Selected paper: {selected_paper.title}\nLink: {selected_paper.entry_id}\nYou can now ask questions about this paper. Type 'new search' when you want to search for a different paper.").send()
93
- state = "QA"
 
94
 
95
- elif state == "QA":
96
  if message.lower() == "new search":
97
- state = "SEARCH"
98
- selected_paper = None
99
- qa_chain = None
100
- papers = []
101
- cl.Message(content="Sure! Please enter a new search query for arXiv papers.").send()
102
- else:
103
- # Answer questions about the selected paper
104
- response = qa_chain({"question": message})
105
- answer = response["answer"]
106
-
107
- # Handling the sources with chunk-specific metadata
108
- sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')}) - {doc.metadata.get('chunk', 'No chunk info')}" for doc in response.get("source_documents", [])])
109
- if sources:
110
- answer += f"\n\nSources:\n{sources}"
111
-
112
- # Send the response with sources
113
- cl.Message(content=answer).send()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  if __name__ == "__main__":
116
- cl.run()
 
1
+ import os
2
+ from typing import List
3
+
4
  import chainlit as cl
5
  import arxiv
6
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
  from langchain.chains import ConversationalRetrievalChain
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain.text_splitter import CharacterTextSplitter
 
10
  from langchain.vectorstores import FAISS
 
11
  from dotenv import load_dotenv
 
 
12
 
13
+ load_dotenv()
 
 
 
 
14
 
15
+ class ArxivResearchAssistant:
16
+ def __init__(self):
17
+ self.selected_paper = None
18
+ self.qa_chain = None
19
+ self.papers: List[arxiv.Result] = []
20
+ self.state = "SEARCH"
21
 
22
+ async def search_papers(self, query: str):
 
 
 
 
23
  search = arxiv.Search(
24
+ query=query,
25
  max_results=5,
26
  sort_by=arxiv.SortCriterion.Relevance
27
  )
28
 
29
+ self.papers = list(search.results())
30
 
31
+ if not self.papers:
32
+ await cl.Message(content="No papers found. Please try another search query.").send()
33
+ return None
34
 
35
+ paper_list = "\n".join([f"{i+1}. {paper.title} - {paper.authors[0]}\nLink: {paper.entry_id}" for i, paper in enumerate(self.papers)])
36
+
37
+ await cl.Message(content=f"Please select a paper by entering its number:\n\n{paper_list}\n\nEnter the number of the paper you want to select:").send()
38
+ self.state = "SELECT"
39
+ return self.papers
40
 
41
+ async def select_paper(self, selection: str):
42
  try:
43
+ selected_index = int(selection) - 1
44
+ if 0 <= selected_index < len(self.papers):
45
+ self.selected_paper = self.papers[selected_index]
46
  else:
47
+ await cl.Message(content="Invalid selection. Please try again.").send()
48
+ return None
49
  except ValueError:
50
+ await cl.Message(content="Invalid input. Please enter a number.").send()
51
+ return None
52
 
53
  # Download the entire paper content (if available)
54
+ paper_text = f"{self.selected_paper.title}\n\n{self.selected_paper.summary}\n\n{self.selected_paper.comment or ''}"
55
 
56
  # Split the text into chunks
57
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
63
  chunks,
64
  embeddings,
65
  metadatas=[{
66
+ "title": self.selected_paper.title,
67
+ "link": self.selected_paper.entry_id,
68
  "chunk": f"Chunk {i+1}/{len(chunks)}"
69
  } for i in range(len(chunks))]
70
  )
 
76
  output_key="answer"
77
  )
78
 
79
+ self.qa_chain = ConversationalRetrievalChain.from_llm(
80
  ChatOpenAI(temperature=0, model="gpt-4o-mini"),
81
  vectorstore.as_retriever(),
82
  memory=memory,
83
  return_source_documents=True
84
  )
85
 
86
+ await cl.Message(content=f"Selected paper: {self.selected_paper.title}\nLink: {self.selected_paper.entry_id}\nYou can now ask questions about this paper. Type 'new search' when you want to search for a different paper.").send()
87
+ self.state = "QA"
88
+ return self.selected_paper
89
 
90
+ async def process_question(self, message: str):
91
  if message.lower() == "new search":
92
+ self.reset()
93
+ await cl.Message(content="Sure! Please enter a new search query for arXiv papers.").send()
94
+ return None
95
+
96
+ response = self.qa_chain({"question": message})
97
+ answer = response["answer"]
98
+
99
+ # Handling the sources with chunk-specific metadata
100
+ sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')}) - {doc.metadata.get('chunk', 'No chunk info')}" for doc in response.get("source_documents", [])])
101
+ if sources:
102
+ answer += f"\n\nSources:\n{sources}"
103
+
104
+ return answer
105
+
106
+ def reset(self):
107
+ self.selected_paper = None
108
+ self.qa_chain = None
109
+ self.papers = []
110
+ self.state = "SEARCH"
111
+
112
+ # Global assistant instance
113
+ assistant = ArxivResearchAssistant()
114
+
115
+ @cl.on_chat_start
116
+ async def start():
117
+ await cl.Message(content="Welcome! Please enter a search query for arXiv papers.").send()
118
+
119
+ @cl.on_message
120
+ async def main(message: cl.Message):
121
+ # Route the message based on the current state
122
+ if assistant.state == "SEARCH":
123
+ await assistant.search_papers(message.content)
124
+
125
+ elif assistant.state == "SELECT":
126
+ await assistant.select_paper(message.content)
127
+
128
+ elif assistant.state == "QA":
129
+ answer = await assistant.process_question(message.content)
130
+ if answer:
131
+ await cl.Message(content=answer).send()
132
 
133
  if __name__ == "__main__":
134
+ cl.run()