QuantumLearner commited on
Commit
f0d3013
·
verified ·
1 Parent(s): 04f6039

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -31
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import chainlit as cl
2
  import arxiv
 
3
  from langchain.chat_models import ChatOpenAI
4
  from langchain.chains import ConversationalRetrievalChain
5
  from langchain.memory import ConversationBufferMemory
@@ -17,22 +18,46 @@ class ArxivResearchAssistant:
17
  self.papers: List[arxiv.Result] = []
18
  self.state = "SEARCH"
19
 
 
 
 
 
 
 
 
 
20
  async def search_papers(self, query: str):
 
 
 
 
 
21
  search = arxiv.Search(
22
  query=query,
23
  max_results=5,
24
  sort_by=arxiv.SortCriterion.Relevance
25
  )
26
 
27
- self.papers = list(search.results())
28
-
 
29
  if not self.papers:
30
  await cl.Message(content="No papers found. Please try another search query.").send()
31
  return None
32
 
33
- paper_list = "\n".join([f"{i+1}. {paper.title} - {paper.authors[0]}\nLink: {paper.entry_id}" for i, paper in enumerate(self.papers)])
34
-
35
- await cl.Message(content=f"Please select a paper by entering its number:\n\n{paper_list}\n\nEnter the number of the paper you want to select:").send()
 
 
 
 
 
 
 
 
 
 
36
  self.state = "SELECT"
37
  return self.papers
38
 
@@ -48,32 +73,30 @@ class ArxivResearchAssistant:
48
  await cl.Message(content="Invalid input. Please enter a number.").send()
49
  return None
50
 
51
- # Download the entire paper content (if available)
52
  paper_text = f"{self.selected_paper.title}\n\n{self.selected_paper.summary}\n\n{self.selected_paper.comment or ''}"
53
 
54
- # Split the text into chunks
55
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
56
  chunks = text_splitter.split_text(paper_text)
57
 
58
- # Create embeddings and vector store, include chunk-specific metadata
59
  embeddings = OpenAIEmbeddings()
60
  vectorstore = FAISS.from_texts(
61
  chunks,
62
  embeddings,
63
- metadatas=[{
64
- "title": self.selected_paper.title,
65
- "link": self.selected_paper.entry_id,
66
- "chunk": f"Chunk {i+1}/{len(chunks)}"
67
- } for i in range(len(chunks))]
 
 
68
  )
69
 
70
- # Create the conversational chain
71
  memory = ConversationBufferMemory(
72
- memory_key="chat_history",
73
- return_messages=True,
74
  output_key="answer"
75
  )
76
-
77
  self.qa_chain = ConversationalRetrievalChain.from_llm(
78
  ChatOpenAI(temperature=0, model="gpt-4o-mini"),
79
  vectorstore.as_retriever(),
@@ -81,7 +104,13 @@ class ArxivResearchAssistant:
81
  return_source_documents=True
82
  )
83
 
84
- await cl.Message(content=f"Selected paper: {self.selected_paper.title}\nLink: {self.selected_paper.entry_id}\nYou can now ask questions about this paper. Type 'new search' when you want to search for a different paper.").send()
 
 
 
 
 
 
85
  self.state = "QA"
86
  return self.selected_paper
87
 
@@ -94,8 +123,14 @@ class ArxivResearchAssistant:
94
  response = self.qa_chain({"question": message})
95
  answer = response["answer"]
96
 
97
- # Handling the sources with chunk-specific metadata
98
- sources = "\n".join([f"- {doc.metadata.get('title', 'Unknown title')} ({doc.metadata.get('link', 'No link')}) - {doc.metadata.get('chunk', 'No chunk info')}" for doc in response.get("source_documents", [])])
 
 
 
 
 
 
99
  if sources:
100
  answer += f"\n\nSources:\n{sources}"
101
 
@@ -107,32 +142,27 @@ class ArxivResearchAssistant:
107
  self.papers = []
108
  self.state = "SEARCH"
109
 
110
- # Global assistant instance
111
  assistant = ArxivResearchAssistant()
112
 
113
  @cl.on_chat_start
114
  async def start():
115
- await cl.Message(content=(
116
- "Welcome! This tool helps you search for papers on arXiv, pick one, and ask questions about its content. \n\n"
117
- "Please enter a topic to search for on arXiv papers.\n\n"
118
- #"balance sheets, and cash flow reports. It generates summaries and strategic due diligence.\n\n"
119
- #"Please enter the ticker symbol for the company you want to analyze:"
120
- )
121
  ).send()
122
 
123
  @cl.on_message
124
  async def main(message: cl.Message):
125
- # Route the message based on the current state
126
  if assistant.state == "SEARCH":
127
  await assistant.search_papers(message.content)
128
-
129
  elif assistant.state == "SELECT":
130
  await assistant.select_paper(message.content)
131
-
132
  elif assistant.state == "QA":
133
  answer = await assistant.process_question(message.content)
134
  if answer:
135
  await cl.Message(content=answer).send()
136
 
137
  if __name__ == "__main__":
138
- cl.run()
 
1
  import chainlit as cl
2
  import arxiv
3
+ from typing import List # <— add this
4
  from langchain.chat_models import ChatOpenAI
5
  from langchain.chains import ConversationalRetrievalChain
6
  from langchain.memory import ConversationBufferMemory
 
18
  self.papers: List[arxiv.Result] = []
19
  self.state = "SEARCH"
20
 
21
+ # NEW: modern client that uses HTTPS + retry/backoff
22
+ self.client = arxiv.Client(
23
+ page_size=50,
24
+ delay_seconds=3,
25
+ num_retries=3,
26
+ user_agent="chainlit-arxiv-app/1.0 (mailto:your-email@example.com)"
27
+ )
28
+
29
  async def search_papers(self, query: str):
30
+ query = (query or "").strip()
31
+ if not query:
32
+ await cl.Message(content="Please enter a non-empty search query.").send()
33
+ return None
34
+
35
  search = arxiv.Search(
36
  query=query,
37
  max_results=5,
38
  sort_by=arxiv.SortCriterion.Relevance
39
  )
40
 
41
+ # CHANGED: use the client to fetch results (handles HTTPS correctly)
42
+ self.papers = list(self.client.results(search))
43
+
44
  if not self.papers:
45
  await cl.Message(content="No papers found. Please try another search query.").send()
46
  return None
47
 
48
+ paper_list = "\n".join(
49
+ [
50
+ f"{i+1}. {paper.title} - {paper.authors[0]}\nLink: {paper.entry_id}"
51
+ for i, paper in enumerate(self.papers)
52
+ ]
53
+ )
54
+
55
+ await cl.Message(
56
+ content=(
57
+ f"Please select a paper by entering its number:\n\n{paper_list}\n\n"
58
+ "Enter the number of the paper you want to select:"
59
+ )
60
+ ).send()
61
  self.state = "SELECT"
62
  return self.papers
63
 
 
73
  await cl.Message(content="Invalid input. Please enter a number.").send()
74
  return None
75
 
 
76
  paper_text = f"{self.selected_paper.title}\n\n{self.selected_paper.summary}\n\n{self.selected_paper.comment or ''}"
77
 
 
78
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
79
  chunks = text_splitter.split_text(paper_text)
80
 
 
81
  embeddings = OpenAIEmbeddings()
82
  vectorstore = FAISS.from_texts(
83
  chunks,
84
  embeddings,
85
+ metadatas=[
86
+ {
87
+ "title": self.selected_paper.title,
88
+ "link": self.selected_paper.entry_id,
89
+ "chunk": f"Chunk {i+1}/{len(chunks)}"
90
+ } for i in range(len(chunks))
91
+ ]
92
  )
93
 
 
94
  memory = ConversationBufferMemory(
95
+ memory_key="chat_history",
96
+ return_messages=True,
97
  output_key="answer"
98
  )
99
+
100
  self.qa_chain = ConversationalRetrievalChain.from_llm(
101
  ChatOpenAI(temperature=0, model="gpt-4o-mini"),
102
  vectorstore.as_retriever(),
 
104
  return_source_documents=True
105
  )
106
 
107
+ await cl.Message(
108
+ content=(
109
+ f"Selected paper: {self.selected_paper.title}\n"
110
+ f"Link: {self.selected_paper.entry_id}\n"
111
+ "You can now ask questions about this paper. Type 'new search' when you want to search for a different paper."
112
+ )
113
+ ).send()
114
  self.state = "QA"
115
  return self.selected_paper
116
 
 
123
  response = self.qa_chain({"question": message})
124
  answer = response["answer"]
125
 
126
+ sources = "\n".join(
127
+ [
128
+ f"- {doc.metadata.get('title', 'Unknown title')} "
129
+ f"({doc.metadata.get('link', 'No link')}) - "
130
+ f"{doc.metadata.get('chunk', 'No chunk info')}"
131
+ for doc in response.get("source_documents", [])
132
+ ]
133
+ )
134
  if sources:
135
  answer += f"\n\nSources:\n{sources}"
136
 
 
142
  self.papers = []
143
  self.state = "SEARCH"
144
 
 
145
  assistant = ArxivResearchAssistant()
146
 
147
  @cl.on_chat_start
148
  async def start():
149
+ await cl.Message(
150
+ content=(
151
+ "Welcome! This tool helps you search for papers on arXiv, pick one, and ask questions about its content.\n\n"
152
+ "Please enter a topic to search for on arXiv papers.\n\n"
153
+ )
 
154
  ).send()
155
 
156
  @cl.on_message
157
  async def main(message: cl.Message):
 
158
  if assistant.state == "SEARCH":
159
  await assistant.search_papers(message.content)
 
160
  elif assistant.state == "SELECT":
161
  await assistant.select_paper(message.content)
 
162
  elif assistant.state == "QA":
163
  answer = await assistant.process_question(message.content)
164
  if answer:
165
  await cl.Message(content=answer).send()
166
 
167
  if __name__ == "__main__":
168
+ cl.run()