QuantumLearner commited on
Commit
2a0549d
·
verified ·
1 Parent(s): 71f7aea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -48
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import chainlit as cl
2
  import arxiv
3
- from typing import List # <— add this
 
 
4
  from langchain.chat_models import ChatOpenAI
5
  from langchain.chains import ConversationalRetrievalChain
6
  from langchain.memory import ConversationBufferMemory
@@ -18,45 +20,49 @@ class ArxivResearchAssistant:
18
  self.papers: List[arxiv.Result] = []
19
  self.state = "SEARCH"
20
 
21
- # NEW: modern client that uses HTTPS + retry/backoff
 
 
 
 
 
 
 
 
22
  self.client = arxiv.Client(
23
- page_size=50,
24
  delay_seconds=3,
25
  num_retries=3,
26
- user_agent="chainlit-arxiv-app/1.0 (mailto:your-email@example.com)"
27
  )
28
 
29
  async def search_papers(self, query: str):
30
- query = (query or "").strip()
31
- if not query:
32
- await cl.Message(content="Please enter a non-empty search query.").send()
33
- return None
34
-
35
  search = arxiv.Search(
36
  query=query,
37
  max_results=5,
38
  sort_by=arxiv.SortCriterion.Relevance
39
  )
40
-
41
- # CHANGED: use the client to fetch results (handles HTTPS correctly)
42
- self.papers = list(self.client.results(search))
 
 
 
 
43
 
44
  if not self.papers:
45
- await cl.Message(content="No papers found. Please try another search query.").send()
 
 
46
  return None
47
 
48
- paper_list = "\n".join(
49
- [
50
- f"{i+1}. {paper.title} - {paper.authors[0]}\nLink: {paper.entry_id}"
51
- for i, paper in enumerate(self.papers)
52
- ]
53
- )
54
-
55
  await cl.Message(
56
- content=(
57
- f"Please select a paper by entering its number:\n\n{paper_list}\n\n"
58
- "Enter the number of the paper you want to select:"
59
- )
60
  ).send()
61
  self.state = "SELECT"
62
  return self.papers
@@ -73,7 +79,11 @@ class ArxivResearchAssistant:
73
  await cl.Message(content="Invalid input. Please enter a number.").send()
74
  return None
75
 
76
- paper_text = f"{self.selected_paper.title}\n\n{self.selected_paper.summary}\n\n{self.selected_paper.comment or ''}"
 
 
 
 
77
 
78
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
79
  chunks = text_splitter.split_text(paper_text)
@@ -82,13 +92,11 @@ class ArxivResearchAssistant:
82
  vectorstore = FAISS.from_texts(
83
  chunks,
84
  embeddings,
85
- metadatas=[
86
- {
87
- "title": self.selected_paper.title,
88
- "link": self.selected_paper.entry_id,
89
- "chunk": f"Chunk {i+1}/{len(chunks)}"
90
- } for i in range(len(chunks))
91
- ]
92
  )
93
 
94
  memory = ConversationBufferMemory(
@@ -107,8 +115,9 @@ class ArxivResearchAssistant:
107
  await cl.Message(
108
  content=(
109
  f"Selected paper: {self.selected_paper.title}\n"
110
- f"Link: {self.selected_paper.entry_id}\n"
111
- "You can now ask questions about this paper. Type 'new search' when you want to search for a different paper."
 
112
  )
113
  ).send()
114
  self.state = "QA"
@@ -123,14 +132,11 @@ class ArxivResearchAssistant:
123
  response = self.qa_chain({"question": message})
124
  answer = response["answer"]
125
 
126
- sources = "\n".join(
127
- [
128
- f"- {doc.metadata.get('title', 'Unknown title')} "
129
- f"({doc.metadata.get('link', 'No link')}) - "
130
- f"{doc.metadata.get('chunk', 'No chunk info')}"
131
- for doc in response.get("source_documents", [])
132
- ]
133
- )
134
  if sources:
135
  answer += f"\n\nSources:\n{sources}"
136
 
@@ -146,12 +152,10 @@ assistant = ArxivResearchAssistant()
146
 
147
  @cl.on_chat_start
148
  async def start():
149
- await cl.Message(
150
- content=(
151
- "Welcome! This tool helps you search for papers on arXiv, pick one, and ask questions about its content.\n\n"
152
- "Please enter a topic to search for on arXiv papers.\n\n"
153
- )
154
- ).send()
155
 
156
  @cl.on_message
157
  async def main(message: cl.Message):
 
1
  import chainlit as cl
2
  import arxiv
3
+ import requests
4
+ from typing import List
5
+
6
  from langchain.chat_models import ChatOpenAI
7
  from langchain.chains import ConversationalRetrievalChain
8
  from langchain.memory import ConversationBufferMemory
 
20
  self.papers: List[arxiv.Result] = []
21
  self.state = "SEARCH"
22
 
23
+ # ---- NEW: custom session with UA (no 'user_agent' kwarg) ----
24
+ sess = requests.Session()
25
+ sess.headers.update({
26
+ "User-Agent": f"arxiv-chainlit-app/1.0 (mailto:{os.getenv('CONTACT_EMAIL','noreply@example.com')})"
27
+ })
28
+ # If you’re behind a proxy or want requests to use env vars:
29
+ sess.trust_env = True
30
+
31
+ # ArXiv client (retries + small delay)
32
  self.client = arxiv.Client(
33
+ page_size=5,
34
  delay_seconds=3,
35
  num_retries=3,
36
+ http_session=sess
37
  )
38
 
39
  async def search_papers(self, query: str):
40
+ # Use arxiv.Search, then fetch with our client to leverage the session/retries
 
 
 
 
41
  search = arxiv.Search(
42
  query=query,
43
  max_results=5,
44
  sort_by=arxiv.SortCriterion.Relevance
45
  )
46
+ try:
47
+ self.papers = list(self.client.results(search))
48
+ except Exception as e:
49
+ await cl.Message(
50
+ content=f"Error talking to arXiv: {e}\nTry again in a moment or tweak your query."
51
+ ).send()
52
+ return None
53
 
54
  if not self.papers:
55
+ await cl.Message(
56
+ content="No papers found. Please try another search query."
57
+ ).send()
58
  return None
59
 
60
+ paper_list = "\n".join([
61
+ f"{i+1}. {paper.title} - {paper.authors[0]}\nLink: {paper.entry_id}"
62
+ for i, paper in enumerate(self.papers)
63
+ ])
 
 
 
64
  await cl.Message(
65
+ content=f"Please select a paper by entering its number:\n\n{paper_list}\n\nEnter the number of the paper you want to select:"
 
 
 
66
  ).send()
67
  self.state = "SELECT"
68
  return self.papers
 
79
  await cl.Message(content="Invalid input. Please enter a number.").send()
80
  return None
81
 
82
+ paper_text = (
83
+ f"{self.selected_paper.title}\n\n"
84
+ f"{self.selected_paper.summary}\n\n"
85
+ f"{self.selected_paper.comment or ''}"
86
+ )
87
 
88
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
89
  chunks = text_splitter.split_text(paper_text)
 
92
  vectorstore = FAISS.from_texts(
93
  chunks,
94
  embeddings,
95
+ metadatas=[{
96
+ "title": self.selected_paper.title,
97
+ "link": self.selected_paper.entry_id,
98
+ "chunk": f"Chunk {i+1}/{len(chunks)}"
99
+ } for i in range(len(chunks))]
 
 
100
  )
101
 
102
  memory = ConversationBufferMemory(
 
115
  await cl.Message(
116
  content=(
117
  f"Selected paper: {self.selected_paper.title}\n"
118
+ f"Link: {self.selected_paper.entry_id}\n\n"
119
+ f"You can now ask questions about this paper. "
120
+ f"Type 'new search' when you want to search for a different paper."
121
  )
122
  ).send()
123
  self.state = "QA"
 
132
  response = self.qa_chain({"question": message})
133
  answer = response["answer"]
134
 
135
+ sources = "\n".join([
136
+ f"- {doc.metadata.get('title','Unknown title')} "
137
+ f"({doc.metadata.get('link','No link')}) - {doc.metadata.get('chunk','No chunk info')}"
138
+ for doc in response.get("source_documents", [])
139
+ ])
 
 
 
140
  if sources:
141
  answer += f"\n\nSources:\n{sources}"
142
 
 
152
 
153
  @cl.on_chat_start
154
  async def start():
155
+ await cl.Message(content=(
156
+ "Welcome! This tool helps you search for papers on arXiv, pick one, and ask questions about its content.\n\n"
157
+ "Please enter a topic to search for on arXiv papers."
158
+ )).send()
 
 
159
 
160
  @cl.on_message
161
  async def main(message: cl.Message):