Tanaybh commited on
Commit
422c3d1
Β·
1 Parent(s): e0ed16b

Update space

Browse files
Files changed (1) hide show
  1. app.py +347 -57
app.py CHANGED
@@ -1,70 +1,360 @@
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
-
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
24
 
25
- response = ""
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
41
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
-
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
 
 
 
 
 
 
1
+ """
2
+ DocMind - Gradio Chat Interface
3
+ Multi-agent research assistant for arXiv papers
4
+ """
5
+
6
  import gradio as gr
7
+ from retriever import PaperRetriever
8
+ from agents import DocMindOrchestrator
9
+ from fetch_arxiv_data import ArxivFetcher
10
+ import os
11
+
12
+
13
+ class DocMindApp:
14
+ def __init__(self):
15
+ self.retriever = None
16
+ self.orchestrator = None
17
+ self.setup_system()
18
+
19
+ def setup_system(self):
20
+ """Initialize retriever and load index"""
21
+ print("Initializing DocMind...")
22
+
23
+ # Initialize retriever
24
+ self.retriever = PaperRetriever()
25
+
26
+ # Try to load existing index
27
+ if not self.retriever.load_index():
28
+ print("No index found. Building new index...")
29
+ fetcher = ArxivFetcher()
30
+ papers = fetcher.load_papers("arxiv_papers.json")
31
+
32
+ if papers:
33
+ self.retriever.build_index(papers)
34
+ self.retriever.save_index()
35
+ print(f"Index built with {len(papers)} papers")
36
+ else:
37
+ print("⚠️ Warning: No papers found. Please run fetch_arxiv_data.py first")
38
+ return
39
+
40
+ # Initialize orchestrator
41
+ self.orchestrator = DocMindOrchestrator(self.retriever)
42
+ print("DocMind ready!")
43
+
44
+ def chat(
45
+ self,
46
+ message: str,
47
+ history: list,
48
+ num_papers: int = 5,
49
+ show_agent_logs: bool = True
50
+ ) -> str:
51
+ """
52
+ Process chat message
53
+
54
+ Args:
55
+ message: User query
56
+ history: Chat history (not used in current version)
57
+ num_papers: Number of papers to include in response
58
+ show_agent_logs: Whether to show agent processing logs
59
+
60
+ Returns:
61
+ Response string
62
+ """
63
+ if not self.orchestrator:
64
+ return "⚠️ System not initialized. Please run fetch_arxiv_data.py to download papers first."
65
+
66
+ if not message.strip():
67
+ return "Please enter a question about research papers."
68
+
69
+ try:
70
+ # Process query through agent pipeline
71
+ response = self.orchestrator.process_query(
72
+ message,
73
+ top_k=num_papers * 2, # Retrieve more, filter to top N
74
+ max_papers_in_response=num_papers
75
+ )
76
+
77
+ return response
78
+
79
+ except Exception as e:
80
+ return f"❌ Error processing query: {str(e)}\n\nPlease try rephrasing your question."
81
+
82
+
83
+ def create_interface():
84
+ """Create Gradio chat interface"""
85
+
86
+ app = DocMindApp()
87
+
88
+ # Custom CSS for better styling
89
+ css = """
90
+ .gradio-container {
91
+ font-family: 'Inter', 'Segoe UI', sans-serif;
92
+ max-width: 1400px !important;
93
+ }
94
+
95
+ /* Header styling */
96
+ h1 {
97
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
98
+ -webkit-background-clip: text;
99
+ -webkit-text-fill-color: transparent;
100
+ background-clip: text;
101
+ font-weight: 700;
102
+ font-size: 2.5em !important;
103
+ margin-bottom: 0.5em;
104
+ }
105
+
106
+ /* Chat area improvements */
107
+ .message-wrap {
108
+ padding: 1.2em !important;
109
+ margin: 0.8em 0 !important;
110
+ border-radius: 12px !important;
111
+ line-height: 1.6;
112
+ }
113
+
114
+ /* User message */
115
+ .message-wrap.user {
116
+ background: linear-gradient(135deg, #667eea15 0%, #764ba215 100%) !important;
117
+ border-left: 3px solid #667eea;
118
+ }
119
+
120
+ /* Bot message */
121
+ .message-wrap.bot {
122
+ background: #f8f9fa !important;
123
+ border-left: 3px solid #28a745;
124
+ }
125
+
126
+ /* Input area */
127
+ .input-text textarea {
128
+ border-radius: 12px !important;
129
+ border: 2px solid #e0e0e0 !important;
130
+ font-size: 1.05em !important;
131
+ }
132
+
133
+ .input-text textarea:focus {
134
+ border-color: #667eea !important;
135
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
136
+ }
137
+
138
+ /* Buttons */
139
+ .btn-primary {
140
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
141
+ border: none !important;
142
+ border-radius: 10px !important;
143
+ padding: 0.8em 2em !important;
144
+ font-weight: 600 !important;
145
+ transition: transform 0.2s !important;
146
+ }
147
+
148
+ .btn-primary:hover {
149
+ transform: translateY(-2px) !important;
150
+ box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4) !important;
151
+ }
152
+
153
+ /* Settings panel */
154
+ .settings-panel {
155
+ background: #f8f9fa;
156
+ border-radius: 12px;
157
+ padding: 1.5em;
158
+ }
159
+
160
+ /* Slider */
161
+ input[type="range"] {
162
+ accent-color: #667eea !important;
163
+ }
164
+
165
+ /* Example buttons */
166
+ .examples button {
167
+ border-radius: 8px !important;
168
+ border: 2px solid #e0e0e0 !important;
169
+ padding: 0.7em 1em !important;
170
+ transition: all 0.2s !important;
171
+ }
172
+
173
+ .examples button:hover {
174
+ border-color: #667eea !important;
175
+ background: #667eea10 !important;
176
+ }
177
+
178
+ /* Code blocks in responses */
179
+ code {
180
+ background: #f4f4f4;
181
+ padding: 0.2em 0.4em;
182
+ border-radius: 4px;
183
+ font-family: 'Courier New', monospace;
184
+ }
185
+
186
+ /* Remove footer */
187
+ footer {
188
+ display: none !important;
189
+ }
190
+
191
+ /* Improve markdown rendering */
192
+ .markdown-body h2 {
193
+ color: #667eea;
194
+ border-bottom: 2px solid #667eea;
195
+ padding-bottom: 0.3em;
196
+ margin-top: 1.5em;
197
+ }
198
+
199
+ .markdown-body h3 {
200
+ color: #764ba2;
201
+ margin-top: 1.2em;
202
+ }
203
+
204
+ /* Better list styling */
205
+ .markdown-body ul {
206
+ line-height: 1.8;
207
+ }
208
+
209
+ .markdown-body li {
210
+ margin: 0.5em 0;
211
+ }
212
  """
 
213
 
214
+ # Example queries
215
+ examples = [
216
+ "What are the latest methods for improving diffusion models?",
217
+ "Summarize recent work on RLHF vs DPO for language model alignment",
218
+ "What are the main challenges in scaling transformer models?",
219
+ "Tell me about recent advances in vision transformers",
220
+ "What's new in retrieval-augmented generation (RAG)?",
221
+ ]
222
 
223
+ with gr.Blocks(css=css, title="DocMind - arXiv Research Assistant", theme=gr.themes.Soft()) as demo:
224
+ gr.Markdown(
225
+ """
226
+ # 🧠 DocMind: Multi-Agent Research Assistant
227
 
228
+ Ask questions about recent AI/ML research papers from arXiv. DocMind uses a 4-agent pipeline to retrieve, read, critique, and synthesize answers.
229
 
230
+ **Agent Pipeline:** πŸ” Retriever β†’ πŸ“– Reader β†’ πŸ”Ž Critic β†’ ✨ Synthesizer
231
+ """
232
+ )
233
 
234
+ with gr.Row():
235
+ with gr.Column(scale=7):
236
+ chatbot = gr.Chatbot(
237
+ label="Research Chat",
238
+ height=550,
239
+ type="messages",
240
+ avatar_images=(None, "🧠"),
241
+ bubble_full_width=False
242
+ )
 
 
243
 
244
+ with gr.Row():
245
+ msg = gr.Textbox(
246
+ label="",
247
+ placeholder="Ask about recent research papers... (e.g., 'What are the latest methods for improving diffusion models?')",
248
+ lines=2,
249
+ scale=9,
250
+ show_label=False
251
+ )
252
+ submit = gr.Button("Send", variant="primary", scale=1, size="lg")
253
 
254
+ with gr.Accordion("πŸ’‘ Example Questions", open=False):
255
+ gr.Examples(
256
+ examples=examples,
257
+ inputs=msg,
258
+ label=""
259
+ )
260
 
261
+ with gr.Column(scale=3):
262
+ with gr.Group():
263
+ gr.Markdown("### βš™οΈ Settings")
264
+
265
+ num_papers = gr.Slider(
266
+ minimum=1,
267
+ maximum=10,
268
+ value=5,
269
+ step=1,
270
+ label="Papers to Include",
271
+ info="More papers = more comprehensive, but slower"
272
+ )
273
+
274
+ show_logs = gr.Checkbox(
275
+ label="Show Agent Logs",
276
+ value=False,
277
+ info="Display processing steps"
278
+ )
279
+
280
+ clear = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary", size="sm")
281
+
282
+ gr.Markdown(
283
+ """
284
+ ---
285
+ ### πŸ“Š About
286
+
287
+ **How it works:**
288
+ 1. πŸ” **Retriever** finds relevant papers
289
+ 2. πŸ“– **Reader** summarizes each paper
290
+ 3. πŸ”Ž **Critic** filters low-quality results
291
+ 4. ✨ **Synthesizer** creates final answer
292
+
293
+ **Data Source:** arXiv papers (AI/ML/CS)
294
+
295
+ **Technology:**
296
+ - FAISS for semantic search
297
+ - Sentence Transformers for embeddings
298
+ - 100 recent papers indexed
299
+ """
300
+ )
301
+
302
+ # Chat interaction
303
+ def respond(message, history, num_papers_val, show_logs_val):
304
+ if not message.strip():
305
+ return history
306
+
307
+ # Add user message
308
+ history.append({"role": "user", "content": message})
309
+
310
+ # Get bot response
311
+ bot_response = app.chat(message, history, num_papers_val, show_logs_val)
312
+
313
+ # Add bot message
314
+ history.append({"role": "assistant", "content": bot_response})
315
+
316
+ return history
317
+
318
+ def clear_chat():
319
+ return []
320
+
321
+ # Event handlers
322
+ submit.click(
323
+ respond,
324
+ inputs=[msg, chatbot, num_papers, show_logs],
325
+ outputs=[chatbot]
326
+ ).then(
327
+ lambda: "",
328
+ outputs=[msg]
329
+ )
330
+
331
+ msg.submit(
332
+ respond,
333
+ inputs=[msg, chatbot, num_papers, show_logs],
334
+ outputs=[chatbot]
335
+ ).then(
336
+ lambda: "",
337
+ outputs=[msg]
338
+ )
339
+
340
+ clear.click(clear_chat, outputs=[chatbot])
341
+
342
+ gr.Markdown(
343
+ """
344
+ <div style='text-align: center; margin-top: 2em; padding: 1em; color: #666;'>
345
+ <small>Built with FAISS, Sentence Transformers, and Gradio β€’ Powered by arXiv API</small>
346
+ </div>
347
+ """
348
+ )
349
+
350
+ return demo
351
 
352
 
353
  if __name__ == "__main__":
354
+ demo = create_interface()
355
+ demo.launch(
356
+ share=False,
357
+ server_name="127.0.0.1", # localhost instead of 0.0.0.0
358
+ server_port=7860,
359
+ show_error=True
360
+ )