faisalsns commited on
Commit
b1f00a0
·
1 Parent(s): d6a4e24

Initial commit for the ai-reasoning-copilot

Browse files
README.md CHANGED
@@ -11,3 +11,41 @@ short_description: AI Reasoning Copilot
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+
16
+ # 🧠 Open Source Reasoning Copilot
17
+
18
+ A powerful AI reasoning assistant that runs completely locally with zero cost!
19
+
20
+ ## Features
21
+
22
+ - 🤖 **Online LLM Integration** - Works with OpenRouter
23
+ - 🔍 **Web Search** - Real-time information retrieval
24
+ - 🧮 **Advanced Calculator** - Symbolic math, calculus, statistics
25
+ - 📁 **Document Processing** - PDF, Word, Excel, CSV, JSON, code files
26
+ - 🧠 **Memory System** - Conversation history and context awareness
27
+ - 🎯 **Reasoning Tools** - Chain-of-thought, problem decomposition
28
+ - 📊 **Data Visualization** - Plots and charts
29
+ - 🔒 **Privacy First** - Everything runs locally
30
+
31
+ ## Usage Examples
32
+
33
+ ### Reasoning & Problem Solving
34
+ - "Help me analyze the pros and cons of remote work"
35
+ - "Walk me through solving this logic puzzle step by step"
36
+ - "What are the implications of AI in healthcare?"
37
+
38
+ ### Research & Information
39
+ - "What are the latest developments in quantum computing?"
40
+ - "Research the history of the Roman Empire"
41
+ - "Find current information about climate change policies"
42
+
43
+ ### Mathematics & Calculations
44
+ - "Solve the equation x^2 + 5x - 6 = 0"
45
+ - "Calculate the derivative of x^3 + 2x^2 - 5x + 1"
46
+ - "Plot the function y = sin(x) + cos(2x)"
47
+
48
+ ### Document Analysis
49
+ - Upload PDFs, Word docs, spreadsheets
50
+ - "Summarize this research paper"
51
+ - "Extract key insights from this data"
app.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import logging
3
+ import os
4
+ from typing import List, Tuple, Any, Optional
5
+ import json
6
+ import threading
7
+ import time
8
+ from datetime import datetime
9
+
10
+ # Import our custom modules
11
+ #from models.llm_handler import LLMHandler
12
+ from models.llm_handler import HuggingFaceLLMHandler, OpenRouterLLMHandler
13
+
14
+ from models.vector_store import VectorStore
15
+ from tools.web_search import WebSearchTool
16
+ from tools.calculator import CalculatorTool
17
+ from tools.file_processor import FileProcessor
18
+ from memory.conversation import ConversationMemory
19
+ from config.settings import Settings
20
+
21
+ # Setup logging
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
25
+ handlers=[
26
+ logging.FileHandler(os.path.join(Settings.LOGS_DIR, 'copilot.log')),
27
+ logging.StreamHandler()
28
+ ]
29
+ )
30
+ logger = logging.getLogger(__name__)
31
+
32
+ class ReasoningCopilot:
33
+ def __init__(self):
34
+ logger.info("Initializing Reasoning Copilot...")
35
+
36
+ # Initialize components
37
+ #self.llm = LLMHandler()
38
+ #self.llm = HuggingFaceLLMHandler()
39
+ self.llm = OpenRouterLLMHandler()
40
+ self.vector_store = VectorStore()
41
+ self.web_search = WebSearchTool()
42
+ self.calculator = CalculatorTool()
43
+ self.file_processor = FileProcessor()
44
+ self.memory = ConversationMemory()
45
+
46
+ # State variables
47
+ self.current_model = Settings.DEFAULT_MODEL
48
+ self.reasoning_mode = "balanced" # balanced, creative, analytical
49
+ self.use_web_search = True
50
+ self.use_vector_search = True
51
+
52
+ logger.info("Reasoning Copilot initialized successfully!")
53
+
54
+ def process_query(self, user_input: str, chat_history: List[Tuple[str, str]],
55
+ use_tools: bool = True) -> Tuple[List[Tuple[str, str]], str]:
56
+ """
57
+ Main query processing function - FIXED VERSION
58
+ """
59
+ try:
60
+ if not user_input.strip():
61
+ return chat_history, ""
62
+
63
+ logger.info(f"Processing query: {user_input[:100]}...")
64
+
65
+ # REMOVED: This was likely causing the 10-minute delay
66
+ # task_analysis = self.llm.analyze_reasoning_task(user_input)
67
+
68
+ # Initialize context and tools output
69
+ context = ""
70
+ tools_output = ""
71
+
72
+ if use_tools:
73
+ # Use vector search for relevant context
74
+ if self.use_vector_search:
75
+ try:
76
+ context = self.vector_store.get_relevant_context(user_input)
77
+ except Exception as e:
78
+ logger.warning(f"Vector search failed: {e}")
79
+ context = ""
80
+
81
+ # Determine if web search is needed - SIMPLIFIED
82
+ if self.use_web_search and self._should_use_web_search_simple(user_input):
83
+ try:
84
+ search_results = self.web_search.search_and_summarize(user_input)
85
+ tools_output += f"\n--- Web Search Results ---\n{search_results}\n"
86
+ except Exception as e:
87
+ logger.warning(f"Web search failed: {e}")
88
+
89
+ # Determine if calculator is needed - SIMPLIFIED
90
+ if self._should_use_calculator_simple(user_input):
91
+ try:
92
+ calc_result = self._handle_calculation(user_input)
93
+ if calc_result:
94
+ tools_output += f"\n--- Calculation Results ---\n{calc_result}\n"
95
+ except Exception as e:
96
+ logger.warning(f"Calculator failed: {e}")
97
+
98
+ # Generate response using LLM - THIS IS THE MAIN CALL
99
+ print(f"DEBUG: About to call generate_response...")
100
+ start_time = time.time()
101
+
102
+ response = self.llm.generate_response(user_input, context, tools_output)
103
+
104
+ end_time = time.time()
105
+ print(f"DEBUG: generate_response took {end_time - start_time:.2f} seconds")
106
+
107
+ # Add to memory
108
+ try:
109
+ self.memory.add_exchange(user_input, response, {
110
+ 'used_tools': use_tools,
111
+ 'reasoning_mode': self.reasoning_mode
112
+ })
113
+ except Exception as e:
114
+ logger.warning(f"Memory storage failed: {e}")
115
+
116
+ # Update chat history
117
+ chat_history.append((user_input, response))
118
+
119
+ # Add to LLM history
120
+ try:
121
+ self.llm.add_to_history(user_input, response)
122
+ except Exception as e:
123
+ logger.warning(f"LLM history update failed: {e}")
124
+
125
+ return chat_history, ""
126
+
127
+ except Exception as e:
128
+ logger.error(f"Error processing query: {e}")
129
+ error_response = f"I apologize, but I encountered an error: {str(e)}"
130
+ chat_history.append((user_input, error_response))
131
+ return chat_history, ""
132
+
133
+ def _should_use_web_search_simple(self, query: str) -> bool:
134
+ """
135
+ SIMPLIFIED version - no task_analysis parameter
136
+ """
137
+ web_search_indicators = [
138
+ 'current', 'latest', 'recent', 'news', 'today', 'now',
139
+ 'what happened', 'update', 'price', 'weather', 'stock'
140
+ ]
141
+
142
+ query_lower = query.lower()
143
+ return any(indicator in query_lower for indicator in web_search_indicators)
144
+
145
+ def _should_use_calculator_simple(self, query: str) -> bool:
146
+ """
147
+ SIMPLIFIED version - no task_analysis parameter
148
+ """
149
+ calc_indicators = [
150
+ 'calculate', 'compute', 'solve', '+', '-', '*', '/', '=',
151
+ 'math', 'equation', 'derivative', 'integral', 'plot'
152
+ ]
153
+
154
+ query_lower = query.lower()
155
+ return any(indicator in query_lower for indicator in calc_indicators)
156
+
157
+ def _handle_calculation(self, query: str) -> str:
158
+ """
159
+ Handle mathematical calculations
160
+ """
161
+ try:
162
+ # Simple expression detection
163
+ import re
164
+
165
+ # Look for equations
166
+ if '=' in query and any(op in query for op in ['+', '-', '*', '/']):
167
+ if 'solve' in query.lower():
168
+ # Equation solving
169
+ equation = re.search(r'([^=]+=[^=]+)', query)
170
+ if equation:
171
+ result = self.calculator.solve_equation(equation.group(1))
172
+ return self.calculator.format_result_for_llm(result)
173
+
174
+ # Look for expressions to evaluate
175
+ expr_pattern = r'([0-9+\-*/().\s]+(?:[+\-*/][0-9+\-*/().\s]+)*)'
176
+ expressions = re.findall(expr_pattern, query)
177
+
178
+ for exp in expressions:
179
+ if len(exp.strip()) > 3: # Avoid single numbers
180
+ result = self.calculator.evaluate_expression(exp.strip())
181
+ return self.calculator.format_result_for_llm(result)
182
+
183
+ return ""
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error in calculation handling: {e}")
187
+ return ""
188
+
189
+ def upload_files(self, files: List[Any]) -> str:
190
+ """
191
+ Handle file uploads
192
+ """
193
+ try:
194
+ if not files:
195
+ return "No files uploaded."
196
+
197
+ results = []
198
+ documents_to_add = []
199
+
200
+ for file in files:
201
+ if hasattr(file, 'name'):
202
+ file_path = file.name
203
+ else:
204
+ file_path = str(file)
205
+
206
+ # Process the file
207
+ file_result = self.file_processor.process_file(file_path)
208
+
209
+ if 'error' not in file_result:
210
+ # Add to vector store
211
+ content = file_result['content']
212
+ metadata = {
213
+ 'filename': file_result['filename'],
214
+ 'type': 'uploaded_file',
215
+ 'source': file_result['filename'],
216
+ 'upload_time': datetime.now().isoformat()
217
+ }
218
+
219
+ documents_to_add.append((content, metadata))
220
+ results.append(f"✓ Processed: {file_result['filename']}")
221
+ else:
222
+ results.append(f"✗ Error processing {file_path}: {file_result['error']}")
223
+
224
+ # Add all documents to vector store
225
+ if documents_to_add:
226
+ contents = [doc[0] for doc in documents_to_add]
227
+ metadata_list = [doc[1] for doc in documents_to_add]
228
+
229
+ success = self.vector_store.add_documents(contents, metadata_list)
230
+ if success:
231
+ results.append(f"\n✓ Added {len(documents_to_add)} documents to knowledge base.")
232
+ else:
233
+ results.append("\n✗ Failed to add documents to knowledge base.")
234
+
235
+ return "\n".join(results)
236
+
237
+ except Exception as e:
238
+ logger.error(f"Error uploading files: {e}")
239
+ return f"Error uploading files: {str(e)}"
240
+
241
+ def change_model(self, model_name: str) -> str:
242
+ """
243
+ Change the current LLM model
244
+ """
245
+ try:
246
+ if self.llm.switch_model(model_name):
247
+ self.current_model = model_name
248
+ return f"✓ Switched to model: {model_name}"
249
+ else:
250
+ return f"✗ Failed to switch to model: {model_name}"
251
+ except Exception as e:
252
+ return f"✗ Error changing model: {str(e)}"
253
+
254
+ def get_system_status(self) -> str:
255
+ """
256
+ Get system status information
257
+ """
258
+ try:
259
+ # Get model info
260
+ available_models = self.llm.get_available_models()
261
+
262
+ # Get vector store stats
263
+ vector_stats = self.vector_store.get_collection_stats()
264
+
265
+ # Get memory stats
266
+ memory_stats = self.memory.get_session_statistics()
267
+
268
+ # Get memory usage
269
+ memory_usage = self.memory.get_memory_usage()
270
+
271
+ status_info = f"""
272
+ 🤖 **Reasoning Copilot Status**
273
+
274
+ **Current Model:** {self.current_model}
275
+ **Available Models:** {len(available_models)} ({', '.join(available_models[:3])}{'...' if len(available_models) > 3 else ''})
276
+
277
+ **Knowledge Base:**
278
+ - Documents: {vector_stats.get('total_documents', 0)}
279
+ - Collection: {vector_stats.get('collection_name', 'N/A')}
280
+
281
+ **Session Memory:**
282
+ - Exchanges: {memory_stats.get('total_exchanges', 0)}
283
+ - Topics: {len(memory_stats.get('topics_discussed', []))}
284
+ - Memory Size: {memory_usage.get('memory_file_size_kb', 0):.1f} KB
285
+
286
+ **Tools Status:**
287
+ - Web Search: {'✓ Enabled' if self.use_web_search else '✗ Disabled'}
288
+ - Vector Search: {'✓ Enabled' if self.use_vector_search else '✗ Disabled'}
289
+ - Calculator: ✓ Available
290
+ - File Processor: ✓ Available
291
+
292
+ **Reasoning Mode:** {self.reasoning_mode.title()}
293
+ """.strip()
294
+
295
+ return status_info
296
+
297
+ except Exception as e:
298
+ logger.error(f"Error getting system status: {e}")
299
+ return f"Error getting system status: {str(e)}"
300
+
301
+ def clear_conversation(self) -> Tuple[List, str]:
302
+ """
303
+ Clear conversation history
304
+ """
305
+ try:
306
+ self.llm.clear_history()
307
+ self.memory.clear_memory()
308
+ return [], "✓ Conversation cleared successfully."
309
+ except Exception as e:
310
+ return [], f"✗ Error clearing conversation: {str(e)}"
311
+
312
+ def export_conversation(self, format_type: str = "markdown") -> str:
313
+ """
314
+ Export conversation history
315
+ """
316
+ try:
317
+ return self.memory.export_conversation(format_type)
318
+ except Exception as e:
319
+ return f"Error exporting conversation: {str(e)}"
320
+
321
+ def create_gradio_interface():
322
+ """
323
+ Create the Gradio interface
324
+ """
325
+ # Initialize the copilot
326
+ copilot = ReasoningCopilot()
327
+
328
+ # Define the main interface
329
+ with gr.Blocks(
330
+ theme=gr.themes.Soft(),
331
+ title="🧠 Open Source Reasoning Copilot",
332
+ css="""
333
+ .gradio-container {
334
+ max-width: 1200px !important;
335
+ }
336
+ .chat-container {
337
+ height: 600px !important;
338
+ }
339
+ """
340
+ ) as interface:
341
+
342
+ gr.Markdown("""
343
+ # 🧠 Open Source Reasoning Copilot
344
+
345
+ A powerful AI assistant that combines local LLMs with advanced reasoning capabilities, web search, calculations, and document processing - all running locally with zero cost!
346
+ """)
347
+
348
+ with gr.Tab("💬 Chat"):
349
+ with gr.Row():
350
+ with gr.Column(scale=3):
351
+ chatbot = gr.Chatbot(
352
+ height=500,
353
+ label="Conversation",
354
+ elem_classes=["chat-container"]
355
+ )
356
+
357
+ with gr.Row():
358
+ msg = gr.Textbox(
359
+ placeholder="Ask me anything! I can help with reasoning, research, calculations, and more...",
360
+ label="Your Message",
361
+ scale=4
362
+ )
363
+ send_btn = gr.Button("Send", variant="primary", scale=1)
364
+
365
+ with gr.Row():
366
+ clear_btn = gr.Button("Clear Chat", variant="secondary")
367
+ use_tools = gr.Checkbox(label="Use Tools", value=True)
368
+
369
+ with gr.Column(scale=1):
370
+ gr.Markdown("### 🛠️ Quick Actions")
371
+
372
+ status_btn = gr.Button("📊 System Status", variant="secondary")
373
+ status_output = gr.Textbox(
374
+ label="Status",
375
+ max_lines=15,
376
+ interactive=False
377
+ )
378
+
379
+ gr.Markdown("### ⚙️ Settings")
380
+
381
+ model_dropdown = gr.Dropdown(
382
+ choices=["mistralai/mistral-7b-instruct", "meta-llama/llama-3-70b-instruct", "google/gemini-2.0-flash-exp:free", "huggingfaceh4/zephyr-7b-beta"],
383
+ value="mistralai/mistral-7b-instruct",
384
+ label="Model"
385
+ )
386
+
387
+ reasoning_mode = gr.Radio(
388
+ choices=["balanced", "creative", "analytical"],
389
+ value="balanced",
390
+ label="Reasoning Mode"
391
+ )
392
+
393
+ web_search_toggle = gr.Checkbox(
394
+ label="Enable Web Search",
395
+ value=True
396
+ )
397
+
398
+ vector_search_toggle = gr.Checkbox(
399
+ label="Enable Vector Search",
400
+ value=True
401
+ )
402
+
403
+ with gr.Tab("📁 Knowledge Base"):
404
+ with gr.Row():
405
+ with gr.Column():
406
+ gr.Markdown("### Upload Documents")
407
+ file_upload = gr.Files(
408
+ label="Upload Files",
409
+ file_types=[".txt", ".pdf", ".docx", ".csv", ".xlsx", ".json", ".py", ".js", ".html", ".md"]
410
+ )
411
+ upload_btn = gr.Button("Process Files", variant="primary")
412
+ upload_status = gr.Textbox(
413
+ label="Upload Status",
414
+ max_lines=10,
415
+ interactive=False
416
+ )
417
+
418
+ with gr.Column():
419
+ gr.Markdown("### Knowledge Base Info")
420
+ kb_info = gr.Textbox(
421
+ label="Knowledge Base Statistics",
422
+ max_lines=10,
423
+ interactive=False
424
+ )
425
+ refresh_kb_btn = gr.Button("Refresh Info")
426
+
427
+ with gr.Tab("🧮 Calculator"):
428
+ with gr.Row():
429
+ with gr.Column():
430
+ calc_input = gr.Textbox(
431
+ label="Mathematical Expression",
432
+ placeholder="e.g., 2*3 + 5, solve x^2 - 4 = 0, derivative of x^2 + 3x"
433
+ )
434
+ calc_btn = gr.Button("Calculate", variant="primary")
435
+ calc_output = gr.Textbox(
436
+ label="Result",
437
+ max_lines=10,
438
+ interactive=False
439
+ )
440
+
441
+ with gr.Column():
442
+ gr.Markdown("""
443
+ ### Supported Operations
444
+ - Basic arithmetic: +, -, *, /, ^
445
+ - Functions: sin, cos, tan, log, sqrt
446
+ - Equation solving: solve x^2 + 2x - 3 = 0
447
+ - Calculus: derivative, integral
448
+ - Matrix operations
449
+ - Statistics
450
+ """)
451
+
452
+ with gr.Tab("📊 Memory & Export"):
453
+ with gr.Row():
454
+ with gr.Column():
455
+ gr.Markdown("### Conversation Memory")
456
+ memory_info = gr.Textbox(
457
+ label="Session Information",
458
+ max_lines=10,
459
+ interactive=False
460
+ )
461
+ refresh_memory_btn = gr.Button("Refresh Memory Info")
462
+
463
+ with gr.Column():
464
+ gr.Markdown("### Export Options")
465
+ export_format = gr.Radio(
466
+ choices=["markdown", "json", "text"],
467
+ value="markdown",
468
+ label="Export Format"
469
+ )
470
+ export_btn = gr.Button("Export Conversation", variant="primary")
471
+ export_output = gr.Textbox(
472
+ label="Exported Conversation",
473
+ max_lines=15,
474
+ interactive=False
475
+ )
476
+
477
+ # Event handlers
478
+ def respond(message, history, use_tools_flag):
479
+ return copilot.process_query(message, history, use_tools_flag)
480
+
481
+ def clear_chat():
482
+ return copilot.clear_conversation()
483
+
484
+ def get_status():
485
+ return copilot.get_system_status()
486
+
487
+ def upload_files_handler(files):
488
+ return copilot.upload_files(files)
489
+
490
+ def change_model_handler(model):
491
+ return copilot.change_model(model)
492
+
493
+ def export_handler(format_type):
494
+ return copilot.export_conversation(format_type)
495
+
496
+ def get_kb_info():
497
+ stats = copilot.vector_store.get_collection_stats()
498
+ return f"Documents: {stats.get('total_documents', 0)}\nCollection: {stats.get('collection_name', 'N/A')}"
499
+
500
+ def get_memory_info():
501
+ return copilot.memory.get_conversation_summary()
502
+
503
+ def update_settings(mode, web_search, vector_search):
504
+ copilot.reasoning_mode = mode
505
+ copilot.use_web_search = web_search
506
+ copilot.use_vector_search = vector_search
507
+ return "Settings updated!"
508
+
509
+ # Wire up the events
510
+ msg.submit(respond, [msg, chatbot, use_tools], [chatbot, msg])
511
+ send_btn.click(respond, [msg, chatbot, use_tools], [chatbot, msg])
512
+ clear_btn.click(clear_chat, outputs=[chatbot, msg])
513
+
514
+ status_btn.click(get_status, outputs=status_output)
515
+ model_dropdown.change(change_model_handler, inputs=model_dropdown, outputs=status_output)
516
+
517
+ upload_btn.click(upload_files_handler, inputs=file_upload, outputs=upload_status)
518
+ refresh_kb_btn.click(get_kb_info, outputs=kb_info)
519
+
520
+ calc_btn.click(
521
+ lambda expr: copilot.calculator.format_result_for_llm(
522
+ copilot.calculator.evaluate_expression(expr)
523
+ ),
524
+ inputs=calc_input,
525
+ outputs=calc_output
526
+ )
527
+
528
+ export_btn.click(export_handler, inputs=export_format, outputs=export_output)
529
+ refresh_memory_btn.click(get_memory_info, outputs=memory_info)
530
+
531
+ # Settings updates
532
+ reasoning_mode.change(
533
+ update_settings,
534
+ inputs=[reasoning_mode, web_search_toggle, vector_search_toggle],
535
+ outputs=status_output
536
+ )
537
+
538
+ return interface
539
+
540
+ if __name__ == "__main__":
541
+ logger.info("Starting Reasoning Copilot...")
542
+
543
+ # Ensure Ollama is running
544
+ logger.info("Make sure Ollama is running with: ollama serve")
545
+ logger.info("And that you have downloaded a model with: ollama pull phi3:mini")
546
+
547
+ # Create and launch the interface
548
+ interface = create_gradio_interface()
549
+
550
+ interface.launch(
551
+ server_port=Settings.GRADIO_PORT,
552
+ share=True,
553
+ #share=Settings.GRADIO_SHARE,
554
+ server_name="0.0.0.0", # Allow external access
555
+ show_error=True,
556
+ # show_tips=True,
557
+ # enable_queue=True
558
+ )
config/settings.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ class Settings:
7
+ # Model Configuration
8
+ DEFAULT_MODEL = "phi3:mini"
9
+ EMBEDDING_MODEL = "nomic-embed-text"
10
+ OLLAMA_BASE_URL = "http://localhost:11434"
11
+
12
+ # Vector Database
13
+ CHROMA_PERSIST_DIR = "./chroma_db"
14
+ COLLECTION_NAME = "knowledge_base"
15
+
16
+ # UI Configuration
17
+ GRADIO_PORT = 7860
18
+ GRADIO_SHARE = False
19
+
20
+ # Tool Configuration
21
+ MAX_SEARCH_RESULTS = 5
22
+ CODE_EXECUTION_TIMEOUT = 30
23
+ MAX_FILE_SIZE_MB = 50
24
+
25
+ # Memory Configuration
26
+ MAX_CONVERSATION_HISTORY = 20
27
+ CONTEXT_WINDOW_SIZE = 4096
28
+
29
+ # Reasoning Configuration
30
+ MAX_REASONING_STEPS = 10
31
+ TEMPERATURE = 0.7
32
+ MAX_TOKENS = 2048
33
+
34
+ # File Paths
35
+ UPLOAD_DIR = "./uploads"
36
+ LOGS_DIR = "./logs"
37
+
38
+ # Create directories if they don't exist
39
+ @classmethod
40
+ def ensure_directories(cls):
41
+ os.makedirs(cls.CHROMA_PERSIST_DIR, exist_ok=True)
42
+ os.makedirs(cls.UPLOAD_DIR, exist_ok=True)
43
+ os.makedirs(cls.LOGS_DIR, exist_ok=True)
44
+
45
+ # Initialize directories on import
46
+ Settings.ensure_directories()
memory/conversation.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import uuid
3
+ from datetime import datetime
4
+ from typing import List, Dict, Any, Optional
5
+ import logging
6
+ from pathlib import Path
7
+ from config.settings import Settings
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class ConversationMemory:
13
+ def __init__(self):
14
+ self.session_id = str(uuid.uuid4())
15
+ self.memory_file = Path(Settings.LOGS_DIR) / f"conversation_{self.session_id}.json"
16
+ self.short_term_memory = []
17
+ self.current_context = {}
18
+ self.reasoning_history = []
19
+
20
+ # Initialize memory structure
21
+ self.memory_structure = {
22
+ 'session_id': self.session_id,
23
+ 'created_at': datetime.now().isoformat(),
24
+ 'conversations': [],
25
+ 'context': {},
26
+ 'reasoning_chains': [],
27
+ 'user_preferences': {},
28
+ 'topics_discussed': []
29
+ }
30
+
31
+ self._save_memory()
32
+
33
+ def add_exchange(self, user_input: str, assistant_response: str,
34
+ metadata: Optional[Dict[str, Any]] = None) -> bool:
35
+ """
36
+ Add a conversation exchange to memory
37
+ """
38
+ try:
39
+ exchange = {
40
+ 'id': str(uuid.uuid4()),
41
+ 'timestamp': datetime.now().isoformat(),
42
+ 'user_input': user_input,
43
+ 'assistant_response': assistant_response,
44
+ 'metadata': metadata or {}
45
+ }
46
+
47
+ # Add to short-term memory
48
+ self.short_term_memory.append(exchange)
49
+
50
+ # Add to persistent memory
51
+ self.memory_structure['conversations'].append(exchange)
52
+
53
+ # Keep short-term memory limited
54
+ if len(self.short_term_memory) > Settings.MAX_CONVERSATION_HISTORY:
55
+ self.short_term_memory = self.short_term_memory[-Settings.MAX_CONVERSATION_HISTORY:]
56
+
57
+ # Extract and store topics
58
+ self._extract_topics(user_input)
59
+
60
+ # Save to file
61
+ self._save_memory()
62
+
63
+ logger.info(f"Added exchange to memory: {exchange['id']}")
64
+ return True
65
+
66
+ except Exception as e:
67
+ logger.error(f"Error adding exchange to memory: {e}")
68
+ return False
69
+
70
+ def add_reasoning_step(self, step: str, step_type: str, result: Any = None) -> bool:
71
+ """
72
+ Add a reasoning step to the reasoning history
73
+ """
74
+ try:
75
+ reasoning_step = {
76
+ 'id': str(uuid.uuid4()),
77
+ 'timestamp': datetime.now().isoformat(),
78
+ 'step': step,
79
+ 'type': step_type,
80
+ 'result': str(result) if result is not None else None
81
+ }
82
+
83
+ self.reasoning_history.append(reasoning_step)
84
+ self.memory_structure['reasoning_chains'].append(reasoning_step)
85
+
86
+ # Keep reasoning history limited
87
+ if len(self.reasoning_history) > 50:
88
+ self.reasoning_history = self.reasoning_history[-50:]
89
+
90
+ self._save_memory()
91
+ return True
92
+
93
+ except Exception as e:
94
+ logger.error(f"Error adding reasoning step: {e}")
95
+ return False
96
+
97
+ def update_context(self, key: str, value: Any) -> bool:
98
+ """
99
+ Update the current context
100
+ """
101
+ try:
102
+ self.current_context[key] = value
103
+ self.memory_structure['context'][key] = value
104
+ self._save_memory()
105
+ return True
106
+ except Exception as e:
107
+ logger.error(f"Error updating context: {e}")
108
+ return False
109
+
110
+ def get_context(self, key: Optional[str] = None) -> Any:
111
+ """
112
+ Get context information
113
+ """
114
+ if key:
115
+ return self.current_context.get(key)
116
+ return self.current_context.copy()
117
+
118
+ def get_recent_exchanges(self, count: int = 5) -> List[Dict[str, Any]]:
119
+ """
120
+ Get recent conversation exchanges
121
+ """
122
+ return self.short_term_memory[-count:] if count <= len(self.short_term_memory) else self.short_term_memory
123
+
124
+ def get_conversation_summary(self) -> str:
125
+ """
126
+ Generate a summary of the conversation
127
+ """
128
+ if not self.short_term_memory:
129
+ return "No conversation history available."
130
+
131
+ summary_parts = [
132
+ f"Session ID: {self.session_id}",
133
+ f"Exchanges: {len(self.memory_structure['conversations'])}",
134
+ f"Topics discussed: {', '.join(self.memory_structure['topics_discussed'][-5:])}",
135
+ "",
136
+ "Recent exchanges:"
137
+ ]
138
+
139
+ # Add recent exchanges
140
+ for exchange in self.short_term_memory[-3:]:
141
+ timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%H:%M:%S")
142
+ summary_parts.append(f"[{timestamp}] User: {exchange['user_input'][:100]}...")
143
+ summary_parts.append(f"[{timestamp}] Assistant: {exchange['assistant_response'][:100]}...")
144
+ summary_parts.append("")
145
+
146
+ return "\n".join(summary_parts)
147
+
148
+ def search_memory(self, query: str, search_type: str = 'all') -> List[Dict[str, Any]]:
149
+ """
150
+ Search through memory for relevant information
151
+ """
152
+ results = []
153
+ query_lower = query.lower()
154
+
155
+ try:
156
+ if search_type in ['all', 'conversations']:
157
+ # Search conversations
158
+ for exchange in self.memory_structure['conversations']:
159
+ if (query_lower in exchange['user_input'].lower() or
160
+ query_lower in exchange['assistant_response'].lower()):
161
+ results.append({
162
+ 'type': 'conversation',
163
+ 'content': exchange,
164
+ 'relevance_score': self._calculate_relevance(query, exchange)
165
+ })
166
+
167
+ if search_type in ['all', 'reasoning']:
168
+ # Search reasoning history
169
+ for step in self.memory_structure['reasoning_chains']:
170
+ if query_lower in step['step'].lower():
171
+ results.append({
172
+ 'type': 'reasoning',
173
+ 'content': step,
174
+ 'relevance_score': self._calculate_relevance(query, step)
175
+ })
176
+
177
+ # Sort by relevance
178
+ results.sort(key=lambda x: x['relevance_score'], reverse=True)
179
+ return results[:10] # Top 10 results
180
+
181
+ except Exception as e:
182
+ logger.error(f"Error searching memory: {e}")
183
+ return []
184
+
185
+ def _extract_topics(self, text: str) -> None:
186
+ """
187
+ Extract topics from user input (simple keyword-based)
188
+ """
189
+ try:
190
+ # Simple topic extraction - can be enhanced with NLP
191
+ keywords = [
192
+ 'programming', 'coding', 'python', 'javascript', 'web', 'ai', 'machine learning',
193
+ 'data', 'analysis', 'math', 'science', 'physics', 'chemistry', 'biology',
194
+ 'history', 'literature', 'writing', 'business', 'finance', 'economics',
195
+ 'health', 'medicine', 'technology', 'research', 'education', 'design'
196
+ ]
197
+
198
+ text_lower = text.lower()
199
+ found_topics = [keyword for keyword in keywords if keyword in text_lower]
200
+
201
+ for topic in found_topics:
202
+ if topic not in self.memory_structure['topics_discussed']:
203
+ self.memory_structure['topics_discussed'].append(topic)
204
+
205
+ # Keep topics list manageable
206
+ if len(self.memory_structure['topics_discussed']) > 20:
207
+ self.memory_structure['topics_discussed'] = self.memory_structure['topics_discussed'][-20:]
208
+
209
+ except Exception as e:
210
+ logger.error(f"Error extracting topics: {e}")
211
+
212
+ def _calculate_relevance(self, query: str, item: Dict[str, Any]) -> float:
213
+ """
214
+ Calculate relevance score for search results
215
+ """
216
+ try:
217
+ query_words = set(query.lower().split())
218
+
219
+ if 'user_input' in item:
220
+ # Conversation item
221
+ text = f"{item['user_input']} {item['assistant_response']}".lower()
222
+ else:
223
+ # Reasoning item
224
+ text = item['step'].lower()
225
+
226
+ text_words = set(text.split())
227
+
228
+ # Simple relevance scoring
229
+ common_words = query_words.intersection(text_words)
230
+ if not query_words:
231
+ return 0.0
232
+
233
+ return len(common_words) / len(query_words)
234
+
235
+ except Exception as e:
236
+ logger.error(f"Error calculating relevance: {e}")
237
+ return 0.0
238
+
239
+ def _save_memory(self) -> bool:
240
+ """
241
+ Save memory to file
242
+ """
243
+ try:
244
+ with open(self.memory_file, 'w', encoding='utf-8') as f:
245
+ json.dump(self.memory_structure, f, indent=2, ensure_ascii=False)
246
+ return True
247
+ except Exception as e:
248
+ logger.error(f"Error saving memory: {e}")
249
+ return False
250
+
251
+ def load_session(self, session_id: str) -> bool:
252
+ """
253
+ Load a previous session
254
+ """
255
+ try:
256
+ session_file = Path(Settings.LOGS_DIR) / f"conversation_{session_id}.json"
257
+
258
+ if not session_file.exists():
259
+ logger.warning(f"Session file not found: {session_file}")
260
+ return False
261
+
262
+ with open(session_file, 'r', encoding='utf-8') as f:
263
+ self.memory_structure = json.load(f)
264
+
265
+ self.session_id = session_id
266
+ self.memory_file = session_file
267
+
268
+ # Rebuild short-term memory from last exchanges
269
+ recent_conversations = self.memory_structure['conversations'][-Settings.MAX_CONVERSATION_HISTORY:]
270
+ self.short_term_memory = recent_conversations
271
+
272
+ # Rebuild context
273
+ self.current_context = self.memory_structure.get('context', {})
274
+
275
+ # Rebuild reasoning history
276
+ self.reasoning_history = self.memory_structure.get('reasoning_chains', [])[-50:]
277
+
278
+ logger.info(f"Loaded session: {session_id}")
279
+ return True
280
+
281
+ except Exception as e:
282
+ logger.error(f"Error loading session: {e}")
283
+ return False
284
+
285
+ def export_conversation(self, format_type: str = 'json') -> str:
286
+ """
287
+ Export conversation in different formats
288
+ """
289
+ try:
290
+ if format_type == 'json':
291
+ return json.dumps(self.memory_structure, indent=2, ensure_ascii=False)
292
+
293
+ elif format_type == 'text':
294
+ lines = [
295
+ f"Conversation Export - Session {self.session_id}",
296
+ f"Created: {self.memory_structure['created_at']}",
297
+ f"Total Exchanges: {len(self.memory_structure['conversations'])}",
298
+ "=" * 50,
299
+ ""
300
+ ]
301
+
302
+ for exchange in self.memory_structure['conversations']:
303
+ timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%Y-%m-%d %H:%M:%S")
304
+ lines.append(f"[{timestamp}]")
305
+ lines.append(f"User: {exchange['user_input']}")
306
+ lines.append(f"Assistant: {exchange['assistant_response']}")
307
+ lines.append("-" * 30)
308
+ lines.append("")
309
+
310
+ return "\n".join(lines)
311
+
312
+ elif format_type == 'markdown':
313
+ lines = [
314
+ f"# Conversation Export",
315
+ f"**Session ID:** {self.session_id}",
316
+ f"**Created:** {self.memory_structure['created_at']}",
317
+ f"**Total Exchanges:** {len(self.memory_structure['conversations'])}",
318
+ ""
319
+ ]
320
+
321
+ for i, exchange in enumerate(self.memory_structure['conversations'], 1):
322
+ timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%Y-%m-%d %H:%M:%S")
323
+ lines.append(f"## Exchange {i}")
324
+ lines.append(f"*{timestamp}*")
325
+ lines.append(f"**User:** {exchange['user_input']}")
326
+ lines.append(f"**Assistant:** {exchange['assistant_response']}")
327
+ lines.append("")
328
+
329
+ return "\n".join(lines)
330
+
331
+ else:
332
+ return f"Unsupported format: {format_type}"
333
+
334
+ except Exception as e:
335
+ logger.error(f"Error exporting conversation: {e}")
336
+ return f"Error exporting conversation: {str(e)}"
337
+
338
+ def get_session_statistics(self) -> Dict[str, Any]:
339
+ """
340
+ Get statistics about the current session
341
+ """
342
+ try:
343
+ conversations = self.memory_structure['conversations']
344
+
345
+ if not conversations:
346
+ return {'error': 'No conversations in this session'}
347
+
348
+ # Calculate statistics
349
+ total_user_words = sum(len(conv['user_input'].split()) for conv in conversations)
350
+ total_assistant_words = sum(len(conv['assistant_response'].split()) for conv in conversations)
351
+
352
+ session_duration = None
353
+ if len(conversations) > 1:
354
+ start_time = datetime.fromisoformat(conversations[0]['timestamp'])
355
+ end_time = datetime.fromisoformat(conversations[-1]['timestamp'])
356
+ session_duration = str(end_time - start_time)
357
+
358
+ return {
359
+ 'session_id': self.session_id,
360
+ 'total_exchanges': len(conversations),
361
+ 'total_user_words': total_user_words,
362
+ 'total_assistant_words': total_assistant_words,
363
+ 'average_user_words': total_user_words / len(conversations) if conversations else 0,
364
+ 'average_assistant_words': total_assistant_words / len(conversations) if conversations else 0,
365
+ 'session_duration': session_duration,
366
+ 'topics_discussed': self.memory_structure.get('topics_discussed', []),
367
+ 'reasoning_steps': len(self.memory_structure.get('reasoning_chains', [])),
368
+ 'created_at': self.memory_structure['created_at']
369
+ }
370
+
371
+ except Exception as e:
372
+ logger.error(f"Error getting session statistics: {e}")
373
+ return {'error': str(e)}
374
+
375
+ def clear_memory(self, keep_context: bool = False) -> bool:
376
+ """
377
+ Clear conversation memory
378
+ """
379
+ try:
380
+ self.short_term_memory.clear()
381
+ self.reasoning_history.clear()
382
+
383
+ if not keep_context:
384
+ self.current_context.clear()
385
+
386
+ # Reset memory structure
387
+ self.memory_structure = {
388
+ 'session_id': self.session_id,
389
+ 'created_at': datetime.now().isoformat(),
390
+ 'conversations': [],
391
+ 'context': self.current_context if keep_context else {},
392
+ 'reasoning_chains': [],
393
+ 'user_preferences': self.memory_structure.get('user_preferences', {}),
394
+ 'topics_discussed': []
395
+ }
396
+
397
+ self._save_memory()
398
+ logger.info("Cleared conversation memory")
399
+ return True
400
+
401
+ except Exception as e:
402
+ logger.error(f"Error clearing memory: {e}")
403
+ return False
404
+
405
+ def set_user_preference(self, key: str, value: Any) -> bool:
406
+ """
407
+ Set user preference
408
+ """
409
+ try:
410
+ if 'user_preferences' not in self.memory_structure:
411
+ self.memory_structure['user_preferences'] = {}
412
+
413
+ self.memory_structure['user_preferences'][key] = value
414
+ self._save_memory()
415
+ logger.info(f"Set user preference: {key} = {value}")
416
+ return True
417
+
418
+ except Exception as e:
419
+ logger.error(f"Error setting user preference: {e}")
420
+ return False
421
+
422
+ def get_user_preferences(self) -> Dict[str, Any]:
423
+ """
424
+ Get all user preferences
425
+ """
426
+ return self.memory_structure.get('user_preferences', {})
427
+
428
+ def get_memory_usage(self) -> Dict[str, Any]:
429
+ """
430
+ Get memory usage statistics
431
+ """
432
+ try:
433
+ memory_size = 0
434
+ if self.memory_file.exists():
435
+ memory_size = self.memory_file.stat().st_size
436
+
437
+ return {
438
+ 'memory_file_size_bytes': memory_size,
439
+ 'memory_file_size_kb': memory_size / 1024,
440
+ 'short_term_exchanges': len(self.short_term_memory),
441
+ 'total_exchanges': len(self.memory_structure['conversations']),
442
+ 'reasoning_steps': len(self.reasoning_history),
443
+ 'context_items': len(self.current_context),
444
+ 'topics_tracked': len(self.memory_structure.get('topics_discussed', []))
445
+ }
446
+
447
+ except Exception as e:
448
+ logger.error(f"Error getting memory usage: {e}")
449
+ return {'error': str(e)}
models/llm_handler.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import ollama
2
+ import json
3
+ import logging
4
+ from typing import List, Dict, Any, Optional
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import torch
7
+ from typing import Optional
8
+ from config.settings import Settings
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ # HuggingFace LLM Handler for Microsoft Phi-3 Mini
14
+
15
+ import requests
16
+ from typing import Optional
17
+ import requests
18
+ import os
19
+ from dotenv import load_dotenv
20
+
21
+ load_dotenv()
22
+
23
+
24
+ class OpenRouterLLMHandler:
25
+ def __init__(self, api_key: str="", model: str = "mistralai/mistral-7b-instruct"):
26
+ if (model == ""):
27
+ model = self.current_model
28
+
29
+ API_KEY = os.getenv("OPENROUTER_API_KEY")
30
+ api_key= API_KEY if API_KEY else api_key
31
+ self.api_key = api_key
32
+ self.model = model
33
+ self.base_url = "https://openrouter.ai/api/v1/chat/completions"
34
+ print(f"🔌 Initialized OpenRouter handler with model: {model}")
35
+
36
+ def generate_response(self, prompt: str, context: Optional[str] = None, tools_output: Optional[str] = None) -> str:
37
+ try:
38
+ full_prompt = self._build_simple_prompt(prompt, context, tools_output)
39
+
40
+ # if self.model_name:
41
+ # self.model = self.model_name
42
+ #self.model = self.model_name
43
+
44
+ # if (model == ""):
45
+ # model = self.model_name
46
+
47
+ headers = {
48
+ "Authorization": f"Bearer {self.api_key}",
49
+ "Content-Type": "application/json"
50
+ }
51
+
52
+ payload = {
53
+ "model": self.model,
54
+ "messages": [
55
+ {"role": "system", "content": "You are a helpful AI assistant."},
56
+ {"role": "user", "content": full_prompt}
57
+ ],
58
+ "temperature": 0.7,
59
+ "max_tokens": 200
60
+ }
61
+
62
+ # 222
63
+ # 320
64
+ # 90k
65
+ # msai
66
+
67
+ # 2% candidate
68
+
69
+
70
+
71
+
72
+
73
+ response = requests.post(self.base_url, headers=headers, json=payload)
74
+ response.raise_for_status()
75
+ result = response.json()
76
+
77
+ return result["choices"][0]["message"]["content"].strip()
78
+
79
+ except Exception as e:
80
+ return f"Error generating response: {str(e)}"
81
+
82
+ def _build_simple_prompt(self, user_input: str, context: Optional[str] = None, tools_output: Optional[str] = None) -> str:
83
+ prompt_parts = []
84
+
85
+ if context and len(context) < 300:
86
+ prompt_parts.append(f"Context: {context}")
87
+
88
+ if tools_output and len(tools_output) < 200:
89
+ prompt_parts.append(f"Additional info: {tools_output}")
90
+
91
+ prompt_parts.append(f"User query: {user_input}")
92
+ return "\n\n".join(prompt_parts)
93
+
94
+ def add_to_history(self, user_input: str, assistant_response: str):
95
+ """
96
+ Add exchange to conversation history
97
+ """
98
+ self.conversation_history.append({
99
+ 'user': user_input,
100
+ 'assistant': assistant_response
101
+ })
102
+
103
+ # Keep only recent history
104
+ if len(self.conversation_history) > Settings.MAX_CONVERSATION_HISTORY:
105
+ self.conversation_history = self.conversation_history[-Settings.MAX_CONVERSATION_HISTORY:]
106
+
107
+ def clear_history(self):
108
+ """
109
+ Clear conversation history
110
+ """
111
+ self.conversation_history = []
112
+
113
+ def get_available_models(self) -> List[str]:
114
+ """
115
+ Get list of available Ollama models
116
+ """
117
+ try:
118
+ models = self.client.list()
119
+ return [model['name'] for model in models['models']]
120
+ except Exception as e:
121
+ logger.error(f"Error getting models: {e}")
122
+ return [Settings.DEFAULT_MODEL]
123
+
124
+ def switch_model(self, model_name: str) -> bool:
125
+ """
126
+ Switch to a different model
127
+ """
128
+ try:
129
+ # Test if model is available
130
+ #self.client.generate(model=model_name, prompt="test", options={'num_predict': 1})
131
+ self.model = model_name
132
+ self.model_name = model_name
133
+ logger.info(f"Switched to model: {model_name}")
134
+ return True
135
+ except Exception as e:
136
+ logger.error(f"Error switching to model {model_name}: {e}")
137
+ return False
138
+
139
+ def generate_embedding(self, text: str) -> List[float]:
140
+ """
141
+ Generate embeddings for text using Ollama
142
+ """
143
+ try:
144
+ response = self.client.embeddings(
145
+ model=Settings.EMBEDDING_MODEL,
146
+ prompt=text
147
+ )
148
+ return response['embedding']
149
+ except Exception as e:
150
+ logger.error(f"Error generating embedding: {e}")
151
+ return []
152
+
153
+
154
+
155
+ # class HuggingFaceLLMHandler:
156
+ # def __init__(self):
157
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
158
+ # import torch
159
+ # import psutil
160
+
161
+ # self.model_name = "microsoft/Phi-3-mini-4k-instruct"
162
+ # print("Loading model... this may take a moment on first run")
163
+
164
+ # # Choose device and dtype intelligently
165
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
166
+ # torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
167
+
168
+ # print(f"Using device: {device}, dtype: {torch_dtype}")
169
+ # print(f"Available RAM: {psutil.virtual_memory().available / 1e6:.2f} MB")
170
+
171
+ # # Load tokenizer
172
+ # self.tokenizer = AutoTokenizer.from_pretrained(
173
+ # self.model_name,
174
+ # trust_remote_code=True
175
+ # )
176
+
177
+ # # Load model safely
178
+ # try:
179
+ # self.model = AutoModelForCausalLM.from_pretrained(
180
+ # self.model_name,
181
+ # torch_dtype=torch_dtype,
182
+ # device_map="auto" if device.type == "cuda" else None,
183
+ # low_cpu_mem_usage=True, # Helps reduce RAM footprint during init
184
+ # trust_remote_code=True
185
+ # )
186
+
187
+ # # Explicitly move to CPU if needed
188
+ # if device.type == "cpu":
189
+ # self.model = self.model.to(device)
190
+
191
+ # print("Model loaded successfully!")
192
+
193
+ # except RuntimeError as e:
194
+ # print(f"❌ Error loading model: {e}")
195
+ # print("Tip: Try switching to a smaller model or free up RAM.")
196
+
197
+ # def generate_response(self, prompt: str, context: Optional[str] = None,
198
+ # tools_output: Optional[str] = None) -> str:
199
+ # """
200
+ # Generate response using Phi-3 - should be under 10 seconds
201
+ # """
202
+ # try:
203
+ # # Build simple prompt
204
+ # full_prompt = self._build_simple_prompt(prompt, context, tools_output)
205
+
206
+ # # Tokenize and move to same device as model
207
+ # inputs = self.tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024)
208
+ # inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
209
+
210
+ # # Generate
211
+ # with torch.no_grad():
212
+ # outputs = self.model.generate(
213
+ # inputs["input_ids"],
214
+ # max_new_tokens=200, # Limit response length
215
+ # temperature=0.7,
216
+ # do_sample=True,
217
+ # pad_token_id=self.tokenizer.eos_token_id,
218
+ # attention_mask=inputs["attention_mask"]
219
+ # )
220
+
221
+ # # Decode response
222
+ # response = self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
223
+ # return response.strip()
224
+
225
+ # except Exception as e:
226
+ # logger.error(f"Error generating response: {e}")
227
+ # return f"Error generating response: {str(e)}"
228
+
229
+ # def _build_simple_prompt(self, user_input: str, context: Optional[str] = None,
230
+ # tools_output: Optional[str] = None) -> str:
231
+ # """Simple prompt builder"""
232
+ # prompt_parts = ["You are a helpful AI assistant."]
233
+
234
+ # if context and len(context) < 300:
235
+ # prompt_parts.append(f"Context: {context}")
236
+
237
+ # if tools_output and len(tools_output) < 200:
238
+ # prompt_parts.append(f"Additional info: {tools_output}")
239
+
240
+ # prompt_parts.append(f"User: {user_input}")
241
+ # prompt_parts.append("Assistant:")
242
+
243
+ # return "\n\n".join(prompt_parts)
244
+
245
+ # def add_to_history(self, user_input: str, assistant_response: str):
246
+ # """
247
+ # Add exchange to conversation history
248
+ # """
249
+ # self.conversation_history.append({
250
+ # 'user': user_input,
251
+ # 'assistant': assistant_response
252
+ # })
253
+
254
+ # # Keep only recent history
255
+ # if len(self.conversation_history) > Settings.MAX_CONVERSATION_HISTORY:
256
+ # self.conversation_history = self.conversation_history[-Settings.MAX_CONVERSATION_HISTORY:]
257
+
258
+ # def clear_history(self):
259
+ # """
260
+ # Clear conversation history
261
+ # """
262
+ # self.conversation_history = []
263
+
264
+ # def get_available_models(self) -> List[str]:
265
+ # """
266
+ # Get list of available Ollama models
267
+ # """
268
+ # try:
269
+ # models = self.client.list()
270
+ # return [model['name'] for model in models['models']]
271
+ # except Exception as e:
272
+ # logger.error(f"Error getting models: {e}")
273
+ # return [Settings.DEFAULT_MODEL]
274
+
275
+ # def switch_model(self, model_name: str) -> bool:
276
+ # """
277
+ # Switch to a different model
278
+ # """
279
+ # try:
280
+ # # Test if model is available
281
+ # self.client.generate(model=model_name, prompt="test", options={'num_predict': 1})
282
+ # self.model_name = model_name
283
+ # logger.info(f"Switched to model: {model_name}")
284
+ # return True
285
+ # except Exception as e:
286
+ # logger.error(f"Error switching to model {model_name}: {e}")
287
+ # return False
288
+
289
+ # def generate_embedding(self, text: str) -> List[float]:
290
+ # """
291
+ # Generate embeddings for text using Ollama
292
+ # """
293
+ # try:
294
+ # response = self.client.embeddings(
295
+ # model=Settings.EMBEDDING_MODEL,
296
+ # prompt=text
297
+ # )
298
+ # return response['embedding']
299
+ # except Exception as e:
300
+ # logger.error(f"Error generating embedding: {e}")
301
+ # return []
302
+
models/vector_store.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb.config import Settings as ChromaSettings
3
+ from sentence_transformers import SentenceTransformer
4
+ import logging
5
+ from typing import List, Dict, Any, Optional
6
+ import uuid
7
+ from config.settings import Settings
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ class VectorStore:
13
+ def __init__(self):
14
+ self.client = chromadb.PersistentClient(
15
+ path=Settings.CHROMA_PERSIST_DIR,
16
+ settings=ChromaSettings(anonymized_telemetry=False)
17
+ )
18
+ self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
19
+ self.collection = None
20
+ self.initialize_collection()
21
+
22
+ def initialize_collection(self):
23
+ """
24
+ Initialize or get the main knowledge base collection
25
+ """
26
+ try:
27
+ self.collection = self.client.get_or_create_collection(
28
+ name=Settings.COLLECTION_NAME,
29
+ metadata={"description": "General knowledge base for reasoning copilot"}
30
+ )
31
+ logger.info(f"Initialized collection: {Settings.COLLECTION_NAME}")
32
+ except Exception as e:
33
+ logger.error(f"Error initializing collection: {e}")
34
+ raise
35
+
36
+ def add_documents(self, documents: List[str], metadata: Optional[List[Dict]] = None,
37
+ ids: Optional[List[str]] = None) -> bool:
38
+ """
39
+ Add documents to the vector store
40
+ """
41
+ try:
42
+ if not documents:
43
+ return False
44
+
45
+ # Generate IDs if not provided
46
+ if ids is None:
47
+ ids = [str(uuid.uuid4()) for _ in documents]
48
+
49
+ # Generate embeddings
50
+ embeddings = self.embedding_model.encode(documents).tolist()
51
+
52
+ # Prepare metadata
53
+ if metadata is None:
54
+ metadata = [{"source": "user_upload", "type": "document"} for _ in documents]
55
+
56
+ # Add to collection
57
+ self.collection.add(
58
+ documents=documents,
59
+ embeddings=embeddings,
60
+ metadatas=metadata,
61
+ ids=ids
62
+ )
63
+
64
+ logger.info(f"Added {len(documents)} documents to vector store")
65
+ return True
66
+
67
+ except Exception as e:
68
+ logger.error(f"Error adding documents: {e}")
69
+ return False
70
+
71
+ def search_similar(self, query: str, n_results: int = 5,
72
+ where: Optional[Dict] = None) -> Dict[str, Any]:
73
+ """
74
+ Search for similar documents
75
+ """
76
+ try:
77
+ # Generate query embedding
78
+ query_embedding = self.embedding_model.encode([query]).tolist()[0]
79
+
80
+ # Search
81
+ results = self.collection.query(
82
+ query_embeddings=[query_embedding],
83
+ n_results=n_results,
84
+ where=where,
85
+ include=['documents', 'metadatas', 'distances']
86
+ )
87
+
88
+ return {
89
+ 'documents': results['documents'][0] if results['documents'] else [],
90
+ 'metadatas': results['metadatas'][0] if results['metadatas'] else [],
91
+ 'distances': results['distances'][0] if results['distances'] else [],
92
+ 'count': len(results['documents'][0]) if results['documents'] else 0
93
+ }
94
+
95
+ except Exception as e:
96
+ logger.error(f"Error searching documents: {e}")
97
+ return {'documents': [], 'metadatas': [], 'distances': [], 'count': 0}
98
+
99
+ def get_relevant_context(self, query: str, max_context_length: int = 2000) -> str:
100
+ """
101
+ Get relevant context for a query, formatted for LLM consumption
102
+ """
103
+ results = self.search_similar(query, n_results=5)
104
+
105
+ if not results['documents']:
106
+ return ""
107
+
108
+ context_parts = []
109
+ current_length = 0
110
+
111
+ for i, (doc, metadata) in enumerate(zip(results['documents'], results['metadatas'])):
112
+ # Create a context snippet
113
+ source = metadata.get('source', 'Unknown')
114
+ snippet = f"Source: {source}\nContent: {doc[:500]}...\n"
115
+
116
+ if current_length + len(snippet) > max_context_length:
117
+ break
118
+
119
+ context_parts.append(snippet)
120
+ current_length += len(snippet)
121
+
122
+ return "\n---\n".join(context_parts)
123
+
124
+ def add_conversation_memory(self, user_input: str, assistant_response: str, session_id: str):
125
+ """
126
+ Add conversation exchange to memory
127
+ """
128
+ try:
129
+ memory_doc = f"User: {user_input}\nAssistant: {assistant_response}"
130
+ metadata = {
131
+ "type": "conversation",
132
+ "session_id": session_id,
133
+ "timestamp": str(uuid.uuid4())
134
+ }
135
+
136
+ return self.add_documents([memory_doc], [metadata])
137
+
138
+ except Exception as e:
139
+ logger.error(f"Error adding conversation memory: {e}")
140
+ return False
141
+
142
+ def search_conversations(self, query: str, session_id: Optional[str] = None) -> List[str]:
143
+ """
144
+ Search previous conversations
145
+ """
146
+ where_clause = {"type": "conversation"}
147
+ if session_id:
148
+ where_clause["session_id"] = session_id
149
+
150
+ results = self.search_similar(query, n_results=3, where=where_clause)
151
+ return results['documents']
152
+
153
+ def get_collection_stats(self) -> Dict[str, Any]:
154
+ """
155
+ Get statistics about the collection
156
+ """
157
+ try:
158
+ count = self.collection.count()
159
+ return {
160
+ "total_documents": count,
161
+ "collection_name": Settings.COLLECTION_NAME
162
+ }
163
+ except Exception as e:
164
+ logger.error(f"Error getting collection stats: {e}")
165
+ return {"total_documents": 0, "collection_name": "unknown"}
166
+
167
+ def delete_documents(self, ids: List[str]) -> bool:
168
+ """
169
+ Delete documents by IDs
170
+ """
171
+ try:
172
+ self.collection.delete(ids=ids)
173
+ logger.info(f"Deleted {len(ids)} documents")
174
+ return True
175
+ except Exception as e:
176
+ logger.error(f"Error deleting documents: {e}")
177
+ return False
178
+
179
+ def clear_collection(self) -> bool:
180
+ """
181
+ Clear all documents from the collection
182
+ """
183
+ try:
184
+ # Delete the collection and recreate it
185
+ self.client.delete_collection(Settings.COLLECTION_NAME)
186
+ self.initialize_collection()
187
+ logger.info("Cleared all documents from collection")
188
+ return True
189
+ except Exception as e:
190
+ logger.error(f"Error clearing collection: {e}")
191
+ return False
192
+
193
+ def create_specialized_collection(self, name: str, description: str) -> bool:
194
+ """
195
+ Create a specialized collection for specific domains
196
+ """
197
+ try:
198
+ collection = self.client.get_or_create_collection(
199
+ name=name,
200
+ metadata={"description": description}
201
+ )
202
+ logger.info(f"Created specialized collection: {name}")
203
+ return True
204
+ except Exception as e:
205
+ logger.error(f"Error creating specialized collection: {e}")
206
+ return False
207
+
208
+ def switch_collection(self, name: str) -> bool:
209
+ """
210
+ Switch to a different collection
211
+ """
212
+ try:
213
+ self.collection = self.client.get_collection(name=name)
214
+ logger.info(f"Switched to collection: {name}")
215
+ return True
216
+ except Exception as e:
217
+ logger.error(f"Error switching to collection {name}: {e}")
218
+ return False
219
+
220
+ def list_collections(self) -> List[str]:
221
+ """
222
+ List all available collections
223
+ """
224
+ try:
225
+ collections = self.client.list_collections()
226
+ return [col.name for col in collections]
227
+ except Exception as e:
228
+ logger.error(f"Error listing collections: {e}")
229
+ return []
requirements.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ chromadb
3
+ ollama
4
+ sentence-transformers
5
+ beautifulsoup4
6
+ requests
7
+ pandas
8
+ numpy
9
+ python-dotenv
10
+ langchain
11
+ langchain-community
12
+ PyPDF2
13
+ python-docx
14
+ openpyxl
15
+ sympy
16
+ networkx
17
+ matplotlib
18
+ plotly
19
+ duckduckgo-search
20
+ psutil
21
+ fastapi
22
+ uvicorn
23
+ websockets
24
+ torch
25
+ transformers
tools/calculator.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sympy as sp
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import io
5
+ import base64
6
+ from typing import Any, Dict, List, Optional, Union
7
+ import logging
8
+ import re
9
+ import math
10
+
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class CalculatorTool:
15
+ def __init__(self):
16
+ self.variables = {}
17
+ self.last_result = None
18
+
19
+ def evaluate_expression(self, expression: str) -> Dict[str, Any]:
20
+ """
21
+ Safely evaluate mathematical expressions
22
+ """
23
+ try:
24
+ # Clean the expression
25
+ expression = self._clean_expression(expression)
26
+
27
+ # Try sympy first for symbolic computation
28
+ try:
29
+ result = sp.sympify(expression).evalf()
30
+ self.last_result = float(result)
31
+ return {
32
+ 'result': float(result),
33
+ 'expression': expression,
34
+ 'type': 'symbolic',
35
+ 'formatted': str(result)
36
+ }
37
+ except:
38
+ # Fall back to basic evaluation
39
+ result = eval(expression, {"__builtins__": {}}, self._get_safe_namespace())
40
+ self.last_result = result
41
+ return {
42
+ 'result': result,
43
+ 'expression': expression,
44
+ 'type': 'numeric',
45
+ 'formatted': str(result)
46
+ }
47
+
48
+ except Exception as e:
49
+ logger.error(f"Error evaluating expression: {e}")
50
+ return {
51
+ 'error': str(e),
52
+ 'expression': expression,
53
+ 'result': None
54
+ }
55
+
56
+ def _clean_expression(self, expression: str) -> str:
57
+ """
58
+ Clean and prepare expression for evaluation
59
+ """
60
+ # Replace common math notation
61
+ replacements = {
62
+ '^': '**',
63
+ '×': '*',
64
+ '÷': '/',
65
+ 'π': 'pi',
66
+ 'e': 'E'
67
+ }
68
+
69
+ for old, new in replacements.items():
70
+ expression = expression.replace(old, new)
71
+
72
+ return expression
73
+
74
+ def _get_safe_namespace(self) -> Dict[str, Any]:
75
+ """
76
+ Get safe namespace for expression evaluation
77
+ """
78
+ safe_dict = {
79
+ 'abs': abs, 'round': round, 'min': min, 'max': max,
80
+ 'sum': sum, 'pow': pow, 'divmod': divmod,
81
+ 'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
82
+ 'asin': math.asin, 'acos': math.acos, 'atan': math.atan,
83
+ 'sinh': math.sinh, 'cosh': math.cosh, 'tanh': math.tanh,
84
+ 'log': math.log, 'log10': math.log10, 'log2': math.log2,
85
+ 'exp': math.exp, 'sqrt': math.sqrt, 'factorial': math.factorial,
86
+ 'pi': math.pi, 'e': math.e, 'inf': math.inf, 'nan': math.nan,
87
+ 'degrees': math.degrees, 'radians': math.radians,
88
+ 'ceil': math.ceil, 'floor': math.floor,
89
+ }
90
+ safe_dict.update(self.variables)
91
+ return safe_dict
92
+
93
+ def solve_equation(self, equation: str, variable: str = 'x') -> Dict[str, Any]:
94
+ """
95
+ Solve equations symbolically
96
+ """
97
+ try:
98
+ # Parse equation
99
+ if '=' in equation:
100
+ left, right = equation.split('=', 1)
101
+ eq = sp.Eq(sp.sympify(left), sp.sympify(right))
102
+ else:
103
+ eq = sp.sympify(equation)
104
+
105
+ # Solve
106
+ var = sp.Symbol(variable)
107
+ solutions = sp.solve(eq, var)
108
+
109
+ return {
110
+ 'equation': equation,
111
+ 'variable': variable,
112
+ 'solutions': [str(sol) for sol in solutions],
113
+ 'numeric_solutions': [float(sol.evalf()) if sol.is_real else complex(sol.evalf()) for sol in solutions]
114
+ }
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error solving equation: {e}")
118
+ return {
119
+ 'error': str(e),
120
+ 'equation': equation,
121
+ 'solutions': []
122
+ }
123
+
124
+ def plot_function(self, expression: str, x_range: tuple = (-10, 10),
125
+ points: int = 1000) -> str:
126
+ """
127
+ Plot a mathematical function and return base64 encoded image
128
+ """
129
+ try:
130
+ x = sp.Symbol('x')
131
+ expr = sp.sympify(expression)
132
+
133
+ # Convert to numpy function
134
+ f = sp.lambdify(x, expr, 'numpy')
135
+
136
+ # Generate points
137
+ x_vals = np.linspace(x_range[0], x_range[1], points)
138
+ y_vals = f(x_vals)
139
+
140
+ # Create plot
141
+ plt.figure(figsize=(10, 6))
142
+ plt.plot(x_vals, y_vals, 'b-', linewidth=2)
143
+ plt.grid(True, alpha=0.3)
144
+ plt.xlabel('x')
145
+ plt.ylabel('f(x)')
146
+ plt.title(f'Plot of f(x) = {expression}')
147
+
148
+ # Convert to base64
149
+ buffer = io.BytesIO()
150
+ plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
151
+ buffer.seek(0)
152
+ plot_data = base64.b64encode(buffer.getvalue()).decode()
153
+ plt.close()
154
+
155
+ return plot_data
156
+
157
+ except Exception as e:
158
+ logger.error(f"Error plotting function: {e}")
159
+ return ""
160
+
161
+ def calculate_derivative(self, expression: str, variable: str = 'x',
162
+ order: int = 1) -> Dict[str, Any]:
163
+ """
164
+ Calculate derivative of an expression
165
+ """
166
+ try:
167
+ var = sp.Symbol(variable)
168
+ expr = sp.sympify(expression)
169
+
170
+ derivative = sp.diff(expr, var, order)
171
+
172
+ return {
173
+ 'original': expression,
174
+ 'derivative': str(derivative),
175
+ 'order': order,
176
+ 'variable': variable,
177
+ 'simplified': str(sp.simplify(derivative))
178
+ }
179
+
180
+ except Exception as e:
181
+ logger.error(f"Error calculating derivative: {e}")
182
+ return {
183
+ 'error': str(e),
184
+ 'original': expression
185
+ }
186
+
187
+ def calculate_integral(self, expression: str, variable: str = 'x',
188
+ limits: Optional[tuple] = None) -> Dict[str, Any]:
189
+ """
190
+ Calculate integral of an expression
191
+ """
192
+ try:
193
+ var = sp.Symbol(variable)
194
+ expr = sp.sympify(expression)
195
+
196
+ if limits:
197
+ # Definite integral
198
+ result = sp.integrate(expr, (var, limits[0], limits[1]))
199
+ integral_type = 'definite'
200
+ else:
201
+ # Indefinite integral
202
+ result = sp.integrate(expr, var)
203
+ integral_type = 'indefinite'
204
+
205
+ return {
206
+ 'original': expression,
207
+ 'integral': str(result),
208
+ 'type': integral_type,
209
+ 'variable': variable,
210
+ 'limits': limits,
211
+ 'numeric_value': float(result.evalf()) if result.is_number else None
212
+ }
213
+
214
+ except Exception as e:
215
+ logger.error(f"Error calculating integral: {e}")
216
+ return {
217
+ 'error': str(e),
218
+ 'original': expression
219
+ }
220
+
221
+ def matrix_operations(self, operation: str, *matrices) -> Dict[str, Any]:
222
+ """
223
+ Perform matrix operations
224
+ """
225
+ try:
226
+ # Convert input to sympy matrices
227
+ sp_matrices = []
228
+ for matrix in matrices:
229
+ if isinstance(matrix, list):
230
+ sp_matrices.append(sp.Matrix(matrix))
231
+ else:
232
+ sp_matrices.append(sp.sympify(matrix))
233
+
234
+ result = None
235
+
236
+ if operation == 'add' and len(sp_matrices) >= 2:
237
+ result = sp_matrices[0] + sp_matrices[1]
238
+ elif operation == 'multiply' and len(sp_matrices) >= 2:
239
+ result = sp_matrices[0] * sp_matrices[1]
240
+ elif operation == 'inverse' and len(sp_matrices) >= 1:
241
+ result = sp_matrices[0].inv()
242
+ elif operation == 'determinant' and len(sp_matrices) >= 1:
243
+ result = sp_matrices[0].det()
244
+ elif operation == 'transpose' and len(sp_matrices) >= 1:
245
+ result = sp_matrices[0].T
246
+ elif operation == 'eigenvalues' and len(sp_matrices) >= 1:
247
+ result = sp_matrices[0].eigenvals()
248
+
249
+ return {
250
+ 'operation': operation,
251
+ 'result': str(result) if result is not None else None,
252
+ 'matrices_count': len(sp_matrices)
253
+ }
254
+
255
+ except Exception as e:
256
+ logger.error(f"Error in matrix operation: {e}")
257
+ return {
258
+ 'error': str(e),
259
+ 'operation': operation
260
+ }
261
+
262
+ def statistics_calculations(self, data: List[float], operation: str) -> Dict[str, Any]:
263
+ """
264
+ Perform statistical calculations
265
+ """
266
+ try:
267
+ data = np.array(data)
268
+ result = None
269
+
270
+ if operation == 'mean':
271
+ result = np.mean(data)
272
+ elif operation == 'median':
273
+ result = np.median(data)
274
+ elif operation == 'std':
275
+ result = np.std(data)
276
+ elif operation == 'var':
277
+ result = np.var(data)
278
+ elif operation == 'min':
279
+ result = np.min(data)
280
+ elif operation == 'max':
281
+ result = np.max(data)
282
+ elif operation == 'sum':
283
+ result = np.sum(data)
284
+ elif operation == 'range':
285
+ result = np.max(data) - np.min(data)
286
+
287
+ return {
288
+ 'operation': operation,
289
+ 'result': float(result) if result is not None else None,
290
+ 'data_size': len(data),
291
+ 'data_preview': data[:5].tolist() if len(data) > 5 else data.tolist()
292
+ }
293
+
294
+ except Exception as e:
295
+ logger.error(f"Error in statistics calculation: {e}")
296
+ return {
297
+ 'error': str(e),
298
+ 'operation': operation
299
+ }
300
+
301
+ def unit_conversion(self, value: float, from_unit: str, to_unit: str) -> Dict[str, Any]:
302
+ """
303
+ Convert between different units
304
+ """
305
+ # Basic unit conversion factors (could be expanded)
306
+ conversions = {
307
+ # Length
308
+ ('m', 'cm'): 100,
309
+ ('m', 'mm'): 1000,
310
+ ('m', 'km'): 0.001,
311
+ ('cm', 'm'): 0.01,
312
+ ('mm', 'm'): 0.001,
313
+ ('km', 'm'): 1000,
314
+ ('ft', 'm'): 0.3048,
315
+ ('in', 'cm'): 2.54,
316
+
317
+ # Weight
318
+ ('kg', 'g'): 1000,
319
+ ('g', 'kg'): 0.001,
320
+ ('lb', 'kg'): 0.453592,
321
+ ('kg', 'lb'): 2.20462,
322
+
323
+ # Temperature (special handling needed)
324
+ # Time
325
+ ('h', 'min'): 60,
326
+ ('min', 's'): 60,
327
+ ('h', 's'): 3600,
328
+ ('day', 'h'): 24,
329
+ }
330
+
331
+ try:
332
+ if (from_unit, to_unit) in conversions:
333
+ result = value * conversions[(from_unit, to_unit)]
334
+ elif (to_unit, from_unit) in conversions:
335
+ result = value / conversions[(to_unit, from_unit)]
336
+ else:
337
+ return {
338
+ 'error': f"Conversion from {from_unit} to {to_unit} not supported",
339
+ 'value': value
340
+ }
341
+
342
+ return {
343
+ 'original_value': value,
344
+ 'original_unit': from_unit,
345
+ 'converted_value': result,
346
+ 'converted_unit': to_unit,
347
+ 'conversion_factor': result / value if value != 0 else None
348
+ }
349
+
350
+ except Exception as e:
351
+ logger.error(f"Error in unit conversion: {e}")
352
+ return {
353
+ 'error': str(e),
354
+ 'value': value
355
+ }
356
+
357
+ def set_variable(self, name: str, value: Any) -> bool:
358
+ """
359
+ Set a variable for use in calculations
360
+ """
361
+ try:
362
+ self.variables[name] = value
363
+ logger.info(f"Set variable {name} = {value}")
364
+ return True
365
+ except Exception as e:
366
+ logger.error(f"Error setting variable: {e}")
367
+ return False
368
+
369
+ def get_variables(self) -> Dict[str, Any]:
370
+ """
371
+ Get all stored variables
372
+ """
373
+ return self.variables.copy()
374
+
375
+ def clear_variables(self) -> bool:
376
+ """
377
+ Clear all stored variables
378
+ """
379
+ try:
380
+ self.variables.clear()
381
+ logger.info("Cleared all variables")
382
+ return True
383
+ except Exception as e:
384
+ logger.error(f"Error clearing variables: {e}")
385
+ return False
386
+
387
+ def format_result_for_llm(self, result: Dict[str, Any]) -> str:
388
+ """
389
+ Format calculation results for LLM consumption
390
+ """
391
+ if 'error' in result:
392
+ return f"Error: {result['error']}"
393
+
394
+ if 'result' in result:
395
+ return f"Result: {result['result']}\nExpression: {result.get('expression', 'N/A')}"
396
+
397
+ # Handle other result types
398
+ formatted_parts = []
399
+ for key, value in result.items():
400
+ if key not in ['error'] and value is not None:
401
+ formatted_parts.append(f"{key.title()}: {value}")
402
+
403
+ return "\n".join(formatted_parts) if formatted_parts else "No result to display"
404
+
tools/file_processor.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ import docx
4
+ import pandas as pd
5
+ import json
6
+ import csv
7
+ from typing import List, Dict, Any, Optional
8
+ import logging
9
+ from pathlib import Path
10
+ from config.settings import Settings
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class FileProcessor:
16
+ def __init__(self):
17
+ self.supported_extensions = {
18
+ '.txt': self._process_text,
19
+ '.pdf': self._process_pdf,
20
+ '.docx': self._process_docx,
21
+ '.doc': self._process_docx,
22
+ '.csv': self._process_csv,
23
+ '.xlsx': self._process_excel,
24
+ '.xls': self._process_excel,
25
+ '.json': self._process_json,
26
+ '.py': self._process_code,
27
+ '.js': self._process_code,
28
+ '.html': self._process_code,
29
+ '.css': self._process_code,
30
+ '.md': self._process_text,
31
+ }
32
+
33
+ def process_file(self, file_path: str) -> Dict[str, Any]:
34
+ """
35
+ Process a file and extract its content
36
+ """
37
+ try:
38
+ file_path = Path(file_path)
39
+
40
+ if not file_path.exists():
41
+ return {'error': f'File not found: {file_path}'}
42
+
43
+ # Check file size
44
+ file_size = file_path.stat().st_size / (1024 * 1024) # MB
45
+ if file_size > Settings.MAX_FILE_SIZE_MB:
46
+ return {'error': f'File too large: {file_size:.1f}MB (max: {Settings.MAX_FILE_SIZE_MB}MB)'}
47
+
48
+ extension = file_path.suffix.lower()
49
+
50
+ if extension not in self.supported_extensions:
51
+ return {'error': f'Unsupported file type: {extension}'}
52
+
53
+ # Process the file
54
+ processor = self.supported_extensions[extension]
55
+ content = processor(file_path)
56
+
57
+ return {
58
+ 'filename': file_path.name,
59
+ 'extension': extension,
60
+ 'size_mb': file_size,
61
+ 'content': content,
62
+ 'metadata': self._extract_metadata(file_path)
63
+ }
64
+
65
+ except Exception as e:
66
+ logger.error(f"Error processing file {file_path}: {e}")
67
+ return {'error': str(e)}
68
+
69
+ def _process_text(self, file_path: Path) -> str:
70
+ """
71
+ Process plain text files
72
+ """
73
+ try:
74
+ with open(file_path, 'r', encoding='utf-8') as f:
75
+ return f.read()
76
+ except UnicodeDecodeError:
77
+ # Try with different encoding
78
+ with open(file_path, 'r', encoding='latin-1') as f:
79
+ return f.read()
80
+
81
+ def _process_pdf(self, file_path: Path) -> str:
82
+ """
83
+ Process PDF files
84
+ """
85
+ try:
86
+ text_content = []
87
+ with open(file_path, 'rb') as f:
88
+ pdf_reader = PyPDF2.PdfReader(f)
89
+
90
+ for page_num, page in enumerate(pdf_reader.pages):
91
+ try:
92
+ text = page.extract_text()
93
+ if text.strip():
94
+ text_content.append(f"--- Page {page_num + 1} ---\n{text}")
95
+ except Exception as e:
96
+ logger.warning(f"Error extracting page {page_num + 1}: {e}")
97
+ continue
98
+
99
+ return "\n\n".join(text_content)
100
+
101
+ except Exception as e:
102
+ logger.error(f"Error processing PDF: {e}")
103
+ return f"Error processing PDF: {str(e)}"
104
+
105
+ def _process_docx(self, file_path: Path) -> str:
106
+ """
107
+ Process Word documents
108
+ """
109
+ try:
110
+ doc = docx.Document(file_path)
111
+ paragraphs = []
112
+
113
+ for paragraph in doc.paragraphs:
114
+ if paragraph.text.strip():
115
+ paragraphs.append(paragraph.text)
116
+
117
+ # Also extract tables
118
+ for table in doc.tables:
119
+ table_data = []
120
+ for row in table.rows:
121
+ row_data = [cell.text.strip() for cell in row.cells]
122
+ table_data.append(" | ".join(row_data))
123
+
124
+ if table_data:
125
+ paragraphs.append("\n--- Table ---\n" + "\n".join(table_data))
126
+
127
+ return "\n\n".join(paragraphs)
128
+
129
+ except Exception as e:
130
+ logger.error(f"Error processing DOCX: {e}")
131
+ return f"Error processing DOCX: {str(e)}"
132
+
133
+ def _process_csv(self, file_path: Path) -> str:
134
+ """
135
+ Process CSV files
136
+ """
137
+ try:
138
+ df = pd.read_csv(file_path)
139
+
140
+ # Basic info about the CSV
141
+ info_parts = [
142
+ f"CSV File Analysis:",
143
+ f"Rows: {len(df)}",
144
+ f"Columns: {len(df.columns)}",
145
+ f"Column Names: {', '.join(df.columns.tolist())}",
146
+ "",
147
+ "First 5 rows:",
148
+ df.head().to_string(),
149
+ "",
150
+ "Data Types:",
151
+ df.dtypes.to_string(),
152
+ "",
153
+ "Basic Statistics:",
154
+ df.describe().to_string() if len(df.select_dtypes(include=['number']).columns) > 0 else "No numeric columns"
155
+ ]
156
+
157
+ return "\n".join(info_parts)
158
+
159
+ except Exception as e:
160
+ logger.error(f"Error processing CSV: {e}")
161
+ return f"Error processing CSV: {str(e)}"
162
+
163
+ def _process_excel(self, file_path: Path) -> str:
164
+ """
165
+ Process Excel files
166
+ """
167
+ try:
168
+ # Read all sheets
169
+ excel_file = pd.ExcelFile(file_path)
170
+ content_parts = [f"Excel File: {file_path.name}"]
171
+ content_parts.append(f"Sheets: {', '.join(excel_file.sheet_names)}")
172
+
173
+ for sheet_name in excel_file.sheet_names:
174
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
175
+
176
+ content_parts.append(f"\n--- Sheet: {sheet_name} ---")
177
+ content_parts.append(f"Rows: {len(df)}, Columns: {len(df.columns)}")
178
+ content_parts.append(f"Columns: {', '.join(df.columns.tolist())}")
179
+ content_parts.append("\nFirst 3 rows:")
180
+ content_parts.append(df.head(3).to_string())
181
+
182
+ return "\n".join(content_parts)
183
+
184
+ except Exception as e:
185
+ logger.error(f"Error processing Excel: {e}")
186
+ return f"Error processing Excel: {str(e)}"
187
+
188
+ def _process_json(self, file_path: Path) -> str:
189
+ """
190
+ Process JSON files
191
+ """
192
+ try:
193
+ with open(file_path, 'r', encoding='utf-8') as f:
194
+ data = json.load(f)
195
+
196
+ # Format JSON for better readability
197
+ if isinstance(data, dict):
198
+ content_parts = [
199
+ f"JSON Object with {len(data)} keys:",
200
+ f"Keys: {', '.join(data.keys())}",
201
+ "",
202
+ "Content (formatted):",
203
+ json.dumps(data, indent=2, ensure_ascii=False)[:2000] + "..." if len(str(data)) > 2000 else json.dumps(data, indent=2, ensure_ascii=False)
204
+ ]
205
+ elif isinstance(data, list):
206
+ content_parts = [
207
+ f"JSON Array with {len(data)} items",
208
+ f"First item type: {type(data[0]).__name__}" if data else "Empty array",
209
+ "",
210
+ "Content (first 3 items):",
211
+ json.dumps(data[:3], indent=2, ensure_ascii=False)
212
+ ]
213
+ else:
214
+ content_parts = [
215
+ f"JSON {type(data).__name__}:",
216
+ str(data)
217
+ ]
218
+
219
+ return "\n".join(content_parts)
220
+
221
+ except Exception as e:
222
+ logger.error(f"Error processing JSON: {e}")
223
+ return f"Error processing JSON: {str(e)}"
224
+
225
+ def _process_code(self, file_path: Path) -> str:
226
+ """
227
+ Process code files
228
+ """
229
+ try:
230
+ content = self._process_text(file_path)
231
+
232
+ # Add some analysis
233
+ lines = content.split('\n')
234
+ non_empty_lines = [line for line in lines if line.strip()]
235
+
236
+ analysis_parts = [
237
+ f"Code File Analysis:",
238
+ f"Language: {file_path.suffix[1:].upper()}",
239
+ f"Total lines: {len(lines)}",
240
+ f"Non-empty lines: {len(non_empty_lines)}",
241
+ f"Estimated complexity: {'High' if len(non_empty_lines) > 100 else 'Medium' if len(non_empty_lines) > 50 else 'Low'}",
242
+ "",
243
+ "Content:",
244
+ content
245
+ ]
246
+
247
+ return "\n".join(analysis_parts)
248
+
249
+ except Exception as e:
250
+ logger.error(f"Error processing code file: {e}")
251
+ return f"Error processing code file: {str(e)}"
252
+
253
+ def _extract_metadata(self, file_path: Path) -> Dict[str, Any]:
254
+ """
255
+ Extract file metadata
256
+ """
257
+ try:
258
+ stat = file_path.stat()
259
+ return {
260
+ 'size_bytes': stat.st_size,
261
+ 'created': stat.st_ctime,
262
+ 'modified': stat.st_mtime,
263
+ 'extension': file_path.suffix,
264
+ 'name': file_path.stem
265
+ }
266
+ except Exception as e:
267
+ logger.error(f"Error extracting metadata: {e}")
268
+ return {}
269
+
270
+ def process_multiple_files(self, file_paths: List[str]) -> List[Dict[str, Any]]:
271
+ """
272
+ Process multiple files
273
+ """
274
+ results = []
275
+ for file_path in file_paths:
276
+ result = self.process_file(file_path)
277
+ results.append(result)
278
+ return results
279
+
280
+ def extract_key_information(self, content: str, file_type: str) -> Dict[str, Any]:
281
+ """
282
+ Extract key information from processed content
283
+ """
284
+ try:
285
+ key_info = {
286
+ 'word_count': len(content.split()),
287
+ 'char_count': len(content),
288
+ 'line_count': len(content.split('\n')),
289
+ 'file_type': file_type
290
+ }
291
+
292
+ # Type-specific extraction
293
+ if file_type in ['.csv', '.xlsx', '.xls']:
294
+ # Extract numerical data mentions
295
+ import re
296
+ numbers = re.findall(r'\d+', content)
297
+ key_info['numeric_values_found'] = len(numbers)
298
+
299
+ elif file_type in ['.py', '.js', '.html', '.css']:
300
+ # Extract function/class names for code files
301
+ import re
302
+ if file_type == '.py':
303
+ functions = re.findall(r'def\s+(\w+)', content)
304
+ classes = re.findall(r'class\s+(\w+)', content)
305
+ key_info['functions'] = functions[:10] # First 10
306
+ key_info['classes'] = classes[:10]
307
+
308
+ return key_info
309
+
310
+ except Exception as e:
311
+ logger.error(f"Error extracting key information: {e}")
312
+ return {'error': str(e)}
313
+
314
+ def save_processed_content(self, content: str, output_path: str) -> bool:
315
+ """
316
+ Save processed content to a file
317
+ """
318
+ try:
319
+ with open(output_path, 'w', encoding='utf-8') as f:
320
+ f.write(content)
321
+ logger.info(f"Saved processed content to: {output_path}")
322
+ return True
323
+ except Exception as e:
324
+ logger.error(f"Error saving content: {e}")
325
+ return False
326
+
327
+ def get_supported_formats(self) -> List[str]:
328
+ """
329
+ Get list of supported file formats
330
+ """
331
+ return list(self.supported_extensions.keys())
332
+
333
+ def format_file_summary_for_llm(self, file_result: Dict[str, Any]) -> str:
334
+ """
335
+ Format file processing results for LLM consumption
336
+ """
337
+ if 'error' in file_result:
338
+ return f"Error processing file: {file_result['error']}"
339
+
340
+ summary_parts = [
341
+ f"File: {file_result['filename']}",
342
+ f"Type: {file_result['extension']}",
343
+ f"Size: {file_result['size_mb']:.2f} MB",
344
+ "",
345
+ "Content Summary:",
346
+ file_result['content'][:1000] + "..." if len(file_result['content']) > 1000 else file_result['content']
347
+ ]
348
+
349
+ return "\n".join(summary_parts)
tools/web_search.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from duckduckgo_search import DDGS
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import logging
5
+ from typing import List, Dict, Any
6
+ from config.settings import Settings
7
+
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class WebSearchTool:
12
+ def __init__(self):
13
+ self.ddgs = DDGS()
14
+ self.session = requests.Session()
15
+ self.session.headers.update({
16
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
17
+ })
18
+
19
+ def search(self, query: str, max_results: int = Settings.MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
20
+ """
21
+ Search the web using DuckDuckGo
22
+ """
23
+ try:
24
+ results = []
25
+ search_results = self.ddgs.text(query, max_results=max_results)
26
+
27
+ for result in search_results:
28
+ results.append({
29
+ 'title': result.get('title', ''),
30
+ 'url': result.get('href', ''),
31
+ 'snippet': result.get('body', ''),
32
+ 'source': 'DuckDuckGo'
33
+ })
34
+
35
+ logger.info(f"Found {len(results)} search results for: {query}")
36
+ return results
37
+
38
+ except Exception as e:
39
+ logger.error(f"Error searching web: {e}")
40
+ return []
41
+
42
+ def get_page_content(self, url: str, max_chars: int = 5000) -> str:
43
+ """
44
+ Extract text content from a web page
45
+ """
46
+ try:
47
+ response = self.session.get(url, timeout=10)
48
+ response.raise_for_status()
49
+
50
+ soup = BeautifulSoup(response.content, 'html.parser')
51
+
52
+ # Remove script and style elements
53
+ for script in soup(["script", "style"]):
54
+ script.decompose()
55
+
56
+ # Get text content
57
+ text = soup.get_text()
58
+
59
+ # Clean up whitespace
60
+ lines = (line.strip() for line in text.splitlines())
61
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
62
+ text = ' '.join(chunk for chunk in chunks if chunk)
63
+
64
+ # Limit length
65
+ if len(text) > max_chars:
66
+ text = text[:max_chars] + "..."
67
+
68
+ return text
69
+
70
+ except Exception as e:
71
+ logger.error(f"Error extracting content from {url}: {e}")
72
+ return f"Error: Could not extract content from {url}"
73
+
74
+ def search_and_summarize(self, query: str, include_content: bool = False) -> str:
75
+ """
76
+ Search and format results for LLM consumption
77
+ """
78
+ results = self.search(query)
79
+
80
+ if not results:
81
+ return "No search results found."
82
+
83
+ summary_parts = [f"Search results for: {query}\n"]
84
+
85
+ for i, result in enumerate(results, 1):
86
+ summary_parts.append(f"{i}. **{result['title']}**")
87
+ summary_parts.append(f" URL: {result['url']}")
88
+ summary_parts.append(f" Summary: {result['snippet']}")
89
+
90
+ if include_content and i <= 2: # Only get content for top 2 results
91
+ content = self.get_page_content(result['url'])
92
+ if content and not content.startswith("Error:"):
93
+ summary_parts.append(f" Content Preview: {content[:500]}...")
94
+
95
+ summary_parts.append("")
96
+
97
+ return "\n".join(summary_parts)
98
+
99
+ def search_news(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
100
+ """
101
+ Search for news articles
102
+ """
103
+ try:
104
+ results = []
105
+ news_results = self.ddgs.news(query, max_results=max_results)
106
+
107
+ for result in news_results:
108
+ results.append({
109
+ 'title': result.get('title', ''),
110
+ 'url': result.get('url', ''),
111
+ 'snippet': result.get('body', ''),
112
+ 'source': result.get('source', ''),
113
+ 'date': result.get('date', ''),
114
+ 'type': 'news'
115
+ })
116
+
117
+ logger.info(f"Found {len(results)} news results for: {query}")
118
+ return results
119
+
120
+ except Exception as e:
121
+ logger.error(f"Error searching news: {e}")
122
+ return []
123
+
124
+ def search_images(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
125
+ """
126
+ Search for images
127
+ """
128
+ try:
129
+ results = []
130
+ image_results = self.ddgs.images(query, max_results=max_results)
131
+
132
+ for result in image_results:
133
+ results.append({
134
+ 'title': result.get('title', ''),
135
+ 'url': result.get('image', ''),
136
+ 'thumbnail': result.get('thumbnail', ''),
137
+ 'source': result.get('source', ''),
138
+ 'type': 'image'
139
+ })
140
+
141
+ logger.info(f"Found {len(results)} image results for: {query}")
142
+ return results
143
+
144
+ except Exception as e:
145
+ logger.error(f"Error searching images: {e}")
146
+ return []
147
+
148
+ def quick_fact_search(self, query: str) -> str:
149
+ """
150
+ Quick search for factual information
151
+ """
152
+ try:
153
+ # Try to get instant answer first
154
+ instant_answer = self.ddgs.answers(query)
155
+ if instant_answer:
156
+ return f"Quick Fact: {instant_answer[0].get('text', '')}"
157
+
158
+ # Fall back to regular search
159
+ results = self.search(query, max_results=2)
160
+ if results:
161
+ return f"From search: {results[0]['snippet']}"
162
+
163
+ return "No quick facts found."
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error in quick fact search: {e}")
167
+ return "Error retrieving quick facts."
168
+
169
+ def research_topic(self, topic: str) -> Dict[str, Any]:
170
+ """
171
+ Comprehensive research on a topic
172
+ """
173
+ research_data = {
174
+ 'topic': topic,
175
+ 'general_info': [],
176
+ 'news': [],
177
+ 'related_queries': []
178
+ }
179
+
180
+ try:
181
+ # General search
182
+ general_results = self.search(topic, max_results=5)
183
+ research_data['general_info'] = general_results
184
+
185
+ # News search
186
+ news_results = self.search_news(topic, max_results=3)
187
+ research_data['news'] = news_results
188
+
189
+ # Generate related queries
190
+ related_queries = [
191
+ f"{topic} definition",
192
+ f"{topic} examples",
193
+ f"{topic} applications",
194
+ f"latest {topic} developments"
195
+ ]
196
+ research_data['related_queries'] = related_queries
197
+
198
+ return research_data
199
+
200
+ except Exception as e:
201
+ logger.error(f"Error researching topic {topic}: {e}")
202
+ return research_data
203
+
204
+ def format_research_for_llm(self, research_data: Dict[str, Any]) -> str:
205
+ """
206
+ Format research data for LLM consumption
207
+ """
208
+ formatted_parts = [f"Research Results for: {research_data['topic']}\n"]
209
+
210
+ if research_data['general_info']:
211
+ formatted_parts.append("## General Information:")
212
+ for i, result in enumerate(research_data['general_info'], 1):
213
+ formatted_parts.append(f"{i}. {result['title']}")
214
+ formatted_parts.append(f" {result['snippet']}\n")
215
+
216
+ if research_data['news']:
217
+ formatted_parts.append("## Recent News:")
218
+ for i, result in enumerate(research_data['news'], 1):
219
+ formatted_parts.append(f"{i}. {result['title']}")
220
+ formatted_parts.append(f" {result['snippet']}")
221
+ if result.get('date'):
222
+ formatted_parts.append(f" Date: {result['date']}\n")
223
+
224
+ return "\n".join(formatted_parts)