Spaces:
Runtime error
Runtime error
Initial commit for the ai-reasoning-copilot
Browse files- README.md +38 -0
- app.py +558 -0
- config/settings.py +46 -0
- memory/conversation.py +449 -0
- models/llm_handler.py +302 -0
- models/vector_store.py +229 -0
- requirements.txt +25 -0
- tools/calculator.py +404 -0
- tools/file_processor.py +349 -0
- tools/web_search.py +224 -0
README.md
CHANGED
|
@@ -11,3 +11,41 @@ short_description: AI Reasoning Copilot
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# 🧠 Open Source Reasoning Copilot
|
| 17 |
+
|
| 18 |
+
A powerful AI reasoning assistant that runs completely locally with zero cost!
|
| 19 |
+
|
| 20 |
+
## Features
|
| 21 |
+
|
| 22 |
+
- 🤖 **Online LLM Integration** - Works with OpenRouter
|
| 23 |
+
- 🔍 **Web Search** - Real-time information retrieval
|
| 24 |
+
- 🧮 **Advanced Calculator** - Symbolic math, calculus, statistics
|
| 25 |
+
- 📁 **Document Processing** - PDF, Word, Excel, CSV, JSON, code files
|
| 26 |
+
- 🧠 **Memory System** - Conversation history and context awareness
|
| 27 |
+
- 🎯 **Reasoning Tools** - Chain-of-thought, problem decomposition
|
| 28 |
+
- 📊 **Data Visualization** - Plots and charts
|
| 29 |
+
- 🔒 **Privacy First** - Everything runs locally
|
| 30 |
+
|
| 31 |
+
## Usage Examples
|
| 32 |
+
|
| 33 |
+
### Reasoning & Problem Solving
|
| 34 |
+
- "Help me analyze the pros and cons of remote work"
|
| 35 |
+
- "Walk me through solving this logic puzzle step by step"
|
| 36 |
+
- "What are the implications of AI in healthcare?"
|
| 37 |
+
|
| 38 |
+
### Research & Information
|
| 39 |
+
- "What are the latest developments in quantum computing?"
|
| 40 |
+
- "Research the history of the Roman Empire"
|
| 41 |
+
- "Find current information about climate change policies"
|
| 42 |
+
|
| 43 |
+
### Mathematics & Calculations
|
| 44 |
+
- "Solve the equation x^2 + 5x - 6 = 0"
|
| 45 |
+
- "Calculate the derivative of x^3 + 2x^2 - 5x + 1"
|
| 46 |
+
- "Plot the function y = sin(x) + cos(2x)"
|
| 47 |
+
|
| 48 |
+
### Document Analysis
|
| 49 |
+
- Upload PDFs, Word docs, spreadsheets
|
| 50 |
+
- "Summarize this research paper"
|
| 51 |
+
- "Extract key insights from this data"
|
app.py
ADDED
|
@@ -0,0 +1,558 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
from typing import List, Tuple, Any, Optional
|
| 5 |
+
import json
|
| 6 |
+
import threading
|
| 7 |
+
import time
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
|
| 10 |
+
# Import our custom modules
|
| 11 |
+
#from models.llm_handler import LLMHandler
|
| 12 |
+
from models.llm_handler import HuggingFaceLLMHandler, OpenRouterLLMHandler
|
| 13 |
+
|
| 14 |
+
from models.vector_store import VectorStore
|
| 15 |
+
from tools.web_search import WebSearchTool
|
| 16 |
+
from tools.calculator import CalculatorTool
|
| 17 |
+
from tools.file_processor import FileProcessor
|
| 18 |
+
from memory.conversation import ConversationMemory
|
| 19 |
+
from config.settings import Settings
|
| 20 |
+
|
| 21 |
+
# Setup logging
|
| 22 |
+
logging.basicConfig(
|
| 23 |
+
level=logging.INFO,
|
| 24 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 25 |
+
handlers=[
|
| 26 |
+
logging.FileHandler(os.path.join(Settings.LOGS_DIR, 'copilot.log')),
|
| 27 |
+
logging.StreamHandler()
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
class ReasoningCopilot:
|
| 33 |
+
def __init__(self):
|
| 34 |
+
logger.info("Initializing Reasoning Copilot...")
|
| 35 |
+
|
| 36 |
+
# Initialize components
|
| 37 |
+
#self.llm = LLMHandler()
|
| 38 |
+
#self.llm = HuggingFaceLLMHandler()
|
| 39 |
+
self.llm = OpenRouterLLMHandler()
|
| 40 |
+
self.vector_store = VectorStore()
|
| 41 |
+
self.web_search = WebSearchTool()
|
| 42 |
+
self.calculator = CalculatorTool()
|
| 43 |
+
self.file_processor = FileProcessor()
|
| 44 |
+
self.memory = ConversationMemory()
|
| 45 |
+
|
| 46 |
+
# State variables
|
| 47 |
+
self.current_model = Settings.DEFAULT_MODEL
|
| 48 |
+
self.reasoning_mode = "balanced" # balanced, creative, analytical
|
| 49 |
+
self.use_web_search = True
|
| 50 |
+
self.use_vector_search = True
|
| 51 |
+
|
| 52 |
+
logger.info("Reasoning Copilot initialized successfully!")
|
| 53 |
+
|
| 54 |
+
def process_query(self, user_input: str, chat_history: List[Tuple[str, str]],
|
| 55 |
+
use_tools: bool = True) -> Tuple[List[Tuple[str, str]], str]:
|
| 56 |
+
"""
|
| 57 |
+
Main query processing function - FIXED VERSION
|
| 58 |
+
"""
|
| 59 |
+
try:
|
| 60 |
+
if not user_input.strip():
|
| 61 |
+
return chat_history, ""
|
| 62 |
+
|
| 63 |
+
logger.info(f"Processing query: {user_input[:100]}...")
|
| 64 |
+
|
| 65 |
+
# REMOVED: This was likely causing the 10-minute delay
|
| 66 |
+
# task_analysis = self.llm.analyze_reasoning_task(user_input)
|
| 67 |
+
|
| 68 |
+
# Initialize context and tools output
|
| 69 |
+
context = ""
|
| 70 |
+
tools_output = ""
|
| 71 |
+
|
| 72 |
+
if use_tools:
|
| 73 |
+
# Use vector search for relevant context
|
| 74 |
+
if self.use_vector_search:
|
| 75 |
+
try:
|
| 76 |
+
context = self.vector_store.get_relevant_context(user_input)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logger.warning(f"Vector search failed: {e}")
|
| 79 |
+
context = ""
|
| 80 |
+
|
| 81 |
+
# Determine if web search is needed - SIMPLIFIED
|
| 82 |
+
if self.use_web_search and self._should_use_web_search_simple(user_input):
|
| 83 |
+
try:
|
| 84 |
+
search_results = self.web_search.search_and_summarize(user_input)
|
| 85 |
+
tools_output += f"\n--- Web Search Results ---\n{search_results}\n"
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.warning(f"Web search failed: {e}")
|
| 88 |
+
|
| 89 |
+
# Determine if calculator is needed - SIMPLIFIED
|
| 90 |
+
if self._should_use_calculator_simple(user_input):
|
| 91 |
+
try:
|
| 92 |
+
calc_result = self._handle_calculation(user_input)
|
| 93 |
+
if calc_result:
|
| 94 |
+
tools_output += f"\n--- Calculation Results ---\n{calc_result}\n"
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.warning(f"Calculator failed: {e}")
|
| 97 |
+
|
| 98 |
+
# Generate response using LLM - THIS IS THE MAIN CALL
|
| 99 |
+
print(f"DEBUG: About to call generate_response...")
|
| 100 |
+
start_time = time.time()
|
| 101 |
+
|
| 102 |
+
response = self.llm.generate_response(user_input, context, tools_output)
|
| 103 |
+
|
| 104 |
+
end_time = time.time()
|
| 105 |
+
print(f"DEBUG: generate_response took {end_time - start_time:.2f} seconds")
|
| 106 |
+
|
| 107 |
+
# Add to memory
|
| 108 |
+
try:
|
| 109 |
+
self.memory.add_exchange(user_input, response, {
|
| 110 |
+
'used_tools': use_tools,
|
| 111 |
+
'reasoning_mode': self.reasoning_mode
|
| 112 |
+
})
|
| 113 |
+
except Exception as e:
|
| 114 |
+
logger.warning(f"Memory storage failed: {e}")
|
| 115 |
+
|
| 116 |
+
# Update chat history
|
| 117 |
+
chat_history.append((user_input, response))
|
| 118 |
+
|
| 119 |
+
# Add to LLM history
|
| 120 |
+
try:
|
| 121 |
+
self.llm.add_to_history(user_input, response)
|
| 122 |
+
except Exception as e:
|
| 123 |
+
logger.warning(f"LLM history update failed: {e}")
|
| 124 |
+
|
| 125 |
+
return chat_history, ""
|
| 126 |
+
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.error(f"Error processing query: {e}")
|
| 129 |
+
error_response = f"I apologize, but I encountered an error: {str(e)}"
|
| 130 |
+
chat_history.append((user_input, error_response))
|
| 131 |
+
return chat_history, ""
|
| 132 |
+
|
| 133 |
+
def _should_use_web_search_simple(self, query: str) -> bool:
|
| 134 |
+
"""
|
| 135 |
+
SIMPLIFIED version - no task_analysis parameter
|
| 136 |
+
"""
|
| 137 |
+
web_search_indicators = [
|
| 138 |
+
'current', 'latest', 'recent', 'news', 'today', 'now',
|
| 139 |
+
'what happened', 'update', 'price', 'weather', 'stock'
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
query_lower = query.lower()
|
| 143 |
+
return any(indicator in query_lower for indicator in web_search_indicators)
|
| 144 |
+
|
| 145 |
+
def _should_use_calculator_simple(self, query: str) -> bool:
|
| 146 |
+
"""
|
| 147 |
+
SIMPLIFIED version - no task_analysis parameter
|
| 148 |
+
"""
|
| 149 |
+
calc_indicators = [
|
| 150 |
+
'calculate', 'compute', 'solve', '+', '-', '*', '/', '=',
|
| 151 |
+
'math', 'equation', 'derivative', 'integral', 'plot'
|
| 152 |
+
]
|
| 153 |
+
|
| 154 |
+
query_lower = query.lower()
|
| 155 |
+
return any(indicator in query_lower for indicator in calc_indicators)
|
| 156 |
+
|
| 157 |
+
def _handle_calculation(self, query: str) -> str:
|
| 158 |
+
"""
|
| 159 |
+
Handle mathematical calculations
|
| 160 |
+
"""
|
| 161 |
+
try:
|
| 162 |
+
# Simple expression detection
|
| 163 |
+
import re
|
| 164 |
+
|
| 165 |
+
# Look for equations
|
| 166 |
+
if '=' in query and any(op in query for op in ['+', '-', '*', '/']):
|
| 167 |
+
if 'solve' in query.lower():
|
| 168 |
+
# Equation solving
|
| 169 |
+
equation = re.search(r'([^=]+=[^=]+)', query)
|
| 170 |
+
if equation:
|
| 171 |
+
result = self.calculator.solve_equation(equation.group(1))
|
| 172 |
+
return self.calculator.format_result_for_llm(result)
|
| 173 |
+
|
| 174 |
+
# Look for expressions to evaluate
|
| 175 |
+
expr_pattern = r'([0-9+\-*/().\s]+(?:[+\-*/][0-9+\-*/().\s]+)*)'
|
| 176 |
+
expressions = re.findall(expr_pattern, query)
|
| 177 |
+
|
| 178 |
+
for exp in expressions:
|
| 179 |
+
if len(exp.strip()) > 3: # Avoid single numbers
|
| 180 |
+
result = self.calculator.evaluate_expression(exp.strip())
|
| 181 |
+
return self.calculator.format_result_for_llm(result)
|
| 182 |
+
|
| 183 |
+
return ""
|
| 184 |
+
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger.error(f"Error in calculation handling: {e}")
|
| 187 |
+
return ""
|
| 188 |
+
|
| 189 |
+
def upload_files(self, files: List[Any]) -> str:
|
| 190 |
+
"""
|
| 191 |
+
Handle file uploads
|
| 192 |
+
"""
|
| 193 |
+
try:
|
| 194 |
+
if not files:
|
| 195 |
+
return "No files uploaded."
|
| 196 |
+
|
| 197 |
+
results = []
|
| 198 |
+
documents_to_add = []
|
| 199 |
+
|
| 200 |
+
for file in files:
|
| 201 |
+
if hasattr(file, 'name'):
|
| 202 |
+
file_path = file.name
|
| 203 |
+
else:
|
| 204 |
+
file_path = str(file)
|
| 205 |
+
|
| 206 |
+
# Process the file
|
| 207 |
+
file_result = self.file_processor.process_file(file_path)
|
| 208 |
+
|
| 209 |
+
if 'error' not in file_result:
|
| 210 |
+
# Add to vector store
|
| 211 |
+
content = file_result['content']
|
| 212 |
+
metadata = {
|
| 213 |
+
'filename': file_result['filename'],
|
| 214 |
+
'type': 'uploaded_file',
|
| 215 |
+
'source': file_result['filename'],
|
| 216 |
+
'upload_time': datetime.now().isoformat()
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
documents_to_add.append((content, metadata))
|
| 220 |
+
results.append(f"✓ Processed: {file_result['filename']}")
|
| 221 |
+
else:
|
| 222 |
+
results.append(f"✗ Error processing {file_path}: {file_result['error']}")
|
| 223 |
+
|
| 224 |
+
# Add all documents to vector store
|
| 225 |
+
if documents_to_add:
|
| 226 |
+
contents = [doc[0] for doc in documents_to_add]
|
| 227 |
+
metadata_list = [doc[1] for doc in documents_to_add]
|
| 228 |
+
|
| 229 |
+
success = self.vector_store.add_documents(contents, metadata_list)
|
| 230 |
+
if success:
|
| 231 |
+
results.append(f"\n✓ Added {len(documents_to_add)} documents to knowledge base.")
|
| 232 |
+
else:
|
| 233 |
+
results.append("\n✗ Failed to add documents to knowledge base.")
|
| 234 |
+
|
| 235 |
+
return "\n".join(results)
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
logger.error(f"Error uploading files: {e}")
|
| 239 |
+
return f"Error uploading files: {str(e)}"
|
| 240 |
+
|
| 241 |
+
def change_model(self, model_name: str) -> str:
|
| 242 |
+
"""
|
| 243 |
+
Change the current LLM model
|
| 244 |
+
"""
|
| 245 |
+
try:
|
| 246 |
+
if self.llm.switch_model(model_name):
|
| 247 |
+
self.current_model = model_name
|
| 248 |
+
return f"✓ Switched to model: {model_name}"
|
| 249 |
+
else:
|
| 250 |
+
return f"✗ Failed to switch to model: {model_name}"
|
| 251 |
+
except Exception as e:
|
| 252 |
+
return f"✗ Error changing model: {str(e)}"
|
| 253 |
+
|
| 254 |
+
def get_system_status(self) -> str:
|
| 255 |
+
"""
|
| 256 |
+
Get system status information
|
| 257 |
+
"""
|
| 258 |
+
try:
|
| 259 |
+
# Get model info
|
| 260 |
+
available_models = self.llm.get_available_models()
|
| 261 |
+
|
| 262 |
+
# Get vector store stats
|
| 263 |
+
vector_stats = self.vector_store.get_collection_stats()
|
| 264 |
+
|
| 265 |
+
# Get memory stats
|
| 266 |
+
memory_stats = self.memory.get_session_statistics()
|
| 267 |
+
|
| 268 |
+
# Get memory usage
|
| 269 |
+
memory_usage = self.memory.get_memory_usage()
|
| 270 |
+
|
| 271 |
+
status_info = f"""
|
| 272 |
+
🤖 **Reasoning Copilot Status**
|
| 273 |
+
|
| 274 |
+
**Current Model:** {self.current_model}
|
| 275 |
+
**Available Models:** {len(available_models)} ({', '.join(available_models[:3])}{'...' if len(available_models) > 3 else ''})
|
| 276 |
+
|
| 277 |
+
**Knowledge Base:**
|
| 278 |
+
- Documents: {vector_stats.get('total_documents', 0)}
|
| 279 |
+
- Collection: {vector_stats.get('collection_name', 'N/A')}
|
| 280 |
+
|
| 281 |
+
**Session Memory:**
|
| 282 |
+
- Exchanges: {memory_stats.get('total_exchanges', 0)}
|
| 283 |
+
- Topics: {len(memory_stats.get('topics_discussed', []))}
|
| 284 |
+
- Memory Size: {memory_usage.get('memory_file_size_kb', 0):.1f} KB
|
| 285 |
+
|
| 286 |
+
**Tools Status:**
|
| 287 |
+
- Web Search: {'✓ Enabled' if self.use_web_search else '✗ Disabled'}
|
| 288 |
+
- Vector Search: {'✓ Enabled' if self.use_vector_search else '✗ Disabled'}
|
| 289 |
+
- Calculator: ✓ Available
|
| 290 |
+
- File Processor: ✓ Available
|
| 291 |
+
|
| 292 |
+
**Reasoning Mode:** {self.reasoning_mode.title()}
|
| 293 |
+
""".strip()
|
| 294 |
+
|
| 295 |
+
return status_info
|
| 296 |
+
|
| 297 |
+
except Exception as e:
|
| 298 |
+
logger.error(f"Error getting system status: {e}")
|
| 299 |
+
return f"Error getting system status: {str(e)}"
|
| 300 |
+
|
| 301 |
+
def clear_conversation(self) -> Tuple[List, str]:
|
| 302 |
+
"""
|
| 303 |
+
Clear conversation history
|
| 304 |
+
"""
|
| 305 |
+
try:
|
| 306 |
+
self.llm.clear_history()
|
| 307 |
+
self.memory.clear_memory()
|
| 308 |
+
return [], "✓ Conversation cleared successfully."
|
| 309 |
+
except Exception as e:
|
| 310 |
+
return [], f"✗ Error clearing conversation: {str(e)}"
|
| 311 |
+
|
| 312 |
+
def export_conversation(self, format_type: str = "markdown") -> str:
|
| 313 |
+
"""
|
| 314 |
+
Export conversation history
|
| 315 |
+
"""
|
| 316 |
+
try:
|
| 317 |
+
return self.memory.export_conversation(format_type)
|
| 318 |
+
except Exception as e:
|
| 319 |
+
return f"Error exporting conversation: {str(e)}"
|
| 320 |
+
|
| 321 |
+
def create_gradio_interface():
|
| 322 |
+
"""
|
| 323 |
+
Create the Gradio interface
|
| 324 |
+
"""
|
| 325 |
+
# Initialize the copilot
|
| 326 |
+
copilot = ReasoningCopilot()
|
| 327 |
+
|
| 328 |
+
# Define the main interface
|
| 329 |
+
with gr.Blocks(
|
| 330 |
+
theme=gr.themes.Soft(),
|
| 331 |
+
title="🧠 Open Source Reasoning Copilot",
|
| 332 |
+
css="""
|
| 333 |
+
.gradio-container {
|
| 334 |
+
max-width: 1200px !important;
|
| 335 |
+
}
|
| 336 |
+
.chat-container {
|
| 337 |
+
height: 600px !important;
|
| 338 |
+
}
|
| 339 |
+
"""
|
| 340 |
+
) as interface:
|
| 341 |
+
|
| 342 |
+
gr.Markdown("""
|
| 343 |
+
# 🧠 Open Source Reasoning Copilot
|
| 344 |
+
|
| 345 |
+
A powerful AI assistant that combines local LLMs with advanced reasoning capabilities, web search, calculations, and document processing - all running locally with zero cost!
|
| 346 |
+
""")
|
| 347 |
+
|
| 348 |
+
with gr.Tab("💬 Chat"):
|
| 349 |
+
with gr.Row():
|
| 350 |
+
with gr.Column(scale=3):
|
| 351 |
+
chatbot = gr.Chatbot(
|
| 352 |
+
height=500,
|
| 353 |
+
label="Conversation",
|
| 354 |
+
elem_classes=["chat-container"]
|
| 355 |
+
)
|
| 356 |
+
|
| 357 |
+
with gr.Row():
|
| 358 |
+
msg = gr.Textbox(
|
| 359 |
+
placeholder="Ask me anything! I can help with reasoning, research, calculations, and more...",
|
| 360 |
+
label="Your Message",
|
| 361 |
+
scale=4
|
| 362 |
+
)
|
| 363 |
+
send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 364 |
+
|
| 365 |
+
with gr.Row():
|
| 366 |
+
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
| 367 |
+
use_tools = gr.Checkbox(label="Use Tools", value=True)
|
| 368 |
+
|
| 369 |
+
with gr.Column(scale=1):
|
| 370 |
+
gr.Markdown("### 🛠️ Quick Actions")
|
| 371 |
+
|
| 372 |
+
status_btn = gr.Button("📊 System Status", variant="secondary")
|
| 373 |
+
status_output = gr.Textbox(
|
| 374 |
+
label="Status",
|
| 375 |
+
max_lines=15,
|
| 376 |
+
interactive=False
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
gr.Markdown("### ⚙️ Settings")
|
| 380 |
+
|
| 381 |
+
model_dropdown = gr.Dropdown(
|
| 382 |
+
choices=["mistralai/mistral-7b-instruct", "meta-llama/llama-3-70b-instruct", "google/gemini-2.0-flash-exp:free", "huggingfaceh4/zephyr-7b-beta"],
|
| 383 |
+
value="mistralai/mistral-7b-instruct",
|
| 384 |
+
label="Model"
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
reasoning_mode = gr.Radio(
|
| 388 |
+
choices=["balanced", "creative", "analytical"],
|
| 389 |
+
value="balanced",
|
| 390 |
+
label="Reasoning Mode"
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
web_search_toggle = gr.Checkbox(
|
| 394 |
+
label="Enable Web Search",
|
| 395 |
+
value=True
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
vector_search_toggle = gr.Checkbox(
|
| 399 |
+
label="Enable Vector Search",
|
| 400 |
+
value=True
|
| 401 |
+
)
|
| 402 |
+
|
| 403 |
+
with gr.Tab("📁 Knowledge Base"):
|
| 404 |
+
with gr.Row():
|
| 405 |
+
with gr.Column():
|
| 406 |
+
gr.Markdown("### Upload Documents")
|
| 407 |
+
file_upload = gr.Files(
|
| 408 |
+
label="Upload Files",
|
| 409 |
+
file_types=[".txt", ".pdf", ".docx", ".csv", ".xlsx", ".json", ".py", ".js", ".html", ".md"]
|
| 410 |
+
)
|
| 411 |
+
upload_btn = gr.Button("Process Files", variant="primary")
|
| 412 |
+
upload_status = gr.Textbox(
|
| 413 |
+
label="Upload Status",
|
| 414 |
+
max_lines=10,
|
| 415 |
+
interactive=False
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
with gr.Column():
|
| 419 |
+
gr.Markdown("### Knowledge Base Info")
|
| 420 |
+
kb_info = gr.Textbox(
|
| 421 |
+
label="Knowledge Base Statistics",
|
| 422 |
+
max_lines=10,
|
| 423 |
+
interactive=False
|
| 424 |
+
)
|
| 425 |
+
refresh_kb_btn = gr.Button("Refresh Info")
|
| 426 |
+
|
| 427 |
+
with gr.Tab("🧮 Calculator"):
|
| 428 |
+
with gr.Row():
|
| 429 |
+
with gr.Column():
|
| 430 |
+
calc_input = gr.Textbox(
|
| 431 |
+
label="Mathematical Expression",
|
| 432 |
+
placeholder="e.g., 2*3 + 5, solve x^2 - 4 = 0, derivative of x^2 + 3x"
|
| 433 |
+
)
|
| 434 |
+
calc_btn = gr.Button("Calculate", variant="primary")
|
| 435 |
+
calc_output = gr.Textbox(
|
| 436 |
+
label="Result",
|
| 437 |
+
max_lines=10,
|
| 438 |
+
interactive=False
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
with gr.Column():
|
| 442 |
+
gr.Markdown("""
|
| 443 |
+
### Supported Operations
|
| 444 |
+
- Basic arithmetic: +, -, *, /, ^
|
| 445 |
+
- Functions: sin, cos, tan, log, sqrt
|
| 446 |
+
- Equation solving: solve x^2 + 2x - 3 = 0
|
| 447 |
+
- Calculus: derivative, integral
|
| 448 |
+
- Matrix operations
|
| 449 |
+
- Statistics
|
| 450 |
+
""")
|
| 451 |
+
|
| 452 |
+
with gr.Tab("📊 Memory & Export"):
|
| 453 |
+
with gr.Row():
|
| 454 |
+
with gr.Column():
|
| 455 |
+
gr.Markdown("### Conversation Memory")
|
| 456 |
+
memory_info = gr.Textbox(
|
| 457 |
+
label="Session Information",
|
| 458 |
+
max_lines=10,
|
| 459 |
+
interactive=False
|
| 460 |
+
)
|
| 461 |
+
refresh_memory_btn = gr.Button("Refresh Memory Info")
|
| 462 |
+
|
| 463 |
+
with gr.Column():
|
| 464 |
+
gr.Markdown("### Export Options")
|
| 465 |
+
export_format = gr.Radio(
|
| 466 |
+
choices=["markdown", "json", "text"],
|
| 467 |
+
value="markdown",
|
| 468 |
+
label="Export Format"
|
| 469 |
+
)
|
| 470 |
+
export_btn = gr.Button("Export Conversation", variant="primary")
|
| 471 |
+
export_output = gr.Textbox(
|
| 472 |
+
label="Exported Conversation",
|
| 473 |
+
max_lines=15,
|
| 474 |
+
interactive=False
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
# Event handlers
|
| 478 |
+
def respond(message, history, use_tools_flag):
|
| 479 |
+
return copilot.process_query(message, history, use_tools_flag)
|
| 480 |
+
|
| 481 |
+
def clear_chat():
|
| 482 |
+
return copilot.clear_conversation()
|
| 483 |
+
|
| 484 |
+
def get_status():
|
| 485 |
+
return copilot.get_system_status()
|
| 486 |
+
|
| 487 |
+
def upload_files_handler(files):
|
| 488 |
+
return copilot.upload_files(files)
|
| 489 |
+
|
| 490 |
+
def change_model_handler(model):
|
| 491 |
+
return copilot.change_model(model)
|
| 492 |
+
|
| 493 |
+
def export_handler(format_type):
|
| 494 |
+
return copilot.export_conversation(format_type)
|
| 495 |
+
|
| 496 |
+
def get_kb_info():
|
| 497 |
+
stats = copilot.vector_store.get_collection_stats()
|
| 498 |
+
return f"Documents: {stats.get('total_documents', 0)}\nCollection: {stats.get('collection_name', 'N/A')}"
|
| 499 |
+
|
| 500 |
+
def get_memory_info():
|
| 501 |
+
return copilot.memory.get_conversation_summary()
|
| 502 |
+
|
| 503 |
+
def update_settings(mode, web_search, vector_search):
|
| 504 |
+
copilot.reasoning_mode = mode
|
| 505 |
+
copilot.use_web_search = web_search
|
| 506 |
+
copilot.use_vector_search = vector_search
|
| 507 |
+
return "Settings updated!"
|
| 508 |
+
|
| 509 |
+
# Wire up the events
|
| 510 |
+
msg.submit(respond, [msg, chatbot, use_tools], [chatbot, msg])
|
| 511 |
+
send_btn.click(respond, [msg, chatbot, use_tools], [chatbot, msg])
|
| 512 |
+
clear_btn.click(clear_chat, outputs=[chatbot, msg])
|
| 513 |
+
|
| 514 |
+
status_btn.click(get_status, outputs=status_output)
|
| 515 |
+
model_dropdown.change(change_model_handler, inputs=model_dropdown, outputs=status_output)
|
| 516 |
+
|
| 517 |
+
upload_btn.click(upload_files_handler, inputs=file_upload, outputs=upload_status)
|
| 518 |
+
refresh_kb_btn.click(get_kb_info, outputs=kb_info)
|
| 519 |
+
|
| 520 |
+
calc_btn.click(
|
| 521 |
+
lambda expr: copilot.calculator.format_result_for_llm(
|
| 522 |
+
copilot.calculator.evaluate_expression(expr)
|
| 523 |
+
),
|
| 524 |
+
inputs=calc_input,
|
| 525 |
+
outputs=calc_output
|
| 526 |
+
)
|
| 527 |
+
|
| 528 |
+
export_btn.click(export_handler, inputs=export_format, outputs=export_output)
|
| 529 |
+
refresh_memory_btn.click(get_memory_info, outputs=memory_info)
|
| 530 |
+
|
| 531 |
+
# Settings updates
|
| 532 |
+
reasoning_mode.change(
|
| 533 |
+
update_settings,
|
| 534 |
+
inputs=[reasoning_mode, web_search_toggle, vector_search_toggle],
|
| 535 |
+
outputs=status_output
|
| 536 |
+
)
|
| 537 |
+
|
| 538 |
+
return interface
|
| 539 |
+
|
| 540 |
+
if __name__ == "__main__":
|
| 541 |
+
logger.info("Starting Reasoning Copilot...")
|
| 542 |
+
|
| 543 |
+
# Ensure Ollama is running
|
| 544 |
+
logger.info("Make sure Ollama is running with: ollama serve")
|
| 545 |
+
logger.info("And that you have downloaded a model with: ollama pull phi3:mini")
|
| 546 |
+
|
| 547 |
+
# Create and launch the interface
|
| 548 |
+
interface = create_gradio_interface()
|
| 549 |
+
|
| 550 |
+
interface.launch(
|
| 551 |
+
server_port=Settings.GRADIO_PORT,
|
| 552 |
+
share=True,
|
| 553 |
+
#share=Settings.GRADIO_SHARE,
|
| 554 |
+
server_name="0.0.0.0", # Allow external access
|
| 555 |
+
show_error=True,
|
| 556 |
+
# show_tips=True,
|
| 557 |
+
# enable_queue=True
|
| 558 |
+
)
|
config/settings.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
class Settings:
|
| 7 |
+
# Model Configuration
|
| 8 |
+
DEFAULT_MODEL = "phi3:mini"
|
| 9 |
+
EMBEDDING_MODEL = "nomic-embed-text"
|
| 10 |
+
OLLAMA_BASE_URL = "http://localhost:11434"
|
| 11 |
+
|
| 12 |
+
# Vector Database
|
| 13 |
+
CHROMA_PERSIST_DIR = "./chroma_db"
|
| 14 |
+
COLLECTION_NAME = "knowledge_base"
|
| 15 |
+
|
| 16 |
+
# UI Configuration
|
| 17 |
+
GRADIO_PORT = 7860
|
| 18 |
+
GRADIO_SHARE = False
|
| 19 |
+
|
| 20 |
+
# Tool Configuration
|
| 21 |
+
MAX_SEARCH_RESULTS = 5
|
| 22 |
+
CODE_EXECUTION_TIMEOUT = 30
|
| 23 |
+
MAX_FILE_SIZE_MB = 50
|
| 24 |
+
|
| 25 |
+
# Memory Configuration
|
| 26 |
+
MAX_CONVERSATION_HISTORY = 20
|
| 27 |
+
CONTEXT_WINDOW_SIZE = 4096
|
| 28 |
+
|
| 29 |
+
# Reasoning Configuration
|
| 30 |
+
MAX_REASONING_STEPS = 10
|
| 31 |
+
TEMPERATURE = 0.7
|
| 32 |
+
MAX_TOKENS = 2048
|
| 33 |
+
|
| 34 |
+
# File Paths
|
| 35 |
+
UPLOAD_DIR = "./uploads"
|
| 36 |
+
LOGS_DIR = "./logs"
|
| 37 |
+
|
| 38 |
+
# Create directories if they don't exist
|
| 39 |
+
@classmethod
|
| 40 |
+
def ensure_directories(cls):
|
| 41 |
+
os.makedirs(cls.CHROMA_PERSIST_DIR, exist_ok=True)
|
| 42 |
+
os.makedirs(cls.UPLOAD_DIR, exist_ok=True)
|
| 43 |
+
os.makedirs(cls.LOGS_DIR, exist_ok=True)
|
| 44 |
+
|
| 45 |
+
# Initialize directories on import
|
| 46 |
+
Settings.ensure_directories()
|
memory/conversation.py
ADDED
|
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import uuid
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from typing import List, Dict, Any, Optional
|
| 5 |
+
import logging
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from config.settings import Settings
|
| 8 |
+
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
class ConversationMemory:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.session_id = str(uuid.uuid4())
|
| 15 |
+
self.memory_file = Path(Settings.LOGS_DIR) / f"conversation_{self.session_id}.json"
|
| 16 |
+
self.short_term_memory = []
|
| 17 |
+
self.current_context = {}
|
| 18 |
+
self.reasoning_history = []
|
| 19 |
+
|
| 20 |
+
# Initialize memory structure
|
| 21 |
+
self.memory_structure = {
|
| 22 |
+
'session_id': self.session_id,
|
| 23 |
+
'created_at': datetime.now().isoformat(),
|
| 24 |
+
'conversations': [],
|
| 25 |
+
'context': {},
|
| 26 |
+
'reasoning_chains': [],
|
| 27 |
+
'user_preferences': {},
|
| 28 |
+
'topics_discussed': []
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
self._save_memory()
|
| 32 |
+
|
| 33 |
+
def add_exchange(self, user_input: str, assistant_response: str,
|
| 34 |
+
metadata: Optional[Dict[str, Any]] = None) -> bool:
|
| 35 |
+
"""
|
| 36 |
+
Add a conversation exchange to memory
|
| 37 |
+
"""
|
| 38 |
+
try:
|
| 39 |
+
exchange = {
|
| 40 |
+
'id': str(uuid.uuid4()),
|
| 41 |
+
'timestamp': datetime.now().isoformat(),
|
| 42 |
+
'user_input': user_input,
|
| 43 |
+
'assistant_response': assistant_response,
|
| 44 |
+
'metadata': metadata or {}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Add to short-term memory
|
| 48 |
+
self.short_term_memory.append(exchange)
|
| 49 |
+
|
| 50 |
+
# Add to persistent memory
|
| 51 |
+
self.memory_structure['conversations'].append(exchange)
|
| 52 |
+
|
| 53 |
+
# Keep short-term memory limited
|
| 54 |
+
if len(self.short_term_memory) > Settings.MAX_CONVERSATION_HISTORY:
|
| 55 |
+
self.short_term_memory = self.short_term_memory[-Settings.MAX_CONVERSATION_HISTORY:]
|
| 56 |
+
|
| 57 |
+
# Extract and store topics
|
| 58 |
+
self._extract_topics(user_input)
|
| 59 |
+
|
| 60 |
+
# Save to file
|
| 61 |
+
self._save_memory()
|
| 62 |
+
|
| 63 |
+
logger.info(f"Added exchange to memory: {exchange['id']}")
|
| 64 |
+
return True
|
| 65 |
+
|
| 66 |
+
except Exception as e:
|
| 67 |
+
logger.error(f"Error adding exchange to memory: {e}")
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
def add_reasoning_step(self, step: str, step_type: str, result: Any = None) -> bool:
|
| 71 |
+
"""
|
| 72 |
+
Add a reasoning step to the reasoning history
|
| 73 |
+
"""
|
| 74 |
+
try:
|
| 75 |
+
reasoning_step = {
|
| 76 |
+
'id': str(uuid.uuid4()),
|
| 77 |
+
'timestamp': datetime.now().isoformat(),
|
| 78 |
+
'step': step,
|
| 79 |
+
'type': step_type,
|
| 80 |
+
'result': str(result) if result is not None else None
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
self.reasoning_history.append(reasoning_step)
|
| 84 |
+
self.memory_structure['reasoning_chains'].append(reasoning_step)
|
| 85 |
+
|
| 86 |
+
# Keep reasoning history limited
|
| 87 |
+
if len(self.reasoning_history) > 50:
|
| 88 |
+
self.reasoning_history = self.reasoning_history[-50:]
|
| 89 |
+
|
| 90 |
+
self._save_memory()
|
| 91 |
+
return True
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
logger.error(f"Error adding reasoning step: {e}")
|
| 95 |
+
return False
|
| 96 |
+
|
| 97 |
+
def update_context(self, key: str, value: Any) -> bool:
|
| 98 |
+
"""
|
| 99 |
+
Update the current context
|
| 100 |
+
"""
|
| 101 |
+
try:
|
| 102 |
+
self.current_context[key] = value
|
| 103 |
+
self.memory_structure['context'][key] = value
|
| 104 |
+
self._save_memory()
|
| 105 |
+
return True
|
| 106 |
+
except Exception as e:
|
| 107 |
+
logger.error(f"Error updating context: {e}")
|
| 108 |
+
return False
|
| 109 |
+
|
| 110 |
+
def get_context(self, key: Optional[str] = None) -> Any:
|
| 111 |
+
"""
|
| 112 |
+
Get context information
|
| 113 |
+
"""
|
| 114 |
+
if key:
|
| 115 |
+
return self.current_context.get(key)
|
| 116 |
+
return self.current_context.copy()
|
| 117 |
+
|
| 118 |
+
def get_recent_exchanges(self, count: int = 5) -> List[Dict[str, Any]]:
|
| 119 |
+
"""
|
| 120 |
+
Get recent conversation exchanges
|
| 121 |
+
"""
|
| 122 |
+
return self.short_term_memory[-count:] if count <= len(self.short_term_memory) else self.short_term_memory
|
| 123 |
+
|
| 124 |
+
def get_conversation_summary(self) -> str:
|
| 125 |
+
"""
|
| 126 |
+
Generate a summary of the conversation
|
| 127 |
+
"""
|
| 128 |
+
if not self.short_term_memory:
|
| 129 |
+
return "No conversation history available."
|
| 130 |
+
|
| 131 |
+
summary_parts = [
|
| 132 |
+
f"Session ID: {self.session_id}",
|
| 133 |
+
f"Exchanges: {len(self.memory_structure['conversations'])}",
|
| 134 |
+
f"Topics discussed: {', '.join(self.memory_structure['topics_discussed'][-5:])}",
|
| 135 |
+
"",
|
| 136 |
+
"Recent exchanges:"
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
# Add recent exchanges
|
| 140 |
+
for exchange in self.short_term_memory[-3:]:
|
| 141 |
+
timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%H:%M:%S")
|
| 142 |
+
summary_parts.append(f"[{timestamp}] User: {exchange['user_input'][:100]}...")
|
| 143 |
+
summary_parts.append(f"[{timestamp}] Assistant: {exchange['assistant_response'][:100]}...")
|
| 144 |
+
summary_parts.append("")
|
| 145 |
+
|
| 146 |
+
return "\n".join(summary_parts)
|
| 147 |
+
|
| 148 |
+
def search_memory(self, query: str, search_type: str = 'all') -> List[Dict[str, Any]]:
|
| 149 |
+
"""
|
| 150 |
+
Search through memory for relevant information
|
| 151 |
+
"""
|
| 152 |
+
results = []
|
| 153 |
+
query_lower = query.lower()
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
if search_type in ['all', 'conversations']:
|
| 157 |
+
# Search conversations
|
| 158 |
+
for exchange in self.memory_structure['conversations']:
|
| 159 |
+
if (query_lower in exchange['user_input'].lower() or
|
| 160 |
+
query_lower in exchange['assistant_response'].lower()):
|
| 161 |
+
results.append({
|
| 162 |
+
'type': 'conversation',
|
| 163 |
+
'content': exchange,
|
| 164 |
+
'relevance_score': self._calculate_relevance(query, exchange)
|
| 165 |
+
})
|
| 166 |
+
|
| 167 |
+
if search_type in ['all', 'reasoning']:
|
| 168 |
+
# Search reasoning history
|
| 169 |
+
for step in self.memory_structure['reasoning_chains']:
|
| 170 |
+
if query_lower in step['step'].lower():
|
| 171 |
+
results.append({
|
| 172 |
+
'type': 'reasoning',
|
| 173 |
+
'content': step,
|
| 174 |
+
'relevance_score': self._calculate_relevance(query, step)
|
| 175 |
+
})
|
| 176 |
+
|
| 177 |
+
# Sort by relevance
|
| 178 |
+
results.sort(key=lambda x: x['relevance_score'], reverse=True)
|
| 179 |
+
return results[:10] # Top 10 results
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
logger.error(f"Error searching memory: {e}")
|
| 183 |
+
return []
|
| 184 |
+
|
| 185 |
+
def _extract_topics(self, text: str) -> None:
|
| 186 |
+
"""
|
| 187 |
+
Extract topics from user input (simple keyword-based)
|
| 188 |
+
"""
|
| 189 |
+
try:
|
| 190 |
+
# Simple topic extraction - can be enhanced with NLP
|
| 191 |
+
keywords = [
|
| 192 |
+
'programming', 'coding', 'python', 'javascript', 'web', 'ai', 'machine learning',
|
| 193 |
+
'data', 'analysis', 'math', 'science', 'physics', 'chemistry', 'biology',
|
| 194 |
+
'history', 'literature', 'writing', 'business', 'finance', 'economics',
|
| 195 |
+
'health', 'medicine', 'technology', 'research', 'education', 'design'
|
| 196 |
+
]
|
| 197 |
+
|
| 198 |
+
text_lower = text.lower()
|
| 199 |
+
found_topics = [keyword for keyword in keywords if keyword in text_lower]
|
| 200 |
+
|
| 201 |
+
for topic in found_topics:
|
| 202 |
+
if topic not in self.memory_structure['topics_discussed']:
|
| 203 |
+
self.memory_structure['topics_discussed'].append(topic)
|
| 204 |
+
|
| 205 |
+
# Keep topics list manageable
|
| 206 |
+
if len(self.memory_structure['topics_discussed']) > 20:
|
| 207 |
+
self.memory_structure['topics_discussed'] = self.memory_structure['topics_discussed'][-20:]
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
logger.error(f"Error extracting topics: {e}")
|
| 211 |
+
|
| 212 |
+
def _calculate_relevance(self, query: str, item: Dict[str, Any]) -> float:
|
| 213 |
+
"""
|
| 214 |
+
Calculate relevance score for search results
|
| 215 |
+
"""
|
| 216 |
+
try:
|
| 217 |
+
query_words = set(query.lower().split())
|
| 218 |
+
|
| 219 |
+
if 'user_input' in item:
|
| 220 |
+
# Conversation item
|
| 221 |
+
text = f"{item['user_input']} {item['assistant_response']}".lower()
|
| 222 |
+
else:
|
| 223 |
+
# Reasoning item
|
| 224 |
+
text = item['step'].lower()
|
| 225 |
+
|
| 226 |
+
text_words = set(text.split())
|
| 227 |
+
|
| 228 |
+
# Simple relevance scoring
|
| 229 |
+
common_words = query_words.intersection(text_words)
|
| 230 |
+
if not query_words:
|
| 231 |
+
return 0.0
|
| 232 |
+
|
| 233 |
+
return len(common_words) / len(query_words)
|
| 234 |
+
|
| 235 |
+
except Exception as e:
|
| 236 |
+
logger.error(f"Error calculating relevance: {e}")
|
| 237 |
+
return 0.0
|
| 238 |
+
|
| 239 |
+
def _save_memory(self) -> bool:
|
| 240 |
+
"""
|
| 241 |
+
Save memory to file
|
| 242 |
+
"""
|
| 243 |
+
try:
|
| 244 |
+
with open(self.memory_file, 'w', encoding='utf-8') as f:
|
| 245 |
+
json.dump(self.memory_structure, f, indent=2, ensure_ascii=False)
|
| 246 |
+
return True
|
| 247 |
+
except Exception as e:
|
| 248 |
+
logger.error(f"Error saving memory: {e}")
|
| 249 |
+
return False
|
| 250 |
+
|
| 251 |
+
def load_session(self, session_id: str) -> bool:
|
| 252 |
+
"""
|
| 253 |
+
Load a previous session
|
| 254 |
+
"""
|
| 255 |
+
try:
|
| 256 |
+
session_file = Path(Settings.LOGS_DIR) / f"conversation_{session_id}.json"
|
| 257 |
+
|
| 258 |
+
if not session_file.exists():
|
| 259 |
+
logger.warning(f"Session file not found: {session_file}")
|
| 260 |
+
return False
|
| 261 |
+
|
| 262 |
+
with open(session_file, 'r', encoding='utf-8') as f:
|
| 263 |
+
self.memory_structure = json.load(f)
|
| 264 |
+
|
| 265 |
+
self.session_id = session_id
|
| 266 |
+
self.memory_file = session_file
|
| 267 |
+
|
| 268 |
+
# Rebuild short-term memory from last exchanges
|
| 269 |
+
recent_conversations = self.memory_structure['conversations'][-Settings.MAX_CONVERSATION_HISTORY:]
|
| 270 |
+
self.short_term_memory = recent_conversations
|
| 271 |
+
|
| 272 |
+
# Rebuild context
|
| 273 |
+
self.current_context = self.memory_structure.get('context', {})
|
| 274 |
+
|
| 275 |
+
# Rebuild reasoning history
|
| 276 |
+
self.reasoning_history = self.memory_structure.get('reasoning_chains', [])[-50:]
|
| 277 |
+
|
| 278 |
+
logger.info(f"Loaded session: {session_id}")
|
| 279 |
+
return True
|
| 280 |
+
|
| 281 |
+
except Exception as e:
|
| 282 |
+
logger.error(f"Error loading session: {e}")
|
| 283 |
+
return False
|
| 284 |
+
|
| 285 |
+
def export_conversation(self, format_type: str = 'json') -> str:
|
| 286 |
+
"""
|
| 287 |
+
Export conversation in different formats
|
| 288 |
+
"""
|
| 289 |
+
try:
|
| 290 |
+
if format_type == 'json':
|
| 291 |
+
return json.dumps(self.memory_structure, indent=2, ensure_ascii=False)
|
| 292 |
+
|
| 293 |
+
elif format_type == 'text':
|
| 294 |
+
lines = [
|
| 295 |
+
f"Conversation Export - Session {self.session_id}",
|
| 296 |
+
f"Created: {self.memory_structure['created_at']}",
|
| 297 |
+
f"Total Exchanges: {len(self.memory_structure['conversations'])}",
|
| 298 |
+
"=" * 50,
|
| 299 |
+
""
|
| 300 |
+
]
|
| 301 |
+
|
| 302 |
+
for exchange in self.memory_structure['conversations']:
|
| 303 |
+
timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%Y-%m-%d %H:%M:%S")
|
| 304 |
+
lines.append(f"[{timestamp}]")
|
| 305 |
+
lines.append(f"User: {exchange['user_input']}")
|
| 306 |
+
lines.append(f"Assistant: {exchange['assistant_response']}")
|
| 307 |
+
lines.append("-" * 30)
|
| 308 |
+
lines.append("")
|
| 309 |
+
|
| 310 |
+
return "\n".join(lines)
|
| 311 |
+
|
| 312 |
+
elif format_type == 'markdown':
|
| 313 |
+
lines = [
|
| 314 |
+
f"# Conversation Export",
|
| 315 |
+
f"**Session ID:** {self.session_id}",
|
| 316 |
+
f"**Created:** {self.memory_structure['created_at']}",
|
| 317 |
+
f"**Total Exchanges:** {len(self.memory_structure['conversations'])}",
|
| 318 |
+
""
|
| 319 |
+
]
|
| 320 |
+
|
| 321 |
+
for i, exchange in enumerate(self.memory_structure['conversations'], 1):
|
| 322 |
+
timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%Y-%m-%d %H:%M:%S")
|
| 323 |
+
lines.append(f"## Exchange {i}")
|
| 324 |
+
lines.append(f"*{timestamp}*")
|
| 325 |
+
lines.append(f"**User:** {exchange['user_input']}")
|
| 326 |
+
lines.append(f"**Assistant:** {exchange['assistant_response']}")
|
| 327 |
+
lines.append("")
|
| 328 |
+
|
| 329 |
+
return "\n".join(lines)
|
| 330 |
+
|
| 331 |
+
else:
|
| 332 |
+
return f"Unsupported format: {format_type}"
|
| 333 |
+
|
| 334 |
+
except Exception as e:
|
| 335 |
+
logger.error(f"Error exporting conversation: {e}")
|
| 336 |
+
return f"Error exporting conversation: {str(e)}"
|
| 337 |
+
|
| 338 |
+
def get_session_statistics(self) -> Dict[str, Any]:
|
| 339 |
+
"""
|
| 340 |
+
Get statistics about the current session
|
| 341 |
+
"""
|
| 342 |
+
try:
|
| 343 |
+
conversations = self.memory_structure['conversations']
|
| 344 |
+
|
| 345 |
+
if not conversations:
|
| 346 |
+
return {'error': 'No conversations in this session'}
|
| 347 |
+
|
| 348 |
+
# Calculate statistics
|
| 349 |
+
total_user_words = sum(len(conv['user_input'].split()) for conv in conversations)
|
| 350 |
+
total_assistant_words = sum(len(conv['assistant_response'].split()) for conv in conversations)
|
| 351 |
+
|
| 352 |
+
session_duration = None
|
| 353 |
+
if len(conversations) > 1:
|
| 354 |
+
start_time = datetime.fromisoformat(conversations[0]['timestamp'])
|
| 355 |
+
end_time = datetime.fromisoformat(conversations[-1]['timestamp'])
|
| 356 |
+
session_duration = str(end_time - start_time)
|
| 357 |
+
|
| 358 |
+
return {
|
| 359 |
+
'session_id': self.session_id,
|
| 360 |
+
'total_exchanges': len(conversations),
|
| 361 |
+
'total_user_words': total_user_words,
|
| 362 |
+
'total_assistant_words': total_assistant_words,
|
| 363 |
+
'average_user_words': total_user_words / len(conversations) if conversations else 0,
|
| 364 |
+
'average_assistant_words': total_assistant_words / len(conversations) if conversations else 0,
|
| 365 |
+
'session_duration': session_duration,
|
| 366 |
+
'topics_discussed': self.memory_structure.get('topics_discussed', []),
|
| 367 |
+
'reasoning_steps': len(self.memory_structure.get('reasoning_chains', [])),
|
| 368 |
+
'created_at': self.memory_structure['created_at']
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
except Exception as e:
|
| 372 |
+
logger.error(f"Error getting session statistics: {e}")
|
| 373 |
+
return {'error': str(e)}
|
| 374 |
+
|
| 375 |
+
def clear_memory(self, keep_context: bool = False) -> bool:
|
| 376 |
+
"""
|
| 377 |
+
Clear conversation memory
|
| 378 |
+
"""
|
| 379 |
+
try:
|
| 380 |
+
self.short_term_memory.clear()
|
| 381 |
+
self.reasoning_history.clear()
|
| 382 |
+
|
| 383 |
+
if not keep_context:
|
| 384 |
+
self.current_context.clear()
|
| 385 |
+
|
| 386 |
+
# Reset memory structure
|
| 387 |
+
self.memory_structure = {
|
| 388 |
+
'session_id': self.session_id,
|
| 389 |
+
'created_at': datetime.now().isoformat(),
|
| 390 |
+
'conversations': [],
|
| 391 |
+
'context': self.current_context if keep_context else {},
|
| 392 |
+
'reasoning_chains': [],
|
| 393 |
+
'user_preferences': self.memory_structure.get('user_preferences', {}),
|
| 394 |
+
'topics_discussed': []
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
self._save_memory()
|
| 398 |
+
logger.info("Cleared conversation memory")
|
| 399 |
+
return True
|
| 400 |
+
|
| 401 |
+
except Exception as e:
|
| 402 |
+
logger.error(f"Error clearing memory: {e}")
|
| 403 |
+
return False
|
| 404 |
+
|
| 405 |
+
def set_user_preference(self, key: str, value: Any) -> bool:
|
| 406 |
+
"""
|
| 407 |
+
Set user preference
|
| 408 |
+
"""
|
| 409 |
+
try:
|
| 410 |
+
if 'user_preferences' not in self.memory_structure:
|
| 411 |
+
self.memory_structure['user_preferences'] = {}
|
| 412 |
+
|
| 413 |
+
self.memory_structure['user_preferences'][key] = value
|
| 414 |
+
self._save_memory()
|
| 415 |
+
logger.info(f"Set user preference: {key} = {value}")
|
| 416 |
+
return True
|
| 417 |
+
|
| 418 |
+
except Exception as e:
|
| 419 |
+
logger.error(f"Error setting user preference: {e}")
|
| 420 |
+
return False
|
| 421 |
+
|
| 422 |
+
def get_user_preferences(self) -> Dict[str, Any]:
|
| 423 |
+
"""
|
| 424 |
+
Get all user preferences
|
| 425 |
+
"""
|
| 426 |
+
return self.memory_structure.get('user_preferences', {})
|
| 427 |
+
|
| 428 |
+
def get_memory_usage(self) -> Dict[str, Any]:
|
| 429 |
+
"""
|
| 430 |
+
Get memory usage statistics
|
| 431 |
+
"""
|
| 432 |
+
try:
|
| 433 |
+
memory_size = 0
|
| 434 |
+
if self.memory_file.exists():
|
| 435 |
+
memory_size = self.memory_file.stat().st_size
|
| 436 |
+
|
| 437 |
+
return {
|
| 438 |
+
'memory_file_size_bytes': memory_size,
|
| 439 |
+
'memory_file_size_kb': memory_size / 1024,
|
| 440 |
+
'short_term_exchanges': len(self.short_term_memory),
|
| 441 |
+
'total_exchanges': len(self.memory_structure['conversations']),
|
| 442 |
+
'reasoning_steps': len(self.reasoning_history),
|
| 443 |
+
'context_items': len(self.current_context),
|
| 444 |
+
'topics_tracked': len(self.memory_structure.get('topics_discussed', []))
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
except Exception as e:
|
| 448 |
+
logger.error(f"Error getting memory usage: {e}")
|
| 449 |
+
return {'error': str(e)}
|
models/llm_handler.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import ollama
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
from typing import List, Dict, Any, Optional
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 6 |
+
import torch
|
| 7 |
+
from typing import Optional
|
| 8 |
+
from config.settings import Settings
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# HuggingFace LLM Handler for Microsoft Phi-3 Mini
|
| 14 |
+
|
| 15 |
+
import requests
|
| 16 |
+
from typing import Optional
|
| 17 |
+
import requests
|
| 18 |
+
import os
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class OpenRouterLLMHandler:
|
| 25 |
+
def __init__(self, api_key: str="", model: str = "mistralai/mistral-7b-instruct"):
|
| 26 |
+
if (model == ""):
|
| 27 |
+
model = self.current_model
|
| 28 |
+
|
| 29 |
+
API_KEY = os.getenv("OPENROUTER_API_KEY")
|
| 30 |
+
api_key= API_KEY if API_KEY else api_key
|
| 31 |
+
self.api_key = api_key
|
| 32 |
+
self.model = model
|
| 33 |
+
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
|
| 34 |
+
print(f"🔌 Initialized OpenRouter handler with model: {model}")
|
| 35 |
+
|
| 36 |
+
def generate_response(self, prompt: str, context: Optional[str] = None, tools_output: Optional[str] = None) -> str:
|
| 37 |
+
try:
|
| 38 |
+
full_prompt = self._build_simple_prompt(prompt, context, tools_output)
|
| 39 |
+
|
| 40 |
+
# if self.model_name:
|
| 41 |
+
# self.model = self.model_name
|
| 42 |
+
#self.model = self.model_name
|
| 43 |
+
|
| 44 |
+
# if (model == ""):
|
| 45 |
+
# model = self.model_name
|
| 46 |
+
|
| 47 |
+
headers = {
|
| 48 |
+
"Authorization": f"Bearer {self.api_key}",
|
| 49 |
+
"Content-Type": "application/json"
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
payload = {
|
| 53 |
+
"model": self.model,
|
| 54 |
+
"messages": [
|
| 55 |
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
| 56 |
+
{"role": "user", "content": full_prompt}
|
| 57 |
+
],
|
| 58 |
+
"temperature": 0.7,
|
| 59 |
+
"max_tokens": 200
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# 222
|
| 63 |
+
# 320
|
| 64 |
+
# 90k
|
| 65 |
+
# msai
|
| 66 |
+
|
| 67 |
+
# 2% candidate
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
response = requests.post(self.base_url, headers=headers, json=payload)
|
| 74 |
+
response.raise_for_status()
|
| 75 |
+
result = response.json()
|
| 76 |
+
|
| 77 |
+
return result["choices"][0]["message"]["content"].strip()
|
| 78 |
+
|
| 79 |
+
except Exception as e:
|
| 80 |
+
return f"Error generating response: {str(e)}"
|
| 81 |
+
|
| 82 |
+
def _build_simple_prompt(self, user_input: str, context: Optional[str] = None, tools_output: Optional[str] = None) -> str:
|
| 83 |
+
prompt_parts = []
|
| 84 |
+
|
| 85 |
+
if context and len(context) < 300:
|
| 86 |
+
prompt_parts.append(f"Context: {context}")
|
| 87 |
+
|
| 88 |
+
if tools_output and len(tools_output) < 200:
|
| 89 |
+
prompt_parts.append(f"Additional info: {tools_output}")
|
| 90 |
+
|
| 91 |
+
prompt_parts.append(f"User query: {user_input}")
|
| 92 |
+
return "\n\n".join(prompt_parts)
|
| 93 |
+
|
| 94 |
+
def add_to_history(self, user_input: str, assistant_response: str):
|
| 95 |
+
"""
|
| 96 |
+
Add exchange to conversation history
|
| 97 |
+
"""
|
| 98 |
+
self.conversation_history.append({
|
| 99 |
+
'user': user_input,
|
| 100 |
+
'assistant': assistant_response
|
| 101 |
+
})
|
| 102 |
+
|
| 103 |
+
# Keep only recent history
|
| 104 |
+
if len(self.conversation_history) > Settings.MAX_CONVERSATION_HISTORY:
|
| 105 |
+
self.conversation_history = self.conversation_history[-Settings.MAX_CONVERSATION_HISTORY:]
|
| 106 |
+
|
| 107 |
+
def clear_history(self):
|
| 108 |
+
"""
|
| 109 |
+
Clear conversation history
|
| 110 |
+
"""
|
| 111 |
+
self.conversation_history = []
|
| 112 |
+
|
| 113 |
+
def get_available_models(self) -> List[str]:
|
| 114 |
+
"""
|
| 115 |
+
Get list of available Ollama models
|
| 116 |
+
"""
|
| 117 |
+
try:
|
| 118 |
+
models = self.client.list()
|
| 119 |
+
return [model['name'] for model in models['models']]
|
| 120 |
+
except Exception as e:
|
| 121 |
+
logger.error(f"Error getting models: {e}")
|
| 122 |
+
return [Settings.DEFAULT_MODEL]
|
| 123 |
+
|
| 124 |
+
def switch_model(self, model_name: str) -> bool:
|
| 125 |
+
"""
|
| 126 |
+
Switch to a different model
|
| 127 |
+
"""
|
| 128 |
+
try:
|
| 129 |
+
# Test if model is available
|
| 130 |
+
#self.client.generate(model=model_name, prompt="test", options={'num_predict': 1})
|
| 131 |
+
self.model = model_name
|
| 132 |
+
self.model_name = model_name
|
| 133 |
+
logger.info(f"Switched to model: {model_name}")
|
| 134 |
+
return True
|
| 135 |
+
except Exception as e:
|
| 136 |
+
logger.error(f"Error switching to model {model_name}: {e}")
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
def generate_embedding(self, text: str) -> List[float]:
|
| 140 |
+
"""
|
| 141 |
+
Generate embeddings for text using Ollama
|
| 142 |
+
"""
|
| 143 |
+
try:
|
| 144 |
+
response = self.client.embeddings(
|
| 145 |
+
model=Settings.EMBEDDING_MODEL,
|
| 146 |
+
prompt=text
|
| 147 |
+
)
|
| 148 |
+
return response['embedding']
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.error(f"Error generating embedding: {e}")
|
| 151 |
+
return []
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
# class HuggingFaceLLMHandler:
|
| 156 |
+
# def __init__(self):
|
| 157 |
+
# from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 158 |
+
# import torch
|
| 159 |
+
# import psutil
|
| 160 |
+
|
| 161 |
+
# self.model_name = "microsoft/Phi-3-mini-4k-instruct"
|
| 162 |
+
# print("Loading model... this may take a moment on first run")
|
| 163 |
+
|
| 164 |
+
# # Choose device and dtype intelligently
|
| 165 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 166 |
+
# torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
|
| 167 |
+
|
| 168 |
+
# print(f"Using device: {device}, dtype: {torch_dtype}")
|
| 169 |
+
# print(f"Available RAM: {psutil.virtual_memory().available / 1e6:.2f} MB")
|
| 170 |
+
|
| 171 |
+
# # Load tokenizer
|
| 172 |
+
# self.tokenizer = AutoTokenizer.from_pretrained(
|
| 173 |
+
# self.model_name,
|
| 174 |
+
# trust_remote_code=True
|
| 175 |
+
# )
|
| 176 |
+
|
| 177 |
+
# # Load model safely
|
| 178 |
+
# try:
|
| 179 |
+
# self.model = AutoModelForCausalLM.from_pretrained(
|
| 180 |
+
# self.model_name,
|
| 181 |
+
# torch_dtype=torch_dtype,
|
| 182 |
+
# device_map="auto" if device.type == "cuda" else None,
|
| 183 |
+
# low_cpu_mem_usage=True, # Helps reduce RAM footprint during init
|
| 184 |
+
# trust_remote_code=True
|
| 185 |
+
# )
|
| 186 |
+
|
| 187 |
+
# # Explicitly move to CPU if needed
|
| 188 |
+
# if device.type == "cpu":
|
| 189 |
+
# self.model = self.model.to(device)
|
| 190 |
+
|
| 191 |
+
# print("Model loaded successfully!")
|
| 192 |
+
|
| 193 |
+
# except RuntimeError as e:
|
| 194 |
+
# print(f"❌ Error loading model: {e}")
|
| 195 |
+
# print("Tip: Try switching to a smaller model or free up RAM.")
|
| 196 |
+
|
| 197 |
+
# def generate_response(self, prompt: str, context: Optional[str] = None,
|
| 198 |
+
# tools_output: Optional[str] = None) -> str:
|
| 199 |
+
# """
|
| 200 |
+
# Generate response using Phi-3 - should be under 10 seconds
|
| 201 |
+
# """
|
| 202 |
+
# try:
|
| 203 |
+
# # Build simple prompt
|
| 204 |
+
# full_prompt = self._build_simple_prompt(prompt, context, tools_output)
|
| 205 |
+
|
| 206 |
+
# # Tokenize and move to same device as model
|
| 207 |
+
# inputs = self.tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024)
|
| 208 |
+
# inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
|
| 209 |
+
|
| 210 |
+
# # Generate
|
| 211 |
+
# with torch.no_grad():
|
| 212 |
+
# outputs = self.model.generate(
|
| 213 |
+
# inputs["input_ids"],
|
| 214 |
+
# max_new_tokens=200, # Limit response length
|
| 215 |
+
# temperature=0.7,
|
| 216 |
+
# do_sample=True,
|
| 217 |
+
# pad_token_id=self.tokenizer.eos_token_id,
|
| 218 |
+
# attention_mask=inputs["attention_mask"]
|
| 219 |
+
# )
|
| 220 |
+
|
| 221 |
+
# # Decode response
|
| 222 |
+
# response = self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
|
| 223 |
+
# return response.strip()
|
| 224 |
+
|
| 225 |
+
# except Exception as e:
|
| 226 |
+
# logger.error(f"Error generating response: {e}")
|
| 227 |
+
# return f"Error generating response: {str(e)}"
|
| 228 |
+
|
| 229 |
+
# def _build_simple_prompt(self, user_input: str, context: Optional[str] = None,
|
| 230 |
+
# tools_output: Optional[str] = None) -> str:
|
| 231 |
+
# """Simple prompt builder"""
|
| 232 |
+
# prompt_parts = ["You are a helpful AI assistant."]
|
| 233 |
+
|
| 234 |
+
# if context and len(context) < 300:
|
| 235 |
+
# prompt_parts.append(f"Context: {context}")
|
| 236 |
+
|
| 237 |
+
# if tools_output and len(tools_output) < 200:
|
| 238 |
+
# prompt_parts.append(f"Additional info: {tools_output}")
|
| 239 |
+
|
| 240 |
+
# prompt_parts.append(f"User: {user_input}")
|
| 241 |
+
# prompt_parts.append("Assistant:")
|
| 242 |
+
|
| 243 |
+
# return "\n\n".join(prompt_parts)
|
| 244 |
+
|
| 245 |
+
# def add_to_history(self, user_input: str, assistant_response: str):
|
| 246 |
+
# """
|
| 247 |
+
# Add exchange to conversation history
|
| 248 |
+
# """
|
| 249 |
+
# self.conversation_history.append({
|
| 250 |
+
# 'user': user_input,
|
| 251 |
+
# 'assistant': assistant_response
|
| 252 |
+
# })
|
| 253 |
+
|
| 254 |
+
# # Keep only recent history
|
| 255 |
+
# if len(self.conversation_history) > Settings.MAX_CONVERSATION_HISTORY:
|
| 256 |
+
# self.conversation_history = self.conversation_history[-Settings.MAX_CONVERSATION_HISTORY:]
|
| 257 |
+
|
| 258 |
+
# def clear_history(self):
|
| 259 |
+
# """
|
| 260 |
+
# Clear conversation history
|
| 261 |
+
# """
|
| 262 |
+
# self.conversation_history = []
|
| 263 |
+
|
| 264 |
+
# def get_available_models(self) -> List[str]:
|
| 265 |
+
# """
|
| 266 |
+
# Get list of available Ollama models
|
| 267 |
+
# """
|
| 268 |
+
# try:
|
| 269 |
+
# models = self.client.list()
|
| 270 |
+
# return [model['name'] for model in models['models']]
|
| 271 |
+
# except Exception as e:
|
| 272 |
+
# logger.error(f"Error getting models: {e}")
|
| 273 |
+
# return [Settings.DEFAULT_MODEL]
|
| 274 |
+
|
| 275 |
+
# def switch_model(self, model_name: str) -> bool:
|
| 276 |
+
# """
|
| 277 |
+
# Switch to a different model
|
| 278 |
+
# """
|
| 279 |
+
# try:
|
| 280 |
+
# # Test if model is available
|
| 281 |
+
# self.client.generate(model=model_name, prompt="test", options={'num_predict': 1})
|
| 282 |
+
# self.model_name = model_name
|
| 283 |
+
# logger.info(f"Switched to model: {model_name}")
|
| 284 |
+
# return True
|
| 285 |
+
# except Exception as e:
|
| 286 |
+
# logger.error(f"Error switching to model {model_name}: {e}")
|
| 287 |
+
# return False
|
| 288 |
+
|
| 289 |
+
# def generate_embedding(self, text: str) -> List[float]:
|
| 290 |
+
# """
|
| 291 |
+
# Generate embeddings for text using Ollama
|
| 292 |
+
# """
|
| 293 |
+
# try:
|
| 294 |
+
# response = self.client.embeddings(
|
| 295 |
+
# model=Settings.EMBEDDING_MODEL,
|
| 296 |
+
# prompt=text
|
| 297 |
+
# )
|
| 298 |
+
# return response['embedding']
|
| 299 |
+
# except Exception as e:
|
| 300 |
+
# logger.error(f"Error generating embedding: {e}")
|
| 301 |
+
# return []
|
| 302 |
+
|
models/vector_store.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
from chromadb.config import Settings as ChromaSettings
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
import logging
|
| 5 |
+
from typing import List, Dict, Any, Optional
|
| 6 |
+
import uuid
|
| 7 |
+
from config.settings import Settings
|
| 8 |
+
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
class VectorStore:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.client = chromadb.PersistentClient(
|
| 15 |
+
path=Settings.CHROMA_PERSIST_DIR,
|
| 16 |
+
settings=ChromaSettings(anonymized_telemetry=False)
|
| 17 |
+
)
|
| 18 |
+
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 19 |
+
self.collection = None
|
| 20 |
+
self.initialize_collection()
|
| 21 |
+
|
| 22 |
+
def initialize_collection(self):
|
| 23 |
+
"""
|
| 24 |
+
Initialize or get the main knowledge base collection
|
| 25 |
+
"""
|
| 26 |
+
try:
|
| 27 |
+
self.collection = self.client.get_or_create_collection(
|
| 28 |
+
name=Settings.COLLECTION_NAME,
|
| 29 |
+
metadata={"description": "General knowledge base for reasoning copilot"}
|
| 30 |
+
)
|
| 31 |
+
logger.info(f"Initialized collection: {Settings.COLLECTION_NAME}")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
logger.error(f"Error initializing collection: {e}")
|
| 34 |
+
raise
|
| 35 |
+
|
| 36 |
+
def add_documents(self, documents: List[str], metadata: Optional[List[Dict]] = None,
|
| 37 |
+
ids: Optional[List[str]] = None) -> bool:
|
| 38 |
+
"""
|
| 39 |
+
Add documents to the vector store
|
| 40 |
+
"""
|
| 41 |
+
try:
|
| 42 |
+
if not documents:
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
# Generate IDs if not provided
|
| 46 |
+
if ids is None:
|
| 47 |
+
ids = [str(uuid.uuid4()) for _ in documents]
|
| 48 |
+
|
| 49 |
+
# Generate embeddings
|
| 50 |
+
embeddings = self.embedding_model.encode(documents).tolist()
|
| 51 |
+
|
| 52 |
+
# Prepare metadata
|
| 53 |
+
if metadata is None:
|
| 54 |
+
metadata = [{"source": "user_upload", "type": "document"} for _ in documents]
|
| 55 |
+
|
| 56 |
+
# Add to collection
|
| 57 |
+
self.collection.add(
|
| 58 |
+
documents=documents,
|
| 59 |
+
embeddings=embeddings,
|
| 60 |
+
metadatas=metadata,
|
| 61 |
+
ids=ids
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
logger.info(f"Added {len(documents)} documents to vector store")
|
| 65 |
+
return True
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logger.error(f"Error adding documents: {e}")
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
def search_similar(self, query: str, n_results: int = 5,
|
| 72 |
+
where: Optional[Dict] = None) -> Dict[str, Any]:
|
| 73 |
+
"""
|
| 74 |
+
Search for similar documents
|
| 75 |
+
"""
|
| 76 |
+
try:
|
| 77 |
+
# Generate query embedding
|
| 78 |
+
query_embedding = self.embedding_model.encode([query]).tolist()[0]
|
| 79 |
+
|
| 80 |
+
# Search
|
| 81 |
+
results = self.collection.query(
|
| 82 |
+
query_embeddings=[query_embedding],
|
| 83 |
+
n_results=n_results,
|
| 84 |
+
where=where,
|
| 85 |
+
include=['documents', 'metadatas', 'distances']
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
return {
|
| 89 |
+
'documents': results['documents'][0] if results['documents'] else [],
|
| 90 |
+
'metadatas': results['metadatas'][0] if results['metadatas'] else [],
|
| 91 |
+
'distances': results['distances'][0] if results['distances'] else [],
|
| 92 |
+
'count': len(results['documents'][0]) if results['documents'] else 0
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.error(f"Error searching documents: {e}")
|
| 97 |
+
return {'documents': [], 'metadatas': [], 'distances': [], 'count': 0}
|
| 98 |
+
|
| 99 |
+
def get_relevant_context(self, query: str, max_context_length: int = 2000) -> str:
|
| 100 |
+
"""
|
| 101 |
+
Get relevant context for a query, formatted for LLM consumption
|
| 102 |
+
"""
|
| 103 |
+
results = self.search_similar(query, n_results=5)
|
| 104 |
+
|
| 105 |
+
if not results['documents']:
|
| 106 |
+
return ""
|
| 107 |
+
|
| 108 |
+
context_parts = []
|
| 109 |
+
current_length = 0
|
| 110 |
+
|
| 111 |
+
for i, (doc, metadata) in enumerate(zip(results['documents'], results['metadatas'])):
|
| 112 |
+
# Create a context snippet
|
| 113 |
+
source = metadata.get('source', 'Unknown')
|
| 114 |
+
snippet = f"Source: {source}\nContent: {doc[:500]}...\n"
|
| 115 |
+
|
| 116 |
+
if current_length + len(snippet) > max_context_length:
|
| 117 |
+
break
|
| 118 |
+
|
| 119 |
+
context_parts.append(snippet)
|
| 120 |
+
current_length += len(snippet)
|
| 121 |
+
|
| 122 |
+
return "\n---\n".join(context_parts)
|
| 123 |
+
|
| 124 |
+
def add_conversation_memory(self, user_input: str, assistant_response: str, session_id: str):
|
| 125 |
+
"""
|
| 126 |
+
Add conversation exchange to memory
|
| 127 |
+
"""
|
| 128 |
+
try:
|
| 129 |
+
memory_doc = f"User: {user_input}\nAssistant: {assistant_response}"
|
| 130 |
+
metadata = {
|
| 131 |
+
"type": "conversation",
|
| 132 |
+
"session_id": session_id,
|
| 133 |
+
"timestamp": str(uuid.uuid4())
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
return self.add_documents([memory_doc], [metadata])
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.error(f"Error adding conversation memory: {e}")
|
| 140 |
+
return False
|
| 141 |
+
|
| 142 |
+
def search_conversations(self, query: str, session_id: Optional[str] = None) -> List[str]:
|
| 143 |
+
"""
|
| 144 |
+
Search previous conversations
|
| 145 |
+
"""
|
| 146 |
+
where_clause = {"type": "conversation"}
|
| 147 |
+
if session_id:
|
| 148 |
+
where_clause["session_id"] = session_id
|
| 149 |
+
|
| 150 |
+
results = self.search_similar(query, n_results=3, where=where_clause)
|
| 151 |
+
return results['documents']
|
| 152 |
+
|
| 153 |
+
def get_collection_stats(self) -> Dict[str, Any]:
|
| 154 |
+
"""
|
| 155 |
+
Get statistics about the collection
|
| 156 |
+
"""
|
| 157 |
+
try:
|
| 158 |
+
count = self.collection.count()
|
| 159 |
+
return {
|
| 160 |
+
"total_documents": count,
|
| 161 |
+
"collection_name": Settings.COLLECTION_NAME
|
| 162 |
+
}
|
| 163 |
+
except Exception as e:
|
| 164 |
+
logger.error(f"Error getting collection stats: {e}")
|
| 165 |
+
return {"total_documents": 0, "collection_name": "unknown"}
|
| 166 |
+
|
| 167 |
+
def delete_documents(self, ids: List[str]) -> bool:
|
| 168 |
+
"""
|
| 169 |
+
Delete documents by IDs
|
| 170 |
+
"""
|
| 171 |
+
try:
|
| 172 |
+
self.collection.delete(ids=ids)
|
| 173 |
+
logger.info(f"Deleted {len(ids)} documents")
|
| 174 |
+
return True
|
| 175 |
+
except Exception as e:
|
| 176 |
+
logger.error(f"Error deleting documents: {e}")
|
| 177 |
+
return False
|
| 178 |
+
|
| 179 |
+
def clear_collection(self) -> bool:
|
| 180 |
+
"""
|
| 181 |
+
Clear all documents from the collection
|
| 182 |
+
"""
|
| 183 |
+
try:
|
| 184 |
+
# Delete the collection and recreate it
|
| 185 |
+
self.client.delete_collection(Settings.COLLECTION_NAME)
|
| 186 |
+
self.initialize_collection()
|
| 187 |
+
logger.info("Cleared all documents from collection")
|
| 188 |
+
return True
|
| 189 |
+
except Exception as e:
|
| 190 |
+
logger.error(f"Error clearing collection: {e}")
|
| 191 |
+
return False
|
| 192 |
+
|
| 193 |
+
def create_specialized_collection(self, name: str, description: str) -> bool:
|
| 194 |
+
"""
|
| 195 |
+
Create a specialized collection for specific domains
|
| 196 |
+
"""
|
| 197 |
+
try:
|
| 198 |
+
collection = self.client.get_or_create_collection(
|
| 199 |
+
name=name,
|
| 200 |
+
metadata={"description": description}
|
| 201 |
+
)
|
| 202 |
+
logger.info(f"Created specialized collection: {name}")
|
| 203 |
+
return True
|
| 204 |
+
except Exception as e:
|
| 205 |
+
logger.error(f"Error creating specialized collection: {e}")
|
| 206 |
+
return False
|
| 207 |
+
|
| 208 |
+
def switch_collection(self, name: str) -> bool:
|
| 209 |
+
"""
|
| 210 |
+
Switch to a different collection
|
| 211 |
+
"""
|
| 212 |
+
try:
|
| 213 |
+
self.collection = self.client.get_collection(name=name)
|
| 214 |
+
logger.info(f"Switched to collection: {name}")
|
| 215 |
+
return True
|
| 216 |
+
except Exception as e:
|
| 217 |
+
logger.error(f"Error switching to collection {name}: {e}")
|
| 218 |
+
return False
|
| 219 |
+
|
| 220 |
+
def list_collections(self) -> List[str]:
|
| 221 |
+
"""
|
| 222 |
+
List all available collections
|
| 223 |
+
"""
|
| 224 |
+
try:
|
| 225 |
+
collections = self.client.list_collections()
|
| 226 |
+
return [col.name for col in collections]
|
| 227 |
+
except Exception as e:
|
| 228 |
+
logger.error(f"Error listing collections: {e}")
|
| 229 |
+
return []
|
requirements.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
chromadb
|
| 3 |
+
ollama
|
| 4 |
+
sentence-transformers
|
| 5 |
+
beautifulsoup4
|
| 6 |
+
requests
|
| 7 |
+
pandas
|
| 8 |
+
numpy
|
| 9 |
+
python-dotenv
|
| 10 |
+
langchain
|
| 11 |
+
langchain-community
|
| 12 |
+
PyPDF2
|
| 13 |
+
python-docx
|
| 14 |
+
openpyxl
|
| 15 |
+
sympy
|
| 16 |
+
networkx
|
| 17 |
+
matplotlib
|
| 18 |
+
plotly
|
| 19 |
+
duckduckgo-search
|
| 20 |
+
psutil
|
| 21 |
+
fastapi
|
| 22 |
+
uvicorn
|
| 23 |
+
websockets
|
| 24 |
+
torch
|
| 25 |
+
transformers
|
tools/calculator.py
ADDED
|
@@ -0,0 +1,404 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sympy as sp
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import io
|
| 5 |
+
import base64
|
| 6 |
+
from typing import Any, Dict, List, Optional, Union
|
| 7 |
+
import logging
|
| 8 |
+
import re
|
| 9 |
+
import math
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
class CalculatorTool:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.variables = {}
|
| 17 |
+
self.last_result = None
|
| 18 |
+
|
| 19 |
+
def evaluate_expression(self, expression: str) -> Dict[str, Any]:
|
| 20 |
+
"""
|
| 21 |
+
Safely evaluate mathematical expressions
|
| 22 |
+
"""
|
| 23 |
+
try:
|
| 24 |
+
# Clean the expression
|
| 25 |
+
expression = self._clean_expression(expression)
|
| 26 |
+
|
| 27 |
+
# Try sympy first for symbolic computation
|
| 28 |
+
try:
|
| 29 |
+
result = sp.sympify(expression).evalf()
|
| 30 |
+
self.last_result = float(result)
|
| 31 |
+
return {
|
| 32 |
+
'result': float(result),
|
| 33 |
+
'expression': expression,
|
| 34 |
+
'type': 'symbolic',
|
| 35 |
+
'formatted': str(result)
|
| 36 |
+
}
|
| 37 |
+
except:
|
| 38 |
+
# Fall back to basic evaluation
|
| 39 |
+
result = eval(expression, {"__builtins__": {}}, self._get_safe_namespace())
|
| 40 |
+
self.last_result = result
|
| 41 |
+
return {
|
| 42 |
+
'result': result,
|
| 43 |
+
'expression': expression,
|
| 44 |
+
'type': 'numeric',
|
| 45 |
+
'formatted': str(result)
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.error(f"Error evaluating expression: {e}")
|
| 50 |
+
return {
|
| 51 |
+
'error': str(e),
|
| 52 |
+
'expression': expression,
|
| 53 |
+
'result': None
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
def _clean_expression(self, expression: str) -> str:
|
| 57 |
+
"""
|
| 58 |
+
Clean and prepare expression for evaluation
|
| 59 |
+
"""
|
| 60 |
+
# Replace common math notation
|
| 61 |
+
replacements = {
|
| 62 |
+
'^': '**',
|
| 63 |
+
'×': '*',
|
| 64 |
+
'÷': '/',
|
| 65 |
+
'π': 'pi',
|
| 66 |
+
'e': 'E'
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
for old, new in replacements.items():
|
| 70 |
+
expression = expression.replace(old, new)
|
| 71 |
+
|
| 72 |
+
return expression
|
| 73 |
+
|
| 74 |
+
def _get_safe_namespace(self) -> Dict[str, Any]:
|
| 75 |
+
"""
|
| 76 |
+
Get safe namespace for expression evaluation
|
| 77 |
+
"""
|
| 78 |
+
safe_dict = {
|
| 79 |
+
'abs': abs, 'round': round, 'min': min, 'max': max,
|
| 80 |
+
'sum': sum, 'pow': pow, 'divmod': divmod,
|
| 81 |
+
'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
|
| 82 |
+
'asin': math.asin, 'acos': math.acos, 'atan': math.atan,
|
| 83 |
+
'sinh': math.sinh, 'cosh': math.cosh, 'tanh': math.tanh,
|
| 84 |
+
'log': math.log, 'log10': math.log10, 'log2': math.log2,
|
| 85 |
+
'exp': math.exp, 'sqrt': math.sqrt, 'factorial': math.factorial,
|
| 86 |
+
'pi': math.pi, 'e': math.e, 'inf': math.inf, 'nan': math.nan,
|
| 87 |
+
'degrees': math.degrees, 'radians': math.radians,
|
| 88 |
+
'ceil': math.ceil, 'floor': math.floor,
|
| 89 |
+
}
|
| 90 |
+
safe_dict.update(self.variables)
|
| 91 |
+
return safe_dict
|
| 92 |
+
|
| 93 |
+
def solve_equation(self, equation: str, variable: str = 'x') -> Dict[str, Any]:
|
| 94 |
+
"""
|
| 95 |
+
Solve equations symbolically
|
| 96 |
+
"""
|
| 97 |
+
try:
|
| 98 |
+
# Parse equation
|
| 99 |
+
if '=' in equation:
|
| 100 |
+
left, right = equation.split('=', 1)
|
| 101 |
+
eq = sp.Eq(sp.sympify(left), sp.sympify(right))
|
| 102 |
+
else:
|
| 103 |
+
eq = sp.sympify(equation)
|
| 104 |
+
|
| 105 |
+
# Solve
|
| 106 |
+
var = sp.Symbol(variable)
|
| 107 |
+
solutions = sp.solve(eq, var)
|
| 108 |
+
|
| 109 |
+
return {
|
| 110 |
+
'equation': equation,
|
| 111 |
+
'variable': variable,
|
| 112 |
+
'solutions': [str(sol) for sol in solutions],
|
| 113 |
+
'numeric_solutions': [float(sol.evalf()) if sol.is_real else complex(sol.evalf()) for sol in solutions]
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
logger.error(f"Error solving equation: {e}")
|
| 118 |
+
return {
|
| 119 |
+
'error': str(e),
|
| 120 |
+
'equation': equation,
|
| 121 |
+
'solutions': []
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
def plot_function(self, expression: str, x_range: tuple = (-10, 10),
|
| 125 |
+
points: int = 1000) -> str:
|
| 126 |
+
"""
|
| 127 |
+
Plot a mathematical function and return base64 encoded image
|
| 128 |
+
"""
|
| 129 |
+
try:
|
| 130 |
+
x = sp.Symbol('x')
|
| 131 |
+
expr = sp.sympify(expression)
|
| 132 |
+
|
| 133 |
+
# Convert to numpy function
|
| 134 |
+
f = sp.lambdify(x, expr, 'numpy')
|
| 135 |
+
|
| 136 |
+
# Generate points
|
| 137 |
+
x_vals = np.linspace(x_range[0], x_range[1], points)
|
| 138 |
+
y_vals = f(x_vals)
|
| 139 |
+
|
| 140 |
+
# Create plot
|
| 141 |
+
plt.figure(figsize=(10, 6))
|
| 142 |
+
plt.plot(x_vals, y_vals, 'b-', linewidth=2)
|
| 143 |
+
plt.grid(True, alpha=0.3)
|
| 144 |
+
plt.xlabel('x')
|
| 145 |
+
plt.ylabel('f(x)')
|
| 146 |
+
plt.title(f'Plot of f(x) = {expression}')
|
| 147 |
+
|
| 148 |
+
# Convert to base64
|
| 149 |
+
buffer = io.BytesIO()
|
| 150 |
+
plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
|
| 151 |
+
buffer.seek(0)
|
| 152 |
+
plot_data = base64.b64encode(buffer.getvalue()).decode()
|
| 153 |
+
plt.close()
|
| 154 |
+
|
| 155 |
+
return plot_data
|
| 156 |
+
|
| 157 |
+
except Exception as e:
|
| 158 |
+
logger.error(f"Error plotting function: {e}")
|
| 159 |
+
return ""
|
| 160 |
+
|
| 161 |
+
def calculate_derivative(self, expression: str, variable: str = 'x',
|
| 162 |
+
order: int = 1) -> Dict[str, Any]:
|
| 163 |
+
"""
|
| 164 |
+
Calculate derivative of an expression
|
| 165 |
+
"""
|
| 166 |
+
try:
|
| 167 |
+
var = sp.Symbol(variable)
|
| 168 |
+
expr = sp.sympify(expression)
|
| 169 |
+
|
| 170 |
+
derivative = sp.diff(expr, var, order)
|
| 171 |
+
|
| 172 |
+
return {
|
| 173 |
+
'original': expression,
|
| 174 |
+
'derivative': str(derivative),
|
| 175 |
+
'order': order,
|
| 176 |
+
'variable': variable,
|
| 177 |
+
'simplified': str(sp.simplify(derivative))
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
logger.error(f"Error calculating derivative: {e}")
|
| 182 |
+
return {
|
| 183 |
+
'error': str(e),
|
| 184 |
+
'original': expression
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
def calculate_integral(self, expression: str, variable: str = 'x',
|
| 188 |
+
limits: Optional[tuple] = None) -> Dict[str, Any]:
|
| 189 |
+
"""
|
| 190 |
+
Calculate integral of an expression
|
| 191 |
+
"""
|
| 192 |
+
try:
|
| 193 |
+
var = sp.Symbol(variable)
|
| 194 |
+
expr = sp.sympify(expression)
|
| 195 |
+
|
| 196 |
+
if limits:
|
| 197 |
+
# Definite integral
|
| 198 |
+
result = sp.integrate(expr, (var, limits[0], limits[1]))
|
| 199 |
+
integral_type = 'definite'
|
| 200 |
+
else:
|
| 201 |
+
# Indefinite integral
|
| 202 |
+
result = sp.integrate(expr, var)
|
| 203 |
+
integral_type = 'indefinite'
|
| 204 |
+
|
| 205 |
+
return {
|
| 206 |
+
'original': expression,
|
| 207 |
+
'integral': str(result),
|
| 208 |
+
'type': integral_type,
|
| 209 |
+
'variable': variable,
|
| 210 |
+
'limits': limits,
|
| 211 |
+
'numeric_value': float(result.evalf()) if result.is_number else None
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
logger.error(f"Error calculating integral: {e}")
|
| 216 |
+
return {
|
| 217 |
+
'error': str(e),
|
| 218 |
+
'original': expression
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
def matrix_operations(self, operation: str, *matrices) -> Dict[str, Any]:
|
| 222 |
+
"""
|
| 223 |
+
Perform matrix operations
|
| 224 |
+
"""
|
| 225 |
+
try:
|
| 226 |
+
# Convert input to sympy matrices
|
| 227 |
+
sp_matrices = []
|
| 228 |
+
for matrix in matrices:
|
| 229 |
+
if isinstance(matrix, list):
|
| 230 |
+
sp_matrices.append(sp.Matrix(matrix))
|
| 231 |
+
else:
|
| 232 |
+
sp_matrices.append(sp.sympify(matrix))
|
| 233 |
+
|
| 234 |
+
result = None
|
| 235 |
+
|
| 236 |
+
if operation == 'add' and len(sp_matrices) >= 2:
|
| 237 |
+
result = sp_matrices[0] + sp_matrices[1]
|
| 238 |
+
elif operation == 'multiply' and len(sp_matrices) >= 2:
|
| 239 |
+
result = sp_matrices[0] * sp_matrices[1]
|
| 240 |
+
elif operation == 'inverse' and len(sp_matrices) >= 1:
|
| 241 |
+
result = sp_matrices[0].inv()
|
| 242 |
+
elif operation == 'determinant' and len(sp_matrices) >= 1:
|
| 243 |
+
result = sp_matrices[0].det()
|
| 244 |
+
elif operation == 'transpose' and len(sp_matrices) >= 1:
|
| 245 |
+
result = sp_matrices[0].T
|
| 246 |
+
elif operation == 'eigenvalues' and len(sp_matrices) >= 1:
|
| 247 |
+
result = sp_matrices[0].eigenvals()
|
| 248 |
+
|
| 249 |
+
return {
|
| 250 |
+
'operation': operation,
|
| 251 |
+
'result': str(result) if result is not None else None,
|
| 252 |
+
'matrices_count': len(sp_matrices)
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
logger.error(f"Error in matrix operation: {e}")
|
| 257 |
+
return {
|
| 258 |
+
'error': str(e),
|
| 259 |
+
'operation': operation
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
def statistics_calculations(self, data: List[float], operation: str) -> Dict[str, Any]:
|
| 263 |
+
"""
|
| 264 |
+
Perform statistical calculations
|
| 265 |
+
"""
|
| 266 |
+
try:
|
| 267 |
+
data = np.array(data)
|
| 268 |
+
result = None
|
| 269 |
+
|
| 270 |
+
if operation == 'mean':
|
| 271 |
+
result = np.mean(data)
|
| 272 |
+
elif operation == 'median':
|
| 273 |
+
result = np.median(data)
|
| 274 |
+
elif operation == 'std':
|
| 275 |
+
result = np.std(data)
|
| 276 |
+
elif operation == 'var':
|
| 277 |
+
result = np.var(data)
|
| 278 |
+
elif operation == 'min':
|
| 279 |
+
result = np.min(data)
|
| 280 |
+
elif operation == 'max':
|
| 281 |
+
result = np.max(data)
|
| 282 |
+
elif operation == 'sum':
|
| 283 |
+
result = np.sum(data)
|
| 284 |
+
elif operation == 'range':
|
| 285 |
+
result = np.max(data) - np.min(data)
|
| 286 |
+
|
| 287 |
+
return {
|
| 288 |
+
'operation': operation,
|
| 289 |
+
'result': float(result) if result is not None else None,
|
| 290 |
+
'data_size': len(data),
|
| 291 |
+
'data_preview': data[:5].tolist() if len(data) > 5 else data.tolist()
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
except Exception as e:
|
| 295 |
+
logger.error(f"Error in statistics calculation: {e}")
|
| 296 |
+
return {
|
| 297 |
+
'error': str(e),
|
| 298 |
+
'operation': operation
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
def unit_conversion(self, value: float, from_unit: str, to_unit: str) -> Dict[str, Any]:
|
| 302 |
+
"""
|
| 303 |
+
Convert between different units
|
| 304 |
+
"""
|
| 305 |
+
# Basic unit conversion factors (could be expanded)
|
| 306 |
+
conversions = {
|
| 307 |
+
# Length
|
| 308 |
+
('m', 'cm'): 100,
|
| 309 |
+
('m', 'mm'): 1000,
|
| 310 |
+
('m', 'km'): 0.001,
|
| 311 |
+
('cm', 'm'): 0.01,
|
| 312 |
+
('mm', 'm'): 0.001,
|
| 313 |
+
('km', 'm'): 1000,
|
| 314 |
+
('ft', 'm'): 0.3048,
|
| 315 |
+
('in', 'cm'): 2.54,
|
| 316 |
+
|
| 317 |
+
# Weight
|
| 318 |
+
('kg', 'g'): 1000,
|
| 319 |
+
('g', 'kg'): 0.001,
|
| 320 |
+
('lb', 'kg'): 0.453592,
|
| 321 |
+
('kg', 'lb'): 2.20462,
|
| 322 |
+
|
| 323 |
+
# Temperature (special handling needed)
|
| 324 |
+
# Time
|
| 325 |
+
('h', 'min'): 60,
|
| 326 |
+
('min', 's'): 60,
|
| 327 |
+
('h', 's'): 3600,
|
| 328 |
+
('day', 'h'): 24,
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
if (from_unit, to_unit) in conversions:
|
| 333 |
+
result = value * conversions[(from_unit, to_unit)]
|
| 334 |
+
elif (to_unit, from_unit) in conversions:
|
| 335 |
+
result = value / conversions[(to_unit, from_unit)]
|
| 336 |
+
else:
|
| 337 |
+
return {
|
| 338 |
+
'error': f"Conversion from {from_unit} to {to_unit} not supported",
|
| 339 |
+
'value': value
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
return {
|
| 343 |
+
'original_value': value,
|
| 344 |
+
'original_unit': from_unit,
|
| 345 |
+
'converted_value': result,
|
| 346 |
+
'converted_unit': to_unit,
|
| 347 |
+
'conversion_factor': result / value if value != 0 else None
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
except Exception as e:
|
| 351 |
+
logger.error(f"Error in unit conversion: {e}")
|
| 352 |
+
return {
|
| 353 |
+
'error': str(e),
|
| 354 |
+
'value': value
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
def set_variable(self, name: str, value: Any) -> bool:
|
| 358 |
+
"""
|
| 359 |
+
Set a variable for use in calculations
|
| 360 |
+
"""
|
| 361 |
+
try:
|
| 362 |
+
self.variables[name] = value
|
| 363 |
+
logger.info(f"Set variable {name} = {value}")
|
| 364 |
+
return True
|
| 365 |
+
except Exception as e:
|
| 366 |
+
logger.error(f"Error setting variable: {e}")
|
| 367 |
+
return False
|
| 368 |
+
|
| 369 |
+
def get_variables(self) -> Dict[str, Any]:
|
| 370 |
+
"""
|
| 371 |
+
Get all stored variables
|
| 372 |
+
"""
|
| 373 |
+
return self.variables.copy()
|
| 374 |
+
|
| 375 |
+
def clear_variables(self) -> bool:
|
| 376 |
+
"""
|
| 377 |
+
Clear all stored variables
|
| 378 |
+
"""
|
| 379 |
+
try:
|
| 380 |
+
self.variables.clear()
|
| 381 |
+
logger.info("Cleared all variables")
|
| 382 |
+
return True
|
| 383 |
+
except Exception as e:
|
| 384 |
+
logger.error(f"Error clearing variables: {e}")
|
| 385 |
+
return False
|
| 386 |
+
|
| 387 |
+
def format_result_for_llm(self, result: Dict[str, Any]) -> str:
|
| 388 |
+
"""
|
| 389 |
+
Format calculation results for LLM consumption
|
| 390 |
+
"""
|
| 391 |
+
if 'error' in result:
|
| 392 |
+
return f"Error: {result['error']}"
|
| 393 |
+
|
| 394 |
+
if 'result' in result:
|
| 395 |
+
return f"Result: {result['result']}\nExpression: {result.get('expression', 'N/A')}"
|
| 396 |
+
|
| 397 |
+
# Handle other result types
|
| 398 |
+
formatted_parts = []
|
| 399 |
+
for key, value in result.items():
|
| 400 |
+
if key not in ['error'] and value is not None:
|
| 401 |
+
formatted_parts.append(f"{key.title()}: {value}")
|
| 402 |
+
|
| 403 |
+
return "\n".join(formatted_parts) if formatted_parts else "No result to display"
|
| 404 |
+
|
tools/file_processor.py
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import PyPDF2
|
| 3 |
+
import docx
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import json
|
| 6 |
+
import csv
|
| 7 |
+
from typing import List, Dict, Any, Optional
|
| 8 |
+
import logging
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from config.settings import Settings
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
class FileProcessor:
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.supported_extensions = {
|
| 18 |
+
'.txt': self._process_text,
|
| 19 |
+
'.pdf': self._process_pdf,
|
| 20 |
+
'.docx': self._process_docx,
|
| 21 |
+
'.doc': self._process_docx,
|
| 22 |
+
'.csv': self._process_csv,
|
| 23 |
+
'.xlsx': self._process_excel,
|
| 24 |
+
'.xls': self._process_excel,
|
| 25 |
+
'.json': self._process_json,
|
| 26 |
+
'.py': self._process_code,
|
| 27 |
+
'.js': self._process_code,
|
| 28 |
+
'.html': self._process_code,
|
| 29 |
+
'.css': self._process_code,
|
| 30 |
+
'.md': self._process_text,
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def process_file(self, file_path: str) -> Dict[str, Any]:
|
| 34 |
+
"""
|
| 35 |
+
Process a file and extract its content
|
| 36 |
+
"""
|
| 37 |
+
try:
|
| 38 |
+
file_path = Path(file_path)
|
| 39 |
+
|
| 40 |
+
if not file_path.exists():
|
| 41 |
+
return {'error': f'File not found: {file_path}'}
|
| 42 |
+
|
| 43 |
+
# Check file size
|
| 44 |
+
file_size = file_path.stat().st_size / (1024 * 1024) # MB
|
| 45 |
+
if file_size > Settings.MAX_FILE_SIZE_MB:
|
| 46 |
+
return {'error': f'File too large: {file_size:.1f}MB (max: {Settings.MAX_FILE_SIZE_MB}MB)'}
|
| 47 |
+
|
| 48 |
+
extension = file_path.suffix.lower()
|
| 49 |
+
|
| 50 |
+
if extension not in self.supported_extensions:
|
| 51 |
+
return {'error': f'Unsupported file type: {extension}'}
|
| 52 |
+
|
| 53 |
+
# Process the file
|
| 54 |
+
processor = self.supported_extensions[extension]
|
| 55 |
+
content = processor(file_path)
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
'filename': file_path.name,
|
| 59 |
+
'extension': extension,
|
| 60 |
+
'size_mb': file_size,
|
| 61 |
+
'content': content,
|
| 62 |
+
'metadata': self._extract_metadata(file_path)
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
logger.error(f"Error processing file {file_path}: {e}")
|
| 67 |
+
return {'error': str(e)}
|
| 68 |
+
|
| 69 |
+
def _process_text(self, file_path: Path) -> str:
|
| 70 |
+
"""
|
| 71 |
+
Process plain text files
|
| 72 |
+
"""
|
| 73 |
+
try:
|
| 74 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 75 |
+
return f.read()
|
| 76 |
+
except UnicodeDecodeError:
|
| 77 |
+
# Try with different encoding
|
| 78 |
+
with open(file_path, 'r', encoding='latin-1') as f:
|
| 79 |
+
return f.read()
|
| 80 |
+
|
| 81 |
+
def _process_pdf(self, file_path: Path) -> str:
|
| 82 |
+
"""
|
| 83 |
+
Process PDF files
|
| 84 |
+
"""
|
| 85 |
+
try:
|
| 86 |
+
text_content = []
|
| 87 |
+
with open(file_path, 'rb') as f:
|
| 88 |
+
pdf_reader = PyPDF2.PdfReader(f)
|
| 89 |
+
|
| 90 |
+
for page_num, page in enumerate(pdf_reader.pages):
|
| 91 |
+
try:
|
| 92 |
+
text = page.extract_text()
|
| 93 |
+
if text.strip():
|
| 94 |
+
text_content.append(f"--- Page {page_num + 1} ---\n{text}")
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.warning(f"Error extracting page {page_num + 1}: {e}")
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
return "\n\n".join(text_content)
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"Error processing PDF: {e}")
|
| 103 |
+
return f"Error processing PDF: {str(e)}"
|
| 104 |
+
|
| 105 |
+
def _process_docx(self, file_path: Path) -> str:
|
| 106 |
+
"""
|
| 107 |
+
Process Word documents
|
| 108 |
+
"""
|
| 109 |
+
try:
|
| 110 |
+
doc = docx.Document(file_path)
|
| 111 |
+
paragraphs = []
|
| 112 |
+
|
| 113 |
+
for paragraph in doc.paragraphs:
|
| 114 |
+
if paragraph.text.strip():
|
| 115 |
+
paragraphs.append(paragraph.text)
|
| 116 |
+
|
| 117 |
+
# Also extract tables
|
| 118 |
+
for table in doc.tables:
|
| 119 |
+
table_data = []
|
| 120 |
+
for row in table.rows:
|
| 121 |
+
row_data = [cell.text.strip() for cell in row.cells]
|
| 122 |
+
table_data.append(" | ".join(row_data))
|
| 123 |
+
|
| 124 |
+
if table_data:
|
| 125 |
+
paragraphs.append("\n--- Table ---\n" + "\n".join(table_data))
|
| 126 |
+
|
| 127 |
+
return "\n\n".join(paragraphs)
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.error(f"Error processing DOCX: {e}")
|
| 131 |
+
return f"Error processing DOCX: {str(e)}"
|
| 132 |
+
|
| 133 |
+
def _process_csv(self, file_path: Path) -> str:
|
| 134 |
+
"""
|
| 135 |
+
Process CSV files
|
| 136 |
+
"""
|
| 137 |
+
try:
|
| 138 |
+
df = pd.read_csv(file_path)
|
| 139 |
+
|
| 140 |
+
# Basic info about the CSV
|
| 141 |
+
info_parts = [
|
| 142 |
+
f"CSV File Analysis:",
|
| 143 |
+
f"Rows: {len(df)}",
|
| 144 |
+
f"Columns: {len(df.columns)}",
|
| 145 |
+
f"Column Names: {', '.join(df.columns.tolist())}",
|
| 146 |
+
"",
|
| 147 |
+
"First 5 rows:",
|
| 148 |
+
df.head().to_string(),
|
| 149 |
+
"",
|
| 150 |
+
"Data Types:",
|
| 151 |
+
df.dtypes.to_string(),
|
| 152 |
+
"",
|
| 153 |
+
"Basic Statistics:",
|
| 154 |
+
df.describe().to_string() if len(df.select_dtypes(include=['number']).columns) > 0 else "No numeric columns"
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
return "\n".join(info_parts)
|
| 158 |
+
|
| 159 |
+
except Exception as e:
|
| 160 |
+
logger.error(f"Error processing CSV: {e}")
|
| 161 |
+
return f"Error processing CSV: {str(e)}"
|
| 162 |
+
|
| 163 |
+
def _process_excel(self, file_path: Path) -> str:
|
| 164 |
+
"""
|
| 165 |
+
Process Excel files
|
| 166 |
+
"""
|
| 167 |
+
try:
|
| 168 |
+
# Read all sheets
|
| 169 |
+
excel_file = pd.ExcelFile(file_path)
|
| 170 |
+
content_parts = [f"Excel File: {file_path.name}"]
|
| 171 |
+
content_parts.append(f"Sheets: {', '.join(excel_file.sheet_names)}")
|
| 172 |
+
|
| 173 |
+
for sheet_name in excel_file.sheet_names:
|
| 174 |
+
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
| 175 |
+
|
| 176 |
+
content_parts.append(f"\n--- Sheet: {sheet_name} ---")
|
| 177 |
+
content_parts.append(f"Rows: {len(df)}, Columns: {len(df.columns)}")
|
| 178 |
+
content_parts.append(f"Columns: {', '.join(df.columns.tolist())}")
|
| 179 |
+
content_parts.append("\nFirst 3 rows:")
|
| 180 |
+
content_parts.append(df.head(3).to_string())
|
| 181 |
+
|
| 182 |
+
return "\n".join(content_parts)
|
| 183 |
+
|
| 184 |
+
except Exception as e:
|
| 185 |
+
logger.error(f"Error processing Excel: {e}")
|
| 186 |
+
return f"Error processing Excel: {str(e)}"
|
| 187 |
+
|
| 188 |
+
def _process_json(self, file_path: Path) -> str:
|
| 189 |
+
"""
|
| 190 |
+
Process JSON files
|
| 191 |
+
"""
|
| 192 |
+
try:
|
| 193 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 194 |
+
data = json.load(f)
|
| 195 |
+
|
| 196 |
+
# Format JSON for better readability
|
| 197 |
+
if isinstance(data, dict):
|
| 198 |
+
content_parts = [
|
| 199 |
+
f"JSON Object with {len(data)} keys:",
|
| 200 |
+
f"Keys: {', '.join(data.keys())}",
|
| 201 |
+
"",
|
| 202 |
+
"Content (formatted):",
|
| 203 |
+
json.dumps(data, indent=2, ensure_ascii=False)[:2000] + "..." if len(str(data)) > 2000 else json.dumps(data, indent=2, ensure_ascii=False)
|
| 204 |
+
]
|
| 205 |
+
elif isinstance(data, list):
|
| 206 |
+
content_parts = [
|
| 207 |
+
f"JSON Array with {len(data)} items",
|
| 208 |
+
f"First item type: {type(data[0]).__name__}" if data else "Empty array",
|
| 209 |
+
"",
|
| 210 |
+
"Content (first 3 items):",
|
| 211 |
+
json.dumps(data[:3], indent=2, ensure_ascii=False)
|
| 212 |
+
]
|
| 213 |
+
else:
|
| 214 |
+
content_parts = [
|
| 215 |
+
f"JSON {type(data).__name__}:",
|
| 216 |
+
str(data)
|
| 217 |
+
]
|
| 218 |
+
|
| 219 |
+
return "\n".join(content_parts)
|
| 220 |
+
|
| 221 |
+
except Exception as e:
|
| 222 |
+
logger.error(f"Error processing JSON: {e}")
|
| 223 |
+
return f"Error processing JSON: {str(e)}"
|
| 224 |
+
|
| 225 |
+
def _process_code(self, file_path: Path) -> str:
|
| 226 |
+
"""
|
| 227 |
+
Process code files
|
| 228 |
+
"""
|
| 229 |
+
try:
|
| 230 |
+
content = self._process_text(file_path)
|
| 231 |
+
|
| 232 |
+
# Add some analysis
|
| 233 |
+
lines = content.split('\n')
|
| 234 |
+
non_empty_lines = [line for line in lines if line.strip()]
|
| 235 |
+
|
| 236 |
+
analysis_parts = [
|
| 237 |
+
f"Code File Analysis:",
|
| 238 |
+
f"Language: {file_path.suffix[1:].upper()}",
|
| 239 |
+
f"Total lines: {len(lines)}",
|
| 240 |
+
f"Non-empty lines: {len(non_empty_lines)}",
|
| 241 |
+
f"Estimated complexity: {'High' if len(non_empty_lines) > 100 else 'Medium' if len(non_empty_lines) > 50 else 'Low'}",
|
| 242 |
+
"",
|
| 243 |
+
"Content:",
|
| 244 |
+
content
|
| 245 |
+
]
|
| 246 |
+
|
| 247 |
+
return "\n".join(analysis_parts)
|
| 248 |
+
|
| 249 |
+
except Exception as e:
|
| 250 |
+
logger.error(f"Error processing code file: {e}")
|
| 251 |
+
return f"Error processing code file: {str(e)}"
|
| 252 |
+
|
| 253 |
+
def _extract_metadata(self, file_path: Path) -> Dict[str, Any]:
|
| 254 |
+
"""
|
| 255 |
+
Extract file metadata
|
| 256 |
+
"""
|
| 257 |
+
try:
|
| 258 |
+
stat = file_path.stat()
|
| 259 |
+
return {
|
| 260 |
+
'size_bytes': stat.st_size,
|
| 261 |
+
'created': stat.st_ctime,
|
| 262 |
+
'modified': stat.st_mtime,
|
| 263 |
+
'extension': file_path.suffix,
|
| 264 |
+
'name': file_path.stem
|
| 265 |
+
}
|
| 266 |
+
except Exception as e:
|
| 267 |
+
logger.error(f"Error extracting metadata: {e}")
|
| 268 |
+
return {}
|
| 269 |
+
|
| 270 |
+
def process_multiple_files(self, file_paths: List[str]) -> List[Dict[str, Any]]:
|
| 271 |
+
"""
|
| 272 |
+
Process multiple files
|
| 273 |
+
"""
|
| 274 |
+
results = []
|
| 275 |
+
for file_path in file_paths:
|
| 276 |
+
result = self.process_file(file_path)
|
| 277 |
+
results.append(result)
|
| 278 |
+
return results
|
| 279 |
+
|
| 280 |
+
def extract_key_information(self, content: str, file_type: str) -> Dict[str, Any]:
|
| 281 |
+
"""
|
| 282 |
+
Extract key information from processed content
|
| 283 |
+
"""
|
| 284 |
+
try:
|
| 285 |
+
key_info = {
|
| 286 |
+
'word_count': len(content.split()),
|
| 287 |
+
'char_count': len(content),
|
| 288 |
+
'line_count': len(content.split('\n')),
|
| 289 |
+
'file_type': file_type
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
# Type-specific extraction
|
| 293 |
+
if file_type in ['.csv', '.xlsx', '.xls']:
|
| 294 |
+
# Extract numerical data mentions
|
| 295 |
+
import re
|
| 296 |
+
numbers = re.findall(r'\d+', content)
|
| 297 |
+
key_info['numeric_values_found'] = len(numbers)
|
| 298 |
+
|
| 299 |
+
elif file_type in ['.py', '.js', '.html', '.css']:
|
| 300 |
+
# Extract function/class names for code files
|
| 301 |
+
import re
|
| 302 |
+
if file_type == '.py':
|
| 303 |
+
functions = re.findall(r'def\s+(\w+)', content)
|
| 304 |
+
classes = re.findall(r'class\s+(\w+)', content)
|
| 305 |
+
key_info['functions'] = functions[:10] # First 10
|
| 306 |
+
key_info['classes'] = classes[:10]
|
| 307 |
+
|
| 308 |
+
return key_info
|
| 309 |
+
|
| 310 |
+
except Exception as e:
|
| 311 |
+
logger.error(f"Error extracting key information: {e}")
|
| 312 |
+
return {'error': str(e)}
|
| 313 |
+
|
| 314 |
+
def save_processed_content(self, content: str, output_path: str) -> bool:
|
| 315 |
+
"""
|
| 316 |
+
Save processed content to a file
|
| 317 |
+
"""
|
| 318 |
+
try:
|
| 319 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 320 |
+
f.write(content)
|
| 321 |
+
logger.info(f"Saved processed content to: {output_path}")
|
| 322 |
+
return True
|
| 323 |
+
except Exception as e:
|
| 324 |
+
logger.error(f"Error saving content: {e}")
|
| 325 |
+
return False
|
| 326 |
+
|
| 327 |
+
def get_supported_formats(self) -> List[str]:
|
| 328 |
+
"""
|
| 329 |
+
Get list of supported file formats
|
| 330 |
+
"""
|
| 331 |
+
return list(self.supported_extensions.keys())
|
| 332 |
+
|
| 333 |
+
def format_file_summary_for_llm(self, file_result: Dict[str, Any]) -> str:
|
| 334 |
+
"""
|
| 335 |
+
Format file processing results for LLM consumption
|
| 336 |
+
"""
|
| 337 |
+
if 'error' in file_result:
|
| 338 |
+
return f"Error processing file: {file_result['error']}"
|
| 339 |
+
|
| 340 |
+
summary_parts = [
|
| 341 |
+
f"File: {file_result['filename']}",
|
| 342 |
+
f"Type: {file_result['extension']}",
|
| 343 |
+
f"Size: {file_result['size_mb']:.2f} MB",
|
| 344 |
+
"",
|
| 345 |
+
"Content Summary:",
|
| 346 |
+
file_result['content'][:1000] + "..." if len(file_result['content']) > 1000 else file_result['content']
|
| 347 |
+
]
|
| 348 |
+
|
| 349 |
+
return "\n".join(summary_parts)
|
tools/web_search.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from duckduckgo_search import DDGS
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import logging
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
from config.settings import Settings
|
| 7 |
+
|
| 8 |
+
logging.basicConfig(level=logging.INFO)
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
class WebSearchTool:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.ddgs = DDGS()
|
| 14 |
+
self.session = requests.Session()
|
| 15 |
+
self.session.headers.update({
|
| 16 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 17 |
+
})
|
| 18 |
+
|
| 19 |
+
def search(self, query: str, max_results: int = Settings.MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
|
| 20 |
+
"""
|
| 21 |
+
Search the web using DuckDuckGo
|
| 22 |
+
"""
|
| 23 |
+
try:
|
| 24 |
+
results = []
|
| 25 |
+
search_results = self.ddgs.text(query, max_results=max_results)
|
| 26 |
+
|
| 27 |
+
for result in search_results:
|
| 28 |
+
results.append({
|
| 29 |
+
'title': result.get('title', ''),
|
| 30 |
+
'url': result.get('href', ''),
|
| 31 |
+
'snippet': result.get('body', ''),
|
| 32 |
+
'source': 'DuckDuckGo'
|
| 33 |
+
})
|
| 34 |
+
|
| 35 |
+
logger.info(f"Found {len(results)} search results for: {query}")
|
| 36 |
+
return results
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logger.error(f"Error searching web: {e}")
|
| 40 |
+
return []
|
| 41 |
+
|
| 42 |
+
def get_page_content(self, url: str, max_chars: int = 5000) -> str:
|
| 43 |
+
"""
|
| 44 |
+
Extract text content from a web page
|
| 45 |
+
"""
|
| 46 |
+
try:
|
| 47 |
+
response = self.session.get(url, timeout=10)
|
| 48 |
+
response.raise_for_status()
|
| 49 |
+
|
| 50 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 51 |
+
|
| 52 |
+
# Remove script and style elements
|
| 53 |
+
for script in soup(["script", "style"]):
|
| 54 |
+
script.decompose()
|
| 55 |
+
|
| 56 |
+
# Get text content
|
| 57 |
+
text = soup.get_text()
|
| 58 |
+
|
| 59 |
+
# Clean up whitespace
|
| 60 |
+
lines = (line.strip() for line in text.splitlines())
|
| 61 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 62 |
+
text = ' '.join(chunk for chunk in chunks if chunk)
|
| 63 |
+
|
| 64 |
+
# Limit length
|
| 65 |
+
if len(text) > max_chars:
|
| 66 |
+
text = text[:max_chars] + "..."
|
| 67 |
+
|
| 68 |
+
return text
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Error extracting content from {url}: {e}")
|
| 72 |
+
return f"Error: Could not extract content from {url}"
|
| 73 |
+
|
| 74 |
+
def search_and_summarize(self, query: str, include_content: bool = False) -> str:
|
| 75 |
+
"""
|
| 76 |
+
Search and format results for LLM consumption
|
| 77 |
+
"""
|
| 78 |
+
results = self.search(query)
|
| 79 |
+
|
| 80 |
+
if not results:
|
| 81 |
+
return "No search results found."
|
| 82 |
+
|
| 83 |
+
summary_parts = [f"Search results for: {query}\n"]
|
| 84 |
+
|
| 85 |
+
for i, result in enumerate(results, 1):
|
| 86 |
+
summary_parts.append(f"{i}. **{result['title']}**")
|
| 87 |
+
summary_parts.append(f" URL: {result['url']}")
|
| 88 |
+
summary_parts.append(f" Summary: {result['snippet']}")
|
| 89 |
+
|
| 90 |
+
if include_content and i <= 2: # Only get content for top 2 results
|
| 91 |
+
content = self.get_page_content(result['url'])
|
| 92 |
+
if content and not content.startswith("Error:"):
|
| 93 |
+
summary_parts.append(f" Content Preview: {content[:500]}...")
|
| 94 |
+
|
| 95 |
+
summary_parts.append("")
|
| 96 |
+
|
| 97 |
+
return "\n".join(summary_parts)
|
| 98 |
+
|
| 99 |
+
def search_news(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
| 100 |
+
"""
|
| 101 |
+
Search for news articles
|
| 102 |
+
"""
|
| 103 |
+
try:
|
| 104 |
+
results = []
|
| 105 |
+
news_results = self.ddgs.news(query, max_results=max_results)
|
| 106 |
+
|
| 107 |
+
for result in news_results:
|
| 108 |
+
results.append({
|
| 109 |
+
'title': result.get('title', ''),
|
| 110 |
+
'url': result.get('url', ''),
|
| 111 |
+
'snippet': result.get('body', ''),
|
| 112 |
+
'source': result.get('source', ''),
|
| 113 |
+
'date': result.get('date', ''),
|
| 114 |
+
'type': 'news'
|
| 115 |
+
})
|
| 116 |
+
|
| 117 |
+
logger.info(f"Found {len(results)} news results for: {query}")
|
| 118 |
+
return results
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
logger.error(f"Error searching news: {e}")
|
| 122 |
+
return []
|
| 123 |
+
|
| 124 |
+
def search_images(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
| 125 |
+
"""
|
| 126 |
+
Search for images
|
| 127 |
+
"""
|
| 128 |
+
try:
|
| 129 |
+
results = []
|
| 130 |
+
image_results = self.ddgs.images(query, max_results=max_results)
|
| 131 |
+
|
| 132 |
+
for result in image_results:
|
| 133 |
+
results.append({
|
| 134 |
+
'title': result.get('title', ''),
|
| 135 |
+
'url': result.get('image', ''),
|
| 136 |
+
'thumbnail': result.get('thumbnail', ''),
|
| 137 |
+
'source': result.get('source', ''),
|
| 138 |
+
'type': 'image'
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
logger.info(f"Found {len(results)} image results for: {query}")
|
| 142 |
+
return results
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.error(f"Error searching images: {e}")
|
| 146 |
+
return []
|
| 147 |
+
|
| 148 |
+
def quick_fact_search(self, query: str) -> str:
|
| 149 |
+
"""
|
| 150 |
+
Quick search for factual information
|
| 151 |
+
"""
|
| 152 |
+
try:
|
| 153 |
+
# Try to get instant answer first
|
| 154 |
+
instant_answer = self.ddgs.answers(query)
|
| 155 |
+
if instant_answer:
|
| 156 |
+
return f"Quick Fact: {instant_answer[0].get('text', '')}"
|
| 157 |
+
|
| 158 |
+
# Fall back to regular search
|
| 159 |
+
results = self.search(query, max_results=2)
|
| 160 |
+
if results:
|
| 161 |
+
return f"From search: {results[0]['snippet']}"
|
| 162 |
+
|
| 163 |
+
return "No quick facts found."
|
| 164 |
+
|
| 165 |
+
except Exception as e:
|
| 166 |
+
logger.error(f"Error in quick fact search: {e}")
|
| 167 |
+
return "Error retrieving quick facts."
|
| 168 |
+
|
| 169 |
+
def research_topic(self, topic: str) -> Dict[str, Any]:
|
| 170 |
+
"""
|
| 171 |
+
Comprehensive research on a topic
|
| 172 |
+
"""
|
| 173 |
+
research_data = {
|
| 174 |
+
'topic': topic,
|
| 175 |
+
'general_info': [],
|
| 176 |
+
'news': [],
|
| 177 |
+
'related_queries': []
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
# General search
|
| 182 |
+
general_results = self.search(topic, max_results=5)
|
| 183 |
+
research_data['general_info'] = general_results
|
| 184 |
+
|
| 185 |
+
# News search
|
| 186 |
+
news_results = self.search_news(topic, max_results=3)
|
| 187 |
+
research_data['news'] = news_results
|
| 188 |
+
|
| 189 |
+
# Generate related queries
|
| 190 |
+
related_queries = [
|
| 191 |
+
f"{topic} definition",
|
| 192 |
+
f"{topic} examples",
|
| 193 |
+
f"{topic} applications",
|
| 194 |
+
f"latest {topic} developments"
|
| 195 |
+
]
|
| 196 |
+
research_data['related_queries'] = related_queries
|
| 197 |
+
|
| 198 |
+
return research_data
|
| 199 |
+
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.error(f"Error researching topic {topic}: {e}")
|
| 202 |
+
return research_data
|
| 203 |
+
|
| 204 |
+
def format_research_for_llm(self, research_data: Dict[str, Any]) -> str:
|
| 205 |
+
"""
|
| 206 |
+
Format research data for LLM consumption
|
| 207 |
+
"""
|
| 208 |
+
formatted_parts = [f"Research Results for: {research_data['topic']}\n"]
|
| 209 |
+
|
| 210 |
+
if research_data['general_info']:
|
| 211 |
+
formatted_parts.append("## General Information:")
|
| 212 |
+
for i, result in enumerate(research_data['general_info'], 1):
|
| 213 |
+
formatted_parts.append(f"{i}. {result['title']}")
|
| 214 |
+
formatted_parts.append(f" {result['snippet']}\n")
|
| 215 |
+
|
| 216 |
+
if research_data['news']:
|
| 217 |
+
formatted_parts.append("## Recent News:")
|
| 218 |
+
for i, result in enumerate(research_data['news'], 1):
|
| 219 |
+
formatted_parts.append(f"{i}. {result['title']}")
|
| 220 |
+
formatted_parts.append(f" {result['snippet']}")
|
| 221 |
+
if result.get('date'):
|
| 222 |
+
formatted_parts.append(f" Date: {result['date']}\n")
|
| 223 |
+
|
| 224 |
+
return "\n".join(formatted_parts)
|