Spaces:

faisalsns
/

ai-reasoning-copilot

Runtime error

App Files Files Community

faisalsns commited on Jul 29, 2025

Commit

b1f00a0

1 Parent(s): d6a4e24

Initial commit for the ai-reasoning-copilot

Browse files

Files changed (10) hide show

README.md +38 -0
app.py +558 -0
config/settings.py +46 -0
memory/conversation.py +449 -0
models/llm_handler.py +302 -0
models/vector_store.py +229 -0
requirements.txt +25 -0
tools/calculator.py +404 -0
tools/file_processor.py +349 -0
tools/web_search.py +224 -0

README.md CHANGED Viewed

@@ -11,3 +11,41 @@ short_description: AI Reasoning Copilot
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# 🧠 Open Source Reasoning Copilot
+A powerful AI reasoning assistant that runs completely locally with zero cost!
+## Features
+- 🤖 **Online LLM Integration** - Works with OpenRouter
+- 🔍 **Web Search** - Real-time information retrieval
+- 🧮 **Advanced Calculator** - Symbolic math, calculus, statistics
+- 📁 **Document Processing** - PDF, Word, Excel, CSV, JSON, code files
+- 🧠 **Memory System** - Conversation history and context awareness
+- 🎯 **Reasoning Tools** - Chain-of-thought, problem decomposition
+- 📊 **Data Visualization** - Plots and charts
+- 🔒 **Privacy First** - Everything runs locally
+## Usage Examples
+### Reasoning & Problem Solving
+- "Help me analyze the pros and cons of remote work"
+- "Walk me through solving this logic puzzle step by step"
+- "What are the implications of AI in healthcare?"
+### Research & Information
+- "What are the latest developments in quantum computing?"
+- "Research the history of the Roman Empire"
+- "Find current information about climate change policies"
+### Mathematics & Calculations
+- "Solve the equation x^2 + 5x - 6 = 0"
+- "Calculate the derivative of x^3 + 2x^2 - 5x + 1"
+- "Plot the function y = sin(x) + cos(2x)"
+### Document Analysis
+- Upload PDFs, Word docs, spreadsheets
+- "Summarize this research paper"
+- "Extract key insights from this data"

app.py ADDED Viewed

	@@ -0,0 +1,558 @@

+import gradio as gr
+import logging
+import os
+from typing import List, Tuple, Any, Optional
+import json
+import threading
+import time
+from datetime import datetime
+# Import our custom modules
+#from models.llm_handler import LLMHandler
+from models.llm_handler import HuggingFaceLLMHandler, OpenRouterLLMHandler
+from models.vector_store import VectorStore
+from tools.web_search import WebSearchTool
+from tools.calculator import CalculatorTool
+from tools.file_processor import FileProcessor
+from memory.conversation import ConversationMemory
+from config.settings import Settings
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(os.path.join(Settings.LOGS_DIR, 'copilot.log')),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+class ReasoningCopilot:
+    def __init__(self):
+        logger.info("Initializing Reasoning Copilot...")
+        # Initialize components
+        #self.llm = LLMHandler()
+        #self.llm = HuggingFaceLLMHandler()
+        self.llm = OpenRouterLLMHandler()
+        self.vector_store = VectorStore()
+        self.web_search = WebSearchTool()
+        self.calculator = CalculatorTool()
+        self.file_processor = FileProcessor()
+        self.memory = ConversationMemory()
+        # State variables
+        self.current_model = Settings.DEFAULT_MODEL
+        self.reasoning_mode = "balanced"  # balanced, creative, analytical
+        self.use_web_search = True
+        self.use_vector_search = True
+        logger.info("Reasoning Copilot initialized successfully!")
+    def process_query(self, user_input: str, chat_history: List[Tuple[str, str]],
+                    use_tools: bool = True) -> Tuple[List[Tuple[str, str]], str]:
+        """
+        Main query processing function - FIXED VERSION
+        """
+        try:
+            if not user_input.strip():
+                return chat_history, ""
+            logger.info(f"Processing query: {user_input[:100]}...")
+            # REMOVED: This was likely causing the 10-minute delay
+            # task_analysis = self.llm.analyze_reasoning_task(user_input)
+            # Initialize context and tools output
+            context = ""
+            tools_output = ""
+            if use_tools:
+                # Use vector search for relevant context
+                if self.use_vector_search:
+                    try:
+                        context = self.vector_store.get_relevant_context(user_input)
+                    except Exception as e:
+                        logger.warning(f"Vector search failed: {e}")
+                        context = ""
+                # Determine if web search is needed - SIMPLIFIED
+                if self.use_web_search and self._should_use_web_search_simple(user_input):
+                    try:
+                        search_results = self.web_search.search_and_summarize(user_input)
+                        tools_output += f"\n--- Web Search Results ---\n{search_results}\n"
+                    except Exception as e:
+                        logger.warning(f"Web search failed: {e}")
+                # Determine if calculator is needed - SIMPLIFIED
+                if self._should_use_calculator_simple(user_input):
+                    try:
+                        calc_result = self._handle_calculation(user_input)
+                        if calc_result:
+                            tools_output += f"\n--- Calculation Results ---\n{calc_result}\n"
+                    except Exception as e:
+                        logger.warning(f"Calculator failed: {e}")
+            # Generate response using LLM - THIS IS THE MAIN CALL
+            print(f"DEBUG: About to call generate_response...")
+            start_time = time.time()
+            response = self.llm.generate_response(user_input, context, tools_output)
+            end_time = time.time()
+            print(f"DEBUG: generate_response took {end_time - start_time:.2f} seconds")
+            # Add to memory
+            try:
+                self.memory.add_exchange(user_input, response, {
+                    'used_tools': use_tools,
+                    'reasoning_mode': self.reasoning_mode
+                })
+            except Exception as e:
+                logger.warning(f"Memory storage failed: {e}")
+            # Update chat history
+            chat_history.append((user_input, response))
+            # Add to LLM history
+            try:
+                self.llm.add_to_history(user_input, response)
+            except Exception as e:
+                logger.warning(f"LLM history update failed: {e}")
+            return chat_history, ""
+        except Exception as e:
+            logger.error(f"Error processing query: {e}")
+            error_response = f"I apologize, but I encountered an error: {str(e)}"
+            chat_history.append((user_input, error_response))
+            return chat_history, ""
+    def _should_use_web_search_simple(self, query: str) -> bool:
+        """
+        SIMPLIFIED version - no task_analysis parameter
+        """
+        web_search_indicators = [
+            'current', 'latest', 'recent', 'news', 'today', 'now',
+            'what happened', 'update', 'price', 'weather', 'stock'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in web_search_indicators)
+    def _should_use_calculator_simple(self, query: str) -> bool:
+        """
+        SIMPLIFIED version - no task_analysis parameter
+        """
+        calc_indicators = [
+            'calculate', 'compute', 'solve', '+', '-', '*', '/', '=',
+            'math', 'equation', 'derivative', 'integral', 'plot'
+        ]
+        query_lower = query.lower()
+        return any(indicator in query_lower for indicator in calc_indicators)
+    def _handle_calculation(self, query: str) -> str:
+        """
+        Handle mathematical calculations
+        """
+        try:
+            # Simple expression detection
+            import re
+            # Look for equations
+            if '=' in query and any(op in query for op in ['+', '-', '*', '/']):
+                if 'solve' in query.lower():
+                    # Equation solving
+                    equation = re.search(r'([^=]+=[^=]+)', query)
+                    if equation:
+                        result = self.calculator.solve_equation(equation.group(1))
+                        return self.calculator.format_result_for_llm(result)
+            # Look for expressions to evaluate
+            expr_pattern = r'([0-9+\-*/().\s]+(?:[+\-*/][0-9+\-*/().\s]+)*)'
+            expressions = re.findall(expr_pattern, query)
+            for exp in expressions:
+                if len(exp.strip()) > 3:  # Avoid single numbers
+                    result = self.calculator.evaluate_expression(exp.strip())
+                    return self.calculator.format_result_for_llm(result)
+            return ""
+        except Exception as e:
+            logger.error(f"Error in calculation handling: {e}")
+            return ""
+    def upload_files(self, files: List[Any]) -> str:
+        """
+        Handle file uploads
+        """
+        try:
+            if not files:
+                return "No files uploaded."
+            results = []
+            documents_to_add = []
+            for file in files:
+                if hasattr(file, 'name'):
+                    file_path = file.name
+                else:
+                    file_path = str(file)
+                # Process the file
+                file_result = self.file_processor.process_file(file_path)
+                if 'error' not in file_result:
+                    # Add to vector store
+                    content = file_result['content']
+                    metadata = {
+                        'filename': file_result['filename'],
+                        'type': 'uploaded_file',
+                        'source': file_result['filename'],
+                        'upload_time': datetime.now().isoformat()
+                    }
+                    documents_to_add.append((content, metadata))
+                    results.append(f"✓ Processed: {file_result['filename']}")
+                else:
+                    results.append(f"✗ Error processing {file_path}: {file_result['error']}")
+            # Add all documents to vector store
+            if documents_to_add:
+                contents = [doc[0] for doc in documents_to_add]
+                metadata_list = [doc[1] for doc in documents_to_add]
+                success = self.vector_store.add_documents(contents, metadata_list)
+                if success:
+                    results.append(f"\n✓ Added {len(documents_to_add)} documents to knowledge base.")
+                else:
+                    results.append("\n✗ Failed to add documents to knowledge base.")
+            return "\n".join(results)
+        except Exception as e:
+            logger.error(f"Error uploading files: {e}")
+            return f"Error uploading files: {str(e)}"
+    def change_model(self, model_name: str) -> str:
+        """
+        Change the current LLM model
+        """
+        try:
+            if self.llm.switch_model(model_name):
+                self.current_model = model_name
+                return f"✓ Switched to model: {model_name}"
+            else:
+                return f"✗ Failed to switch to model: {model_name}"
+        except Exception as e:
+            return f"✗ Error changing model: {str(e)}"
+    def get_system_status(self) -> str:
+        """
+        Get system status information
+        """
+        try:
+            # Get model info
+            available_models = self.llm.get_available_models()
+            # Get vector store stats
+            vector_stats = self.vector_store.get_collection_stats()
+            # Get memory stats
+            memory_stats = self.memory.get_session_statistics()
+            # Get memory usage
+            memory_usage = self.memory.get_memory_usage()
+            status_info = f"""
+🤖 **Reasoning Copilot Status**
+**Current Model:** {self.current_model}
+**Available Models:** {len(available_models)} ({', '.join(available_models[:3])}{'...' if len(available_models) > 3 else ''})
+**Knowledge Base:**
+- Documents: {vector_stats.get('total_documents', 0)}
+- Collection: {vector_stats.get('collection_name', 'N/A')}
+**Session Memory:**
+- Exchanges: {memory_stats.get('total_exchanges', 0)}
+- Topics: {len(memory_stats.get('topics_discussed', []))}
+- Memory Size: {memory_usage.get('memory_file_size_kb', 0):.1f} KB
+**Tools Status:**
+- Web Search: {'✓ Enabled' if self.use_web_search else '✗ Disabled'}
+- Vector Search: {'✓ Enabled' if self.use_vector_search else '✗ Disabled'}
+- Calculator: ✓ Available
+- File Processor: ✓ Available
+**Reasoning Mode:** {self.reasoning_mode.title()}
+            """.strip()
+            return status_info
+        except Exception as e:
+            logger.error(f"Error getting system status: {e}")
+            return f"Error getting system status: {str(e)}"
+    def clear_conversation(self) -> Tuple[List, str]:
+        """
+        Clear conversation history
+        """
+        try:
+            self.llm.clear_history()
+            self.memory.clear_memory()
+            return [], "✓ Conversation cleared successfully."
+        except Exception as e:
+            return [], f"✗ Error clearing conversation: {str(e)}"
+    def export_conversation(self, format_type: str = "markdown") -> str:
+        """
+        Export conversation history
+        """
+        try:
+            return self.memory.export_conversation(format_type)
+        except Exception as e:
+            return f"Error exporting conversation: {str(e)}"
+def create_gradio_interface():
+    """
+    Create the Gradio interface
+    """
+    # Initialize the copilot
+    copilot = ReasoningCopilot()
+    # Define the main interface
+    with gr.Blocks(
+        theme=gr.themes.Soft(),
+        title="🧠 Open Source Reasoning Copilot",
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        .chat-container {
+            height: 600px !important;
+        }
+        """
+    ) as interface:
+        gr.Markdown("""
+        # 🧠 Open Source Reasoning Copilot
+        A powerful AI assistant that combines local LLMs with advanced reasoning capabilities, web search, calculations, and document processing - all running locally with zero cost!
+        """)
+        with gr.Tab("💬 Chat"):
+            with gr.Row():
+                with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(
+                        height=500,
+                        label="Conversation",
+                        elem_classes=["chat-container"]
+                    )
+                    with gr.Row():
+                        msg = gr.Textbox(
+                            placeholder="Ask me anything! I can help with reasoning, research, calculations, and more...",
+                            label="Your Message",
+                            scale=4
+                        )
+                        send_btn = gr.Button("Send", variant="primary", scale=1)
+                    with gr.Row():
+                        clear_btn = gr.Button("Clear Chat", variant="secondary")
+                        use_tools = gr.Checkbox(label="Use Tools", value=True)
+                with gr.Column(scale=1):
+                    gr.Markdown("### 🛠️ Quick Actions")
+                    status_btn = gr.Button("📊 System Status", variant="secondary")
+                    status_output = gr.Textbox(
+                        label="Status",
+                        max_lines=15,
+                        interactive=False
+                    )
+                    gr.Markdown("### ⚙️ Settings")
+                    model_dropdown = gr.Dropdown(
+                        choices=["mistralai/mistral-7b-instruct", "meta-llama/llama-3-70b-instruct", "google/gemini-2.0-flash-exp:free", "huggingfaceh4/zephyr-7b-beta"],
+                        value="mistralai/mistral-7b-instruct",
+                        label="Model"
+                    )
+                    reasoning_mode = gr.Radio(
+                        choices=["balanced", "creative", "analytical"],
+                        value="balanced",
+                        label="Reasoning Mode"
+                    )
+                    web_search_toggle = gr.Checkbox(
+                        label="Enable Web Search",
+                        value=True
+                    )
+                    vector_search_toggle = gr.Checkbox(
+                        label="Enable Vector Search",
+                        value=True
+                    )
+        with gr.Tab("📁 Knowledge Base"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Upload Documents")
+                    file_upload = gr.Files(
+                        label="Upload Files",
+                        file_types=[".txt", ".pdf", ".docx", ".csv", ".xlsx", ".json", ".py", ".js", ".html", ".md"]
+                    )
+                    upload_btn = gr.Button("Process Files", variant="primary")
+                    upload_status = gr.Textbox(
+                        label="Upload Status",
+                        max_lines=10,
+                        interactive=False
+                    )
+                with gr.Column():
+                    gr.Markdown("### Knowledge Base Info")
+                    kb_info = gr.Textbox(
+                        label="Knowledge Base Statistics",
+                        max_lines=10,
+                        interactive=False
+                    )
+                    refresh_kb_btn = gr.Button("Refresh Info")
+        with gr.Tab("🧮 Calculator"):
+            with gr.Row():
+                with gr.Column():
+                    calc_input = gr.Textbox(
+                        label="Mathematical Expression",
+                        placeholder="e.g., 2*3 + 5, solve x^2 - 4 = 0, derivative of x^2 + 3x"
+                    )
+                    calc_btn = gr.Button("Calculate", variant="primary")
+                    calc_output = gr.Textbox(
+                        label="Result",
+                        max_lines=10,
+                        interactive=False
+                    )
+                with gr.Column():
+                    gr.Markdown("""
+                    ### Supported Operations
+                    - Basic arithmetic: +, -, *, /, ^
+                    - Functions: sin, cos, tan, log, sqrt
+                    - Equation solving: solve x^2 + 2x - 3 = 0
+                    - Calculus: derivative, integral
+                    - Matrix operations
+                    - Statistics
+                    """)
+        with gr.Tab("📊 Memory & Export"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Conversation Memory")
+                    memory_info = gr.Textbox(
+                        label="Session Information",
+                        max_lines=10,
+                        interactive=False
+                    )
+                    refresh_memory_btn = gr.Button("Refresh Memory Info")
+                with gr.Column():
+                    gr.Markdown("### Export Options")
+                    export_format = gr.Radio(
+                        choices=["markdown", "json", "text"],
+                        value="markdown",
+                        label="Export Format"
+                    )
+                    export_btn = gr.Button("Export Conversation", variant="primary")
+                    export_output = gr.Textbox(
+                        label="Exported Conversation",
+                        max_lines=15,
+                        interactive=False
+                    )
+        # Event handlers
+        def respond(message, history, use_tools_flag):
+            return copilot.process_query(message, history, use_tools_flag)
+        def clear_chat():
+            return copilot.clear_conversation()
+        def get_status():
+            return copilot.get_system_status()
+        def upload_files_handler(files):
+            return copilot.upload_files(files)
+        def change_model_handler(model):
+            return copilot.change_model(model)
+        def export_handler(format_type):
+            return copilot.export_conversation(format_type)
+        def get_kb_info():
+            stats = copilot.vector_store.get_collection_stats()
+            return f"Documents: {stats.get('total_documents', 0)}\nCollection: {stats.get('collection_name', 'N/A')}"
+        def get_memory_info():
+            return copilot.memory.get_conversation_summary()
+        def update_settings(mode, web_search, vector_search):
+            copilot.reasoning_mode = mode
+            copilot.use_web_search = web_search
+            copilot.use_vector_search = vector_search
+            return "Settings updated!"
+        # Wire up the events
+        msg.submit(respond, [msg, chatbot, use_tools], [chatbot, msg])
+        send_btn.click(respond, [msg, chatbot, use_tools], [chatbot, msg])
+        clear_btn.click(clear_chat, outputs=[chatbot, msg])
+        status_btn.click(get_status, outputs=status_output)
+        model_dropdown.change(change_model_handler, inputs=model_dropdown, outputs=status_output)
+        upload_btn.click(upload_files_handler, inputs=file_upload, outputs=upload_status)
+        refresh_kb_btn.click(get_kb_info, outputs=kb_info)
+        calc_btn.click(
+            lambda expr: copilot.calculator.format_result_for_llm(
+                copilot.calculator.evaluate_expression(expr)
+            ),
+            inputs=calc_input,
+            outputs=calc_output
+        )
+        export_btn.click(export_handler, inputs=export_format, outputs=export_output)
+        refresh_memory_btn.click(get_memory_info, outputs=memory_info)
+        # Settings updates
+        reasoning_mode.change(
+            update_settings,
+            inputs=[reasoning_mode, web_search_toggle, vector_search_toggle],
+            outputs=status_output
+        )
+    return interface
+if __name__ == "__main__":
+    logger.info("Starting Reasoning Copilot...")
+    # Ensure Ollama is running
+    logger.info("Make sure Ollama is running with: ollama serve")
+    logger.info("And that you have downloaded a model with: ollama pull phi3:mini")
+    # Create and launch the interface
+    interface = create_gradio_interface()
+    interface.launch(
+        server_port=Settings.GRADIO_PORT,
+        share=True,
+        #share=Settings.GRADIO_SHARE,
+        server_name="0.0.0.0",  # Allow external access
+        show_error=True,
+        # show_tips=True,
+        # enable_queue=True
+    )

config/settings.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+class Settings:
+    # Model Configuration
+    DEFAULT_MODEL = "phi3:mini"
+    EMBEDDING_MODEL = "nomic-embed-text"
+    OLLAMA_BASE_URL = "http://localhost:11434"
+    # Vector Database
+    CHROMA_PERSIST_DIR = "./chroma_db"
+    COLLECTION_NAME = "knowledge_base"
+    # UI Configuration
+    GRADIO_PORT = 7860
+    GRADIO_SHARE = False
+    # Tool Configuration
+    MAX_SEARCH_RESULTS = 5
+    CODE_EXECUTION_TIMEOUT = 30
+    MAX_FILE_SIZE_MB = 50
+    # Memory Configuration
+    MAX_CONVERSATION_HISTORY = 20
+    CONTEXT_WINDOW_SIZE = 4096
+    # Reasoning Configuration
+    MAX_REASONING_STEPS = 10
+    TEMPERATURE = 0.7
+    MAX_TOKENS = 2048
+    # File Paths
+    UPLOAD_DIR = "./uploads"
+    LOGS_DIR = "./logs"
+    # Create directories if they don't exist
+    @classmethod
+    def ensure_directories(cls):
+        os.makedirs(cls.CHROMA_PERSIST_DIR, exist_ok=True)
+        os.makedirs(cls.UPLOAD_DIR, exist_ok=True)
+        os.makedirs(cls.LOGS_DIR, exist_ok=True)
+# Initialize directories on import
+Settings.ensure_directories()

memory/conversation.py ADDED Viewed

	@@ -0,0 +1,449 @@

+import json
+import uuid
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+import logging
+from pathlib import Path
+from config.settings import Settings
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ConversationMemory:
+    def __init__(self):
+        self.session_id = str(uuid.uuid4())
+        self.memory_file = Path(Settings.LOGS_DIR) / f"conversation_{self.session_id}.json"
+        self.short_term_memory = []
+        self.current_context = {}
+        self.reasoning_history = []
+        # Initialize memory structure
+        self.memory_structure = {
+            'session_id': self.session_id,
+            'created_at': datetime.now().isoformat(),
+            'conversations': [],
+            'context': {},
+            'reasoning_chains': [],
+            'user_preferences': {},
+            'topics_discussed': []
+        }
+        self._save_memory()
+    def add_exchange(self, user_input: str, assistant_response: str,
+                    metadata: Optional[Dict[str, Any]] = None) -> bool:
+        """
+        Add a conversation exchange to memory
+        """
+        try:
+            exchange = {
+                'id': str(uuid.uuid4()),
+                'timestamp': datetime.now().isoformat(),
+                'user_input': user_input,
+                'assistant_response': assistant_response,
+                'metadata': metadata or {}
+            }
+            # Add to short-term memory
+            self.short_term_memory.append(exchange)
+            # Add to persistent memory
+            self.memory_structure['conversations'].append(exchange)
+            # Keep short-term memory limited
+            if len(self.short_term_memory) > Settings.MAX_CONVERSATION_HISTORY:
+                self.short_term_memory = self.short_term_memory[-Settings.MAX_CONVERSATION_HISTORY:]
+            # Extract and store topics
+            self._extract_topics(user_input)
+            # Save to file
+            self._save_memory()
+            logger.info(f"Added exchange to memory: {exchange['id']}")
+            return True
+        except Exception as e:
+            logger.error(f"Error adding exchange to memory: {e}")
+            return False
+    def add_reasoning_step(self, step: str, step_type: str, result: Any = None) -> bool:
+        """
+        Add a reasoning step to the reasoning history
+        """
+        try:
+            reasoning_step = {
+                'id': str(uuid.uuid4()),
+                'timestamp': datetime.now().isoformat(),
+                'step': step,
+                'type': step_type,
+                'result': str(result) if result is not None else None
+            }
+            self.reasoning_history.append(reasoning_step)
+            self.memory_structure['reasoning_chains'].append(reasoning_step)
+            # Keep reasoning history limited
+            if len(self.reasoning_history) > 50:
+                self.reasoning_history = self.reasoning_history[-50:]
+            self._save_memory()
+            return True
+        except Exception as e:
+            logger.error(f"Error adding reasoning step: {e}")
+            return False
+    def update_context(self, key: str, value: Any) -> bool:
+        """
+        Update the current context
+        """
+        try:
+            self.current_context[key] = value
+            self.memory_structure['context'][key] = value
+            self._save_memory()
+            return True
+        except Exception as e:
+            logger.error(f"Error updating context: {e}")
+            return False
+    def get_context(self, key: Optional[str] = None) -> Any:
+        """
+        Get context information
+        """
+        if key:
+            return self.current_context.get(key)
+        return self.current_context.copy()
+    def get_recent_exchanges(self, count: int = 5) -> List[Dict[str, Any]]:
+        """
+        Get recent conversation exchanges
+        """
+        return self.short_term_memory[-count:] if count <= len(self.short_term_memory) else self.short_term_memory
+    def get_conversation_summary(self) -> str:
+        """
+        Generate a summary of the conversation
+        """
+        if not self.short_term_memory:
+            return "No conversation history available."
+        summary_parts = [
+            f"Session ID: {self.session_id}",
+            f"Exchanges: {len(self.memory_structure['conversations'])}",
+            f"Topics discussed: {', '.join(self.memory_structure['topics_discussed'][-5:])}",
+            "",
+            "Recent exchanges:"
+        ]
+        # Add recent exchanges
+        for exchange in self.short_term_memory[-3:]:
+            timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%H:%M:%S")
+            summary_parts.append(f"[{timestamp}] User: {exchange['user_input'][:100]}...")
+            summary_parts.append(f"[{timestamp}] Assistant: {exchange['assistant_response'][:100]}...")
+            summary_parts.append("")
+        return "\n".join(summary_parts)
+    def search_memory(self, query: str, search_type: str = 'all') -> List[Dict[str, Any]]:
+        """
+        Search through memory for relevant information
+        """
+        results = []
+        query_lower = query.lower()
+        try:
+            if search_type in ['all', 'conversations']:
+                # Search conversations
+                for exchange in self.memory_structure['conversations']:
+                    if (query_lower in exchange['user_input'].lower() or
+                        query_lower in exchange['assistant_response'].lower()):
+                        results.append({
+                            'type': 'conversation',
+                            'content': exchange,
+                            'relevance_score': self._calculate_relevance(query, exchange)
+                        })
+            if search_type in ['all', 'reasoning']:
+                # Search reasoning history
+                for step in self.memory_structure['reasoning_chains']:
+                    if query_lower in step['step'].lower():
+                        results.append({
+                            'type': 'reasoning',
+                            'content': step,
+                            'relevance_score': self._calculate_relevance(query, step)
+                        })
+            # Sort by relevance
+            results.sort(key=lambda x: x['relevance_score'], reverse=True)
+            return results[:10]  # Top 10 results
+        except Exception as e:
+            logger.error(f"Error searching memory: {e}")
+            return []
+    def _extract_topics(self, text: str) -> None:
+        """
+        Extract topics from user input (simple keyword-based)
+        """
+        try:
+            # Simple topic extraction - can be enhanced with NLP
+            keywords = [
+                'programming', 'coding', 'python', 'javascript', 'web', 'ai', 'machine learning',
+                'data', 'analysis', 'math', 'science', 'physics', 'chemistry', 'biology',
+                'history', 'literature', 'writing', 'business', 'finance', 'economics',
+                'health', 'medicine', 'technology', 'research', 'education', 'design'
+            ]
+            text_lower = text.lower()
+            found_topics = [keyword for keyword in keywords if keyword in text_lower]
+            for topic in found_topics:
+                if topic not in self.memory_structure['topics_discussed']:
+                    self.memory_structure['topics_discussed'].append(topic)
+            # Keep topics list manageable
+            if len(self.memory_structure['topics_discussed']) > 20:
+                self.memory_structure['topics_discussed'] = self.memory_structure['topics_discussed'][-20:]
+        except Exception as e:
+            logger.error(f"Error extracting topics: {e}")
+    def _calculate_relevance(self, query: str, item: Dict[str, Any]) -> float:
+        """
+        Calculate relevance score for search results
+        """
+        try:
+            query_words = set(query.lower().split())
+            if 'user_input' in item:
+                # Conversation item
+                text = f"{item['user_input']} {item['assistant_response']}".lower()
+            else:
+                # Reasoning item
+                text = item['step'].lower()
+            text_words = set(text.split())
+            # Simple relevance scoring
+            common_words = query_words.intersection(text_words)
+            if not query_words:
+                return 0.0
+            return len(common_words) / len(query_words)
+        except Exception as e:
+            logger.error(f"Error calculating relevance: {e}")
+            return 0.0
+    def _save_memory(self) -> bool:
+        """
+        Save memory to file
+        """
+        try:
+            with open(self.memory_file, 'w', encoding='utf-8') as f:
+                json.dump(self.memory_structure, f, indent=2, ensure_ascii=False)
+            return True
+        except Exception as e:
+            logger.error(f"Error saving memory: {e}")
+            return False
+    def load_session(self, session_id: str) -> bool:
+        """
+        Load a previous session
+        """
+        try:
+            session_file = Path(Settings.LOGS_DIR) / f"conversation_{session_id}.json"
+            if not session_file.exists():
+                logger.warning(f"Session file not found: {session_file}")
+                return False
+            with open(session_file, 'r', encoding='utf-8') as f:
+                self.memory_structure = json.load(f)
+            self.session_id = session_id
+            self.memory_file = session_file
+            # Rebuild short-term memory from last exchanges
+            recent_conversations = self.memory_structure['conversations'][-Settings.MAX_CONVERSATION_HISTORY:]
+            self.short_term_memory = recent_conversations
+            # Rebuild context
+            self.current_context = self.memory_structure.get('context', {})
+            # Rebuild reasoning history
+            self.reasoning_history = self.memory_structure.get('reasoning_chains', [])[-50:]
+            logger.info(f"Loaded session: {session_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Error loading session: {e}")
+            return False
+    def export_conversation(self, format_type: str = 'json') -> str:
+        """
+        Export conversation in different formats
+        """
+        try:
+            if format_type == 'json':
+                return json.dumps(self.memory_structure, indent=2, ensure_ascii=False)
+            elif format_type == 'text':
+                lines = [
+                    f"Conversation Export - Session {self.session_id}",
+                    f"Created: {self.memory_structure['created_at']}",
+                    f"Total Exchanges: {len(self.memory_structure['conversations'])}",
+                    "=" * 50,
+                    ""
+                ]
+                for exchange in self.memory_structure['conversations']:
+                    timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%Y-%m-%d %H:%M:%S")
+                    lines.append(f"[{timestamp}]")
+                    lines.append(f"User: {exchange['user_input']}")
+                    lines.append(f"Assistant: {exchange['assistant_response']}")
+                    lines.append("-" * 30)
+                    lines.append("")
+                return "\n".join(lines)
+            elif format_type == 'markdown':
+                lines = [
+                    f"# Conversation Export",
+                    f"**Session ID:** {self.session_id}",
+                    f"**Created:** {self.memory_structure['created_at']}",
+                    f"**Total Exchanges:** {len(self.memory_structure['conversations'])}",
+                    ""
+                ]
+                for i, exchange in enumerate(self.memory_structure['conversations'], 1):
+                    timestamp = datetime.fromisoformat(exchange['timestamp']).strftime("%Y-%m-%d %H:%M:%S")
+                    lines.append(f"## Exchange {i}")
+                    lines.append(f"*{timestamp}*")
+                    lines.append(f"**User:** {exchange['user_input']}")
+                    lines.append(f"**Assistant:** {exchange['assistant_response']}")
+                    lines.append("")
+                return "\n".join(lines)
+            else:
+                return f"Unsupported format: {format_type}"
+        except Exception as e:
+            logger.error(f"Error exporting conversation: {e}")
+            return f"Error exporting conversation: {str(e)}"
+    def get_session_statistics(self) -> Dict[str, Any]:
+        """
+        Get statistics about the current session
+        """
+        try:
+            conversations = self.memory_structure['conversations']
+            if not conversations:
+                return {'error': 'No conversations in this session'}
+            # Calculate statistics
+            total_user_words = sum(len(conv['user_input'].split()) for conv in conversations)
+            total_assistant_words = sum(len(conv['assistant_response'].split()) for conv in conversations)
+            session_duration = None
+            if len(conversations) > 1:
+                start_time = datetime.fromisoformat(conversations[0]['timestamp'])
+                end_time = datetime.fromisoformat(conversations[-1]['timestamp'])
+                session_duration = str(end_time - start_time)
+            return {
+                'session_id': self.session_id,
+                'total_exchanges': len(conversations),
+                'total_user_words': total_user_words,
+                'total_assistant_words': total_assistant_words,
+                'average_user_words': total_user_words / len(conversations) if conversations else 0,
+                'average_assistant_words': total_assistant_words / len(conversations) if conversations else 0,
+                'session_duration': session_duration,
+                'topics_discussed': self.memory_structure.get('topics_discussed', []),
+                'reasoning_steps': len(self.memory_structure.get('reasoning_chains', [])),
+                'created_at': self.memory_structure['created_at']
+            }
+        except Exception as e:
+            logger.error(f"Error getting session statistics: {e}")
+            return {'error': str(e)}
+    def clear_memory(self, keep_context: bool = False) -> bool:
+        """
+        Clear conversation memory
+        """
+        try:
+            self.short_term_memory.clear()
+            self.reasoning_history.clear()
+            if not keep_context:
+                self.current_context.clear()
+            # Reset memory structure
+            self.memory_structure = {
+                'session_id': self.session_id,
+                'created_at': datetime.now().isoformat(),
+                'conversations': [],
+                'context': self.current_context if keep_context else {},
+                'reasoning_chains': [],
+                'user_preferences': self.memory_structure.get('user_preferences', {}),
+                'topics_discussed': []
+            }
+            self._save_memory()
+            logger.info("Cleared conversation memory")
+            return True
+        except Exception as e:
+            logger.error(f"Error clearing memory: {e}")
+            return False
+    def set_user_preference(self, key: str, value: Any) -> bool:
+        """
+        Set user preference
+        """
+        try:
+            if 'user_preferences' not in self.memory_structure:
+                self.memory_structure['user_preferences'] = {}
+            self.memory_structure['user_preferences'][key] = value
+            self._save_memory()
+            logger.info(f"Set user preference: {key} = {value}")
+            return True
+        except Exception as e:
+            logger.error(f"Error setting user preference: {e}")
+            return False
+    def get_user_preferences(self) -> Dict[str, Any]:
+        """
+        Get all user preferences
+        """
+        return self.memory_structure.get('user_preferences', {})
+    def get_memory_usage(self) -> Dict[str, Any]:
+        """
+        Get memory usage statistics
+        """
+        try:
+            memory_size = 0
+            if self.memory_file.exists():
+                memory_size = self.memory_file.stat().st_size
+            return {
+                'memory_file_size_bytes': memory_size,
+                'memory_file_size_kb': memory_size / 1024,
+                'short_term_exchanges': len(self.short_term_memory),
+                'total_exchanges': len(self.memory_structure['conversations']),
+                'reasoning_steps': len(self.reasoning_history),
+                'context_items': len(self.current_context),
+                'topics_tracked': len(self.memory_structure.get('topics_discussed', []))
+            }
+        except Exception as e:
+            logger.error(f"Error getting memory usage: {e}")
+            return {'error': str(e)}

models/llm_handler.py ADDED Viewed

	@@ -0,0 +1,302 @@

+# import ollama
+import json
+import logging
+from typing import List, Dict, Any, Optional
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from typing import Optional
+from config.settings import Settings
+logger = logging.getLogger(__name__)
+# HuggingFace LLM Handler for Microsoft Phi-3 Mini
+import requests
+from typing import Optional
+import requests
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class OpenRouterLLMHandler:
+    def __init__(self, api_key: str="", model: str = "mistralai/mistral-7b-instruct"):
+        if (model == ""):
+            model = self.current_model
+        API_KEY = os.getenv("OPENROUTER_API_KEY")
+        api_key= API_KEY if API_KEY else api_key
+        self.api_key = api_key
+        self.model = model
+        self.base_url = "https://openrouter.ai/api/v1/chat/completions"
+        print(f"🔌 Initialized OpenRouter handler with model: {model}")
+    def generate_response(self, prompt: str, context: Optional[str] = None, tools_output: Optional[str] = None) -> str:
+        try:
+            full_prompt = self._build_simple_prompt(prompt, context, tools_output)
+            # if self.model_name:
+            #     self.model = self.model_name
+            #self.model = self.model_name
+            # if (model == ""):
+            #     model = self.model_name
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json"
+            }
+            payload = {
+                "model": self.model,
+                "messages": [
+                    {"role": "system", "content": "You are a helpful AI assistant."},
+                    {"role": "user", "content": full_prompt}
+                ],
+                "temperature": 0.7,
+                "max_tokens": 200
+            }
+# 222
+# 320
+# 90k
+# msai
+# 2% candidate
+            response = requests.post(self.base_url, headers=headers, json=payload)
+            response.raise_for_status()
+            result = response.json()
+            return result["choices"][0]["message"]["content"].strip()
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+    def _build_simple_prompt(self, user_input: str, context: Optional[str] = None, tools_output: Optional[str] = None) -> str:
+        prompt_parts = []
+        if context and len(context) < 300:
+            prompt_parts.append(f"Context: {context}")
+        if tools_output and len(tools_output) < 200:
+            prompt_parts.append(f"Additional info: {tools_output}")
+        prompt_parts.append(f"User query: {user_input}")
+        return "\n\n".join(prompt_parts)
+    def add_to_history(self, user_input: str, assistant_response: str):
+        """
+        Add exchange to conversation history
+        """
+        self.conversation_history.append({
+            'user': user_input,
+            'assistant': assistant_response
+        })
+        # Keep only recent history
+        if len(self.conversation_history) > Settings.MAX_CONVERSATION_HISTORY:
+            self.conversation_history = self.conversation_history[-Settings.MAX_CONVERSATION_HISTORY:]
+    def clear_history(self):
+        """
+        Clear conversation history
+        """
+        self.conversation_history = []
+    def get_available_models(self) -> List[str]:
+        """
+        Get list of available Ollama models
+        """
+        try:
+            models = self.client.list()
+            return [model['name'] for model in models['models']]
+        except Exception as e:
+            logger.error(f"Error getting models: {e}")
+            return [Settings.DEFAULT_MODEL]
+    def switch_model(self, model_name: str) -> bool:
+        """
+        Switch to a different model
+        """
+        try:
+            # Test if model is available
+            #self.client.generate(model=model_name, prompt="test", options={'num_predict': 1})
+            self.model = model_name
+            self.model_name = model_name
+            logger.info(f"Switched to model: {model_name}")
+            return True
+        except Exception as e:
+            logger.error(f"Error switching to model {model_name}: {e}")
+            return False
+    def generate_embedding(self, text: str) -> List[float]:
+        """
+        Generate embeddings for text using Ollama
+        """
+        try:
+            response = self.client.embeddings(
+                model=Settings.EMBEDDING_MODEL,
+                prompt=text
+            )
+            return response['embedding']
+        except Exception as e:
+            logger.error(f"Error generating embedding: {e}")
+            return []
+# class HuggingFaceLLMHandler:
+#     def __init__(self):
+#         from transformers import AutoTokenizer, AutoModelForCausalLM
+#         import torch
+#         import psutil
+#         self.model_name = "microsoft/Phi-3-mini-4k-instruct"
+#         print("Loading model... this may take a moment on first run")
+#         # Choose device and dtype intelligently
+#         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#         torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
+#         print(f"Using device: {device}, dtype: {torch_dtype}")
+#         print(f"Available RAM: {psutil.virtual_memory().available / 1e6:.2f} MB")
+#         # Load tokenizer
+#         self.tokenizer = AutoTokenizer.from_pretrained(
+#             self.model_name,
+#             trust_remote_code=True
+#         )
+#         # Load model safely
+#         try:
+#             self.model = AutoModelForCausalLM.from_pretrained(
+#                 self.model_name,
+#                 torch_dtype=torch_dtype,
+#                 device_map="auto" if device.type == "cuda" else None,
+#                 low_cpu_mem_usage=True,  # Helps reduce RAM footprint during init
+#                 trust_remote_code=True
+#             )
+#             # Explicitly move to CPU if needed
+#             if device.type == "cpu":
+#                 self.model = self.model.to(device)
+#             print("Model loaded successfully!")
+#         except RuntimeError as e:
+#             print(f"❌ Error loading model: {e}")
+#             print("Tip: Try switching to a smaller model or free up RAM.")
+#     def generate_response(self, prompt: str, context: Optional[str] = None,
+#                             tools_output: Optional[str] = None) -> str:
+#             """
+#             Generate response using Phi-3 - should be under 10 seconds
+#             """
+#             try:
+#                 # Build simple prompt
+#                 full_prompt = self._build_simple_prompt(prompt, context, tools_output)
+#                 # Tokenize and move to same device as model
+#                 inputs = self.tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024)
+#                 inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
+#                 # Generate
+#                 with torch.no_grad():
+#                     outputs = self.model.generate(
+#                         inputs["input_ids"],
+#                         max_new_tokens=200,  # Limit response length
+#                         temperature=0.7,
+#                         do_sample=True,
+#                         pad_token_id=self.tokenizer.eos_token_id,
+#                         attention_mask=inputs["attention_mask"]
+#                     )
+#                 # Decode response
+#                 response = self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
+#                 return response.strip()
+#             except Exception as e:
+#                 logger.error(f"Error generating response: {e}")
+#                 return f"Error generating response: {str(e)}"
+#     def _build_simple_prompt(self, user_input: str, context: Optional[str] = None,
+#                             tools_output: Optional[str] = None) -> str:
+#         """Simple prompt builder"""
+#         prompt_parts = ["You are a helpful AI assistant."]
+#         if context and len(context) < 300:
+#             prompt_parts.append(f"Context: {context}")
+#         if tools_output and len(tools_output) < 200:
+#             prompt_parts.append(f"Additional info: {tools_output}")
+#         prompt_parts.append(f"User: {user_input}")
+#         prompt_parts.append("Assistant:")
+#         return "\n\n".join(prompt_parts)
+#     def add_to_history(self, user_input: str, assistant_response: str):
+#         """
+#         Add exchange to conversation history
+#         """
+#         self.conversation_history.append({
+#             'user': user_input,
+#             'assistant': assistant_response
+#         })
+#         # Keep only recent history
+#         if len(self.conversation_history) > Settings.MAX_CONVERSATION_HISTORY:
+#             self.conversation_history = self.conversation_history[-Settings.MAX_CONVERSATION_HISTORY:]
+#     def clear_history(self):
+#         """
+#         Clear conversation history
+#         """
+#         self.conversation_history = []
+#     def get_available_models(self) -> List[str]:
+#         """
+#         Get list of available Ollama models
+#         """
+#         try:
+#             models = self.client.list()
+#             return [model['name'] for model in models['models']]
+#         except Exception as e:
+#             logger.error(f"Error getting models: {e}")
+#             return [Settings.DEFAULT_MODEL]
+#     def switch_model(self, model_name: str) -> bool:
+#         """
+#         Switch to a different model
+#         """
+#         try:
+#             # Test if model is available
+#             self.client.generate(model=model_name, prompt="test", options={'num_predict': 1})
+#             self.model_name = model_name
+#             logger.info(f"Switched to model: {model_name}")
+#             return True
+#         except Exception as e:
+#             logger.error(f"Error switching to model {model_name}: {e}")
+#             return False
+#     def generate_embedding(self, text: str) -> List[float]:
+#         """
+#         Generate embeddings for text using Ollama
+#         """
+#         try:
+#             response = self.client.embeddings(
+#                 model=Settings.EMBEDDING_MODEL,
+#                 prompt=text
+#             )
+#             return response['embedding']
+#         except Exception as e:
+#             logger.error(f"Error generating embedding: {e}")
+#             return []

models/vector_store.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import chromadb
+from chromadb.config import Settings as ChromaSettings
+from sentence_transformers import SentenceTransformer
+import logging
+from typing import List, Dict, Any, Optional
+import uuid
+from config.settings import Settings
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class VectorStore:
+    def __init__(self):
+        self.client = chromadb.PersistentClient(
+            path=Settings.CHROMA_PERSIST_DIR,
+            settings=ChromaSettings(anonymized_telemetry=False)
+        )
+        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.collection = None
+        self.initialize_collection()
+    def initialize_collection(self):
+        """
+        Initialize or get the main knowledge base collection
+        """
+        try:
+            self.collection = self.client.get_or_create_collection(
+                name=Settings.COLLECTION_NAME,
+                metadata={"description": "General knowledge base for reasoning copilot"}
+            )
+            logger.info(f"Initialized collection: {Settings.COLLECTION_NAME}")
+        except Exception as e:
+            logger.error(f"Error initializing collection: {e}")
+            raise
+    def add_documents(self, documents: List[str], metadata: Optional[List[Dict]] = None,
+                     ids: Optional[List[str]] = None) -> bool:
+        """
+        Add documents to the vector store
+        """
+        try:
+            if not documents:
+                return False
+            # Generate IDs if not provided
+            if ids is None:
+                ids = [str(uuid.uuid4()) for _ in documents]
+            # Generate embeddings
+            embeddings = self.embedding_model.encode(documents).tolist()
+            # Prepare metadata
+            if metadata is None:
+                metadata = [{"source": "user_upload", "type": "document"} for _ in documents]
+            # Add to collection
+            self.collection.add(
+                documents=documents,
+                embeddings=embeddings,
+                metadatas=metadata,
+                ids=ids
+            )
+            logger.info(f"Added {len(documents)} documents to vector store")
+            return True
+        except Exception as e:
+            logger.error(f"Error adding documents: {e}")
+            return False
+    def search_similar(self, query: str, n_results: int = 5,
+                      where: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Search for similar documents
+        """
+        try:
+            # Generate query embedding
+            query_embedding = self.embedding_model.encode([query]).tolist()[0]
+            # Search
+            results = self.collection.query(
+                query_embeddings=[query_embedding],
+                n_results=n_results,
+                where=where,
+                include=['documents', 'metadatas', 'distances']
+            )
+            return {
+                'documents': results['documents'][0] if results['documents'] else [],
+                'metadatas': results['metadatas'][0] if results['metadatas'] else [],
+                'distances': results['distances'][0] if results['distances'] else [],
+                'count': len(results['documents'][0]) if results['documents'] else 0
+            }
+        except Exception as e:
+            logger.error(f"Error searching documents: {e}")
+            return {'documents': [], 'metadatas': [], 'distances': [], 'count': 0}
+    def get_relevant_context(self, query: str, max_context_length: int = 2000) -> str:
+        """
+        Get relevant context for a query, formatted for LLM consumption
+        """
+        results = self.search_similar(query, n_results=5)
+        if not results['documents']:
+            return ""
+        context_parts = []
+        current_length = 0
+        for i, (doc, metadata) in enumerate(zip(results['documents'], results['metadatas'])):
+            # Create a context snippet
+            source = metadata.get('source', 'Unknown')
+            snippet = f"Source: {source}\nContent: {doc[:500]}...\n"
+            if current_length + len(snippet) > max_context_length:
+                break
+            context_parts.append(snippet)
+            current_length += len(snippet)
+        return "\n---\n".join(context_parts)
+    def add_conversation_memory(self, user_input: str, assistant_response: str, session_id: str):
+        """
+        Add conversation exchange to memory
+        """
+        try:
+            memory_doc = f"User: {user_input}\nAssistant: {assistant_response}"
+            metadata = {
+                "type": "conversation",
+                "session_id": session_id,
+                "timestamp": str(uuid.uuid4())
+            }
+            return self.add_documents([memory_doc], [metadata])
+        except Exception as e:
+            logger.error(f"Error adding conversation memory: {e}")
+            return False
+    def search_conversations(self, query: str, session_id: Optional[str] = None) -> List[str]:
+        """
+        Search previous conversations
+        """
+        where_clause = {"type": "conversation"}
+        if session_id:
+            where_clause["session_id"] = session_id
+        results = self.search_similar(query, n_results=3, where=where_clause)
+        return results['documents']
+    def get_collection_stats(self) -> Dict[str, Any]:
+        """
+        Get statistics about the collection
+        """
+        try:
+            count = self.collection.count()
+            return {
+                "total_documents": count,
+                "collection_name": Settings.COLLECTION_NAME
+            }
+        except Exception as e:
+            logger.error(f"Error getting collection stats: {e}")
+            return {"total_documents": 0, "collection_name": "unknown"}
+    def delete_documents(self, ids: List[str]) -> bool:
+        """
+        Delete documents by IDs
+        """
+        try:
+            self.collection.delete(ids=ids)
+            logger.info(f"Deleted {len(ids)} documents")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents: {e}")
+            return False
+    def clear_collection(self) -> bool:
+        """
+        Clear all documents from the collection
+        """
+        try:
+            # Delete the collection and recreate it
+            self.client.delete_collection(Settings.COLLECTION_NAME)
+            self.initialize_collection()
+            logger.info("Cleared all documents from collection")
+            return True
+        except Exception as e:
+            logger.error(f"Error clearing collection: {e}")
+            return False
+    def create_specialized_collection(self, name: str, description: str) -> bool:
+        """
+        Create a specialized collection for specific domains
+        """
+        try:
+            collection = self.client.get_or_create_collection(
+                name=name,
+                metadata={"description": description}
+            )
+            logger.info(f"Created specialized collection: {name}")
+            return True
+        except Exception as e:
+            logger.error(f"Error creating specialized collection: {e}")
+            return False
+    def switch_collection(self, name: str) -> bool:
+        """
+        Switch to a different collection
+        """
+        try:
+            self.collection = self.client.get_collection(name=name)
+            logger.info(f"Switched to collection: {name}")
+            return True
+        except Exception as e:
+            logger.error(f"Error switching to collection {name}: {e}")
+            return False
+    def list_collections(self) -> List[str]:
+        """
+        List all available collections
+        """
+        try:
+            collections = self.client.list_collections()
+            return [col.name for col in collections]
+        except Exception as e:
+            logger.error(f"Error listing collections: {e}")
+            return []

requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+gradio
+chromadb
+ollama
+sentence-transformers
+beautifulsoup4
+requests
+pandas
+numpy
+python-dotenv
+langchain
+langchain-community
+PyPDF2
+python-docx
+openpyxl
+sympy
+networkx
+matplotlib
+plotly
+duckduckgo-search
+psutil
+fastapi
+uvicorn
+websockets
+torch
+transformers

tools/calculator.py ADDED Viewed

	@@ -0,0 +1,404 @@

+import sympy as sp
+import numpy as np
+import matplotlib.pyplot as plt
+import io
+import base64
+from typing import Any, Dict, List, Optional, Union
+import logging
+import re
+import math
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class CalculatorTool:
+    def __init__(self):
+        self.variables = {}
+        self.last_result = None
+    def evaluate_expression(self, expression: str) -> Dict[str, Any]:
+        """
+        Safely evaluate mathematical expressions
+        """
+        try:
+            # Clean the expression
+            expression = self._clean_expression(expression)
+            # Try sympy first for symbolic computation
+            try:
+                result = sp.sympify(expression).evalf()
+                self.last_result = float(result)
+                return {
+                    'result': float(result),
+                    'expression': expression,
+                    'type': 'symbolic',
+                    'formatted': str(result)
+                }
+            except:
+                # Fall back to basic evaluation
+                result = eval(expression, {"__builtins__": {}}, self._get_safe_namespace())
+                self.last_result = result
+                return {
+                    'result': result,
+                    'expression': expression,
+                    'type': 'numeric',
+                    'formatted': str(result)
+                }
+        except Exception as e:
+            logger.error(f"Error evaluating expression: {e}")
+            return {
+                'error': str(e),
+                'expression': expression,
+                'result': None
+            }
+    def _clean_expression(self, expression: str) -> str:
+        """
+        Clean and prepare expression for evaluation
+        """
+        # Replace common math notation
+        replacements = {
+            '^': '**',
+            '×': '*',
+            '÷': '/',
+            'π': 'pi',
+            'e': 'E'
+        }
+        for old, new in replacements.items():
+            expression = expression.replace(old, new)
+        return expression
+    def _get_safe_namespace(self) -> Dict[str, Any]:
+        """
+        Get safe namespace for expression evaluation
+        """
+        safe_dict = {
+            'abs': abs, 'round': round, 'min': min, 'max': max,
+            'sum': sum, 'pow': pow, 'divmod': divmod,
+            'sin': math.sin, 'cos': math.cos, 'tan': math.tan,
+            'asin': math.asin, 'acos': math.acos, 'atan': math.atan,
+            'sinh': math.sinh, 'cosh': math.cosh, 'tanh': math.tanh,
+            'log': math.log, 'log10': math.log10, 'log2': math.log2,
+            'exp': math.exp, 'sqrt': math.sqrt, 'factorial': math.factorial,
+            'pi': math.pi, 'e': math.e, 'inf': math.inf, 'nan': math.nan,
+            'degrees': math.degrees, 'radians': math.radians,
+            'ceil': math.ceil, 'floor': math.floor,
+        }
+        safe_dict.update(self.variables)
+        return safe_dict
+    def solve_equation(self, equation: str, variable: str = 'x') -> Dict[str, Any]:
+        """
+        Solve equations symbolically
+        """
+        try:
+            # Parse equation
+            if '=' in equation:
+                left, right = equation.split('=', 1)
+                eq = sp.Eq(sp.sympify(left), sp.sympify(right))
+            else:
+                eq = sp.sympify(equation)
+            # Solve
+            var = sp.Symbol(variable)
+            solutions = sp.solve(eq, var)
+            return {
+                'equation': equation,
+                'variable': variable,
+                'solutions': [str(sol) for sol in solutions],
+                'numeric_solutions': [float(sol.evalf()) if sol.is_real else complex(sol.evalf()) for sol in solutions]
+            }
+        except Exception as e:
+            logger.error(f"Error solving equation: {e}")
+            return {
+                'error': str(e),
+                'equation': equation,
+                'solutions': []
+            }
+    def plot_function(self, expression: str, x_range: tuple = (-10, 10),
+                     points: int = 1000) -> str:
+        """
+        Plot a mathematical function and return base64 encoded image
+        """
+        try:
+            x = sp.Symbol('x')
+            expr = sp.sympify(expression)
+            # Convert to numpy function
+            f = sp.lambdify(x, expr, 'numpy')
+            # Generate points
+            x_vals = np.linspace(x_range[0], x_range[1], points)
+            y_vals = f(x_vals)
+            # Create plot
+            plt.figure(figsize=(10, 6))
+            plt.plot(x_vals, y_vals, 'b-', linewidth=2)
+            plt.grid(True, alpha=0.3)
+            plt.xlabel('x')
+            plt.ylabel('f(x)')
+            plt.title(f'Plot of f(x) = {expression}')
+            # Convert to base64
+            buffer = io.BytesIO()
+            plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight')
+            buffer.seek(0)
+            plot_data = base64.b64encode(buffer.getvalue()).decode()
+            plt.close()
+            return plot_data
+        except Exception as e:
+            logger.error(f"Error plotting function: {e}")
+            return ""
+    def calculate_derivative(self, expression: str, variable: str = 'x',
+                           order: int = 1) -> Dict[str, Any]:
+        """
+        Calculate derivative of an expression
+        """
+        try:
+            var = sp.Symbol(variable)
+            expr = sp.sympify(expression)
+            derivative = sp.diff(expr, var, order)
+            return {
+                'original': expression,
+                'derivative': str(derivative),
+                'order': order,
+                'variable': variable,
+                'simplified': str(sp.simplify(derivative))
+            }
+        except Exception as e:
+            logger.error(f"Error calculating derivative: {e}")
+            return {
+                'error': str(e),
+                'original': expression
+            }
+    def calculate_integral(self, expression: str, variable: str = 'x',
+                          limits: Optional[tuple] = None) -> Dict[str, Any]:
+        """
+        Calculate integral of an expression
+        """
+        try:
+            var = sp.Symbol(variable)
+            expr = sp.sympify(expression)
+            if limits:
+                # Definite integral
+                result = sp.integrate(expr, (var, limits[0], limits[1]))
+                integral_type = 'definite'
+            else:
+                # Indefinite integral
+                result = sp.integrate(expr, var)
+                integral_type = 'indefinite'
+            return {
+                'original': expression,
+                'integral': str(result),
+                'type': integral_type,
+                'variable': variable,
+                'limits': limits,
+                'numeric_value': float(result.evalf()) if result.is_number else None
+            }
+        except Exception as e:
+            logger.error(f"Error calculating integral: {e}")
+            return {
+                'error': str(e),
+                'original': expression
+            }
+    def matrix_operations(self, operation: str, *matrices) -> Dict[str, Any]:
+        """
+        Perform matrix operations
+        """
+        try:
+            # Convert input to sympy matrices
+            sp_matrices = []
+            for matrix in matrices:
+                if isinstance(matrix, list):
+                    sp_matrices.append(sp.Matrix(matrix))
+                else:
+                    sp_matrices.append(sp.sympify(matrix))
+            result = None
+            if operation == 'add' and len(sp_matrices) >= 2:
+                result = sp_matrices[0] + sp_matrices[1]
+            elif operation == 'multiply' and len(sp_matrices) >= 2:
+                result = sp_matrices[0] * sp_matrices[1]
+            elif operation == 'inverse' and len(sp_matrices) >= 1:
+                result = sp_matrices[0].inv()
+            elif operation == 'determinant' and len(sp_matrices) >= 1:
+                result = sp_matrices[0].det()
+            elif operation == 'transpose' and len(sp_matrices) >= 1:
+                result = sp_matrices[0].T
+            elif operation == 'eigenvalues' and len(sp_matrices) >= 1:
+                result = sp_matrices[0].eigenvals()
+            return {
+                'operation': operation,
+                'result': str(result) if result is not None else None,
+                'matrices_count': len(sp_matrices)
+            }
+        except Exception as e:
+            logger.error(f"Error in matrix operation: {e}")
+            return {
+                'error': str(e),
+                'operation': operation
+            }
+    def statistics_calculations(self, data: List[float], operation: str) -> Dict[str, Any]:
+        """
+        Perform statistical calculations
+        """
+        try:
+            data = np.array(data)
+            result = None
+            if operation == 'mean':
+                result = np.mean(data)
+            elif operation == 'median':
+                result = np.median(data)
+            elif operation == 'std':
+                result = np.std(data)
+            elif operation == 'var':
+                result = np.var(data)
+            elif operation == 'min':
+                result = np.min(data)
+            elif operation == 'max':
+                result = np.max(data)
+            elif operation == 'sum':
+                result = np.sum(data)
+            elif operation == 'range':
+                result = np.max(data) - np.min(data)
+            return {
+                'operation': operation,
+                'result': float(result) if result is not None else None,
+                'data_size': len(data),
+                'data_preview': data[:5].tolist() if len(data) > 5 else data.tolist()
+            }
+        except Exception as e:
+            logger.error(f"Error in statistics calculation: {e}")
+            return {
+                'error': str(e),
+                'operation': operation
+            }
+    def unit_conversion(self, value: float, from_unit: str, to_unit: str) -> Dict[str, Any]:
+        """
+        Convert between different units
+        """
+        # Basic unit conversion factors (could be expanded)
+        conversions = {
+            # Length
+            ('m', 'cm'): 100,
+            ('m', 'mm'): 1000,
+            ('m', 'km'): 0.001,
+            ('cm', 'm'): 0.01,
+            ('mm', 'm'): 0.001,
+            ('km', 'm'): 1000,
+            ('ft', 'm'): 0.3048,
+            ('in', 'cm'): 2.54,
+            # Weight
+            ('kg', 'g'): 1000,
+            ('g', 'kg'): 0.001,
+            ('lb', 'kg'): 0.453592,
+            ('kg', 'lb'): 2.20462,
+            # Temperature (special handling needed)
+            # Time
+            ('h', 'min'): 60,
+            ('min', 's'): 60,
+            ('h', 's'): 3600,
+            ('day', 'h'): 24,
+        }
+        try:
+            if (from_unit, to_unit) in conversions:
+                result = value * conversions[(from_unit, to_unit)]
+            elif (to_unit, from_unit) in conversions:
+                result = value / conversions[(to_unit, from_unit)]
+            else:
+                return {
+                    'error': f"Conversion from {from_unit} to {to_unit} not supported",
+                    'value': value
+                }
+            return {
+                'original_value': value,
+                'original_unit': from_unit,
+                'converted_value': result,
+                'converted_unit': to_unit,
+                'conversion_factor': result / value if value != 0 else None
+            }
+        except Exception as e:
+            logger.error(f"Error in unit conversion: {e}")
+            return {
+                'error': str(e),
+                'value': value
+            }
+    def set_variable(self, name: str, value: Any) -> bool:
+        """
+        Set a variable for use in calculations
+        """
+        try:
+            self.variables[name] = value
+            logger.info(f"Set variable {name} = {value}")
+            return True
+        except Exception as e:
+            logger.error(f"Error setting variable: {e}")
+            return False
+    def get_variables(self) -> Dict[str, Any]:
+        """
+        Get all stored variables
+        """
+        return self.variables.copy()
+    def clear_variables(self) -> bool:
+        """
+        Clear all stored variables
+        """
+        try:
+            self.variables.clear()
+            logger.info("Cleared all variables")
+            return True
+        except Exception as e:
+            logger.error(f"Error clearing variables: {e}")
+            return False
+    def format_result_for_llm(self, result: Dict[str, Any]) -> str:
+        """
+        Format calculation results for LLM consumption
+        """
+        if 'error' in result:
+            return f"Error: {result['error']}"
+        if 'result' in result:
+            return f"Result: {result['result']}\nExpression: {result.get('expression', 'N/A')}"
+        # Handle other result types
+        formatted_parts = []
+        for key, value in result.items():
+            if key not in ['error'] and value is not None:
+                formatted_parts.append(f"{key.title()}: {value}")
+        return "\n".join(formatted_parts) if formatted_parts else "No result to display"

tools/file_processor.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import os
+import PyPDF2
+import docx
+import pandas as pd
+import json
+import csv
+from typing import List, Dict, Any, Optional
+import logging
+from pathlib import Path
+from config.settings import Settings
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class FileProcessor:
+    def __init__(self):
+        self.supported_extensions = {
+            '.txt': self._process_text,
+            '.pdf': self._process_pdf,
+            '.docx': self._process_docx,
+            '.doc': self._process_docx,
+            '.csv': self._process_csv,
+            '.xlsx': self._process_excel,
+            '.xls': self._process_excel,
+            '.json': self._process_json,
+            '.py': self._process_code,
+            '.js': self._process_code,
+            '.html': self._process_code,
+            '.css': self._process_code,
+            '.md': self._process_text,
+        }
+    def process_file(self, file_path: str) -> Dict[str, Any]:
+        """
+        Process a file and extract its content
+        """
+        try:
+            file_path = Path(file_path)
+            if not file_path.exists():
+                return {'error': f'File not found: {file_path}'}
+            # Check file size
+            file_size = file_path.stat().st_size / (1024 * 1024)  # MB
+            if file_size > Settings.MAX_FILE_SIZE_MB:
+                return {'error': f'File too large: {file_size:.1f}MB (max: {Settings.MAX_FILE_SIZE_MB}MB)'}
+            extension = file_path.suffix.lower()
+            if extension not in self.supported_extensions:
+                return {'error': f'Unsupported file type: {extension}'}
+            # Process the file
+            processor = self.supported_extensions[extension]
+            content = processor(file_path)
+            return {
+                'filename': file_path.name,
+                'extension': extension,
+                'size_mb': file_size,
+                'content': content,
+                'metadata': self._extract_metadata(file_path)
+            }
+        except Exception as e:
+            logger.error(f"Error processing file {file_path}: {e}")
+            return {'error': str(e)}
+    def _process_text(self, file_path: Path) -> str:
+        """
+        Process plain text files
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except UnicodeDecodeError:
+            # Try with different encoding
+            with open(file_path, 'r', encoding='latin-1') as f:
+                return f.read()
+    def _process_pdf(self, file_path: Path) -> str:
+        """
+        Process PDF files
+        """
+        try:
+            text_content = []
+            with open(file_path, 'rb') as f:
+                pdf_reader = PyPDF2.PdfReader(f)
+                for page_num, page in enumerate(pdf_reader.pages):
+                    try:
+                        text = page.extract_text()
+                        if text.strip():
+                            text_content.append(f"--- Page {page_num + 1} ---\n{text}")
+                    except Exception as e:
+                        logger.warning(f"Error extracting page {page_num + 1}: {e}")
+                        continue
+            return "\n\n".join(text_content)
+        except Exception as e:
+            logger.error(f"Error processing PDF: {e}")
+            return f"Error processing PDF: {str(e)}"
+    def _process_docx(self, file_path: Path) -> str:
+        """
+        Process Word documents
+        """
+        try:
+            doc = docx.Document(file_path)
+            paragraphs = []
+            for paragraph in doc.paragraphs:
+                if paragraph.text.strip():
+                    paragraphs.append(paragraph.text)
+            # Also extract tables
+            for table in doc.tables:
+                table_data = []
+                for row in table.rows:
+                    row_data = [cell.text.strip() for cell in row.cells]
+                    table_data.append(" | ".join(row_data))
+                if table_data:
+                    paragraphs.append("\n--- Table ---\n" + "\n".join(table_data))
+            return "\n\n".join(paragraphs)
+        except Exception as e:
+            logger.error(f"Error processing DOCX: {e}")
+            return f"Error processing DOCX: {str(e)}"
+    def _process_csv(self, file_path: Path) -> str:
+        """
+        Process CSV files
+        """
+        try:
+            df = pd.read_csv(file_path)
+            # Basic info about the CSV
+            info_parts = [
+                f"CSV File Analysis:",
+                f"Rows: {len(df)}",
+                f"Columns: {len(df.columns)}",
+                f"Column Names: {', '.join(df.columns.tolist())}",
+                "",
+                "First 5 rows:",
+                df.head().to_string(),
+                "",
+                "Data Types:",
+                df.dtypes.to_string(),
+                "",
+                "Basic Statistics:",
+                df.describe().to_string() if len(df.select_dtypes(include=['number']).columns) > 0 else "No numeric columns"
+            ]
+            return "\n".join(info_parts)
+        except Exception as e:
+            logger.error(f"Error processing CSV: {e}")
+            return f"Error processing CSV: {str(e)}"
+    def _process_excel(self, file_path: Path) -> str:
+        """
+        Process Excel files
+        """
+        try:
+            # Read all sheets
+            excel_file = pd.ExcelFile(file_path)
+            content_parts = [f"Excel File: {file_path.name}"]
+            content_parts.append(f"Sheets: {', '.join(excel_file.sheet_names)}")
+            for sheet_name in excel_file.sheet_names:
+                df = pd.read_excel(file_path, sheet_name=sheet_name)
+                content_parts.append(f"\n--- Sheet: {sheet_name} ---")
+                content_parts.append(f"Rows: {len(df)}, Columns: {len(df.columns)}")
+                content_parts.append(f"Columns: {', '.join(df.columns.tolist())}")
+                content_parts.append("\nFirst 3 rows:")
+                content_parts.append(df.head(3).to_string())
+            return "\n".join(content_parts)
+        except Exception as e:
+            logger.error(f"Error processing Excel: {e}")
+            return f"Error processing Excel: {str(e)}"
+    def _process_json(self, file_path: Path) -> str:
+        """
+        Process JSON files
+        """
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            # Format JSON for better readability
+            if isinstance(data, dict):
+                content_parts = [
+                    f"JSON Object with {len(data)} keys:",
+                    f"Keys: {', '.join(data.keys())}",
+                    "",
+                    "Content (formatted):",
+                    json.dumps(data, indent=2, ensure_ascii=False)[:2000] + "..." if len(str(data)) > 2000 else json.dumps(data, indent=2, ensure_ascii=False)
+                ]
+            elif isinstance(data, list):
+                content_parts = [
+                    f"JSON Array with {len(data)} items",
+                    f"First item type: {type(data[0]).__name__}" if data else "Empty array",
+                    "",
+                    "Content (first 3 items):",
+                    json.dumps(data[:3], indent=2, ensure_ascii=False)
+                ]
+            else:
+                content_parts = [
+                    f"JSON {type(data).__name__}:",
+                    str(data)
+                ]
+            return "\n".join(content_parts)
+        except Exception as e:
+            logger.error(f"Error processing JSON: {e}")
+            return f"Error processing JSON: {str(e)}"
+    def _process_code(self, file_path: Path) -> str:
+        """
+        Process code files
+        """
+        try:
+            content = self._process_text(file_path)
+            # Add some analysis
+            lines = content.split('\n')
+            non_empty_lines = [line for line in lines if line.strip()]
+            analysis_parts = [
+                f"Code File Analysis:",
+                f"Language: {file_path.suffix[1:].upper()}",
+                f"Total lines: {len(lines)}",
+                f"Non-empty lines: {len(non_empty_lines)}",
+                f"Estimated complexity: {'High' if len(non_empty_lines) > 100 else 'Medium' if len(non_empty_lines) > 50 else 'Low'}",
+                "",
+                "Content:",
+                content
+            ]
+            return "\n".join(analysis_parts)
+        except Exception as e:
+            logger.error(f"Error processing code file: {e}")
+            return f"Error processing code file: {str(e)}"
+    def _extract_metadata(self, file_path: Path) -> Dict[str, Any]:
+        """
+        Extract file metadata
+        """
+        try:
+            stat = file_path.stat()
+            return {
+                'size_bytes': stat.st_size,
+                'created': stat.st_ctime,
+                'modified': stat.st_mtime,
+                'extension': file_path.suffix,
+                'name': file_path.stem
+            }
+        except Exception as e:
+            logger.error(f"Error extracting metadata: {e}")
+            return {}
+    def process_multiple_files(self, file_paths: List[str]) -> List[Dict[str, Any]]:
+        """
+        Process multiple files
+        """
+        results = []
+        for file_path in file_paths:
+            result = self.process_file(file_path)
+            results.append(result)
+        return results
+    def extract_key_information(self, content: str, file_type: str) -> Dict[str, Any]:
+        """
+        Extract key information from processed content
+        """
+        try:
+            key_info = {
+                'word_count': len(content.split()),
+                'char_count': len(content),
+                'line_count': len(content.split('\n')),
+                'file_type': file_type
+            }
+            # Type-specific extraction
+            if file_type in ['.csv', '.xlsx', '.xls']:
+                # Extract numerical data mentions
+                import re
+                numbers = re.findall(r'\d+', content)
+                key_info['numeric_values_found'] = len(numbers)
+            elif file_type in ['.py', '.js', '.html', '.css']:
+                # Extract function/class names for code files
+                import re
+                if file_type == '.py':
+                    functions = re.findall(r'def\s+(\w+)', content)
+                    classes = re.findall(r'class\s+(\w+)', content)
+                    key_info['functions'] = functions[:10]  # First 10
+                    key_info['classes'] = classes[:10]
+            return key_info
+        except Exception as e:
+            logger.error(f"Error extracting key information: {e}")
+            return {'error': str(e)}
+    def save_processed_content(self, content: str, output_path: str) -> bool:
+        """
+        Save processed content to a file
+        """
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(content)
+            logger.info(f"Saved processed content to: {output_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Error saving content: {e}")
+            return False
+    def get_supported_formats(self) -> List[str]:
+        """
+        Get list of supported file formats
+        """
+        return list(self.supported_extensions.keys())
+    def format_file_summary_for_llm(self, file_result: Dict[str, Any]) -> str:
+        """
+        Format file processing results for LLM consumption
+        """
+        if 'error' in file_result:
+            return f"Error processing file: {file_result['error']}"
+        summary_parts = [
+            f"File: {file_result['filename']}",
+            f"Type: {file_result['extension']}",
+            f"Size: {file_result['size_mb']:.2f} MB",
+            "",
+            "Content Summary:",
+            file_result['content'][:1000] + "..." if len(file_result['content']) > 1000 else file_result['content']
+        ]
+        return "\n".join(summary_parts)

tools/web_search.py ADDED Viewed

	@@ -0,0 +1,224 @@

+from duckduckgo_search import DDGS
+import requests
+from bs4 import BeautifulSoup
+import logging
+from typing import List, Dict, Any
+from config.settings import Settings
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class WebSearchTool:
+    def __init__(self):
+        self.ddgs = DDGS()
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        })
+    def search(self, query: str, max_results: int = Settings.MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
+        """
+        Search the web using DuckDuckGo
+        """
+        try:
+            results = []
+            search_results = self.ddgs.text(query, max_results=max_results)
+            for result in search_results:
+                results.append({
+                    'title': result.get('title', ''),
+                    'url': result.get('href', ''),
+                    'snippet': result.get('body', ''),
+                    'source': 'DuckDuckGo'
+                })
+            logger.info(f"Found {len(results)} search results for: {query}")
+            return results
+        except Exception as e:
+            logger.error(f"Error searching web: {e}")
+            return []
+    def get_page_content(self, url: str, max_chars: int = 5000) -> str:
+        """
+        Extract text content from a web page
+        """
+        try:
+            response = self.session.get(url, timeout=10)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.content, 'html.parser')
+            # Remove script and style elements
+            for script in soup(["script", "style"]):
+                script.decompose()
+            # Get text content
+            text = soup.get_text()
+            # Clean up whitespace
+            lines = (line.strip() for line in text.splitlines())
+            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+            text = ' '.join(chunk for chunk in chunks if chunk)
+            # Limit length
+            if len(text) > max_chars:
+                text = text[:max_chars] + "..."
+            return text
+        except Exception as e:
+            logger.error(f"Error extracting content from {url}: {e}")
+            return f"Error: Could not extract content from {url}"
+    def search_and_summarize(self, query: str, include_content: bool = False) -> str:
+        """
+        Search and format results for LLM consumption
+        """
+        results = self.search(query)
+        if not results:
+            return "No search results found."
+        summary_parts = [f"Search results for: {query}\n"]
+        for i, result in enumerate(results, 1):
+            summary_parts.append(f"{i}. **{result['title']}**")
+            summary_parts.append(f"   URL: {result['url']}")
+            summary_parts.append(f"   Summary: {result['snippet']}")
+            if include_content and i <= 2:  # Only get content for top 2 results
+                content = self.get_page_content(result['url'])
+                if content and not content.startswith("Error:"):
+                    summary_parts.append(f"   Content Preview: {content[:500]}...")
+            summary_parts.append("")
+        return "\n".join(summary_parts)
+    def search_news(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search for news articles
+        """
+        try:
+            results = []
+            news_results = self.ddgs.news(query, max_results=max_results)
+            for result in news_results:
+                results.append({
+                    'title': result.get('title', ''),
+                    'url': result.get('url', ''),
+                    'snippet': result.get('body', ''),
+                    'source': result.get('source', ''),
+                    'date': result.get('date', ''),
+                    'type': 'news'
+                })
+            logger.info(f"Found {len(results)} news results for: {query}")
+            return results
+        except Exception as e:
+            logger.error(f"Error searching news: {e}")
+            return []
+    def search_images(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search for images
+        """
+        try:
+            results = []
+            image_results = self.ddgs.images(query, max_results=max_results)
+            for result in image_results:
+                results.append({
+                    'title': result.get('title', ''),
+                    'url': result.get('image', ''),
+                    'thumbnail': result.get('thumbnail', ''),
+                    'source': result.get('source', ''),
+                    'type': 'image'
+                })
+            logger.info(f"Found {len(results)} image results for: {query}")
+            return results
+        except Exception as e:
+            logger.error(f"Error searching images: {e}")
+            return []
+    def quick_fact_search(self, query: str) -> str:
+        """
+        Quick search for factual information
+        """
+        try:
+            # Try to get instant answer first
+            instant_answer = self.ddgs.answers(query)
+            if instant_answer:
+                return f"Quick Fact: {instant_answer[0].get('text', '')}"
+            # Fall back to regular search
+            results = self.search(query, max_results=2)
+            if results:
+                return f"From search: {results[0]['snippet']}"
+            return "No quick facts found."
+        except Exception as e:
+            logger.error(f"Error in quick fact search: {e}")
+            return "Error retrieving quick facts."
+    def research_topic(self, topic: str) -> Dict[str, Any]:
+        """
+        Comprehensive research on a topic
+        """
+        research_data = {
+            'topic': topic,
+            'general_info': [],
+            'news': [],
+            'related_queries': []
+        }
+        try:
+            # General search
+            general_results = self.search(topic, max_results=5)
+            research_data['general_info'] = general_results
+            # News search
+            news_results = self.search_news(topic, max_results=3)
+            research_data['news'] = news_results
+            # Generate related queries
+            related_queries = [
+                f"{topic} definition",
+                f"{topic} examples",
+                f"{topic} applications",
+                f"latest {topic} developments"
+            ]
+            research_data['related_queries'] = related_queries
+            return research_data
+        except Exception as e:
+            logger.error(f"Error researching topic {topic}: {e}")
+            return research_data
+    def format_research_for_llm(self, research_data: Dict[str, Any]) -> str:
+        """
+        Format research data for LLM consumption
+        """
+        formatted_parts = [f"Research Results for: {research_data['topic']}\n"]
+        if research_data['general_info']:
+            formatted_parts.append("## General Information:")
+            for i, result in enumerate(research_data['general_info'], 1):
+                formatted_parts.append(f"{i}. {result['title']}")
+                formatted_parts.append(f"   {result['snippet']}\n")
+        if research_data['news']:
+            formatted_parts.append("## Recent News:")
+            for i, result in enumerate(research_data['news'], 1):
+                formatted_parts.append(f"{i}. {result['title']}")
+                formatted_parts.append(f"   {result['snippet']}")
+                if result.get('date'):
+                    formatted_parts.append(f"   Date: {result['date']}\n")
+        return "\n".join(formatted_parts)