File size: 12,653 Bytes
fd06b5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import os
from pprint import pprint
import requests
from langchain_core.tools import tool
from vector_store import get_vector_store
try:
    from ddgs import DDGS
except ImportError:
    DDGS = None
try:
    from docling.document_converter import DocumentConverter
except ImportError:
    DocumentConverter = None

# Weather Tools
@tool
def get_current_weather(city: str) -> dict:
    """Get the current weather for a specific city. Returns temperature, condition, etc."""
    api_key = os.getenv("OPENWEATHERMAP_API_KEY")
    if not api_key:
        return {"error": "Weather API key not configured."}
    
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={api_key}&units=metric"
    try:
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            return response.json()
        return {"error": f"API Error: {response.text}"}
    except Exception as e:
        return {"error": str(e)}

@tool
def get_weather_forecast(city: str) -> dict:
    """Get the 5-day weather forecast for a city. Useful for checking future weather."""
    api_key = os.getenv("OPENWEATHERMAP_API_KEY")
    if not api_key:
        return {"error": "Weather API key not configured."}
    
    url = f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={api_key}&units=metric"
    try:
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            return response.json()
        return {"error": f"API Error: {response.text}"}
    except Exception as e:
        return {"error": str(e)}

@tool
def schedule_meeting(title: str, description: str, start_time: str, end_time: str, participants: str, location: str = "") -> str:
    """
    Schedule a meeting in the database.
    
    Args:
        title: Meeting title
        description: Meeting description (can include weather info)
        start_time: Start time in format 'YYYY-MM-DD HH:MM:SS'
        end_time: End time in format 'YYYY-MM-DD HH:MM:SS'
        participants: Comma-separated list of participant names
        location: Meeting location
        
    Returns:
        Success or error message
    """
    try:
        from database import engine
        from sqlmodel import Session
        from models import Meeting
        from datetime import datetime
        
        # Convert string datetime to datetime objects for SQLite
        start_dt = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
        end_dt = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S")
        
        meeting = Meeting(
            title=title,
            description=description,
            location=location,
            start_time=start_dt,
            end_time=end_dt,
            participants=participants
        )
        
        with Session(engine) as session:
            session.add(meeting)
            session.commit()
            session.refresh(meeting)
            
        return f"✅ Meeting scheduled successfully! ID: {meeting.id}, Title: {title}, Time: {start_time} to {end_time}"
        
    except Exception as e:
        return f"❌ Failed to schedule meeting: {e}"

@tool
def cancel_meetings(date_filter: str = "all", meeting_ids: str = "") -> str:
    """
    Cancel/delete meetings from the database.
    
    Args:
        date_filter: Filter for which meetings to cancel - "all", "today", "tomorrow", or specific date "YYYY-MM-DD"
        meeting_ids: Optional comma-separated list of specific meeting IDs to cancel (e.g., "1,2,3")
        
    Returns:
        Success message with count of cancelled meetings
    """
    try:
        from database import engine
        from sqlmodel import Session, select
        from models import Meeting
        from datetime import datetime, timedelta
        
        with Session(engine) as session:
            # Build query based on filters
            if meeting_ids:
                # Cancel specific meeting IDs
                ids = [int(id.strip()) for id in meeting_ids.split(",")]
                meetings = session.exec(select(Meeting).where(Meeting.id.in_(ids))).all()
            else:
                # Cancel by date filter
                if date_filter == "today":
                    today = datetime.now().date()
                    meetings = session.exec(
                        select(Meeting).where(
                            (Meeting.start_time >= today) & 
                            (Meeting.start_time < today + timedelta(days=1))
                        )
                    ).all()
                elif date_filter == "tomorrow":
                    tomorrow = (datetime.now() + timedelta(days=1)).date()
                    meetings = session.exec(
                        select(Meeting).where(
                            (Meeting.start_time >= tomorrow) & 
                            (Meeting.start_time < tomorrow + timedelta(days=1))
                        )
                    ).all()
                elif date_filter == "all":
                    meetings = session.exec(select(Meeting)).all()
                else:
                    # Try parsing as specific date
                    try:
                        target_date = datetime.strptime(date_filter, "%Y-%m-%d").date()
                        meetings = session.exec(
                            select(Meeting).where(
                                (Meeting.start_time >= target_date) & 
                                (Meeting.start_time < target_date + timedelta(days=1))
                            )
                        ).all()
                    except ValueError:
                        return f"❌ Invalid date format: {date_filter}. Use 'today', 'tomorrow', 'all', or 'YYYY-MM-DD'"
            
            if not meetings:
                return f"No meetings found to cancel for filter: {date_filter}"
            
            # Delete meetings
            cancelled_titles = [f"'{m.title}' at {m.start_time}" for m in meetings]
            for meeting in meetings:
                session.delete(meeting)
            
            session.commit()
            
            return f"✅ Cancelled {len(meetings)} meeting(s):\n" + "\n".join(f"  • {title}" for title in cancelled_titles)
            
    except Exception as e:
        return f"❌ Failed to cancel meetings: {e}"

# Web Tools
@tool
def duckduckgo_search(query: str) -> str:
    """Perform a DuckDuckGo search and return relevant results."""
    if not DDGS:
        return "DuckDuckGo Search library not installed. Install with: pip install ddgs"
    try:
        with DDGS() as ddgs:
            # Use better search parameters for more relevant results
            results = list(ddgs.text(
                query, 
                region='wt-wt',  # Global results
                safesearch='moderate',
                timelimit='y',   # Last year for fresher results
                max_results=5
            ))
            
            if not results:
                return "No search results found."
            
            # Format results with better structure
            formatted = []
            for i, result in enumerate(results, 1):
                title = result.get('title', 'No title')
                body = result.get('body', 'No description')
                url = result.get('href', 'No URL')
                
                # Truncate body to avoid token overflow
                if len(body) > 300:
                    body = body[:297] + "..."
                
                formatted.append(f"**Result {i}: {title}**\n{body}\nSource: {url}")
            print("\n\n".join(formatted))
            return "\n\n".join(formatted)
    except Exception as e:
        return f"Search failed: {str(e)[:200]}"

# Document Tools
@tool
def read_document_with_docling(file_path: str) -> str:
    """Read and parse a PDF or Text document using Docling to extract text."""
    if not DocumentConverter:
        return "Docling library not installed."
    try:
        converter = DocumentConverter()
        result = converter.convert(file_path)
        return result.document.export_to_markdown()
    except Exception as e:
        return f"Error reading document: {e}"

@tool
def ingest_document_to_vector_store(file_path: str, document_id: str, is_temporary: bool = True) -> str:
    """
    Ingest a document into the vector store for semantic search.
    First parses the document, then chunks and embeds it into ChromaDB.
    
    Args:
        file_path: Path to the document file (PDF or text)
        document_id: Unique identifier for this document
        is_temporary: If True, stores in memory (session only). If False, stores to disk.
        
    Returns:
        Status message with number of chunks created
    """
    try:
        # First parse the document
        if not DocumentConverter:
            return "Docling library not installed."
        
        # Configure lightweight pipeline - no vision models, faster processing
        try:
            from docling.datamodel.base_models import InputFormat
            from docling.datamodel.pipeline_options import PdfPipelineOptions
            from docling.document_converter import PdfFormatOption
            
            pipeline_options = PdfPipelineOptions()
            pipeline_options.do_ocr = False  # Keep OCR for text extraction
            pipeline_options.do_table_structure = False  # Disable table detection (slow)
            # Disable slow enrichment features
            pipeline_options.do_picture_classification = False
            pipeline_options.do_picture_description = False
            pipeline_options.do_code_enrichment = False
            pipeline_options.do_formula_enrichment = False
            pipeline_options.generate_picture_images = False
            
            converter = DocumentConverter(
                format_options={
                    InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
                }
            )
        except Exception as config_error:
            # Fallback to simple converter if advanced options fail
            print(f"⚠️ Using simple converter due to: {config_error}")
            converter = DocumentConverter()
        
        result = converter.convert(file_path)
        document_text = result.document.export_to_markdown()
        
        # Ingest into vector store
        # Use temporary store for uploads by default, unless specified otherwise
        vector_store = get_vector_store(is_persistent=not is_temporary)
        
        num_chunks = vector_store.ingest_document(
            document_text=document_text,
            document_id=document_id,
            metadata={"file_path": file_path},
            chunk_size=500,
            chunk_overlap=50
        )
        
        store_type = "temporary (in-memory)" if is_temporary else "persistent (disk)"
        return f"Successfully ingested document '{document_id}' into {store_type} vector store. Created {num_chunks} chunks."
    
    except Exception as e:
        return f"Document ingestion failed: {e}"


@tool
def search_vector_store(query: str, document_id: str = "", top_k: int = 3, search_type: str = "persistent") -> str:
    """
    Search the vector store for relevant document chunks.
    
    Args:
        query: Search query text
        document_id: Optional specific document to search within (empty string searches all documents)
        top_k: Number of top results to return (default: 3)
        search_type: "persistent" (default) or "temporary" (for uploaded files)
        
    Returns:
        Formatted search results with similarity scores
    """
    try:
        is_persistent = (search_type == "persistent")
        vector_store = get_vector_store(is_persistent=is_persistent)
        
        # Convert empty string to None for the vector store
        doc_id = document_id if document_id else None
        
        results = vector_store.similarity_search(
            query=query,
            top_k=top_k,
            document_id=doc_id
        )
        
        if not results:
            return f"No relevant documents found in {search_type} vector store."
        
        # Format results
        output = f"{search_type.capitalize()} Vector Store Search Results:\n\n"
        for i, (chunk_text, score, metadata) in enumerate(results, 1):
            output += f"Result {i} (Similarity: {score:.3f}):\n"
            output += f"{chunk_text}\n"
            output += f"[Document: {metadata.get('document_id', 'unknown')}]\n\n"
        
        return output
    
    except Exception as e:
        return f"Vector store search failed: {e}"