File size: 10,308 Bytes
368277b
da3a984
368277b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da3a984
 
 
368277b
 
 
 
 
 
 
 
 
 
 
 
9685fdc
368277b
b58981e
 
 
368277b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
from typing import List, Dict, Any
from smolagents import CodeAgent, InferenceClientModel, tool
import os
import json
import sys
from pathlib import Path
from datetime import datetime


def find_chrome_bookmarks_file():
    """
    Automatically detects the Chrome bookmarks file path based on the OS.
    Returns the absolute path string.
    """
    home = Path.home()

    if sys.platform.startswith("win"):
        # Windows
        base = home / "AppData" / "Local" / "Google" / "Chrome" / "User Data" / "Default"
    elif sys.platform.startswith("darwin"):
        # macOS
        base = home / "Library" / "Application Support" / "Google" / "Chrome" / "Default"
    else:
        # Linux
        base = home / ".config" / "google-chrome" / "Default"

    bookmark_file = base / "Bookmarks"
    if not bookmark_file.exists():
        raise FileNotFoundError(f"Cannot find Chrome Bookmarks file at: {bookmark_file}")
    return str(bookmark_file)


def find_folder_by_name(node, target_name):
    """
    Recursively searches for a folder with the given name.
    """
    if not isinstance(node, dict):
        return None
    if node.get("type") == "folder" and node.get("name") == target_name:
        return node
    # Search in children
    for child in node.get("children", []):
        found = find_folder_by_name(child, target_name)
        if found:
            return found
    return None


def extract_bookmarks_from_folder(node, bookmark_list):
    """
    Recursively extracts all bookmarks from a folder node.
    """
    if not isinstance(node, dict):
        return

    if node.get("type") == "url":
        # Individual bookmark
        bookmark_data = {
            "title": node.get("name", ""),
            "url": node.get("url", ""),
            "date_added": node.get("date_added", ""),
            "date_modified": node.get("date_modified", ""),
            "id": node.get("id", ""),
        }
        bookmark_list.append(bookmark_data)
    elif node.get("type") == "folder":
        # Folder, process children
        children = node.get("children", [])
        for child in children:
            extract_bookmarks_from_folder(child, bookmark_list)


def get_cache_file_path():
    """Returns the path for the bookmark cache file."""
    # Create data folder in the root repository
    data_dir = Path("data")
    data_dir.mkdir(exist_ok=True)
    return str(data_dir / "ai_bookmarks_cache.json")


def load_cache():
    """Loads the bookmark cache from JSON file."""
    cache_file = get_cache_file_path()
    if os.path.exists(cache_file):
        try:
            with open(cache_file, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception as e:
            print(f"Error loading cache: {e}")
    return {"bookmarks": [], "last_updated": None}


def save_cache(cache_data):
    """Saves the bookmark cache to JSON file."""
    cache_file = get_cache_file_path()
    try:
        with open(cache_file, "w", encoding="utf-8") as f:
            json.dump(cache_data, f, indent=2, ensure_ascii=False)
        return True
    except Exception as e:
        print(f"Error saving cache: {e}")
        return False


@tool
def update_ai_bookmarks_cache() -> Dict[str, Any]:
    """
    Extracts bookmarks from Chrome's 'AI ressources' folder and saves them to the data/ai_bookmarks_cache.json file.
    This creates a local cache that avoids direct interaction with Chrome's raw JSON file for subsequent operations.

    Returns:
        Dictionary with update status and bookmark count.
    """
    try:
        # Find Chrome bookmarks file
        bookmarks_file = find_chrome_bookmarks_file()

        # Load Chrome bookmarks
        with open(bookmarks_file, "r", encoding="utf-8") as f:
            data = json.load(f)

        # Find the 'AI ressources' folder
        ai_folder = None
        roots = data.get("roots", {})
        for key in ("bookmark_bar", "other", "synced"):
            if key in roots:
                ai_folder = find_folder_by_name(roots[key], "AI ressources")
                if ai_folder:
                    break

        if not ai_folder:
            return {"status": "error", "message": "AI ressources folder not found in bookmarks"}

        # Extract bookmarks from AI ressources folder
        bookmarks = []
        extract_bookmarks_from_folder(ai_folder, bookmarks)

        # Create cache data with metadata
        cache_data = {
            "bookmarks": bookmarks,
            "last_updated": datetime.now().isoformat(),
            "folder_name": "AI ressources",
            "total_count": len(bookmarks),
        }

        # Save to cache
        if save_cache(cache_data):
            return {
                "status": "success",
                "message": f"Successfully updated cache with {len(bookmarks)} bookmarks",
                "count": len(bookmarks),
            }
        else:
            return {"status": "error", "message": "Failed to save cache"}

    except Exception as e:
        return {"status": "error", "message": f"Error updating cache: {str(e)}"}


@tool
def get_latest_ai_bookmarks(n: int = 10) -> List[Dict[str, Any]]:
    """
    Gets the n latest bookmarks from the AI ressources cache.

    Args:
        n: Number of latest bookmarks to return (default: 10)

    Returns:
        List of the latest bookmarks with metadata.
    """
    cache = load_cache()
    bookmarks = cache.get("bookmarks", [])

    if not bookmarks:
        return []

    # Sort by date_added (newest first) if available
    try:
        sorted_bookmarks = sorted(bookmarks, key=lambda x: int(x.get("date_added", "0")), reverse=True)
    except (ValueError, TypeError):
        # If sorting fails, return as is
        sorted_bookmarks = bookmarks

    return sorted_bookmarks[:n]


@tool
def search_ai_bookmarks(query: str) -> List[Dict[str, Any]]:
    """
    Search AI ressources bookmarks for entries matching a query.

    Args:
        query: Search term to find in bookmark titles or URLs.

    Returns:
        List of matching bookmarks.
    """
    cache = load_cache()
    bookmarks = cache.get("bookmarks", [])

    if not bookmarks:
        return []

    query_lower = query.lower()
    matching_bookmarks = []

    for bookmark in bookmarks:
        title = bookmark.get("title", "").lower()
        url = bookmark.get("url", "").lower()

        if query_lower in title or query_lower in url:
            matching_bookmarks.append(bookmark)

    return matching_bookmarks


@tool
def get_bookmark_statistics() -> Dict[str, Any]:
    """
    Gets statistics about the AI ressources bookmarks cache.

    Returns:
        Dictionary with various statistics about the bookmarks.
    """
    cache = load_cache()
    bookmarks = cache.get("bookmarks", [])

    if not bookmarks:
        return {"total_count": 0, "last_updated": None}

    # Calculate statistics
    total_count = len(bookmarks)
    domains = {}

    for bookmark in bookmarks:
        url = bookmark.get("url", "")
        try:
            from urllib.parse import urlparse

            domain = urlparse(url).netloc
            domains[domain] = domains.get(domain, 0) + 1
        except (ValueError, AttributeError):
            pass

    # Get top domains
    top_domains = sorted(domains.items(), key=lambda x: x[1], reverse=True)[:5]

    return {
        "total_count": total_count,
        "last_updated": cache.get("last_updated"),
        "top_domains": top_domains,
        "unique_domains": len(domains),
    }


@tool
def get_all_ai_bookmarks() -> List[Dict[str, Any]]:
    """
    Gets all bookmarks from the AI ressources cache.

    Returns:
        List of all cached bookmarks.
    """
    cache = load_cache()
    return cache.get("bookmarks", [])


@tool
def filter_bookmarks_by_domain(domain: str) -> List[Dict[str, Any]]:
    """
    Filters AI ressources bookmarks by domain.

    Args:
        domain: Domain name to filter by (e.g., 'github.com')

    Returns:
        List of bookmarks from the specified domain.
    """
    cache = load_cache()
    bookmarks = cache.get("bookmarks", [])

    if not bookmarks:
        return []

    domain_lower = domain.lower()
    filtered_bookmarks = []

    for bookmark in bookmarks:
        url = bookmark.get("url", "")
        try:
            from urllib.parse import urlparse

            bookmark_domain = urlparse(url).netloc.lower()
            if domain_lower in bookmark_domain:
                filtered_bookmarks.append(bookmark)
        except (ValueError, AttributeError):
            pass

    return filtered_bookmarks


@tool
def get_cache_info() -> Dict[str, Any]:
    """
    Gets information about the bookmark cache file.

    Returns:
        Dictionary with cache file information.
    """
    cache_file = get_cache_file_path()
    cache = load_cache()

    info = {
        "cache_file_path": cache_file,
        "cache_exists": os.path.exists(cache_file),
        "last_updated": cache.get("last_updated"),
        "bookmark_count": len(cache.get("bookmarks", [])),
        "folder_name": cache.get("folder_name", "Unknown"),
    }

    if os.path.exists(cache_file):
        stat = os.stat(cache_file)
        info["file_size_bytes"] = stat.st_size
        info["file_modified"] = datetime.fromtimestamp(stat.st_mtime).isoformat()

    return info


# Instantiate the Bookmarks CodeAgent with enhanced tools
bookmarks_agent = CodeAgent(
    model=InferenceClientModel(
        provider="nebius",
        token=os.environ["HF_TOKEN"],
    ),
    tools=[
        update_ai_bookmarks_cache,
        get_latest_ai_bookmarks,
        search_ai_bookmarks,
        get_bookmark_statistics,
        get_all_ai_bookmarks,
        filter_bookmarks_by_domain,
        get_cache_info,
    ],
    name="bookmarks_agent",
    description="Specialized agent for Chrome bookmarks operations, focusing on AI ressources folder. Extracts bookmarks from Chrome and caches them in data/ai_bookmarks_cache.json to avoid direct interaction with Chrome's raw JSON. Provides search, filtering, statistics, and cache management for AI-related bookmarks.",
    max_steps=10,
    additional_authorized_imports=["json", "datetime", "urllib.parse", "pathlib"],
    # Reduce verbosity
    stream_outputs=False,
    max_print_outputs_length=300,
)