File size: 5,008 Bytes
81598c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""

LangGraph tools for the OpenMark agent.

Each tool hits either ChromaDB (semantic) or Neo4j (graph) or both.

"""

from langchain_core.tools import tool
from openmark.embeddings.factory import get_embedder
from openmark.stores import chroma as chroma_store
from openmark.stores import neo4j_store

# Embedder is loaded once and reused
_embedder = None

def _get_embedder():
    global _embedder
    if _embedder is None:
        _embedder = get_embedder()
    return _embedder


@tool
def search_semantic(query: str, n: int = 10) -> str:
    """

    Search bookmarks by semantic meaning using vector similarity.

    Use this for natural language queries like 'RAG tools', 'LangGraph tutorials', etc.

    Returns top N most relevant bookmarks.

    """
    results = chroma_store.search(query, _get_embedder(), n=n)
    if not results:
        return "No results found."
    lines = [f"{r['rank']}. [{r['category']}] {r['title']}\n   {r['url']} (similarity: {r['similarity']}, score: {r['score']})"
             for r in results]
    return "\n".join(lines)


@tool
def search_by_category(category: str, query: str = "", n: int = 15) -> str:
    """

    Find bookmarks in a specific category, optionally filtered by semantic query.

    Categories: RAG & Vector Search, Agent Development, LangChain / LangGraph,

    MCP & Tool Use, Context Engineering, AI Tools & Platforms, GitHub Repos & OSS,

    Learning & Courses, YouTube & Video, Web Development, Cloud & Infrastructure,

    Data Science & ML, Knowledge Graphs & Neo4j, Career & Jobs, LLM Fine-tuning,

    Finance & Crypto, Design & UI/UX, News & Articles, Entertainment & Other

    """
    if query:
        results = chroma_store.search(query, _get_embedder(), n=n, category=category)
    else:
        results = chroma_store.search(category, _get_embedder(), n=n, category=category)
    if not results:
        return f"No bookmarks found in category '{category}'."
    lines = [f"{r['rank']}. {r['title']}\n   {r['url']}" for r in results]
    return f"Category '{category}' — top results:\n" + "\n".join(lines)


@tool
def find_by_tag(tag: str) -> str:
    """

    Find all bookmarks tagged with a specific tag using the knowledge graph.

    Returns bookmarks ordered by quality score.

    """
    results = neo4j_store.find_by_tag(tag, limit=20)
    if not results:
        return f"No bookmarks found with tag '{tag}'."
    lines = [f"- {r['title']}\n  {r['url']} (score: {r['score']})" for r in results]
    return f"Bookmarks tagged '{tag}':\n" + "\n".join(lines)


@tool
def find_similar_bookmarks(url: str) -> str:
    """

    Find bookmarks semantically similar to a given URL.

    Uses SIMILAR_TO edges in the knowledge graph (built from embedding neighbors).

    """
    results = neo4j_store.find_similar(url, limit=10)
    if not results:
        return f"No similar bookmarks found for {url}."
    lines = [f"- {r['title']}\n  {r['url']} (similarity: {r['similarity']:.3f})" for r in results]
    return "Similar bookmarks:\n" + "\n".join(lines)


@tool
def explore_tag_cluster(tag: str) -> str:
    """

    Explore the knowledge graph around a tag — find related tags and their bookmarks.

    Traverses CO_OCCURS_WITH edges (2 hops) to discover connected topics.

    Great for discovering what else you know about a topic.

    """
    results = neo4j_store.find_tag_cluster(tag, hops=2, limit=25)
    if not results:
        return f"No cluster found for tag '{tag}'."
    lines = [f"- [{r['via_tag']}] {r['title']}\n  {r['url']}" for r in results]
    return f"Knowledge cluster around '{tag}':\n" + "\n".join(lines)


@tool
def get_stats() -> str:
    """

    Get statistics about the OpenMark knowledge base.

    Shows total bookmarks, tags, categories in both ChromaDB and Neo4j.

    """
    chroma_stats = chroma_store.get_stats()
    neo4j_stats  = neo4j_store.get_stats()
    return (
        f"OpenMark Knowledge Base Stats:\n"
        f"  ChromaDB vectors:   {chroma_stats.get('total', 0)}\n"
        f"  Neo4j bookmarks:    {neo4j_stats.get('bookmarks', 0)}\n"
        f"  Neo4j tags:         {neo4j_stats.get('tags', 0)}\n"
        f"  Neo4j categories:   {neo4j_stats.get('categories', 0)}"
    )


@tool
def run_cypher(cypher: str) -> str:
    """

    Run a raw Cypher query against the Neo4j knowledge graph.

    Use for advanced graph traversals. Example:

    MATCH (b:Bookmark)-[:TAGGED]->(t:Tag) WHERE t.name='rag' RETURN b.title, b.url LIMIT 10

    """
    try:
        rows = neo4j_store.query(cypher)
        if not rows:
            return "Query returned no results."
        lines = [str(r) for r in rows[:20]]
        return "\n".join(lines)
    except Exception as e:
        return f"Cypher error: {e}"


ALL_TOOLS = [
    search_semantic,
    search_by_category,
    find_by_tag,
    find_similar_bookmarks,
    explore_tag_cluster,
    get_stats,
    run_cypher,
]