File size: 6,115 Bytes
7cae5d6
8cc9517
 
 
 
7cae5d6
8cc9517
7cae5d6
719390c
8cc9517
 
 
3d40769
719390c
8cc9517
 
 
3d40769
 
8cc9517
719390c
 
 
 
 
 
8cc9517
3d40769
719390c
3d40769
719390c
 
8cc9517
 
 
3d40769
719390c
 
8cc9517
7cae5d6
719390c
8cc9517
 
 
 
 
 
719390c
8cc9517
 
 
 
719390c
 
8cc9517
 
 
 
 
 
 
 
 
 
 
7cae5d6
8cc9517
719390c
8cc9517
 
 
719390c
 
8cc9517
 
 
 
 
 
 
 
 
719390c
8cc9517
719390c
8cc9517
 
 
719390c
 
8cc9517
 
719390c
8cc9517
 
719390c
8cc9517
719390c
 
 
8cc9517
 
 
 
 
 
 
 
 
 
 
 
 
 
719390c
8cc9517
719390c
8cc9517
 
 
 
 
 
 
719390c
 
8cc9517
 
719390c
8cc9517
 
 
 
 
 
 
 
 
719390c
8cc9517
 
719390c
8cc9517
 
719390c
8cc9517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719390c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import os
import sqlite3
import json
from pymilvus import MilvusClient, FieldSchema, CollectionSchema, DataType
import google.generativeai as genai

from tools.tool_registry import get_all_tools

# --- Configuration for persistence paths ---
DATA_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))
SQLITE_DB_PATH = os.path.join(DATA_DIR, "tools.metadata.db")
MILVUS_DATA_PATH = os.path.join(DATA_DIR, "milvus_lite.db")

# --- Model and DB Configuration ---
EMBEDDING_DIM = 3072
EMBEDDING_MODEL_NAME = "gemini-embedding-exp-03-07"
MILVUS_COLLECTION_NAME = "tool_embeddings"


def initialize_system():
    """

    The main system initialization function.

    It creates directories, sets up the database and vector store, and loads tools.

    This function is designed to be idempotent.

    """
    print("--- Starting System Initialization (Final Version) ---")
    os.makedirs(DATA_DIR, exist_ok=True)

    # --- Correct Initialization Order ---

    # 1. Initialize SQLite and sync tool metadata
    # Ensures SQLite always has the latest tool information
    _init_sqlite_db()
    all_tools_definitions = get_all_tools()
    _sync_tools_to_sqlite(all_tools_definitions)

    # 2. Initialize Milvus and sync vector embeddings
    # It reads data from the already populated SQLite DB
    milvus_client = _init_milvus_and_sync_embeddings()

    # 3. Create the tool recommender instance
    from core.tool_recommender import DirectToolRecommender

    tool_recommender = DirectToolRecommender(
        milvus_client=milvus_client, sqlite_db_path=SQLITE_DB_PATH
    )

    print("--- System Initialization Complete ---")
    return all_tools_definitions, tool_recommender


def _init_sqlite_db():
    """Initializes the SQLite database and creates the tools table if it doesn't exist."""
    print(f"SQLite DB Path: {SQLITE_DB_PATH}")
    with sqlite3.connect(SQLITE_DB_PATH) as conn:
        cursor = conn.cursor()
        cursor.execute(
            """

            CREATE TABLE IF NOT EXISTS tools (

                id INTEGER PRIMARY KEY AUTOINCREMENT,

                name TEXT UNIQUE NOT NULL,

                description TEXT NOT NULL,

                parameters TEXT NOT NULL

            )

        """
        )
        conn.commit()
    print("SQLite DB table verified.")


def _sync_tools_to_sqlite(tools_definitions):
    """Syncs tool definitions into the SQLite database."""
    print("Syncing tool metadata to SQLite...")
    with sqlite3.connect(SQLITE_DB_PATH) as conn:
        cursor = conn.cursor()
        for tool in tools_definitions:
            cursor.execute("SELECT id FROM tools WHERE name = ?", (tool.name,))
            if cursor.fetchone() is None:
                cursor.execute(
                    "INSERT INTO tools (name, description, parameters) VALUES (?, ?, ?)",
                    (tool.name, tool.description, json.dumps(tool.args)),
                )
                print(f"  - Added new tool to SQLite: {tool.name}")
        conn.commit()
    print("SQLite sync complete.")


def _init_milvus_and_sync_embeddings():
    """Initializes Milvus Lite, rebuilds the collection, and syncs embeddings."""
    print(f"Milvus Lite Data Path: {MILVUS_DATA_PATH}")
    client = MilvusClient(uri=MILVUS_DATA_PATH)

    # Recreate the collection on every startup to ensure correct dimensionality and fresh data for the demo.
    if client.has_collection(collection_name=MILVUS_COLLECTION_NAME):
        client.drop_collection(collection_name=MILVUS_COLLECTION_NAME)
        print("Found old Milvus collection. Dropped it to rebuild.")

    print(
        f"Creating Milvus collection '{MILVUS_COLLECTION_NAME}' with dimension {EMBEDDING_DIM}..."
    )
    fields = [
        FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
        FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=EMBEDDING_DIM),
    ]
    schema = CollectionSchema(fields)
    client.create_collection(collection_name=MILVUS_COLLECTION_NAME, schema=schema)

    index_params = client.prepare_index_params()
    index_params.add_index(
        field_name="embedding", index_type="AUTOINDEX", metric_type="L2"
    )
    client.create_index(
        collection_name=MILVUS_COLLECTION_NAME, index_params=index_params
    )
    print("Milvus collection and index created successfully.")

    # Critical Step: Now we sync the embeddings to the newly created collection
    _sync_tool_embeddings_to_milvus(client)

    client.load_collection(collection_name=MILVUS_COLLECTION_NAME)
    return client


def _sync_tool_embeddings_to_milvus(milvus_client):
    """Generates and syncs tool description embeddings to Milvus Lite."""
    print("Syncing tool embeddings to Milvus...")
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        print("Error: GEMINI_API_KEY not found.")
        return
    genai.configure(api_key=api_key)

    with sqlite3.connect(SQLITE_DB_PATH) as conn:
        cursor = conn.cursor()
        cursor.execute("SELECT id, description FROM tools")
        all_tools_in_db = cursor.fetchall()

    if not all_tools_in_db:
        print("Error: No tools found in SQLite to sync.")
        return

    print(f"Found {len(all_tools_in_db)} tools from SQLite, generating embeddings...")
    docs_to_embed = [tool[1] for tool in all_tools_in_db]

    print(f"Using embedding model: {EMBEDDING_MODEL_NAME}")
    result = genai.embed_content(
        model=EMBEDDING_MODEL_NAME,
        content=docs_to_embed,
        task_type="retrieval_document",
    )

    embeddings = result["embedding"]
    tool_ids_to_insert = [tool[0] for tool in all_tools_in_db]

    data_to_insert = [
        {"id": tool_id, "embedding": embedding}
        for tool_id, embedding in zip(tool_ids_to_insert, embeddings)
    ]

    milvus_client.insert(collection_name=MILVUS_COLLECTION_NAME, data=data_to_insert)
    print(f"Successfully inserted {len(data_to_insert)} new embeddings into Milvus.")