File size: 1,940 Bytes
de38977
 
 
 
 
 
 
 
 
 
 
82b35ca
de38977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import time
from langchain_nebius import NebiusEmbeddings
from pydantic import SecretStr
from pymilvus import MilvusClient
# Configuration constants
MILVUS_URI = os.getenv("MILVUS_URI", "http://localhost:19530")
COLLECTION_NAME = "my_rag_collection"
DOCUMENT_DIR = "data/"
EMBEDDING_DIMENSION = 4096

milvus_client = MilvusClient(uri=MILVUS_URI, token=os.getenv("MILVUS_API_KEY"))

TEXT_MAX_LENGTH = 65000
CHUNK_SIZE = 100
BATCH_SIZE = 5


embedding_model = NebiusEmbeddings(
    api_key=SecretStr(os.getenv("NEBIUS_API_KEY", os.getenv("OPENAI_API_KEY"))),
    model="Qwen/Qwen3-Embedding-8B",
    base_url="https://api.studio.nebius.ai/v1"
)

def generate_embedding(text):
    """Generate embedding for a single text."""
    return embedding_model.embed_query(text)

def verify_insertion():
    """Verify that data was successfully inserted into Milvus."""
    stats = milvus_client.get_collection_stats(COLLECTION_NAME)
    print(f"Collection stats: {stats}")
    
    # Test search functionality
    test_query = "What are competence standards and their purpose?"
    test_embedding = generate_embedding(test_query)
    
    search_results = milvus_client.search(
        collection_name=COLLECTION_NAME,
        data=[test_embedding],
        limit=3,
        output_fields=["text", "metadata"]
    )
    
    print(f"\nTest search results for '{test_query}':")
    for i, result in enumerate(search_results[0]):
        print(f"Result {i+1}:")
        print(f"  Score: {result['distance']:.4f}")
        print(f"  Text preview: {result['entity']['text'][:300]}...")
        print(f"  Metadata: {result['entity']['metadata']}")
        print("-" * 50)

if __name__ == "__main__":
    start_time = time.time()
    print("=" * 60)
   
    print("\n Starting test search")
    print("=" * 30)
    verify_insertion()
    
    elapsed_time = time.time() - start_time
    print(f"\nTotal execution time: {elapsed_time:.2f} seconds")