File size: 1,588 Bytes
565e754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import uuid
from typing import Literal, Any

import pandas as pd
from qdrant_client import models

from src.config import qdrant_client


def qdrant_create_index(
        index_name: str, 
        dim: int, 
        distance: Literal["cosine", "euclid", "manhattan"],
    ):
    distance_mode = None
    match distance:
        case "cosine":
            distance_mode = models.Distance.COSINE
        case "euclid":
            distance_mode = models.Distance.EUCLID
        case "manhattan":
            distance_mode = models.Distance.MANHATTAN
        case _:
            return ValueError(distance)
        
    return qdrant_client.create_collection(
        collection_name=index_name,
        vectors_config=models.VectorParams(
            size=dim, 
            distance=distance_mode,
        )
    )


def qdrant_insert(df: pd.DataFrame, index_name: str) -> Any:
    """
    df.columns == ["doc_id", "text", "vector"]
    """
    points = [
        models.PointStruct(
            id=str(uuid.uuid4()),  # уникальный id чанка
            vector=list(row.vector),  # вектор чанкa
            payload={
                "doc_id": row.doc_id,  # <--- связь с PostgreSQL
                "text": row.text,
            },
        ) for row in df.itertuples(index=False)
    ]

    return qdrant_client.upsert(collection_name=index_name, points=points)


def qdrant_search(index_name: str, vector: list, limit: int = 5) -> list:
    return qdrant_client.query_points(
        collection_name=index_name,
        query=vector,
        limit=limit,
    )