Spaces:
Sleeping
Sleeping
fahmiaziz98
commited on
Commit
·
4897e02
1
Parent(s):
1773955
remove numpy., scypy
Browse files- requirements.txt +0 -2
- vectordb/milvus_client.py +0 -0
- vectordb/utils.py +0 -77
requirements.txt
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
fastapi[standard]==0.116.2
|
| 2 |
uvicorn==0.35.0
|
| 3 |
-
numpy==2.0.2
|
| 4 |
-
scipy==1.16.3
|
| 5 |
torch==2.8.0
|
| 6 |
sentence-transformers==5.1.1
|
| 7 |
loguru==0.7.3
|
|
|
|
| 1 |
fastapi[standard]==0.116.2
|
| 2 |
uvicorn==0.35.0
|
|
|
|
|
|
|
| 3 |
torch==2.8.0
|
| 4 |
sentence-transformers==5.1.1
|
| 5 |
loguru==0.7.3
|
vectordb/milvus_client.py
DELETED
|
File without changes
|
vectordb/utils.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
from scipy.sparse import csr_matrix
|
| 3 |
-
from typing import Dict, List
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
def convert_sparse_to_csr(sparse_dict: Dict[str, List]) -> csr_matrix:
|
| 7 |
-
"""
|
| 8 |
-
Convert sparse embedding to scipy CSR matrix
|
| 9 |
-
|
| 10 |
-
API format: {"indices": [10, 25, 42], "values": [0.85, 0.62, 0.91]}
|
| 11 |
-
Milvus format: scipy.sparse.csr_matrix with shape (1, max_dimension)
|
| 12 |
-
|
| 13 |
-
Args:
|
| 14 |
-
sparse_dict: Dictionary with 'indices' and 'values'
|
| 15 |
-
|
| 16 |
-
Returns:
|
| 17 |
-
scipy CSR matrix
|
| 18 |
-
"""
|
| 19 |
-
indices = sparse_dict["indices"]
|
| 20 |
-
values = sparse_dict["values"]
|
| 21 |
-
|
| 22 |
-
max_dim = max(indices) + 1 if indices else 1
|
| 23 |
-
|
| 24 |
-
# Create CSR matrix
|
| 25 |
-
# Shape: (1, max_dim) karena ini single vector
|
| 26 |
-
row_indices = [0] * len(indices) # Semua di row 0
|
| 27 |
-
col_indices = indices
|
| 28 |
-
|
| 29 |
-
sparse_matrix = csr_matrix(
|
| 30 |
-
(values, (row_indices, col_indices)),
|
| 31 |
-
shape=(1, max_dim)
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
return sparse_matrix
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
def batch_convert_sparse_to_csr(sparse_list: List[Dict[str, List]]) -> csr_matrix:
|
| 38 |
-
"""
|
| 39 |
-
Convert batch of sparse embeddings to single CSR matrix
|
| 40 |
-
|
| 41 |
-
Args:
|
| 42 |
-
sparse_list: List of sparse dicts
|
| 43 |
-
|
| 44 |
-
Returns:
|
| 45 |
-
scipy CSR matrix with shape (batch_size, max_dim)
|
| 46 |
-
"""
|
| 47 |
-
if not sparse_list:
|
| 48 |
-
return csr_matrix((0, 0))
|
| 49 |
-
|
| 50 |
-
max_dim = 0
|
| 51 |
-
for sparse_dict in sparse_list:
|
| 52 |
-
if sparse_dict["indices"]:
|
| 53 |
-
max_dim = max(max_dim, max(sparse_dict["indices"]) + 1)
|
| 54 |
-
|
| 55 |
-
if max_dim == 0:
|
| 56 |
-
max_dim = 30000 # Default vocab size for SPLADE
|
| 57 |
-
|
| 58 |
-
# Build row indices, column indices, and values
|
| 59 |
-
row_indices = []
|
| 60 |
-
col_indices = []
|
| 61 |
-
values = []
|
| 62 |
-
|
| 63 |
-
for row_idx, sparse_dict in enumerate(sparse_list):
|
| 64 |
-
indices = sparse_dict["indices"]
|
| 65 |
-
vals = sparse_dict["values"]
|
| 66 |
-
|
| 67 |
-
row_indices.extend([row_idx] * len(indices))
|
| 68 |
-
col_indices.extend(indices)
|
| 69 |
-
values.extend(vals)
|
| 70 |
-
|
| 71 |
-
# Create CSR matrix
|
| 72 |
-
sparse_matrix = csr_matrix(
|
| 73 |
-
(values, (row_indices, col_indices)),
|
| 74 |
-
shape=(len(sparse_list), max_dim)
|
| 75 |
-
)
|
| 76 |
-
|
| 77 |
-
return sparse_matrix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|