fahmiaziz98 commited on
Commit
4897e02
·
1 Parent(s): 1773955

remove numpy., scypy

Browse files
requirements.txt CHANGED
@@ -1,7 +1,5 @@
1
  fastapi[standard]==0.116.2
2
  uvicorn==0.35.0
3
- numpy==2.0.2
4
- scipy==1.16.3
5
  torch==2.8.0
6
  sentence-transformers==5.1.1
7
  loguru==0.7.3
 
1
  fastapi[standard]==0.116.2
2
  uvicorn==0.35.0
 
 
3
  torch==2.8.0
4
  sentence-transformers==5.1.1
5
  loguru==0.7.3
vectordb/milvus_client.py DELETED
File without changes
vectordb/utils.py DELETED
@@ -1,77 +0,0 @@
1
- import numpy as np
2
- from scipy.sparse import csr_matrix
3
- from typing import Dict, List
4
-
5
-
6
- def convert_sparse_to_csr(sparse_dict: Dict[str, List]) -> csr_matrix:
7
- """
8
- Convert sparse embedding to scipy CSR matrix
9
-
10
- API format: {"indices": [10, 25, 42], "values": [0.85, 0.62, 0.91]}
11
- Milvus format: scipy.sparse.csr_matrix with shape (1, max_dimension)
12
-
13
- Args:
14
- sparse_dict: Dictionary with 'indices' and 'values'
15
-
16
- Returns:
17
- scipy CSR matrix
18
- """
19
- indices = sparse_dict["indices"]
20
- values = sparse_dict["values"]
21
-
22
- max_dim = max(indices) + 1 if indices else 1
23
-
24
- # Create CSR matrix
25
- # Shape: (1, max_dim) karena ini single vector
26
- row_indices = [0] * len(indices) # Semua di row 0
27
- col_indices = indices
28
-
29
- sparse_matrix = csr_matrix(
30
- (values, (row_indices, col_indices)),
31
- shape=(1, max_dim)
32
- )
33
-
34
- return sparse_matrix
35
-
36
-
37
- def batch_convert_sparse_to_csr(sparse_list: List[Dict[str, List]]) -> csr_matrix:
38
- """
39
- Convert batch of sparse embeddings to single CSR matrix
40
-
41
- Args:
42
- sparse_list: List of sparse dicts
43
-
44
- Returns:
45
- scipy CSR matrix with shape (batch_size, max_dim)
46
- """
47
- if not sparse_list:
48
- return csr_matrix((0, 0))
49
-
50
- max_dim = 0
51
- for sparse_dict in sparse_list:
52
- if sparse_dict["indices"]:
53
- max_dim = max(max_dim, max(sparse_dict["indices"]) + 1)
54
-
55
- if max_dim == 0:
56
- max_dim = 30000 # Default vocab size for SPLADE
57
-
58
- # Build row indices, column indices, and values
59
- row_indices = []
60
- col_indices = []
61
- values = []
62
-
63
- for row_idx, sparse_dict in enumerate(sparse_list):
64
- indices = sparse_dict["indices"]
65
- vals = sparse_dict["values"]
66
-
67
- row_indices.extend([row_idx] * len(indices))
68
- col_indices.extend(indices)
69
- values.extend(vals)
70
-
71
- # Create CSR matrix
72
- sparse_matrix = csr_matrix(
73
- (values, (row_indices, col_indices)),
74
- shape=(len(sparse_list), max_dim)
75
- )
76
-
77
- return sparse_matrix