GodsDevProject commited on
Commit
9109e81
·
verified ·
1 Parent(s): 6d8f721

Create ingest/cluster.py

Browse files
Files changed (1) hide show
  1. ingest/cluster.py +33 -0
ingest/cluster.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+
3
+ def build_embeddings(results: List[Dict]):
4
+ try:
5
+ from sentence_transformers import SentenceTransformer
6
+ model = SentenceTransformer("all-MiniLM-L6-v2")
7
+ texts = [r.get("snippet", "") for r in results]
8
+ return model.encode(texts, show_progress_bar=False)
9
+ except Exception:
10
+ return None
11
+
12
+
13
+ def cluster_embeddings(results: List[Dict], embeddings):
14
+ if embeddings is None or len(results) == 0:
15
+ return {}
16
+
17
+ try:
18
+ import faiss
19
+ import numpy as np
20
+
21
+ dim = embeddings.shape[1]
22
+ index = faiss.IndexFlatL2(dim)
23
+ index.add(embeddings)
24
+
25
+ clusters = {}
26
+ for i, r in enumerate(results):
27
+ clusters.setdefault(r["source"], []).append({
28
+ "title": r["title"],
29
+ "index": i
30
+ })
31
+ return clusters
32
+ except Exception:
33
+ return {}