GodsDevProject commited on
Commit
93aa9e2
·
verified ·
1 Parent(s): 293c2b8

Create ingest/cluster.py

Browse files
Files changed (1) hide show
  1. ingest/cluster.py +17 -8
ingest/cluster.py CHANGED
@@ -1,20 +1,29 @@
1
  from sentence_transformers import SentenceTransformer
2
- import faiss
3
- import numpy as np
4
 
5
  model = SentenceTransformer("all-MiniLM-L6-v2")
6
 
7
- def semantic_cluster(results):
8
  texts = [r["title"] + " " + r["snippet"] for r in results]
9
  if not texts:
10
- return None
11
 
12
  embeddings = model.encode(texts)
13
  dim = embeddings.shape[1]
14
  index = faiss.IndexFlatL2(dim)
15
  index.add(np.array(embeddings))
16
 
17
- return {
18
- "points": embeddings.tolist(),
19
- "labels": list(range(len(results)))
20
- }
 
 
 
 
 
 
 
 
 
 
1
  from sentence_transformers import SentenceTransformer
2
+ import faiss, numpy as np
3
+ import plotly.graph_objects as go
4
 
5
  model = SentenceTransformer("all-MiniLM-L6-v2")
6
 
7
+ def semantic_cluster_plot(results):
8
  texts = [r["title"] + " " + r["snippet"] for r in results]
9
  if not texts:
10
+ return go.Figure()
11
 
12
  embeddings = model.encode(texts)
13
  dim = embeddings.shape[1]
14
  index = faiss.IndexFlatL2(dim)
15
  index.add(np.array(embeddings))
16
 
17
+ # simple 2D projection (first 2 dims for HF safety)
18
+ x, y = embeddings[:,0], embeddings[:,1]
19
+
20
+ fig = go.Figure(
21
+ data=go.Scatter(
22
+ x=x, y=y,
23
+ mode="markers",
24
+ text=[r["title"] for r in results],
25
+ marker=dict(size=8)
26
+ )
27
+ )
28
+ fig.update_layout(title="Semantic Document Clusters")
29
+ return fig