shimaa22 commited on
Commit
60d219b
·
verified ·
1 Parent(s): b833da4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -42
app.py DELETED
@@ -1,42 +0,0 @@
1
- import pandas as pd
2
- import faiss
3
- import pickle
4
- from sentence_transformers import SentenceTransformer
5
- import numpy as np
6
- import gzip
7
-
8
- # ===== CONFIG =====
9
- CSV_PATH = "data.csv"
10
- INDEX_PATH = "faiss.index"
11
- META_PATH = "metadata.pkl.gz"
12
-
13
- # ===== LOAD DATA =====
14
- df = pd.read_csv(CSV_PATH)
15
- texts = df["text"].astype(str).tolist()
16
- statuses = df["status"].astype(str).tolist()
17
-
18
- model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
19
-
20
- # ===== EMBEDDING BATCH =====
21
- embeddings = []
22
- batch_size = 64
23
-
24
- for i in range(0, len(texts), batch_size):
25
- batch_texts = texts[i:i+batch_size]
26
- batch_emb = model.encode(batch_texts, convert_to_numpy=True)
27
- embeddings.append(batch_emb)
28
-
29
- embeddings = np.vstack(embeddings)
30
-
31
- # ===== FAISS =====
32
- dim = embeddings.shape[1]
33
- index = faiss.IndexFlatL2(dim)
34
- index.add(embeddings.astype("float32"))
35
-
36
- # ===== SAVE =====
37
- faiss.write_index(index, INDEX_PATH)
38
-
39
- with gzip.open(META_PATH, "wb") as f:
40
- pickle.dump({"texts": texts, "statuses": statuses}, f)
41
-
42
- print(f"✅ Index built with {len(texts)} records")