Delete app.py
Browse files
app.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import faiss
|
| 3 |
-
import pickle
|
| 4 |
-
from sentence_transformers import SentenceTransformer
|
| 5 |
-
import numpy as np
|
| 6 |
-
import gzip
|
| 7 |
-
|
| 8 |
-
# ===== CONFIG =====
|
| 9 |
-
CSV_PATH = "data.csv"
|
| 10 |
-
INDEX_PATH = "faiss.index"
|
| 11 |
-
META_PATH = "metadata.pkl.gz"
|
| 12 |
-
|
| 13 |
-
# ===== LOAD DATA =====
|
| 14 |
-
df = pd.read_csv(CSV_PATH)
|
| 15 |
-
texts = df["text"].astype(str).tolist()
|
| 16 |
-
statuses = df["status"].astype(str).tolist()
|
| 17 |
-
|
| 18 |
-
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 19 |
-
|
| 20 |
-
# ===== EMBEDDING BATCH =====
|
| 21 |
-
embeddings = []
|
| 22 |
-
batch_size = 64
|
| 23 |
-
|
| 24 |
-
for i in range(0, len(texts), batch_size):
|
| 25 |
-
batch_texts = texts[i:i+batch_size]
|
| 26 |
-
batch_emb = model.encode(batch_texts, convert_to_numpy=True)
|
| 27 |
-
embeddings.append(batch_emb)
|
| 28 |
-
|
| 29 |
-
embeddings = np.vstack(embeddings)
|
| 30 |
-
|
| 31 |
-
# ===== FAISS =====
|
| 32 |
-
dim = embeddings.shape[1]
|
| 33 |
-
index = faiss.IndexFlatL2(dim)
|
| 34 |
-
index.add(embeddings.astype("float32"))
|
| 35 |
-
|
| 36 |
-
# ===== SAVE =====
|
| 37 |
-
faiss.write_index(index, INDEX_PATH)
|
| 38 |
-
|
| 39 |
-
with gzip.open(META_PATH, "wb") as f:
|
| 40 |
-
pickle.dump({"texts": texts, "statuses": statuses}, f)
|
| 41 |
-
|
| 42 |
-
print(f"✅ Index built with {len(texts)} records")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|