| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| import os | |
| # Load embedding model | |
| print("Loading embedding model...") | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Load training data | |
| data_path = "data/data.txt" | |
| with open(data_path, "r", encoding="utf-8") as f: | |
| documents = [line.strip() for line in f.readlines() if line.strip()] | |
| print(f"Loaded {len(documents)} documents") | |
| # Create embeddings | |
| print("Creating embeddings...") | |
| embeddings = model.encode(documents) | |
| # Create FAISS index | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(np.array(embeddings)) | |
| # Save index and documents | |
| faiss.write_index(index, "vector.index") | |
| with open("documents.txt", "w", encoding="utf-8") as f: | |
| for doc in documents: | |
| f.write(doc + "\n") | |
| print("Vector database created successfully!") | |