Spaces:
Sleeping
Sleeping
| import faiss | |
| import numpy as np | |
| def build_and_save_index(embedding_file, index_file): | |
| """ | |
| Loads embeddings, NORMALIZES them, builds a FAISS IndexFlatIP index, | |
| and saves the index to disk. | |
| """ | |
| try: | |
| # Load the embeddings from the .npy file | |
| print(f"Loading embeddings from '{embedding_file}'...") | |
| embeddings = np.load(embedding_file).astype('float32') | |
| print(f"Embeddings loaded. Shape: {embeddings.shape}") | |
| # --- FIX 1: NORMALIZE THE EMBEDDINGS --- | |
| # This crucial step scales all vectors to a unit length of 1. | |
| print("Normalizing embeddings to unit length...") | |
| faiss.normalize_L2(embeddings) | |
| embedding_dimension = embeddings.shape[1] | |
| # --- FIX 2: USE IndexFlatIP FOR COSINE SIMILARITY --- | |
| # IndexFlatIP (Inner Product) is the correct index for comparing normalized text vectors. | |
| print(f"Building FAISS IndexFlatIP with dimension {embedding_dimension}...") | |
| index = faiss.IndexFlatIP(embedding_dimension) | |
| # Add the normalized embeddings to the index | |
| index.add(embeddings) | |
| print(f"Successfully added {index.ntotal} vectors to the index.") | |
| # Save the Index | |
| print(f"Saving index to '{index_file}'...") | |
| faiss.write_index(index, index_file) | |
| print(f"Index saved successfully!") | |
| except FileNotFoundError: | |
| print(f"ERROR: The file '{embedding_file}' was not found.") | |
| except Exception as e: | |
| print(f"AN UNEXPECTED ERROR OCCURRED: {e}") | |
| if __name__ == '__main__': | |
| embedding_filename = 'location_embeddings.npy' | |
| index_filename = 'location_index.faiss' | |
| build_and_save_index(embedding_filename, index_filename) |