Spaces:

georgeek
/

HF-LLM-Intent-Detection

Sleeping

HF-LLM-Intent-Detection / src /Z_test_X_model.py

Transfer

5ecde30 11 months ago

1.4 kB

	from sentence_transformers import SentenceTransformer
	from A_Preprocess import load_pdf_data
	from E_Model_utils import get_embeddings
	import numpy as np
	import faiss

	# Load and preprocess data
	data_file_path = r'C:\Users\serban.tica\Documents\tobi_llm_intent_recognition\data\Pager_Intents_Cleaned.csv'
	data = load_pdf_data(data_file_path)


	sentences = data['utterance'].tolist()
	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	model_name = 'all-MiniLM-L6-v2'
	embeddings = get_embeddings(model, sentences)
	print(f'Embeddings shape: {embeddings.shape}.')

	#save embeddings as faiss index

	# Convert embeddings to float32
	embeddings = np.array(embeddings).astype('float32')

	# Create a FAISS index
	index = faiss.IndexFlatL2(embeddings.shape[1]) # L2 distance
	index.add(embeddings)

	# Save the FAISS index
	faiss.write_index(index, f"{model_name}_faiss.index")

	# Load the FAISS index (for later use)
	index = faiss.read_index(f"{model_name}_faiss.index")

	# To query the index, you can use the search method
	# Example: Find the 5 nearest neighbors of a query embedding

	query_embedding = 'cat am de platit la factura'
	query_embedding = np.array([embeddings[0]]).astype('float32') # Example query
	D, I = index.search(query_embedding, 5) # D: distances, I: indices
	print("Indices of nearest neighbors:", I)
	print("Distances of nearest neighbors:", D)




	#print(embeddings[:10])