Spaces:

georgeek
/

HF-LLM-Intent-Detection

Sleeping

App Files Files Community

HF-LLM-Intent-Detection / src /F_Load_embeddings_index_and search.py

georgeek

Transfer

5ecde30 11 months ago

raw

history blame contribute delete

3.29 kB

	import faiss
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer
	import time

	# Start the timer
	start_time = time.time()

	# Load the FAISS index
	index_path = "embeddings/multilingual-e5-small_vector_db.index"
	#index_path = "embeddings/all-MiniLM-L6-v2_vector_db.index"

	try:
	index = faiss.read_index(index_path)
	print(f"FAISS index loaded successfully from {index_path} - Time passed: {time.time() - start_time:.2f} seconds")
	except Exception as e:
	print(f"Error loading FAISS index: {e} - Time passed: {time.time() - start_time:.2f} seconds")

	# Load the model
	try:
	model = SentenceTransformer('intfloat/multilingual-e5-small', local_files_only=True)
	# model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	print(f"Model loaded successfully - Time passed: {time.time() - start_time:.2f} seconds")
	except Exception as e:
	print(f"Error loading model: {e} - Time passed: {time.time() - start_time:.2f} seconds")

	# Example new text
	new_text = ["Cat am de plata"]
	print(f'The text is: {new_text} - Time passed: {time.time() - start_time:.2f} seconds')

	# Generate embeddings for the new text
	try:
	new_embeddings = model.encode(new_text)
	print(f"Generated embeddings for new text: - Time passed: {time.time() - start_time:.2f} seconds")
	except Exception as e:
	print(f"Error generating embeddings: {e} - Time passed: {time.time() - start_time:.2f} seconds")

	# Convert new embeddings to float32
	try:
	new_embeddings = np.array(new_embeddings).astype('float32')
	print(f"Converted new embeddings to float32: - Time passed: {time.time() - start_time:.2f} seconds")
	except Exception as e:
	print(f"Error converting embeddings to float32: {e} - Time passed: {time.time() - start_time:.2f} seconds")

	# Perform similarity search
	try:
	k = 5 # Number of nearest neighbors to retrieve
	D, I = index.search(new_embeddings, k) # D: distances, I: indices
	print(f"Similarity search results: Indices - {I}, Distances - {D} - Time passed: {time.time() - start_time:.2f} seconds")
	except Exception as e:
	print(f"Error performing similarity search: {e} - Time passed: {time.time() - start_time:.2f} seconds")


	# Load the CSV file
	csv_file_path = r'C:\Users\serban.tica\Documents\tobi_llm_intent_recognition\data\Pager_Intents_Cleaned.csv'
	try:
	data = pd.read_csv(csv_file_path)
	print(f"CSV file loaded successfully from {csv_file_path}")
	except Exception as e:
	print(f"Error loading CSV file: {e}")



	# Retrieve the corresponding rows from the DataFrame
	'''t# Retrieve the corresponding rows from the DataFrame
	try:
	for i, query in enumerate(new_text):
	print(f"Query: {query} - Time passed: {time.time() - start_time:.2f} seconds")
	for idx in I[i]:
	print(f"Index: {idx}, Row: {df.iloc[idx]} - Time passed: {time.time() - start_time:.2f} seconds")
	except Exception as e:
	print(f"Error retrieving rows from DataFrame: {e} - Time passed: {time.time() - start_time:.2f} seconds")'''


	intents = data['intent'].tolist()
	intent = intents[I[0][0]]
	distance = D[0][0]

	similarity = 1 / (1 + distance)

	print(f"Intenția identificată: {intent}")
	print(f"Nivel de încredere: {similarity:.4f}- Time passed: {time.time() - start_time:.2f} seconds")