SearchWithLLM / work.py
RiH-137Rishi's picture
Upload 7 files
e386454 verified
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
## laoding the pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')
## loading the json embeddings data
with open('embeddings_data.json', 'r') as file:
courses_data = json.load(file)
## extracting the embeddings and titles
course_embeddings = [np.array(course['embedding']) for course in courses_data]
course_titles = [course['title'] for course in courses_data]
## query function
def get_query_embedding(query):
return model.encode(query, convert_to_tensor=True)
## function to add relevance factors
def add_relevance_factors(similarities, indices):
relevance_factor = 0.2
## tuple of index and score
enhanced_scores = []
for idx in indices:
curriculum_match = 1 if "deep learning" in course_titles[idx].lower() else 0
enhanced_score = similarities[idx] + relevance_factor * curriculum_match
enhanced_scores.append((idx, enhanced_score))
enhanced_scores.sort(key=lambda x: x[1], reverse=True)
return [x[0] for x in enhanced_scores]
## user query sample
user_query = "machine learning courses with deep learning"
## converting query into embedding
query_embedding = get_query_embedding(user_query)
## calculating the similarity
cosine_similarities = cosine_similarity([np.array(query_embedding)], course_embeddings)
## getting the top 5 similar courses
top_k = 5
top_indices = cosine_similarities[0].argsort()[-top_k:][::-1]
top_indices_with_relevance = add_relevance_factors(cosine_similarities[0], top_indices)
## displayiong the results
for i in top_indices_with_relevance:
print(f"Title: {course_titles[i]}")
print(f"Cosine Similarity: {cosine_similarities[0][i]:.4f}")
print("-" * 50)