Spaces:
Sleeping
Sleeping
| import json | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| import gradio as gr | |
| # Load the SentenceTransformer model | |
| model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') | |
| # Load the embeddings from the JSON file | |
| with open('./final_data_with_embeddings.json', 'r') as f: | |
| data = json.load(f) | |
| # Function to perform the search | |
| def search_courses(user_query): | |
| query_embedding = model.encode(user_query) # Get the embedding for user query | |
| similarity_scores = [] # Array to store similarity scores | |
| # Compare the user query embedding with each stored embedding | |
| for dets in data: | |
| embed = np.array(dets['embedding']) | |
| similarity = cosine_similarity([query_embedding], [embed]) | |
| similarity_scores.append((similarity[0][0], dets)) | |
| # Sort the similarity scores in descending order | |
| similarity_scores.sort(key=lambda x: x[0], reverse=True) | |
| # Get the top 4 courses | |
| top_4_dets = [item[1] for item in similarity_scores[:4]] | |
| results = [] | |
| for i,det in enumerate(top_4_dets,1): | |
| course_info = f"{i}. " \ | |
| f"**Category**: {det['Course Category']}\n\n" \ | |
| f"**Course Name**: {det['Course Name']}\n\n" \ | |
| f"**Course URL**: {det['Course Url']}\n\n" \ | |
| f"**Description**: {det['Course Description']}\n\n" | |
| results.append(course_info) | |
| return "\n\n\n".join(results) | |
| # Create the Gradio interface | |
| iface = gr.Interface(fn=search_courses, | |
| inputs="text", | |
| outputs="markdown", | |
| title="Course Search with Sentence Transformers", | |
| description="Enter a query to find the top 4 most similar courses.") | |
| # Launch the Gradio app | |
| iface.launch() | |