Spaces:
Sleeping
Sleeping
| from sentence_transformers import SentenceTransformer | |
| import pandas as pd | |
| import torch | |
| # Load pre-trained model for sentence embedding | |
| model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens') | |
| # Load scraped courses data | |
| courses_df = pd.read_csv("courses_data.csv") | |
| # Encode course descriptions | |
| courses_df['embedding'] = courses_df['description'].apply(lambda x: model.encode(x, convert_to_tensor=True)) | |
| def search_courses(query, top_k=5): | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, torch.stack(courses_df['embedding'].tolist())) | |
| top_results = torch.topk(cosine_scores, k=top_k) | |
| results = [] | |
| for idx in top_results.indices: | |
| course = courses_df.iloc[idx.item()] | |
| results.append({ | |
| 'title': course['title'], | |
| 'description': course['description'], | |
| 'curriculum': course['curriculum'] | |
| }) | |
| return results | |