File size: 988 Bytes
64aa0bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sentence_transformers import SentenceTransformer
import pandas as pd
import torch

# Load pre-trained model for sentence embedding
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')

# Load scraped courses data
courses_df = pd.read_csv("courses_data.csv")

# Encode course descriptions
courses_df['embedding'] = courses_df['description'].apply(lambda x: model.encode(x, convert_to_tensor=True))

def search_courses(query, top_k=5):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = torch.nn.functional.cosine_similarity(query_embedding, torch.stack(courses_df['embedding'].tolist()))
    top_results = torch.topk(cosine_scores, k=top_k)
    
    results = []
    for idx in top_results.indices:
        course = courses_df.iloc[idx.item()]
        results.append({
            'title': course['title'],
            'description': course['description'],
            'curriculum': course['curriculum']
        })
    return results