File size: 2,182 Bytes
9d9d2f1
 
f1fe906
9d9d2f1
 
f1fe906
 
 
 
 
fe96be4
f1fe906
7af238c
9d9d2f1
f1fe906
9d9d2f1
f1fe906
9d9d2f1
fe96be4
f1fe906
9d9d2f1
 
f1fe906
7af238c
9d9d2f1
f1fe906
 
9d9d2f1
 
 
fe96be4
f1fe906
9d9d2f1
 
 
 
 
5e56944
9d9d2f1
f1fe906
9d9d2f1
 
f1fe906
9d9d2f1
 
fe96be4
9d9d2f1
 
 
 
 
f1fe906
9d9d2f1
 
 
 
 
 
 
 
fe96be4
f1fe906
5e56944
f1fe906
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr

# Debug: Print start of application
print("Starting the application...")

# Load the dataset from the same directory
print("Loading dataset...")
df = pd.read_csv('courses.csv')  # courses.csv
print(f"Dataset loaded. Number of rows: {df.shape[0]}")

# Load a pre-trained sentence transformer model
print("Loading Sentence Transformer model...")
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Model loaded successfully.")

# Create a combined column for embedding
print("Generating embeddings for courses...")
df['combined_text'] = df['title'] + " " + df['description'] + " " + df['keywords']
course_embeddings = model.encode(df['combined_text'].tolist(), convert_to_tensor=True)
print("Embeddings generated successfully.")

def search_courses(user_query):
    print(f"Received query: {user_query}")
    
    # Encode the user query
    query_embedding = model.encode(user_query, convert_to_tensor=True)

    # Compute similarities between the query and each course embedding
    print("Calculating cosine similarities...")
    similarities = cosine_similarity(
        query_embedding.cpu().detach().numpy().reshape(1, -1),
        course_embeddings.cpu().detach().numpy()
    )

    # Get indices of top matching courses (top 5 results)
    top_matches = similarities.argsort()[0][-5:][::-1]
    
    # Retrieve top matching courses
    results = [{"title": df.iloc[i]["title"], "description": df.iloc[i]["description"]} for i in top_matches]
    print(f"Found {len(results)} results.")
    return results

# Gradio function for user interaction
def gradio_search(query):
    results = search_courses(query)
    return results

# Set up Gradio interface
print("Setting up Gradio interface...")
iface = gr.Interface(
    fn=gradio_search,
    inputs="text",
    outputs="json",
    title="Smart Course Search",
    description="Find the most relevant courses based on your query."
)

# Launch the app
print("Launching the app...")
iface.launch()

# Debug: Print end of application
print("Application launched successfully.")