File size: 2,335 Bytes
9d9d2f1
 
f1fe906
9d9d2f1
 
f1fe906
 
 
 
 
 
 
7af238c
9d9d2f1
f1fe906
9d9d2f1
f1fe906
9d9d2f1
 
f1fe906
9d9d2f1
 
f1fe906
7af238c
9d9d2f1
f1fe906
 
9d9d2f1
 
 
 
f1fe906
9d9d2f1
 
 
 
 
5e56944
9d9d2f1
f1fe906
9d9d2f1
 
f1fe906
9d9d2f1
 
 
 
 
 
 
 
f1fe906
9d9d2f1
 
 
 
 
 
 
 
 
f1fe906
5e56944
f1fe906
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr

# Debug: Print start of application
print("Starting the application...")

# Load the dataset from the same directory
print("Loading dataset...")
df = pd.read_csv('courses.csv')  # Assuming courses.csv is in the same directory as app.py
print(f"Dataset loaded. Number of rows: {df.shape[0]}")

# Load a pre-trained sentence transformer model
print("Loading Sentence Transformer model...")
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Model loaded successfully.")

# Create a combined column for embedding (e.g., title + description + keywords)
print("Generating embeddings for courses...")
df['combined_text'] = df['title'] + " " + df['description'] + " " + df['keywords']
course_embeddings = model.encode(df['combined_text'].tolist(), convert_to_tensor=True)
print("Embeddings generated successfully.")

def search_courses(user_query):
    print(f"Received query: {user_query}")
    
    # Encode the user query
    query_embedding = model.encode(user_query, convert_to_tensor=True)

    # Compute cosine similarities between the query and each course embedding
    print("Calculating cosine similarities...")
    similarities = cosine_similarity(
        query_embedding.cpu().detach().numpy().reshape(1, -1),
        course_embeddings.cpu().detach().numpy()
    )

    # Get indices of top matching courses (top 5 results)
    top_matches = similarities.argsort()[0][-5:][::-1]
    
    # Retrieve top matching courses
    results = [{"title": df.iloc[i]["title"], "description": df.iloc[i]["description"]} for i in top_matches]
    print(f"Found {len(results)} results.")
    return results

# Define Gradio function for user interaction
def gradio_search(query):
    results = search_courses(query)
    return results

# Set up Gradio interface
print("Setting up Gradio interface...")
iface = gr.Interface(
    fn=gradio_search,
    inputs="text",
    outputs="json",
    title="Smart Course Search",
    description="Find the most relevant courses based on your query."
)

# Launch the app (for local testing or deploying in Hugging Face Spaces)
print("Launching the app...")
iface.launch()

# Debug: Print end of application
print("Application launched successfully.")