Spaces:
Sleeping
Sleeping
File size: 2,335 Bytes
9d9d2f1 f1fe906 9d9d2f1 f1fe906 7af238c 9d9d2f1 f1fe906 9d9d2f1 f1fe906 9d9d2f1 f1fe906 9d9d2f1 f1fe906 7af238c 9d9d2f1 f1fe906 9d9d2f1 f1fe906 9d9d2f1 5e56944 9d9d2f1 f1fe906 9d9d2f1 f1fe906 9d9d2f1 f1fe906 9d9d2f1 f1fe906 5e56944 f1fe906 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
# Debug: Print start of application
print("Starting the application...")
# Load the dataset from the same directory
print("Loading dataset...")
df = pd.read_csv('courses.csv') # Assuming courses.csv is in the same directory as app.py
print(f"Dataset loaded. Number of rows: {df.shape[0]}")
# Load a pre-trained sentence transformer model
print("Loading Sentence Transformer model...")
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Model loaded successfully.")
# Create a combined column for embedding (e.g., title + description + keywords)
print("Generating embeddings for courses...")
df['combined_text'] = df['title'] + " " + df['description'] + " " + df['keywords']
course_embeddings = model.encode(df['combined_text'].tolist(), convert_to_tensor=True)
print("Embeddings generated successfully.")
def search_courses(user_query):
print(f"Received query: {user_query}")
# Encode the user query
query_embedding = model.encode(user_query, convert_to_tensor=True)
# Compute cosine similarities between the query and each course embedding
print("Calculating cosine similarities...")
similarities = cosine_similarity(
query_embedding.cpu().detach().numpy().reshape(1, -1),
course_embeddings.cpu().detach().numpy()
)
# Get indices of top matching courses (top 5 results)
top_matches = similarities.argsort()[0][-5:][::-1]
# Retrieve top matching courses
results = [{"title": df.iloc[i]["title"], "description": df.iloc[i]["description"]} for i in top_matches]
print(f"Found {len(results)} results.")
return results
# Define Gradio function for user interaction
def gradio_search(query):
results = search_courses(query)
return results
# Set up Gradio interface
print("Setting up Gradio interface...")
iface = gr.Interface(
fn=gradio_search,
inputs="text",
outputs="json",
title="Smart Course Search",
description="Find the most relevant courses based on your query."
)
# Launch the app (for local testing or deploying in Hugging Face Spaces)
print("Launching the app...")
iface.launch()
# Debug: Print end of application
print("Application launched successfully.")
|