AnalyticsVidhya / app.py
adityasajja6's picture
Update app.py
bb3fc71 verified
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import pandas as pd
import torch
# Load the fine-tuned model from Hugging Face
model = SentenceTransformer("adityasajja6/fine_tuned_mpnet_model")
# Load the cleaned courses dataset
courses_df = pd.read_csv('cleaned_analytics_vidhya_courses.csv')
# Load the precomputed course embeddings from the .pt file
course_embeddings = torch.load('course_embeddings.pt')
# Define the search function
def search_courses(query, top_k=5):
# Create embedding for the query
query_embedding = model.encode(query, convert_to_tensor=True)
# Compute similarity scores
similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
# Find the top_k most similar courses
top_results = similarities.topk(k=top_k)
# Extract the titles, links, and similarity scores of the top results
results = []
for idx in top_results.indices:
idx = int(idx) # Convert tensor index to an integer
course_title = courses_df.iloc[idx]['Title']
course_link = courses_df.iloc[idx]['Link']
similarity_score = round(float(similarities[idx]), 4)
results.append((course_title, course_link, similarity_score))
return results
# Define the Gradio interface
def gradio_search(query):
results = search_courses(query)
formatted_results = [
f"{title} (Score: {score}) - [Link]({link})"
for title, link, score in results
]
return "\n\n".join(formatted_results)
# Create a Gradio interface
interface = gr.Interface(
fn=gradio_search,
inputs=gr.Textbox(label="Search for courses"),
outputs=gr.Markdown(label="Top Matching Courses"),
title="Smart Course Search Tool",
description="Enter a query to find the most relevant courses from Analytics Vidhya's free courses.",
)
# Launch the interface
if __name__ == "__main__":
interface.launch()