import gradio as gr from sentence_transformers import SentenceTransformer, util import pandas as pd import torch # Load the fine-tuned model from Hugging Face model = SentenceTransformer("adityasajja6/fine_tuned_mpnet_model") # Load the cleaned courses dataset courses_df = pd.read_csv('cleaned_analytics_vidhya_courses.csv') # Load the precomputed course embeddings from the .pt file course_embeddings = torch.load('course_embeddings.pt') # Define the search function def search_courses(query, top_k=5): # Create embedding for the query query_embedding = model.encode(query, convert_to_tensor=True) # Compute similarity scores similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0] # Find the top_k most similar courses top_results = similarities.topk(k=top_k) # Extract the titles, links, and similarity scores of the top results results = [] for idx in top_results.indices: idx = int(idx) # Convert tensor index to an integer course_title = courses_df.iloc[idx]['Title'] course_link = courses_df.iloc[idx]['Link'] similarity_score = round(float(similarities[idx]), 4) results.append((course_title, course_link, similarity_score)) return results # Define the Gradio interface def gradio_search(query): results = search_courses(query) formatted_results = [ f"{title} (Score: {score}) - [Link]({link})" for title, link, score in results ] return "\n\n".join(formatted_results) # Create a Gradio interface interface = gr.Interface( fn=gradio_search, inputs=gr.Textbox(label="Search for courses"), outputs=gr.Markdown(label="Top Matching Courses"), title="Smart Course Search Tool", description="Enter a query to find the most relevant courses from Analytics Vidhya's free courses.", ) # Launch the interface if __name__ == "__main__": interface.launch()