metechmohit's picture
Update app.py
fd87433 verified
import pandas as pd
import requests
import streamlit as st
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import os
import requests
from bs4 import BeautifulSoup
# Function to scrape courses from a single page using BeautifulSoup
def scrape_courses_from_page(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
courses = []
# Extract course title, image, and course link
course_cards = soup.find_all('header', class_='course-card__img-container')
for course_card in course_cards:
img_tag = course_card.find('img', class_='course-card__img')
if img_tag:
title = img_tag.get('alt')
image_url = img_tag.get('src')
# Find the course link using the previous 'a' tag
link_tag = course_card.find_previous('a')
course_link = link_tag.get('href')
if not course_link.startswith('http'):
course_link = 'https://courses.analyticsvidhya.com' + course_link
courses.append({
'title': title,
'image_url': image_url,
'course_link': course_link
})
return courses
# Function to scrape across multiple pages using BeautifulSoup
def scrape_courses_from_all_pages(base_url, total_pages):
all_courses = []
for page_num in range(1, total_pages + 1):
url = f"{base_url}?page={page_num}"
courses_on_page = scrape_courses_from_page(url)
all_courses.extend(courses_on_page)
return pd.DataFrame(all_courses)
# Define base URL and total pages
base_url = "https://courses.analyticsvidhya.com/collections/courses"
total_pages = 8 # Assuming there are 8 pages of courses
# Check if the CSV file already exists
if not os.path.exists("scraped.csv"):
courses_df = scrape_courses_from_all_pages(base_url, total_pages)
courses_df.to_csv("scraped.csv", index=False)
else:
pass
# Load Scraped courses data
df = pd.read_csv("scraped.csv")
# Load the Hugging Face embedding model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Generate embeddings for course titles
course_embeddings = model.encode(df['title'].tolist())
# Create FAISS index for similarity search
dimension = course_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension) # Using L2 distance
index.add(np.array(course_embeddings))
# Function to search courses using embeddings
def search_courses(query):
try:
# Generate the embedding for the query
query_embedding = model.encode([query])
# Search for the closest courses using FAISS
top_k = 6 # Retrieve the top 6 closest matches
distances, indices = index.search(np.array(query_embedding), top_k)
# Collect the results based on the indices returned by FAISS
results = []
for idx, distance in zip(indices[0], distances[0]):
course = df.iloc[idx]
results.append({
'title': course['title'],
'image_url': course['image_url'],
'course_link': course['course_link'],
'score': 1 - distance # Convert distance to similarity score
})
return sorted(results, key=lambda x: x['score'], reverse=True)
except Exception as e:
st.error(f"An error occurred in search_courses: {str(e)}")
return []
# Function to display search results in Streamlit
def display_search_results(result_list):
if result_list:
for item in result_list:
course_title = item['title']
course_image = item['image_url']
course_link = item['course_link']
st.image(course_image, use_column_width=True)
st.write(f"### {course_title}")
button_html = f"""
<a href="{course_link}" target="_blank">
<button style="background-color:#4CAF50; border:none; color:white; padding:10px 20px; text-align:center; text-decoration:none; display:inline-block; font-size:16px; margin:4px 2px; cursor:pointer; border-radius:5px;">
View Course
</button>
</a>
"""
st.markdown(button_html, unsafe_allow_html=True)
else:
st.write("No results found. Please try a different query.")
# Streamlit UI
st.title("Analytics Vidhya Free Courses🔍")
st.image("cc.jpg")
st.markdown("#### 🔍🌐 Get the most appropriate course as per your learning requirement.")
st.markdown("<hr style='border:1px solid #eee;'>", unsafe_allow_html=True)
query = st.text_input(
"Enter course related keywords...",
placeholder="e.g., machine learning, data science, python",
help="Type in a keyword to find related free courses"
)
search_button = st.button("Search")
# Search results section
if search_button and query:
st.write("Collecting courses......")
result_list = search_courses(query)
# Display search results if available
if result_list:
st.markdown(f"### Top results for: {query}")
display_search_results(result_list)
else:
st.write("No results found. Please try a different keyword.")
else:
st.write("Please enter a search query to find relevant courses.")
# Footer with subtle text
st.markdown(
"<p style='text-align:center; color:grey;'>Made by @metechmohit </p>",
unsafe_allow_html=True
)