Spaces:
Build error
Build error
| import pandas as pd | |
| import requests | |
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| import os | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # Function to scrape courses from a single page using BeautifulSoup | |
| def scrape_courses_from_page(url): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| courses = [] | |
| # Extract course title, image, and course link | |
| course_cards = soup.find_all('header', class_='course-card__img-container') | |
| for course_card in course_cards: | |
| img_tag = course_card.find('img', class_='course-card__img') | |
| if img_tag: | |
| title = img_tag.get('alt') | |
| image_url = img_tag.get('src') | |
| # Find the course link using the previous 'a' tag | |
| link_tag = course_card.find_previous('a') | |
| course_link = link_tag.get('href') | |
| if not course_link.startswith('http'): | |
| course_link = 'https://courses.analyticsvidhya.com' + course_link | |
| courses.append({ | |
| 'title': title, | |
| 'image_url': image_url, | |
| 'course_link': course_link | |
| }) | |
| return courses | |
| # Function to scrape across multiple pages using BeautifulSoup | |
| def scrape_courses_from_all_pages(base_url, total_pages): | |
| all_courses = [] | |
| for page_num in range(1, total_pages + 1): | |
| url = f"{base_url}?page={page_num}" | |
| courses_on_page = scrape_courses_from_page(url) | |
| all_courses.extend(courses_on_page) | |
| return pd.DataFrame(all_courses) | |
| # Define base URL and total pages | |
| base_url = "https://courses.analyticsvidhya.com/collections/courses" | |
| total_pages = 8 # Assuming there are 8 pages of courses | |
| # Check if the CSV file already exists | |
| if not os.path.exists("scraped.csv"): | |
| courses_df = scrape_courses_from_all_pages(base_url, total_pages) | |
| courses_df.to_csv("scraped.csv", index=False) | |
| else: | |
| pass | |
| # Load Scraped courses data | |
| df = pd.read_csv("scraped.csv") | |
| # Load the Hugging Face embedding model | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| # Generate embeddings for course titles | |
| course_embeddings = model.encode(df['title'].tolist()) | |
| # Create FAISS index for similarity search | |
| dimension = course_embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) # Using L2 distance | |
| index.add(np.array(course_embeddings)) | |
| # Function to search courses using embeddings | |
| def search_courses(query): | |
| try: | |
| # Generate the embedding for the query | |
| query_embedding = model.encode([query]) | |
| # Search for the closest courses using FAISS | |
| top_k = 6 # Retrieve the top 6 closest matches | |
| distances, indices = index.search(np.array(query_embedding), top_k) | |
| # Collect the results based on the indices returned by FAISS | |
| results = [] | |
| for idx, distance in zip(indices[0], distances[0]): | |
| course = df.iloc[idx] | |
| results.append({ | |
| 'title': course['title'], | |
| 'image_url': course['image_url'], | |
| 'course_link': course['course_link'], | |
| 'score': 1 - distance # Convert distance to similarity score | |
| }) | |
| return sorted(results, key=lambda x: x['score'], reverse=True) | |
| except Exception as e: | |
| st.error(f"An error occurred in search_courses: {str(e)}") | |
| return [] | |
| # Function to display search results in Streamlit | |
| def display_search_results(result_list): | |
| if result_list: | |
| for item in result_list: | |
| course_title = item['title'] | |
| course_image = item['image_url'] | |
| course_link = item['course_link'] | |
| st.image(course_image, use_column_width=True) | |
| st.write(f"### {course_title}") | |
| button_html = f""" | |
| <a href="{course_link}" target="_blank"> | |
| <button style="background-color:#4CAF50; border:none; color:white; padding:10px 20px; text-align:center; text-decoration:none; display:inline-block; font-size:16px; margin:4px 2px; cursor:pointer; border-radius:5px;"> | |
| View Course | |
| </button> | |
| </a> | |
| """ | |
| st.markdown(button_html, unsafe_allow_html=True) | |
| else: | |
| st.write("No results found. Please try a different query.") | |
| # Streamlit UI | |
| st.title("Analytics Vidhya Free Courses🔍") | |
| st.image("cc.jpg") | |
| st.markdown("#### 🔍🌐 Get the most appropriate course as per your learning requirement.") | |
| st.markdown("<hr style='border:1px solid #eee;'>", unsafe_allow_html=True) | |
| query = st.text_input( | |
| "Enter course related keywords...", | |
| placeholder="e.g., machine learning, data science, python", | |
| help="Type in a keyword to find related free courses" | |
| ) | |
| search_button = st.button("Search") | |
| # Search results section | |
| if search_button and query: | |
| st.write("Collecting courses......") | |
| result_list = search_courses(query) | |
| # Display search results if available | |
| if result_list: | |
| st.markdown(f"### Top results for: {query}") | |
| display_search_results(result_list) | |
| else: | |
| st.write("No results found. Please try a different keyword.") | |
| else: | |
| st.write("Please enter a search query to find relevant courses.") | |
| # Footer with subtle text | |
| st.markdown( | |
| "<p style='text-align:center; color:grey;'>Made by @metechmohit </p>", | |
| unsafe_allow_html=True | |
| ) | |