Spaces:
Sleeping
Sleeping
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| def scrape_courses(): | |
| url = "https://courses.analyticsvidhya.com/pages/all-free-courses" # Use the actual URL where the courses are listed | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| courses = [] | |
| for course_card in soup.find_all("a", class_="card-link"): | |
| # Extract title | |
| title = course_card.find("h2", class_="card-text").get_text(strip=True) | |
| # Extract duration and lessons (if available) | |
| duration_lessons = course_card.find("p", class_="fs-14").get_text(strip=True) | |
| duration, lessons = duration_lessons.split(" Hours")[0] + " Hours", duration_lessons.split(" Hours")[1].strip() | |
| # Extract review rating (if available) | |
| rating = course_card.find("span", class_="rating") # Assuming rating is in a span with class "rating" | |
| rating = rating.get_text(strip=True) if rating else "No rating" | |
| # Extract price (if available) | |
| price = course_card.find("span", class_="price") # Assuming price is in a span with class "price" | |
| price = price.get_text(strip=True) if price else "Free" # Assuming courses without price are free | |
| courses.append({ | |
| 'title': title, | |
| 'duration': duration, | |
| 'lessons': lessons, | |
| 'rating': rating, | |
| 'price': price | |
| }) | |
| return pd.DataFrame(courses) | |
| # Run the scraper and save the data to a CSV | |
| if __name__ == "__main__": | |
| courses_df = scrape_courses() | |
| print(courses_df) # Preview the scraped data | |
| courses_df.to_csv("courses_data.csv", index=False) | |