File size: 1,651 Bytes
94cefa6 28d80c3 94cefa6 28d80c3 94cefa6 a38e938 94cefa6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
# Function to scrape a single post
def scrape_post(link):
response = requests.get(link)
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.select_one('h1').text.strip()
content = soup.select_one('section.entry').text.strip()
featured_img = soup.select_one('img.entry-image')['src']
category = [a.text for a in soup.select('.entry-cat a')]
tags = [a.text for a in soup.select('.entry-tag a')]
return {'title': title, 'content': content, 'featured_img': featured_img, 'category': category, 'tags': tags}
# Function to scrape multiple pages
def scrape_unwinnable_movies():
base_url = 'https://unwinnable.com/category/sections/movies-tv/'
next_page = base_url
all_posts = []
while next_page:
response = requests.get(next_page)
soup = BeautifulSoup(response.content, 'html.parser')
post_links = [a['href'] for a in soup.select('.entry-title a')]
for link in post_links:
post_data = scrape_post(link)
all_posts.append(post_data)
next_page_elem = soup.select_one('a.next')
next_page = next_page_elem['href'] if next_page_elem else None
return pd.DataFrame(all_posts)
# Gradio Interface function
def scrape_and_display():
df = scrape_unwinnable_movies()
return gr.DataFrame.update(value=df)
# Create Gradio Interface
demo = gr.Interface(fn=scrape_and_display, inputs=[], outputs="dataframe", title="Unwinnable Movies Scraper")
# Launch the app
demo.launch()
|