scrape1 / app.py
13ze's picture
Update app.py
94cefa6 verified
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr
# Function to scrape a single post
def scrape_post(link):
response = requests.get(link)
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.select_one('h1').text.strip()
content = soup.select_one('section.entry').text.strip()
featured_img = soup.select_one('img.entry-image')['src']
category = [a.text for a in soup.select('.entry-cat a')]
tags = [a.text for a in soup.select('.entry-tag a')]
return {'title': title, 'content': content, 'featured_img': featured_img, 'category': category, 'tags': tags}
# Function to scrape multiple pages
def scrape_unwinnable_movies():
base_url = 'https://unwinnable.com/category/sections/movies-tv/'
next_page = base_url
all_posts = []
while next_page:
response = requests.get(next_page)
soup = BeautifulSoup(response.content, 'html.parser')
post_links = [a['href'] for a in soup.select('.entry-title a')]
for link in post_links:
post_data = scrape_post(link)
all_posts.append(post_data)
next_page_elem = soup.select_one('a.next')
next_page = next_page_elem['href'] if next_page_elem else None
return pd.DataFrame(all_posts)
# Gradio Interface function
def scrape_and_display():
df = scrape_unwinnable_movies()
return gr.DataFrame.update(value=df)
# Create Gradio Interface
demo = gr.Interface(fn=scrape_and_display, inputs=[], outputs="dataframe", title="Unwinnable Movies Scraper")
# Launch the app
demo.launch()