Spaces:

13ze
/

scrape1

Sleeping

File size: 1,651 Bytes

94cefa6
28d80c3
 
94cefa6
28d80c3
 
94cefa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a38e938
94cefa6

# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import gradio as gr

# Function to scrape a single post
def scrape_post(link):
    response = requests.get(link)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    title = soup.select_one('h1').text.strip()
    content = soup.select_one('section.entry').text.strip()
    featured_img = soup.select_one('img.entry-image')['src']
    category = [a.text for a in soup.select('.entry-cat a')]
    tags = [a.text for a in soup.select('.entry-tag a')]
    
    return {'title': title, 'content': content, 'featured_img': featured_img, 'category': category, 'tags': tags}

# Function to scrape multiple pages
def scrape_unwinnable_movies():
    base_url = 'https://unwinnable.com/category/sections/movies-tv/'
    next_page = base_url
    all_posts = []

    while next_page:
        response = requests.get(next_page)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        post_links = [a['href'] for a in soup.select('.entry-title a')]
        for link in post_links:
            post_data = scrape_post(link)
            all_posts.append(post_data)
        
        next_page_elem = soup.select_one('a.next')
        next_page = next_page_elem['href'] if next_page_elem else None
    
    return pd.DataFrame(all_posts)

# Gradio Interface function
def scrape_and_display():
    df = scrape_unwinnable_movies()
    return gr.DataFrame.update(value=df)

# Create Gradio Interface
demo = gr.Interface(fn=scrape_and_display, inputs=[], outputs="dataframe", title="Unwinnable Movies Scraper")

# Launch the app
demo.launch()