BookScrapper / app.py
HarshitaSuri's picture
Update app.py
db43731 verified
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from cryptography.fernet import Fernet
import gradio as gr
BASE_URL = "http://books.toscrape.com/catalogue/page-{}.html"
# Scrape a single page
def scrape_page(page):
books = []
url = BASE_URL.format(page)
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
if response.status_code != 200:
return books
soup = BeautifulSoup(response.text, "html.parser")
for book in soup.select("article.product_pod"):
title = book.h3.a['title']
price = book.select_one("p.price_color").text
rating = book.p['class'][1] # e.g., 'Three'
books.append({"Title": title, "Price": price, "Rating": rating})
return books
# Scrape multiple pages
def scrape_books(pages=1):
all_books = []
for page in range(1, pages+1):
all_books.extend(scrape_page(page))
time.sleep(1) # ethical scraping
df = pd.DataFrame(all_books)
return df
# Mask titles (original title removed)
def mask_titles_only(df):
df_masked = df.copy()
df_masked["Title"] = df_masked["Title"].apply(lambda x: x[0] + "***" if x != "N/A" else x)
return df_masked
# Encrypt CSV
def encrypt_csv(df):
key = Fernet.generate_key()
cipher = Fernet(key)
csv_bytes = df.to_csv(index=False).encode()
encrypted_file = "books_encrypted.csv"
with open(encrypted_file, "wb") as f:
f.write(cipher.encrypt(csv_bytes))
return encrypted_file, key.decode()
# Gradio interface function
def app_interface(pages, security_option):
df = scrape_books(pages)
if security_option == "Mask Titles":
df = mask_titles_only(df)
file_path = "books_masked.csv"
df.to_csv(file_path, index=False)
return df, file_path, "Masked CSV ready to download."
elif security_option == "Encrypt CSV":
encrypted_file, key = encrypt_csv(df)
return df, encrypted_file, f"Encrypted CSV ready to download. Key: {key}"
else:
file_path = "books.csv"
df.to_csv(file_path, index=False)
return df, file_path, "CSV ready to download."
# Gradio App
iface = gr.Interface(
fn=app_interface,
inputs=[
gr.Slider(1, 50, step=1, label="Number of Pages to Scrape", value=1),
gr.Radio(["None", "Mask Titles", "Encrypt CSV"], label="Security Option")
],
outputs=[
gr.Dataframe(label="Books Data"),
gr.File(label="Download CSV File"),
gr.Textbox(label="Info / Encryption Key")
],
title="Secure Book Scraper",
description="Scrape book details from 'Books to Scrape'. Mask book titles or encrypt the CSV for secure storage."
)
iface.launch()