Spaces:

kgboom
/

scrapey

Sleeping

kgboom commited on Jul 26, 2024

Commit

065bbe3

verified ·

1 Parent(s): fe48522

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,9 +2,8 @@ import streamlit as st
 import requests
 from bs4 import BeautifulSoup
 import scrapy
-from scrapy.crawler import CrawlerProcess
 import pandas as pd
-import io
 # Streamlit app title
 st.title("Web Scraping with Streamlit")
@@ -23,19 +22,8 @@ def scrape_with_beautifulsoup(url):
     quotes = soup.find_all('span', class_='text')
     return [quote.text for quote in quotes]
-# Scrapy spider to scrape book titles and prices
-class BookSpider(scrapy.Spider):
-    name = 'bookspider'
-    start_urls = [url]
-    def parse(self, response):
-        for book in response.css('article.product_pod'):
-            yield {
-                'title': book.css('h3 a::attr(title)').get(),
-                'price': book.css('p.price_color::text').get()
-            }
-# Scrapy function to scrape booksdef scrape_with_scrapy(url):
     def parse(response):
         books = []
         for book in response.css('article.product_pod'):

 import requests
 from bs4 import BeautifulSoup
 import scrapy
+from scrapy.http import TextResponse
 import pandas as pd
 # Streamlit app title
 st.title("Web Scraping with Streamlit")
     quotes = soup.find_all('span', class_='text')
     return [quote.text for quote in quotes]
+# Scrapy function to scrape books
+def scrape_with_scrapy(url):
     def parse(response):
         books = []
         for book in response.css('article.product_pod'):