Update app.py
Browse files
app.py
CHANGED
|
@@ -2,9 +2,8 @@ import streamlit as st
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import scrapy
|
| 5 |
-
from scrapy.
|
| 6 |
import pandas as pd
|
| 7 |
-
import io
|
| 8 |
|
| 9 |
# Streamlit app title
|
| 10 |
st.title("Web Scraping with Streamlit")
|
|
@@ -23,19 +22,8 @@ def scrape_with_beautifulsoup(url):
|
|
| 23 |
quotes = soup.find_all('span', class_='text')
|
| 24 |
return [quote.text for quote in quotes]
|
| 25 |
|
| 26 |
-
# Scrapy
|
| 27 |
-
|
| 28 |
-
name = 'bookspider'
|
| 29 |
-
start_urls = [url]
|
| 30 |
-
|
| 31 |
-
def parse(self, response):
|
| 32 |
-
for book in response.css('article.product_pod'):
|
| 33 |
-
yield {
|
| 34 |
-
'title': book.css('h3 a::attr(title)').get(),
|
| 35 |
-
'price': book.css('p.price_color::text').get()
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
# Scrapy function to scrape booksdef scrape_with_scrapy(url):
|
| 39 |
def parse(response):
|
| 40 |
books = []
|
| 41 |
for book in response.css('article.product_pod'):
|
|
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import scrapy
|
| 5 |
+
from scrapy.http import TextResponse
|
| 6 |
import pandas as pd
|
|
|
|
| 7 |
|
| 8 |
# Streamlit app title
|
| 9 |
st.title("Web Scraping with Streamlit")
|
|
|
|
| 22 |
quotes = soup.find_all('span', class_='text')
|
| 23 |
return [quote.text for quote in quotes]
|
| 24 |
|
| 25 |
+
# Scrapy function to scrape books
|
| 26 |
+
def scrape_with_scrapy(url):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def parse(response):
|
| 28 |
books = []
|
| 29 |
for book in response.css('article.product_pod'):
|