dataScrapper / app.py
Dedisson's picture
requirements.txt
8d6fb2b verified
raw
history blame contribute delete
762 Bytes
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
def scrape_site(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
titles = [h.text.strip() for h in soup.find_all("h2")]
return titles
def clean_data(data):
df = pd.DataFrame(data, columns=["title"])
df["title"] = df["title"].str.lower().str.strip()
df = df.drop_duplicates()
return df["title"].tolist()
def chatbot(message, history):
if message.startswith("scrape"):
url = message.replace("scrape ", "")
data = scrape_site(url)
clean = clean_data(data)
return "\n".join(clean[:10])
return "Tape: scrape URL"
demo = gr.ChatInterface(fn=chatbot)
demo.launch()