Dedisson commited on
Commit
8d6fb2b
·
verified ·
1 Parent(s): 17961a6

requirements.txt

Browse files

gradio
requests
beautifulsoup4
pandas

Files changed (1) hide show
  1. app.py +33 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+
6
+
7
+ def scrape_site(url):
8
+ response = requests.get(url)
9
+ soup = BeautifulSoup(response.text, "html.parser")
10
+ titles = [h.text.strip() for h in soup.find_all("h2")]
11
+ return titles
12
+
13
+
14
+ def clean_data(data):
15
+ df = pd.DataFrame(data, columns=["title"])
16
+ df["title"] = df["title"].str.lower().str.strip()
17
+ df = df.drop_duplicates()
18
+ return df["title"].tolist()
19
+
20
+
21
+ def chatbot(message, history):
22
+
23
+ if message.startswith("scrape"):
24
+ url = message.replace("scrape ", "")
25
+ data = scrape_site(url)
26
+ clean = clean_data(data)
27
+ return "\n".join(clean[:10])
28
+
29
+ return "Tape: scrape URL"
30
+
31
+
32
+ demo = gr.ChatInterface(fn=chatbot)
33
+ demo.launch()