Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,8 @@ from bs4 import BeautifulSoup
|
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Web scraping
|
| 8 |
def scrape_website(url):
|
|
@@ -11,10 +13,15 @@ def scrape_website(url):
|
|
| 11 |
content = ' '.join([p.text for p in soup.find_all('p')])
|
| 12 |
return content
|
| 13 |
|
| 14 |
-
# Store data
|
| 15 |
stored_data = {}
|
| 16 |
def store_data(url, content):
|
| 17 |
stored_data[url] = content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Conversational AI with a smaller model
|
| 20 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
|
|
@@ -24,6 +31,12 @@ model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
|
|
| 24 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 25 |
model.to(device)
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def generate_response(input_text):
|
| 28 |
input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
|
| 29 |
response_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
|
|
@@ -31,17 +44,28 @@ def generate_response(input_text):
|
|
| 31 |
return response
|
| 32 |
|
| 33 |
def chatbot_response(user_input):
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
if url in stored_data:
|
| 37 |
content = stored_data[url]
|
|
|
|
| 38 |
else:
|
| 39 |
content = scrape_website(url)
|
| 40 |
-
store_data(url, content)
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Interface
|
| 47 |
def chat_interface(user_input):
|
|
|
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
import gradio as gr
|
| 5 |
import torch
|
| 6 |
+
import re
|
| 7 |
+
import os
|
| 8 |
|
| 9 |
# Web scraping
|
| 10 |
def scrape_website(url):
|
|
|
|
| 13 |
content = ' '.join([p.text for p in soup.find_all('p')])
|
| 14 |
return content
|
| 15 |
|
| 16 |
+
# Store data and save to a file
|
| 17 |
stored_data = {}
|
| 18 |
def store_data(url, content):
|
| 19 |
stored_data[url] = content
|
| 20 |
+
# Save content to a file
|
| 21 |
+
filename = url.replace("https://", "").replace("http://", "").replace("/", "_") + ".txt"
|
| 22 |
+
with open(filename, "w") as file:
|
| 23 |
+
file.write(content)
|
| 24 |
+
return filename
|
| 25 |
|
| 26 |
# Conversational AI with a smaller model
|
| 27 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
|
|
|
|
| 31 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 32 |
model.to(device)
|
| 33 |
|
| 34 |
+
# Function to extract URLs from text
|
| 35 |
+
def extract_urls(text):
|
| 36 |
+
url_pattern = re.compile(r'(https?://\S+)')
|
| 37 |
+
urls = url_pattern.findall(text)
|
| 38 |
+
return urls
|
| 39 |
+
|
| 40 |
def generate_response(input_text):
|
| 41 |
input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
|
| 42 |
response_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
|
|
|
|
| 44 |
return response
|
| 45 |
|
| 46 |
def chatbot_response(user_input):
|
| 47 |
+
# Extract URLs from the input
|
| 48 |
+
urls = extract_urls(user_input)
|
| 49 |
+
|
| 50 |
+
file_links = []
|
| 51 |
+
|
| 52 |
+
# Scrape content from URLs, save to file, and append content to user input
|
| 53 |
+
for url in urls:
|
| 54 |
if url in stored_data:
|
| 55 |
content = stored_data[url]
|
| 56 |
+
filename = store_data(url, content)
|
| 57 |
else:
|
| 58 |
content = scrape_website(url)
|
| 59 |
+
filename = store_data(url, content)
|
| 60 |
+
file_links.append(f"You can download the data here: {filename}")
|
| 61 |
+
user_input += " " + content
|
| 62 |
+
|
| 63 |
+
# Generate response based on the combined input
|
| 64 |
+
response = generate_response(user_input)
|
| 65 |
+
|
| 66 |
+
# Combine the chatbot response with file links
|
| 67 |
+
full_response = response + "\n" + "\n".join(file_links)
|
| 68 |
+
return full_response
|
| 69 |
|
| 70 |
# Interface
|
| 71 |
def chat_interface(user_input):
|