ashutoshsharma58 commited on
Commit
ba7993e
·
verified ·
1 Parent(s): ab7fede

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -8
app.py CHANGED
@@ -3,6 +3,8 @@ from bs4 import BeautifulSoup
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
5
  import torch
 
 
6
 
7
  # Web scraping
8
  def scrape_website(url):
@@ -11,10 +13,15 @@ def scrape_website(url):
11
  content = ' '.join([p.text for p in soup.find_all('p')])
12
  return content
13
 
14
- # Store data
15
  stored_data = {}
16
  def store_data(url, content):
17
  stored_data[url] = content
 
 
 
 
 
18
 
19
  # Conversational AI with a smaller model
20
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
@@ -24,6 +31,12 @@ model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
24
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
25
  model.to(device)
26
 
 
 
 
 
 
 
27
  def generate_response(input_text):
28
  input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
29
  response_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
@@ -31,17 +44,28 @@ def generate_response(input_text):
31
  return response
32
 
33
  def chatbot_response(user_input):
34
- if user_input.startswith('http'):
35
- url = user_input
 
 
 
 
 
36
  if url in stored_data:
37
  content = stored_data[url]
 
38
  else:
39
  content = scrape_website(url)
40
- store_data(url, content)
41
- return "I've fetched the data from the website. How can I help you with it?"
42
- else:
43
- response = generate_response(user_input)
44
- return response
 
 
 
 
 
45
 
46
  # Interface
47
  def chat_interface(user_input):
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
5
  import torch
6
+ import re
7
+ import os
8
 
9
  # Web scraping
10
  def scrape_website(url):
 
13
  content = ' '.join([p.text for p in soup.find_all('p')])
14
  return content
15
 
16
+ # Store data and save to a file
17
  stored_data = {}
18
  def store_data(url, content):
19
  stored_data[url] = content
20
+ # Save content to a file
21
+ filename = url.replace("https://", "").replace("http://", "").replace("/", "_") + ".txt"
22
+ with open(filename, "w") as file:
23
+ file.write(content)
24
+ return filename
25
 
26
  # Conversational AI with a smaller model
27
  tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
 
31
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
32
  model.to(device)
33
 
34
+ # Function to extract URLs from text
35
+ def extract_urls(text):
36
+ url_pattern = re.compile(r'(https?://\S+)')
37
+ urls = url_pattern.findall(text)
38
+ return urls
39
+
40
  def generate_response(input_text):
41
  input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
42
  response_ids = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
 
44
  return response
45
 
46
  def chatbot_response(user_input):
47
+ # Extract URLs from the input
48
+ urls = extract_urls(user_input)
49
+
50
+ file_links = []
51
+
52
+ # Scrape content from URLs, save to file, and append content to user input
53
+ for url in urls:
54
  if url in stored_data:
55
  content = stored_data[url]
56
+ filename = store_data(url, content)
57
  else:
58
  content = scrape_website(url)
59
+ filename = store_data(url, content)
60
+ file_links.append(f"You can download the data here: {filename}")
61
+ user_input += " " + content
62
+
63
+ # Generate response based on the combined input
64
+ response = generate_response(user_input)
65
+
66
+ # Combine the chatbot response with file links
67
+ full_response = response + "\n" + "\n".join(file_links)
68
+ return full_response
69
 
70
  # Interface
71
  def chat_interface(user_input):