kinely commited on
Commit
477440f
·
verified ·
1 Parent(s): bb56a4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -1,41 +1,52 @@
1
  import requests
2
  from bs4 import BeautifulSoup
 
 
 
 
 
 
3
 
 
4
  url = "https://aspireec.com/"
5
  response = requests.get(url)
6
  soup = BeautifulSoup(response.text, 'html.parser')
 
7
  # Extract data (e.g., headlines, paragraphs, etc.)
8
  content = soup.find_all('p') # Example: extracting paragraphs
9
- website_data = [p.text for p in content]
10
- import json
11
 
 
12
  with open('website_data.json', 'w') as file:
13
  json.dump(website_data, file)
14
- from sentence_transformers import SentenceTransformer
15
 
 
16
  model = SentenceTransformer('all-MiniLM-L6-v2')
17
  embeddings = model.encode(website_data)
18
- import faiss
19
- import numpy as np
20
 
21
  # Create FAISS index
22
  dimension = embeddings.shape[1]
23
  index = faiss.IndexFlatL2(dimension)
24
  index.add(np.array(embeddings))
25
- query = "What is the website about?"
26
- query_embedding = model.encode([query])
27
- distances, indices = index.search(np.array(query_embedding), k=1)
28
- best_match = website_data[indices[0][0]]
29
- from transformers import pipeline
30
 
 
31
  summarizer = pipeline("summarization", model="google/flan-t5-base")
32
- answer = summarizer(best_match)
33
- print(answer)
34
- import streamlit as st
35
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  st.title("Website Chatbot")
37
 
38
  user_input = st.text_input("Ask me anything about the website:")
39
  if user_input:
40
- response = get_answer(user_input) # Function to query data
41
  st.write(response)
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
+ import json
4
+ import numpy as np
5
+ import faiss
6
+ from sentence_transformers import SentenceTransformer
7
+ from transformers import pipeline
8
+ import streamlit as st
9
 
10
+ # Step 1: Scrape Website Data
11
  url = "https://aspireec.com/"
12
  response = requests.get(url)
13
  soup = BeautifulSoup(response.text, 'html.parser')
14
+
15
  # Extract data (e.g., headlines, paragraphs, etc.)
16
  content = soup.find_all('p') # Example: extracting paragraphs
17
+ website_data = [p.text.strip() for p in content if p.text.strip()]
 
18
 
19
+ # Save the extracted content to a JSON file
20
  with open('website_data.json', 'w') as file:
21
  json.dump(website_data, file)
 
22
 
23
+ # Step 2: Create Embeddings and FAISS Index
24
  model = SentenceTransformer('all-MiniLM-L6-v2')
25
  embeddings = model.encode(website_data)
 
 
26
 
27
  # Create FAISS index
28
  dimension = embeddings.shape[1]
29
  index = faiss.IndexFlatL2(dimension)
30
  index.add(np.array(embeddings))
 
 
 
 
 
31
 
32
+ # Step 3: Summarization Model
33
  summarizer = pipeline("summarization", model="google/flan-t5-base")
 
 
 
34
 
35
+ # Step 4: Define the `get_answer` Function
36
+ def get_answer(query):
37
+ # Encode the query
38
+ query_embedding = model.encode([query])
39
+ distances, indices = index.search(np.array(query_embedding), k=1)
40
+ # Retrieve the best match
41
+ best_match = website_data[indices[0][0]]
42
+ # Generate a summarized response
43
+ summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False)
44
+ return summarized_response[0]['summary_text']
45
+
46
+ # Step 5: Streamlit Chatbot UI
47
  st.title("Website Chatbot")
48
 
49
  user_input = st.text_input("Ask me anything about the website:")
50
  if user_input:
51
+ response = get_answer(user_input) # Query the FAISS index and summarize the response
52
  st.write(response)