Spaces:

kinely
/

chatbot

Sleeping

App Files Files Community

kinely commited on Nov 28, 2024

Commit

477440f

verified ·

1 Parent(s): bb56a4a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -1,41 +1,52 @@
 import requests
 from bs4 import BeautifulSoup
 url = "https://aspireec.com/"
 response = requests.get(url)
 soup = BeautifulSoup(response.text, 'html.parser')
 # Extract data (e.g., headlines, paragraphs, etc.)
 content = soup.find_all('p')  # Example: extracting paragraphs
-website_data = [p.text for p in content]
-import json
 with open('website_data.json', 'w') as file:
     json.dump(website_data, file)
-from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 embeddings = model.encode(website_data)
-import faiss
-import numpy as np
 # Create FAISS index
 dimension = embeddings.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(np.array(embeddings))
-query = "What is the website about?"
-query_embedding = model.encode([query])
-distances, indices = index.search(np.array(query_embedding), k=1)
-best_match = website_data[indices[0][0]]
-from transformers import pipeline
 summarizer = pipeline("summarization", model="google/flan-t5-base")
-answer = summarizer(best_match)
-print(answer)
-import streamlit as st
 st.title("Website Chatbot")
 user_input = st.text_input("Ask me anything about the website:")
 if user_input:
-    response = get_answer(user_input)  # Function to query data
     st.write(response)

 import requests
 from bs4 import BeautifulSoup
+import json
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline
+import streamlit as st
+# Step 1: Scrape Website Data
 url = "https://aspireec.com/"
 response = requests.get(url)
 soup = BeautifulSoup(response.text, 'html.parser')
 # Extract data (e.g., headlines, paragraphs, etc.)
 content = soup.find_all('p')  # Example: extracting paragraphs
+website_data = [p.text.strip() for p in content if p.text.strip()]
+# Save the extracted content to a JSON file
 with open('website_data.json', 'w') as file:
     json.dump(website_data, file)
+# Step 2: Create Embeddings and FAISS Index
 model = SentenceTransformer('all-MiniLM-L6-v2')
 embeddings = model.encode(website_data)
 # Create FAISS index
 dimension = embeddings.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(np.array(embeddings))
+# Step 3: Summarization Model
 summarizer = pipeline("summarization", model="google/flan-t5-base")
+# Step 4: Define the `get_answer` Function
+def get_answer(query):
+    # Encode the query
+    query_embedding = model.encode([query])
+    distances, indices = index.search(np.array(query_embedding), k=1)
+    # Retrieve the best match
+    best_match = website_data[indices[0][0]]
+    # Generate a summarized response
+    summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False)
+    return summarized_response[0]['summary_text']
+# Step 5: Streamlit Chatbot UI
 st.title("Website Chatbot")
 user_input = st.text_input("Ask me anything about the website:")
 if user_input:
+    response = get_answer(user_input)  # Query the FAISS index and summarize the response
     st.write(response)