import requests from bs4 import BeautifulSoup import json import numpy as np import faiss from sentence_transformers import SentenceTransformer from transformers import pipeline import streamlit as st # Step 1: Scrape Website Data url = "https://aspireec.com/" response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # Extract data (e.g., headlines, paragraphs, etc.) content = soup.find_all('p') # Example: extracting paragraphs website_data = [p.text.strip() for p in content if p.text.strip()] # Save the extracted content to a JSON file with open('website_data.json', 'w') as file: json.dump(website_data, file) # Step 2: Create Embeddings and FAISS Index model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode(website_data) # Create FAISS index dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(embeddings)) # Step 3: Summarization Model summarizer = pipeline("summarization", model="google/flan-t5-base") # Step 4: Define the `get_answer` Function def get_answer(query): # Encode the query query_embedding = model.encode([query]) distances, indices = index.search(np.array(query_embedding), k=1) # Retrieve the best match best_match = website_data[indices[0][0]] # Generate a summarized response summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False) return summarized_response[0]['summary_text'] # Step 5: Streamlit Chatbot UI st.title("Website Chatbot") user_input = st.text_input("Ask me anything about the website:") if user_input: response = get_answer(user_input) # Query the FAISS index and summarize the response st.write(response)