import requests
from bs4 import BeautifulSoup
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import streamlit as st

# Step 1: Scrape Website Data
url = "https://aspireec.com/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Extract data (e.g., headlines, paragraphs, etc.)
content = soup.find_all('p')  # Example: extracting paragraphs
website_data = [p.text.strip() for p in content if p.text.strip()]

# Save the extracted content to a JSON file
with open('website_data.json', 'w') as file:
    json.dump(website_data, file)

# Step 2: Create Embeddings and FAISS Index
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(website_data)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Step 3: Summarization Model
summarizer = pipeline("summarization", model="google/flan-t5-base")

# Step 4: Define the `get_answer` Function
def get_answer(query):
    # Encode the query
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k=1)
    # Retrieve the best match
    best_match = website_data[indices[0][0]]
    # Generate a summarized response
    summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False)
    return summarized_response[0]['summary_text']

# Step 5: Streamlit Chatbot UI
st.title("Website Chatbot")

user_input = st.text_input("Ask me anything about the website:")
if user_input:
    response = get_answer(user_input)  # Query the FAISS index and summarize the response
    st.write(response)