Spaces:

kinely
/

chatbot

Sleeping

File size: 1,719 Bytes

bb56a4a
 
477440f
 
 
 
 
 
bb56a4a
477440f
bb56a4a
 
 
477440f
bb56a4a
 
477440f
bb56a4a
477440f
bb56a4a
 
 
477440f
bb56a4a
 
 
 
 
 
 
 
477440f
bb56a4a
 
477440f
 
 
 
 
 
 
 
 
 
 
 
bb56a4a
 
 
 
477440f
bb56a4a

import requests
from bs4 import BeautifulSoup
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import streamlit as st

# Step 1: Scrape Website Data
url = "https://aspireec.com/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Extract data (e.g., headlines, paragraphs, etc.)
content = soup.find_all('p')  # Example: extracting paragraphs
website_data = [p.text.strip() for p in content if p.text.strip()]

# Save the extracted content to a JSON file
with open('website_data.json', 'w') as file:
    json.dump(website_data, file)

# Step 2: Create Embeddings and FAISS Index
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(website_data)

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Step 3: Summarization Model
summarizer = pipeline("summarization", model="google/flan-t5-base")

# Step 4: Define the `get_answer` Function
def get_answer(query):
    # Encode the query
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), k=1)
    # Retrieve the best match
    best_match = website_data[indices[0][0]]
    # Generate a summarized response
    summarized_response = summarizer(best_match, max_length=50, min_length=10, do_sample=False)
    return summarized_response[0]['summary_text']

# Step 5: Streamlit Chatbot UI
st.title("Website Chatbot")

user_input = st.text_input("Ask me anything about the website:")
if user_input:
    response = get_answer(user_input)  # Query the FAISS index and summarize the response
    st.write(response)