Munwar92's picture
Update app.py
310713e verified
import streamlit as st
import requests
from bs4 import BeautifulSoup
import speech_recognition as sr
from gtts import gTTS
import os
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline
# Scrape website data
def scrape_website(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text()
return text
# Function to create embeddings
def create_embeddings(texts):
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
embeddings = model.encode(texts)
return embeddings
# Use Faiss for similarity search
def search(query, documents, k=1):
if not documents: # Ensure documents are not empty
return ["No documents available for searching."]
embeddings = create_embeddings([query] + documents)
query_embedding = embeddings[0]
doc_embeddings = np.stack(embeddings[1:])
if doc_embeddings.shape[0] == 0: # Ensure embeddings are non-empty
return ["No embeddings available for searching."]
index = faiss.IndexFlatL2(doc_embeddings.shape[1]) # L2 distance for similarity
index.add(doc_embeddings)
# Perform search
try:
D, I = index.search(np.array([query_embedding]), k)
return [documents[i] for i in I[0] if i < len(documents)] # Ensure indices are valid
except Exception as e:
return [f"Error during search: {str(e)}"]
# Function for Text-to-Speech
def text_to_speech(text):
tts = gTTS(text)
tts.save("response.mp3")
os.system("start response.mp3") # For Windows, use "start", on Linux or macOS use "open"
# Function for Speech-to-Text
def speech_to_text():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
audio = recognizer.listen(source)
query = recognizer.recognize_google(audio)
print(f"User: {query}")
return query
# Function to generate responses using Hugging Face GPT model
def generate_response(query):
generator = pipeline("text-generation", model="gpt2")
response = generator(query, max_new_tokens=50, num_return_sequences=1)
return response[0]['generated_text']
# Main Streamlit function
def main():
st.title("Custom Multilingual Chatbot")
mode = st.selectbox("Choose Mode", ["Text", "Voice"])
if mode == "Text":
user_input = st.text_input("Ask me anything:")
if user_input:
url = "https://www.sbbusba.edu.pk/" # Example URL
web_content = scrape_website(url)
relevant_data = [] # Initialize with a default value
if web_content: # Ensure web content is not empty
relevant_data = search(user_input, [web_content])
if relevant_data and isinstance(relevant_data[0], str):
response = generate_response(f"Based on the content of the website: {relevant_data[0]}")
st.write("Bot: " + response)
text_to_speech(response)
else:
st.write("Bot: Sorry, I couldn't find any relevant data.")
elif mode == "Voice":
if st.button("Start Listening"):
query = speech_to_text() # Listen and convert to text
url = "https://www.sbbusba.edu.pk/" # Example URL
web_content = scrape_website(url)
relevant_data = search(query, [web_content])
response = generate_response(f"Based on the content of the website: {relevant_data[0]}")
st.write("Bot: " + response)
text_to_speech(response) # Convert the text response to speech
if __name__ == "__main__":
main()