import streamlit as st import requests from bs4 import BeautifulSoup import speech_recognition as sr from gtts import gTTS import os from sentence_transformers import SentenceTransformer import faiss import numpy as np from transformers import pipeline # Scrape website data def scrape_website(url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') text = soup.get_text() return text # Function to create embeddings def create_embeddings(texts): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') embeddings = model.encode(texts) return embeddings # Use Faiss for similarity search def search(query, documents, k=1): if not documents: # Ensure documents are not empty return ["No documents available for searching."] embeddings = create_embeddings([query] + documents) query_embedding = embeddings[0] doc_embeddings = np.stack(embeddings[1:]) if doc_embeddings.shape[0] == 0: # Ensure embeddings are non-empty return ["No embeddings available for searching."] index = faiss.IndexFlatL2(doc_embeddings.shape[1]) # L2 distance for similarity index.add(doc_embeddings) # Perform search try: D, I = index.search(np.array([query_embedding]), k) return [documents[i] for i in I[0] if i < len(documents)] # Ensure indices are valid except Exception as e: return [f"Error during search: {str(e)}"] # Function for Text-to-Speech def text_to_speech(text): tts = gTTS(text) tts.save("response.mp3") os.system("start response.mp3") # For Windows, use "start", on Linux or macOS use "open" # Function for Speech-to-Text def speech_to_text(): recognizer = sr.Recognizer() with sr.Microphone() as source: print("Listening...") audio = recognizer.listen(source) query = recognizer.recognize_google(audio) print(f"User: {query}") return query # Function to generate responses using Hugging Face GPT model def generate_response(query): generator = pipeline("text-generation", model="gpt2") response = generator(query, max_new_tokens=50, num_return_sequences=1) return response[0]['generated_text'] # Main Streamlit function def main(): st.title("Custom Multilingual Chatbot") mode = st.selectbox("Choose Mode", ["Text", "Voice"]) if mode == "Text": user_input = st.text_input("Ask me anything:") if user_input: url = "https://www.sbbusba.edu.pk/" # Example URL web_content = scrape_website(url) relevant_data = [] # Initialize with a default value if web_content: # Ensure web content is not empty relevant_data = search(user_input, [web_content]) if relevant_data and isinstance(relevant_data[0], str): response = generate_response(f"Based on the content of the website: {relevant_data[0]}") st.write("Bot: " + response) text_to_speech(response) else: st.write("Bot: Sorry, I couldn't find any relevant data.") elif mode == "Voice": if st.button("Start Listening"): query = speech_to_text() # Listen and convert to text url = "https://www.sbbusba.edu.pk/" # Example URL web_content = scrape_website(url) relevant_data = search(query, [web_content]) response = generate_response(f"Based on the content of the website: {relevant_data[0]}") st.write("Bot: " + response) text_to_speech(response) # Convert the text response to speech if __name__ == "__main__": main()