Spaces:
Build error
Build error
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import speech_recognition as sr | |
| from gtts import gTTS | |
| import os | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| from transformers import pipeline | |
| # Scrape website data | |
| def scrape_website(url): | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| text = soup.get_text() | |
| return text | |
| # Function to create embeddings | |
| def create_embeddings(texts): | |
| model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
| embeddings = model.encode(texts) | |
| return embeddings | |
| # Use Faiss for similarity search | |
| def search(query, documents, k=1): | |
| if not documents: # Ensure documents are not empty | |
| return ["No documents available for searching."] | |
| embeddings = create_embeddings([query] + documents) | |
| query_embedding = embeddings[0] | |
| doc_embeddings = np.stack(embeddings[1:]) | |
| if doc_embeddings.shape[0] == 0: # Ensure embeddings are non-empty | |
| return ["No embeddings available for searching."] | |
| index = faiss.IndexFlatL2(doc_embeddings.shape[1]) # L2 distance for similarity | |
| index.add(doc_embeddings) | |
| # Perform search | |
| try: | |
| D, I = index.search(np.array([query_embedding]), k) | |
| return [documents[i] for i in I[0] if i < len(documents)] # Ensure indices are valid | |
| except Exception as e: | |
| return [f"Error during search: {str(e)}"] | |
| # Function for Text-to-Speech | |
| def text_to_speech(text): | |
| tts = gTTS(text) | |
| tts.save("response.mp3") | |
| os.system("start response.mp3") # For Windows, use "start", on Linux or macOS use "open" | |
| # Function for Speech-to-Text | |
| def speech_to_text(): | |
| recognizer = sr.Recognizer() | |
| with sr.Microphone() as source: | |
| print("Listening...") | |
| audio = recognizer.listen(source) | |
| query = recognizer.recognize_google(audio) | |
| print(f"User: {query}") | |
| return query | |
| # Function to generate responses using Hugging Face GPT model | |
| def generate_response(query): | |
| generator = pipeline("text-generation", model="gpt2") | |
| response = generator(query, max_new_tokens=50, num_return_sequences=1) | |
| return response[0]['generated_text'] | |
| # Main Streamlit function | |
| def main(): | |
| st.title("Custom Multilingual Chatbot") | |
| mode = st.selectbox("Choose Mode", ["Text", "Voice"]) | |
| if mode == "Text": | |
| user_input = st.text_input("Ask me anything:") | |
| if user_input: | |
| url = "https://www.sbbusba.edu.pk/" # Example URL | |
| web_content = scrape_website(url) | |
| relevant_data = [] # Initialize with a default value | |
| if web_content: # Ensure web content is not empty | |
| relevant_data = search(user_input, [web_content]) | |
| if relevant_data and isinstance(relevant_data[0], str): | |
| response = generate_response(f"Based on the content of the website: {relevant_data[0]}") | |
| st.write("Bot: " + response) | |
| text_to_speech(response) | |
| else: | |
| st.write("Bot: Sorry, I couldn't find any relevant data.") | |
| elif mode == "Voice": | |
| if st.button("Start Listening"): | |
| query = speech_to_text() # Listen and convert to text | |
| url = "https://www.sbbusba.edu.pk/" # Example URL | |
| web_content = scrape_website(url) | |
| relevant_data = search(query, [web_content]) | |
| response = generate_response(f"Based on the content of the website: {relevant_data[0]}") | |
| st.write("Bot: " + response) | |
| text_to_speech(response) # Convert the text response to speech | |
| if __name__ == "__main__": | |
| main() | |