Spaces:
Build error
Build error
File size: 3,670 Bytes
7dd7b7f 9970661 9bb1fd1 9970661 9bb1fd1 9970661 dab76fd 9970661 e98c9e1 9970661 e98c9e1 9970661 e98c9e1 9970661 094de79 9970661 094de79 9970661 7dd7b7f 9970661 dab76fd 9970661 d59637c 310713e d59637c c676825 9970661 dab76fd 7dd7b7f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | import streamlit as st
import requests
from bs4 import BeautifulSoup
import speech_recognition as sr
from gtts import gTTS
import os
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline
# Scrape website data
def scrape_website(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.get_text()
return text
# Function to create embeddings
def create_embeddings(texts):
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
embeddings = model.encode(texts)
return embeddings
# Use Faiss for similarity search
def search(query, documents, k=1):
if not documents: # Ensure documents are not empty
return ["No documents available for searching."]
embeddings = create_embeddings([query] + documents)
query_embedding = embeddings[0]
doc_embeddings = np.stack(embeddings[1:])
if doc_embeddings.shape[0] == 0: # Ensure embeddings are non-empty
return ["No embeddings available for searching."]
index = faiss.IndexFlatL2(doc_embeddings.shape[1]) # L2 distance for similarity
index.add(doc_embeddings)
# Perform search
try:
D, I = index.search(np.array([query_embedding]), k)
return [documents[i] for i in I[0] if i < len(documents)] # Ensure indices are valid
except Exception as e:
return [f"Error during search: {str(e)}"]
# Function for Text-to-Speech
def text_to_speech(text):
tts = gTTS(text)
tts.save("response.mp3")
os.system("start response.mp3") # For Windows, use "start", on Linux or macOS use "open"
# Function for Speech-to-Text
def speech_to_text():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
print("Listening...")
audio = recognizer.listen(source)
query = recognizer.recognize_google(audio)
print(f"User: {query}")
return query
# Function to generate responses using Hugging Face GPT model
def generate_response(query):
generator = pipeline("text-generation", model="gpt2")
response = generator(query, max_new_tokens=50, num_return_sequences=1)
return response[0]['generated_text']
# Main Streamlit function
def main():
st.title("Custom Multilingual Chatbot")
mode = st.selectbox("Choose Mode", ["Text", "Voice"])
if mode == "Text":
user_input = st.text_input("Ask me anything:")
if user_input:
url = "https://www.sbbusba.edu.pk/" # Example URL
web_content = scrape_website(url)
relevant_data = [] # Initialize with a default value
if web_content: # Ensure web content is not empty
relevant_data = search(user_input, [web_content])
if relevant_data and isinstance(relevant_data[0], str):
response = generate_response(f"Based on the content of the website: {relevant_data[0]}")
st.write("Bot: " + response)
text_to_speech(response)
else:
st.write("Bot: Sorry, I couldn't find any relevant data.")
elif mode == "Voice":
if st.button("Start Listening"):
query = speech_to_text() # Listen and convert to text
url = "https://www.sbbusba.edu.pk/" # Example URL
web_content = scrape_website(url)
relevant_data = search(query, [web_content])
response = generate_response(f"Based on the content of the website: {relevant_data[0]}")
st.write("Bot: " + response)
text_to_speech(response) # Convert the text response to speech
if __name__ == "__main__":
main()
|