File size: 3,670 Bytes
7dd7b7f
9970661
 
 
 
 
 
 
 
 
9bb1fd1
9970661
 
 
 
 
 
9bb1fd1
9970661
 
 
 
 
dab76fd
9970661
 
e98c9e1
 
 
9970661
 
 
e98c9e1
 
 
9970661
 
 
e98c9e1
 
 
 
 
 
 
9970661
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
094de79
9970661
 
094de79
9970661
7dd7b7f
 
9970661
 
dab76fd
9970661
 
 
d59637c
 
310713e
 
 
d59637c
 
 
 
 
 
 
 
c676825
 
9970661
 
 
 
 
 
 
 
 
 
 
dab76fd
7dd7b7f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
import requests
from bs4 import BeautifulSoup
import speech_recognition as sr
from gtts import gTTS
import os
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline

# Scrape website data
def scrape_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    text = soup.get_text()
    return text

# Function to create embeddings
def create_embeddings(texts):
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    embeddings = model.encode(texts)
    return embeddings

# Use Faiss for similarity search
def search(query, documents, k=1):
    if not documents:  # Ensure documents are not empty
        return ["No documents available for searching."]
    
    embeddings = create_embeddings([query] + documents)
    query_embedding = embeddings[0]
    doc_embeddings = np.stack(embeddings[1:])

    if doc_embeddings.shape[0] == 0:  # Ensure embeddings are non-empty
        return ["No embeddings available for searching."]
    
    index = faiss.IndexFlatL2(doc_embeddings.shape[1])  # L2 distance for similarity
    index.add(doc_embeddings)

    # Perform search
    try:
        D, I = index.search(np.array([query_embedding]), k)
        return [documents[i] for i in I[0] if i < len(documents)]  # Ensure indices are valid
    except Exception as e:
        return [f"Error during search: {str(e)}"]

# Function for Text-to-Speech
def text_to_speech(text):
    tts = gTTS(text)
    tts.save("response.mp3")
    os.system("start response.mp3")  # For Windows, use "start", on Linux or macOS use "open"

# Function for Speech-to-Text
def speech_to_text():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
        query = recognizer.recognize_google(audio)
        print(f"User: {query}")
        return query

# Function to generate responses using Hugging Face GPT model
def generate_response(query):
    generator = pipeline("text-generation", model="gpt2")
    response = generator(query, max_new_tokens=50, num_return_sequences=1)
    return response[0]['generated_text']


# Main Streamlit function
def main():
    st.title("Custom Multilingual Chatbot")
    
    mode = st.selectbox("Choose Mode", ["Text", "Voice"])

    if mode == "Text":
        user_input = st.text_input("Ask me anything:")
        if user_input:
            url = "https://www.sbbusba.edu.pk/"  # Example URL
            web_content = scrape_website(url)
            relevant_data = []  # Initialize with a default value

        if web_content:  # Ensure web content is not empty
            relevant_data = search(user_input, [web_content])

        if relevant_data and isinstance(relevant_data[0], str):
            response = generate_response(f"Based on the content of the website: {relevant_data[0]}")
            st.write("Bot: " + response)
            text_to_speech(response)
        else:
            st.write("Bot: Sorry, I couldn't find any relevant data.")


    
    elif mode == "Voice":
        if st.button("Start Listening"):
            query = speech_to_text()  # Listen and convert to text
            url = "https://www.sbbusba.edu.pk/"  # Example URL
            web_content = scrape_website(url)
            relevant_data = search(query, [web_content])
            
            response = generate_response(f"Based on the content of the website: {relevant_data[0]}")
            st.write("Bot: " + response)
            text_to_speech(response)  # Convert the text response to speech

if __name__ == "__main__":
    main()