|
|
import streamlit as st |
|
|
from googletrans import Translator |
|
|
from langdetect import detect |
|
|
import time |
|
|
import warnings |
|
|
import os |
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="GUVI Multilingual Chatbot", |
|
|
page_icon="🤖", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
translator = Translator() |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_guvi_dataset(): |
|
|
qa_pairs = {} |
|
|
try: |
|
|
with open("GUVI dataset.txt", "r", encoding="utf-8") as file: |
|
|
lines = file.readlines() |
|
|
for i in range(0, len(lines), 2): |
|
|
if i+1 < len(lines): |
|
|
question = lines[i].strip() |
|
|
answer = lines[i+1].strip() |
|
|
qa_pairs[question.lower()] = answer |
|
|
except FileNotFoundError: |
|
|
st.error("GUVI dataset (guvi.txt) not found. Using GPT-only responses.") |
|
|
return qa_pairs |
|
|
|
|
|
|
|
|
qa_pairs = load_guvi_dataset() |
|
|
|
|
|
|
|
|
language_map = { |
|
|
"English": "en", |
|
|
"Hindi": "hi", |
|
|
"Tamil": "ta", |
|
|
"Telugu": "te", |
|
|
"Kannada": "kn", |
|
|
"Malayalam": "ml", |
|
|
"Bengali": "bn", |
|
|
"Marathi": "mr" |
|
|
} |
|
|
|
|
|
|
|
|
def detect_language(text): |
|
|
try: |
|
|
return detect(text) |
|
|
except: |
|
|
return "en" |
|
|
|
|
|
|
|
|
def translate_text(text, target_lang, source_lang='auto'): |
|
|
if source_lang == target_lang: |
|
|
return text |
|
|
|
|
|
try: |
|
|
translation = translator.translate(text, src=source_lang, dest=target_lang) |
|
|
return translation.text |
|
|
except Exception as e: |
|
|
st.warning(f"Translation error: {e}. Returning original text.") |
|
|
return text |
|
|
|
|
|
|
|
|
def generate_response(prompt): |
|
|
|
|
|
lower_prompt = prompt.lower() |
|
|
if lower_prompt in qa_pairs: |
|
|
return qa_pairs[lower_prompt] |
|
|
|
|
|
|
|
|
inputs = models["chat_tokenizer"](prompt, return_tensors="pt", max_length=512, truncation=True) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = models["chat_model"].generate( |
|
|
**inputs, |
|
|
max_length=200, |
|
|
num_beams=5, |
|
|
early_stopping=True, |
|
|
temperature=0.7 |
|
|
) |
|
|
|
|
|
return models["chat_tokenizer"].decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.stApp { |
|
|
background-color: #f5f5f5; |
|
|
} |
|
|
.chat-container { |
|
|
background-color: white; |
|
|
border-radius: 10px; |
|
|
padding: 20px; |
|
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
.user-message { |
|
|
background-color: #e3f2fd; |
|
|
padding: 10px; |
|
|
border-radius: 10px; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.bot-message { |
|
|
background-color: #f5f5f5; |
|
|
padding: 10px; |
|
|
border-radius: 10px; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.stSelectbox > div > div { |
|
|
border: 1px solid #2196F3 !important; |
|
|
} |
|
|
.stTextInput > div > div > input { |
|
|
border: 1px solid #2196F3 !important; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.title("GUVI Multilingual Chatbot 🤖") |
|
|
st.markdown(""" |
|
|
Welcome to the GUVI Multilingual Chatbot! This assistant can help you with: |
|
|
- Course information and recommendations |
|
|
- Career guidance and mentorship |
|
|
- Technical support |
|
|
- General queries about GUVI platform |
|
|
|
|
|
**Available in multiple Indian languages!** |
|
|
""") |
|
|
|
|
|
|
|
|
st.sidebar.title("Settings") |
|
|
selected_language = st.sidebar.selectbox( |
|
|
"Select your preferred language:", |
|
|
list(language_map.keys()), |
|
|
index=0 |
|
|
) |
|
|
|
|
|
st.sidebar.markdown("---") |
|
|
st.sidebar.markdown("### About") |
|
|
st.sidebar.markdown(""" |
|
|
This chatbot is powered by: |
|
|
- OpenAI GPT |
|
|
- Google Translator |
|
|
- GUVI's custom knowledge base |
|
|
|
|
|
Developed for GUVI's multilingual learners. |
|
|
""") |
|
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
|
st.session_state.messages = [] |
|
|
|
|
|
|
|
|
for message in st.session_state.messages: |
|
|
with st.chat_message(message["role"]): |
|
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
|
if prompt := st.chat_input("Type your message here..."): |
|
|
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
|
|
|
input_lang = detect_language(prompt) |
|
|
target_lang = language_map[selected_language] |
|
|
|
|
|
|
|
|
with st.chat_message("user"): |
|
|
st.markdown(prompt) |
|
|
|
|
|
|
|
|
with st.spinner("Thinking..."): |
|
|
|
|
|
if input_lang != "en": |
|
|
translated_prompt = translate_text(prompt, "en", input_lang) |
|
|
else: |
|
|
translated_prompt = prompt |
|
|
|
|
|
|
|
|
response = generate_response(translated_prompt) |
|
|
|
|
|
|
|
|
if target_lang != "en": |
|
|
final_response = translate_text(response, target_lang, "en") |
|
|
else: |
|
|
final_response = response |
|
|
|
|
|
|
|
|
time.sleep(0.5) |
|
|
|
|
|
|
|
|
with st.chat_message("assistant"): |
|
|
st.markdown(final_response) |
|
|
|
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": final_response}) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |