| | import os |
| | import streamlit as st |
| | import random |
| | import time |
| |
|
| | from langchain.chains import RetrievalQA |
| | from langchain.chat_models import ChatOpenAI |
| | from langchain.document_loaders import DataFrameLoader |
| | from langchain.embeddings import OpenAIEmbeddings |
| | from langchain.vectorstores import Chroma |
| |
|
| | import kagglehub |
| | from kagglehub import KaggleDatasetAdapter |
| | import pandas as pd |
| |
|
| | |
| | |
| | df = kagglehub.load_dataset( |
| | KaggleDatasetAdapter.PANDAS, |
| | "tobiasbueck/multilingual-customer-support-tickets", |
| | file_path, |
| | ) |
| |
|
| | df = df[df['language'] == 'en'] |
| | |
| | non_string_body = df[~df['body'].apply(lambda x: isinstance(x, str))].index |
| | non_string_answers = df[~df['answer'].apply(lambda x: isinstance(x, str))].index |
| | non_string_ids = non_string_body.union(non_string_answers) |
| | |
| | df = df.drop(index=non_string_ids) |
| | df['q_and_a'] = 'Question: ' + df['body'] + ' Answer: ' + df['answer'] |
| | df_train, df_holdout = train_test_split(df, test_size=0.2, random_state=42) |
| | df_val, df_test = train_test_split(df_holdout, test_size=0.5, random_state=42) |
| |
|
| | persist_directory = './chroma_db' |
| | rm -rf ./chroma_db |
| | loader = DataFrameLoader( |
| | df_train, |
| | page_content_column="q_and_a") |
| | documents = loader.load() |
| |
|
| | vectordb = Chroma.from_documents( |
| | documents=documents, |
| | embedding=embedding, |
| | persist_directory=persist_directory |
| | ) |
| |
|
| | |
| | openai_api_key = os.getenv("openai_token") |
| | embedding = OpenAIEmbeddings(openai_api_key=openai_api_key) |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| |
|
| | llm_name = "gpt-3.5-turbo" |
| |
|
| | llm = ChatOpenAI(model_name=llm_name, temperature=0.7, |
| | openai_api_key=openai_api_key) |
| |
|
| | qa_chain = RetrievalQA.from_chain_type( |
| | llm, |
| | retriever=vectordb.as_retriever(search_kwargs={"k": 5}) |
| | ) |
| |
|
| |
|
| | |
| | def response_generator(prompt): |
| | response = qa_chain({"query": prompt})['result'] |
| | |
| | for word in response.split(): |
| | yield word + " " |
| | time.sleep(0.05) |
| |
|
| |
|
| | st.title("Technical Support Chatbot") |
| |
|
| | |
| | if "messages" not in st.session_state: |
| | st.session_state.messages = [] |
| |
|
| | |
| | for message in st.session_state.messages: |
| | with st.chat_message(message["role"]): |
| | st.markdown(message["content"]) |
| |
|
| | |
| | if prompt := st.chat_input("Enter your question here"): |
| | |
| | st.session_state.messages.append({"role": "user", "content": prompt}) |
| | |
| | with st.chat_message("user"): |
| | st.markdown(prompt) |
| |
|
| | |
| | with st.chat_message("assistant"): |
| | response = st.write_stream(response_generator(prompt)) |
| | |
| | st.session_state.messages.append({"role": "assistant", "content": response}) |