Chat / app.py
rockerritesh's picture
Update app.py
55743e6 verified
import streamlit as st
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import textwrap
# Streamlit sidebar for file upload and chunk size slider
st.sidebar.title("Upload your text file")
uploaded_file = st.sidebar.file_uploader("Choose a text file", type=["txt"])
# Slider for chunk size selection
chunk_size = st.sidebar.slider("Select chunk size", min_value=100, max_value=500, step=100, value=300)
if uploaded_file:
# Read the text file content
text_data = uploaded_file.read().decode("utf-8")
# Split the text into chunks based on the selected chunk size
sentences = textwrap.wrap(text_data, chunk_size)
# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer().fit(sentences)
vectors = vectorizer.transform(sentences) # Keep it sparse
def get_top_responses(user_query, top_n=5):
# Transform user query and keep the result sparse
user_vector = vectorizer.transform([user_query])
# Compute cosine similarity directly with sparse matrices
similarities = cosine_similarity(user_vector, vectors).flatten()
# Get indices of top N similar sentences
top_indices = similarities.argsort()[-top_n:][::-1]
# Return top N most similar chunks
return [sentences[i] for i in top_indices]
# Streamlit chat elements
st.title("TF-IDF Chatbot")
# Chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Chat input box
user_input = st.chat_input("Ask me anything")
# Handle user input
if user_input:
# Store the user message in the session
st.session_state.messages.append({"role": "user", "content": user_input})
# Get the top bot responses
responses = get_top_responses(user_input)
# Store the bot responses in the session
for response in responses:
st.session_state.messages.append({"role": "bot", "content": response})
# Display the chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])