Spaces:

rockerritesh
/

Chat

Sleeping

App Files Files Community

Chat / app.py

rockerritesh

Update app.py

55743e6 verified over 1 year ago

raw

history blame contribute delete

2.24 kB

	import streamlit as st
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import textwrap

	# Streamlit sidebar for file upload and chunk size slider
	st.sidebar.title("Upload your text file")
	uploaded_file = st.sidebar.file_uploader("Choose a text file", type=["txt"])

	# Slider for chunk size selection
	chunk_size = st.sidebar.slider("Select chunk size", min_value=100, max_value=500, step=100, value=300)

	if uploaded_file:
	# Read the text file content
	text_data = uploaded_file.read().decode("utf-8")

	# Split the text into chunks based on the selected chunk size
	sentences = textwrap.wrap(text_data, chunk_size)

	# Initialize the TF-IDF Vectorizer
	vectorizer = TfidfVectorizer().fit(sentences)
	vectors = vectorizer.transform(sentences) # Keep it sparse

	def get_top_responses(user_query, top_n=5):
	# Transform user query and keep the result sparse
	user_vector = vectorizer.transform([user_query])

	# Compute cosine similarity directly with sparse matrices
	similarities = cosine_similarity(user_vector, vectors).flatten()

	# Get indices of top N similar sentences
	top_indices = similarities.argsort()[-top_n:][::-1]

	# Return top N most similar chunks
	return [sentences[i] for i in top_indices]

	# Streamlit chat elements
	st.title("TF-IDF Chatbot")

	# Chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Chat input box
	user_input = st.chat_input("Ask me anything")

	# Handle user input
	if user_input:
	# Store the user message in the session
	st.session_state.messages.append({"role": "user", "content": user_input})

	# Get the top bot responses
	responses = get_top_responses(user_input)

	# Store the bot responses in the session
	for response in responses:
	st.session_state.messages.append({"role": "bot", "content": response})

	# Display the chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.write(message["content"])