Spaces:
Sleeping
Sleeping
File size: 2,237 Bytes
5311203 55743e6 5311203 55743e6 819112e 5311203 55743e6 5311203 819112e 7c2d6e5 55743e6 819112e 55743e6 819112e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import streamlit as st
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import textwrap
# Streamlit sidebar for file upload and chunk size slider
st.sidebar.title("Upload your text file")
uploaded_file = st.sidebar.file_uploader("Choose a text file", type=["txt"])
# Slider for chunk size selection
chunk_size = st.sidebar.slider("Select chunk size", min_value=100, max_value=500, step=100, value=300)
if uploaded_file:
# Read the text file content
text_data = uploaded_file.read().decode("utf-8")
# Split the text into chunks based on the selected chunk size
sentences = textwrap.wrap(text_data, chunk_size)
# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer().fit(sentences)
vectors = vectorizer.transform(sentences) # Keep it sparse
def get_top_responses(user_query, top_n=5):
# Transform user query and keep the result sparse
user_vector = vectorizer.transform([user_query])
# Compute cosine similarity directly with sparse matrices
similarities = cosine_similarity(user_vector, vectors).flatten()
# Get indices of top N similar sentences
top_indices = similarities.argsort()[-top_n:][::-1]
# Return top N most similar chunks
return [sentences[i] for i in top_indices]
# Streamlit chat elements
st.title("TF-IDF Chatbot")
# Chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Chat input box
user_input = st.chat_input("Ask me anything")
# Handle user input
if user_input:
# Store the user message in the session
st.session_state.messages.append({"role": "user", "content": user_input})
# Get the top bot responses
responses = get_top_responses(user_input)
# Store the bot responses in the session
for response in responses:
st.session_state.messages.append({"role": "bot", "content": response})
# Display the chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
|