Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import textwrap | |
| # Streamlit sidebar for file upload and chunk size slider | |
| st.sidebar.title("Upload your text file") | |
| uploaded_file = st.sidebar.file_uploader("Choose a text file", type=["txt"]) | |
| # Slider for chunk size selection | |
| chunk_size = st.sidebar.slider("Select chunk size", min_value=100, max_value=500, step=100, value=300) | |
| if uploaded_file: | |
| # Read the text file content | |
| text_data = uploaded_file.read().decode("utf-8") | |
| # Split the text into chunks based on the selected chunk size | |
| sentences = textwrap.wrap(text_data, chunk_size) | |
| # Initialize the TF-IDF Vectorizer | |
| vectorizer = TfidfVectorizer().fit(sentences) | |
| vectors = vectorizer.transform(sentences) # Keep it sparse | |
| def get_top_responses(user_query, top_n=5): | |
| # Transform user query and keep the result sparse | |
| user_vector = vectorizer.transform([user_query]) | |
| # Compute cosine similarity directly with sparse matrices | |
| similarities = cosine_similarity(user_vector, vectors).flatten() | |
| # Get indices of top N similar sentences | |
| top_indices = similarities.argsort()[-top_n:][::-1] | |
| # Return top N most similar chunks | |
| return [sentences[i] for i in top_indices] | |
| # Streamlit chat elements | |
| st.title("TF-IDF Chatbot") | |
| # Chat history | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| # Chat input box | |
| user_input = st.chat_input("Ask me anything") | |
| # Handle user input | |
| if user_input: | |
| # Store the user message in the session | |
| st.session_state.messages.append({"role": "user", "content": user_input}) | |
| # Get the top bot responses | |
| responses = get_top_responses(user_input) | |
| # Store the bot responses in the session | |
| for response in responses: | |
| st.session_state.messages.append({"role": "bot", "content": response}) | |
| # Display the chat history | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.write(message["content"]) | |