rockerritesh commited on
Commit
819112e
·
verified ·
1 Parent(s): 26a25b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -46
app.py CHANGED
@@ -1,53 +1,59 @@
1
  import streamlit as st
2
  from sklearn.feature_extraction.text import TfidfVectorizer
3
  from sklearn.metrics.pairwise import cosine_similarity
4
- import requests
5
 
6
- # Load the text file from the URL
7
- url = "http://llm.sumityadav.com.np/bio.txt"
8
- response = requests.get(url)
9
- text_data = response.text
10
 
11
- # Split the text into sentences for easier querying
12
- sentences = text_data.split('##')
13
 
14
- # Initialize the TF-IDF Vectorizer
15
- vectorizer = TfidfVectorizer().fit(sentences)
16
- vectors = vectorizer.transform(sentences) # Don't convert to array, keep it sparse
17
-
18
- def get_response(user_query):
19
- # Transform user query and keep the result sparse
20
- user_vector = vectorizer.transform([user_query])
21
-
22
- # Compute cosine similarity directly with sparse matrices
23
- similarities = cosine_similarity(user_vector, vectors)
24
 
25
- # Find the index of the most similar sentence
26
- closest_index = similarities.argmax()
27
- return sentences[closest_index]
28
-
29
- # Streamlit chat elements
30
- st.title("TF-IDF Chatbot")
31
-
32
- # Chat history
33
- if "messages" not in st.session_state:
34
- st.session_state.messages = []
35
-
36
- # Chat input box
37
- user_input = st.chat_input("Ask me anything")
38
-
39
- # Handle user input
40
- if user_input:
41
- # Store the user message in the session
42
- st.session_state.messages.append({"role": "user", "content": user_input})
43
-
44
- # Get the bot response
45
- response = get_response(user_input)
46
-
47
- # Store the bot response in the session
48
- st.session_state.messages.append({"role": "bot", "content": response})
49
-
50
- # Display the chat history
51
- for message in st.session_state.messages:
52
- with st.chat_message(message["role"]):
53
- st.write(message["content"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from sklearn.feature_extraction.text import TfidfVectorizer
3
  from sklearn.metrics.pairwise import cosine_similarity
 
4
 
5
+ # Streamlit sidebar for file upload
6
+ st.sidebar.title("Upload your text file")
7
+ uploaded_file = st.sidebar.file_uploader("Choose a text file", type=["txt"])
 
8
 
 
 
9
 
10
+ if uploaded_file:
11
+ # Read the text file content
12
+ text_data = uploaded_file.read().decode("utf-8")
 
 
 
 
 
 
 
13
 
14
+ # Split the text into sentences
15
+ sentences = text_data.split('\n')
16
+
17
+ # Initialize the TF-IDF Vectorizer
18
+ vectorizer = TfidfVectorizer().fit(sentences)
19
+ vectors = vectorizer.transform(sentences) # Keep it sparse
20
+
21
+ def get_top_responses(user_query, top_n=5):
22
+ # Transform user query and keep the result sparse
23
+ user_vector = vectorizer.transform([user_query])
24
+
25
+ # Compute cosine similarity directly with sparse matrices
26
+ similarities = cosine_similarity(user_vector, vectors).flatten()
27
+
28
+ # Get indices of top N similar sentences
29
+ top_indices = similarities.argsort()[-top_n:][::-1]
30
+
31
+ # Return top N most similar sentences
32
+ return [sentences[i] for i in top_indices]
33
+
34
+ # Streamlit chat elements
35
+ st.title("TF-IDF Chatbot")
36
+
37
+ # Chat history
38
+ if "messages" not in st.session_state:
39
+ st.session_state.messages = []
40
+
41
+ # Chat input box
42
+ user_input = st.chat_input("Ask me anything")
43
+
44
+ # Handle user input
45
+ if user_input:
46
+ # Store the user message in the session
47
+ st.session_state.messages.append({"role": "user", "content": user_input})
48
+
49
+ # Get the top bot responses
50
+ responses = get_top_responses(user_input)
51
+
52
+ # Store the bot responses in the session
53
+ for response in responses:
54
+ st.session_state.messages.append({"role": "bot", "content": response})
55
+
56
+ # Display the chat history
57
+ for message in st.session_state.messages:
58
+ with st.chat_message(message["role"]):
59
+ st.write(message["content"])