harshith1411 commited on
Commit
2c24a01
Β·
verified Β·
1 Parent(s): efd2bac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -55
app.py CHANGED
@@ -1,93 +1,132 @@
1
  import streamlit as st
2
  import os
3
- from langchain_openai import ChatOpenAI, OpenAIEmbeddings
4
- from langchain_community.document_loaders import TextLoader, PyPDFLoader
5
- from langchain_community.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.vectorstores import FAISS
7
- from langchain.prompts import ChatPromptTemplate
8
- from langchain_core.output_parsers import StrOutputParser
9
- import tempfile
10
 
11
  os.environ["OPENAI_API_KEY"] = "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"
12
 
13
- def load_vectorstore(file_path):
14
- if os.path.exists("faiss_index"):
15
- embeddings = OpenAIEmbeddings()
16
- return FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- file_ext = file_path.split('.')[-1].lower()
19
- if file_ext == 'pdf':
20
- loader = PyPDFLoader(file_path)
21
- else:
22
- loader = TextLoader(file_path)
23
 
24
- docs = loader.load()
25
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
26
- splits = text_splitter.split_documents(docs)
 
27
 
28
- embeddings = OpenAIEmbeddings()
29
- vectorstore = FAISS.from_documents(splits, embeddings)
30
- vectorstore.save_local("faiss_index")
31
- return vectorstore
32
-
33
- def get_rag_chain(vectorstore):
34
- retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
35
- llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
36
 
37
- prompt = ChatPromptTemplate.from_template("Context: {context}. Question: {question}. Answer using context only.")
 
 
 
 
 
 
38
 
39
- chain = (
40
- {"context": retriever, "question": lambda x: x}
41
- | prompt
42
- | llm
43
- | StrOutputParser()
44
  )
45
- return chain
46
 
47
  st.title("🧠 Dynamic RAG Chatbot")
48
- st.markdown("Upload PDF/TXT β†’ Ask ANY question!")
49
 
50
- uploaded_file = st.file_uploader("πŸ“€ Upload PDF or TXT", type=['pdf', 'txt'])
 
51
 
52
- if uploaded_file is not None:
53
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
54
- tmp_file.write(uploaded_file.getvalue())
55
- file_path = tmp_file.name
56
-
57
- st.success(f"βœ… Loaded: {uploaded_file.name}")
58
-
59
- with st.spinner("πŸ”„ Indexing..."):
60
- vectorstore = load_vectorstore(file_path)
61
- chain = get_rag_chain(vectorstore)
62
- st.session_state.chain = chain
63
- st.session_state.ready = True
64
- st.session_state.doc_name = uploaded_file.name
65
-
66
- if 'ready' in st.session_state and st.session_state.ready:
67
- st.success(f"πŸš€ Ready! Document: {st.session_state.doc_name}")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  if "messages" not in st.session_state:
70
  st.session_state.messages = []
71
 
 
72
  for message in st.session_state.messages:
73
  with st.chat_message(message["role"]):
74
  st.markdown(message["content"])
75
 
 
76
  if query := st.chat_input("πŸ’¬ Ask about your document..."):
77
  st.session_state.messages.append({"role": "user", "content": query})
78
  with st.chat_message("user"):
79
  st.markdown(query)
80
 
81
  with st.chat_message("assistant"):
82
- with st.spinner("Searching..."):
83
- response = st.session_state.chain.invoke(query)
84
  st.markdown(response)
85
 
86
  st.session_state.messages.append({"role": "assistant", "content": response})
87
 
 
88
  if st.button("πŸ—‘οΈ Clear Chat"):
89
  st.session_state.messages = []
90
  st.rerun()
91
 
92
  else:
93
- st.info("πŸ‘† Upload PDF/TXT to start chatting!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import os
3
+ import re
 
 
 
 
 
 
4
 
5
  os.environ["OPENAI_API_KEY"] = "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"
6
 
7
+ from langchain_openai import ChatOpenAI
8
+ from openai import OpenAI
9
+ import tempfile
10
+
11
+ client = OpenAI()
12
+
13
+ def simple_split(text, chunk_size=1000):
14
+ """Pure Python splitter"""
15
+ sentences = re.split(r'[.!?]\s+', text)
16
+ chunks = []
17
+ current_chunk = ""
18
+
19
+ for sentence in sentences:
20
+ if len(current_chunk + sentence) < chunk_size:
21
+ current_chunk += sentence + ". "
22
+ else:
23
+ if current_chunk:
24
+ chunks.append(current_chunk.strip())
25
+ current_chunk = sentence + ". "
26
+
27
+ if current_chunk:
28
+ chunks.append(current_chunk.strip())
29
+
30
+ return chunks
31
+
32
+ def dynamic_rag(query, document_content):
33
+ """Dynamic RAG - no external deps"""
34
+ chunks = simple_split(document_content)
35
 
36
+ # Simple similarity (keyword matching)
37
+ best_chunks = []
38
+ query_words = set(query.lower().split())
 
 
39
 
40
+ for chunk in chunks:
41
+ chunk_words = set(chunk.lower().split())
42
+ score = len(query_words.intersection(chunk_words))
43
+ best_chunks.append((score, chunk))
44
 
45
+ best_chunks.sort(reverse=True, key=lambda x: x[0])
46
+ context = "\n".join([chunk for score, chunk in best_chunks[:3]])
 
 
 
 
 
 
47
 
48
+ prompt = f"""Use ONLY this context from document:
49
+
50
+ {context}
51
+
52
+ Question: {query}
53
+
54
+ Answer using context only:"""
55
 
56
+ response = client.chat.completions.create(
57
+ model="gpt-4o-mini",
58
+ messages=[{"role": "user", "content": prompt}],
59
+ temperature=0
 
60
  )
61
+ return response.choices[0].message.content
62
 
63
  st.title("🧠 Dynamic RAG Chatbot")
64
+ st.markdown("**Paste text or upload β†’ Ask ANY question!**")
65
 
66
+ # Input options
67
+ col1, col2 = st.columns(2)
68
 
69
+ with col1:
70
+ uploaded_file = st.file_uploader("πŸ“€ Upload TXT", type='txt')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ with col2:
73
+ pasted_text = st.text_area("πŸ“ Or paste text here", height=150)
74
+
75
+ document_content = ""
76
+
77
+ if uploaded_file is not None:
78
+ content = uploaded_file.read().decode('utf-8')
79
+ document_content = content
80
+ st.success("βœ… TXT loaded!")
81
+ elif pasted_text:
82
+ document_content = pasted_text
83
+ st.success("βœ… Text loaded!")
84
+
85
+ if document_content:
86
+ st.session_state.document_content = document_content
87
+ st.success("πŸš€ Chatbot ready! Ask about your text.")
88
+
89
+ if 'document_content' in st.session_state:
90
  if "messages" not in st.session_state:
91
  st.session_state.messages = []
92
 
93
+ # Chat history
94
  for message in st.session_state.messages:
95
  with st.chat_message(message["role"]):
96
  st.markdown(message["content"])
97
 
98
+ # Chat input
99
  if query := st.chat_input("πŸ’¬ Ask about your document..."):
100
  st.session_state.messages.append({"role": "user", "content": query})
101
  with st.chat_message("user"):
102
  st.markdown(query)
103
 
104
  with st.chat_message("assistant"):
105
+ with st.spinner("πŸ” Searching document..."):
106
+ response = dynamic_rag(query, st.session_state.document_content)
107
  st.markdown(response)
108
 
109
  st.session_state.messages.append({"role": "assistant", "content": response})
110
 
111
+ # Clear
112
  if st.button("πŸ—‘οΈ Clear Chat"):
113
  st.session_state.messages = []
114
  st.rerun()
115
 
116
  else:
117
+ st.info("πŸ‘† **Paste text or upload TXT to start chatting!**")
118
+ st.markdown("""
119
+ **Test example:**
120
+ ```
121
+ Skills: Python, DSA, AI/ML
122
+ Projects: RAG Chatbot (live demo)
123
+ LeetCode: 300 problems solved
124
+ ```
125
+ Ask: "What projects?" β†’ Perfect answer!
126
+ """)
127
+
128
+ st.sidebar.markdown("### πŸ› οΈ Pure Python RAG")
129
+ st.markdown("β€’ Custom text splitter")
130
+ st.markdown("β€’ Keyword similarity")
131
+ st.markdown("β€’ OpenAI GPT-4o-mini")
132
+ st.markdown("β€’ Dynamic input")