Files changed (1) hide show
  1. app.py +55 -60
app.py CHANGED
@@ -2,33 +2,38 @@ import streamlit as st
2
  import tempfile
3
 
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
- from langchain_text_splitters import RecursiveCharacterTextSplitter
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain_community.vectorstores import FAISS
 
8
  from langchain.chains import RetrievalQA
9
- from langchain.prompts import PromptTemplate
10
- from langchain_community.llms import HuggingFacePipeline
11
 
12
  from transformers import pipeline
13
 
 
 
 
 
 
 
14
 
15
  # -------------------------------
16
- # Load Documents (FIXED - NO 403)
17
  # -------------------------------
18
  def load_documents(uploaded_files):
19
  documents = []
20
 
21
  for file in uploaded_files:
22
- # βœ… SAFE TEMP FILE (main fix)
23
- with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
24
- tmp_file.write(file.read())
25
- file_path = tmp_file.name
26
 
27
- # Load document
28
  if file.name.endswith(".pdf"):
29
- loader = PyPDFLoader(file_path)
30
  else:
31
- loader = TextLoader(file_path)
32
 
33
  documents.extend(loader.load())
34
 
@@ -40,93 +45,83 @@ def load_documents(uploaded_files):
40
  # -------------------------------
41
  def split_documents(documents):
42
  splitter = RecursiveCharacterTextSplitter(
43
- chunk_size=800,
44
- chunk_overlap=100
45
  )
46
  return splitter.split_documents(documents)
47
 
48
 
49
  # -------------------------------
50
- # Create Vector Store
51
  # -------------------------------
52
- def create_vectorstore(chunks):
53
- embeddings = HuggingFaceEmbeddings(
 
54
  model_name="sentence-transformers/all-MiniLM-L6-v2"
55
  )
 
 
 
 
 
 
 
56
  return FAISS.from_documents(chunks, embeddings)
57
 
58
 
59
  # -------------------------------
60
- # Load LLM (LIGHT + NO TOKEN)
61
  # -------------------------------
 
62
  def load_llm():
63
  pipe = pipeline(
64
- "text2text-generation",
65
- model="google/flan-t5-small", # best balance
66
- max_length=512,
67
- temperature=0.3
68
  )
69
  return HuggingFacePipeline(pipeline=pipe)
70
 
71
 
72
  # -------------------------------
73
- # Build QA Chain (Better Prompt)
74
  # -------------------------------
75
  def build_qa(vectorstore):
76
  llm = load_llm()
 
77
 
78
- prompt_template = """
79
- Use the following context to answer the question.
80
- If the answer is not in the context, say "Answer not found in document".
81
-
82
- Context:
83
- {context}
84
-
85
- Question:
86
- {question}
87
-
88
- Answer:
89
- """
90
-
91
- PROMPT = PromptTemplate(
92
- template=prompt_template,
93
- input_variables=["context", "question"]
94
- )
95
-
96
- return RetrievalQA.from_chain_type(
97
  llm=llm,
98
- retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
99
- chain_type_kwargs={"prompt": PROMPT}
100
  )
 
101
 
102
 
103
  # -------------------------------
104
- # Streamlit UI
105
  # -------------------------------
106
- st.set_page_config(page_title="RAG Chatbot", layout="wide")
107
- st.title("πŸ“„ Chat with Your Documents (RAG)")
108
-
109
  uploaded_files = st.file_uploader(
110
- "Upload PDF or TXT files (Max ~10MB recommended)",
111
  accept_multiple_files=True
112
  )
113
 
114
  if uploaded_files:
115
- with st.spinner("Processing documents..."):
116
  docs = load_documents(uploaded_files)
117
  chunks = split_documents(docs)
118
  vectorstore = create_vectorstore(chunks)
119
  qa_chain = build_qa(vectorstore)
120
 
121
- st.success("βœ… Documents processed successfully!")
122
 
123
- query = st.text_input("Ask a question from your documents")
 
 
 
124
 
125
  if query:
126
- with st.spinner("Thinking..."):
127
- try:
128
- result = qa_chain.run(query)
129
- st.write("### πŸ“Œ Answer:")
130
- st.write(result)
131
- except Exception as e:
132
- st.error(f"Error: {str(e)}")
 
2
  import tempfile
3
 
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.llms import HuggingFacePipeline
9
  from langchain.chains import RetrievalQA
 
 
10
 
11
  from transformers import pipeline
12
 
13
+ # -------------------------------
14
+ # Page Config
15
+ # -------------------------------
16
+ st.set_page_config(page_title="RAG Chatbot", layout="wide")
17
+ st.title("πŸ“„ Chat with Your Documents (RAG)")
18
+ st.write("πŸš€ App started successfully")
19
 
20
  # -------------------------------
21
+ # Load Documents (FIXED)
22
  # -------------------------------
23
  def load_documents(uploaded_files):
24
  documents = []
25
 
26
  for file in uploaded_files:
27
+ # Save file safely using temp file
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file.name) as tmp:
29
+ tmp.write(file.getbuffer())
30
+ temp_path = tmp.name
31
 
32
+ # Load based on type
33
  if file.name.endswith(".pdf"):
34
+ loader = PyPDFLoader(temp_path)
35
  else:
36
+ loader = TextLoader(temp_path)
37
 
38
  documents.extend(loader.load())
39
 
 
45
  # -------------------------------
46
  def split_documents(documents):
47
  splitter = RecursiveCharacterTextSplitter(
48
+ chunk_size=500,
49
+ chunk_overlap=50
50
  )
51
  return splitter.split_documents(documents)
52
 
53
 
54
  # -------------------------------
55
+ # Cached Embeddings (IMPORTANT)
56
  # -------------------------------
57
+ @st.cache_resource
58
+ def get_embeddings():
59
+ return HuggingFaceEmbeddings(
60
  model_name="sentence-transformers/all-MiniLM-L6-v2"
61
  )
62
+
63
+
64
+ # -------------------------------
65
+ # Create Vector Store
66
+ # -------------------------------
67
+ def create_vectorstore(chunks):
68
+ embeddings = get_embeddings()
69
  return FAISS.from_documents(chunks, embeddings)
70
 
71
 
72
  # -------------------------------
73
+ # Cached LLM (IMPORTANT)
74
  # -------------------------------
75
+ @st.cache_resource
76
  def load_llm():
77
  pipe = pipeline(
78
+ "text-generation",
79
+ model="google/flan-t5-small", # lightweight model
80
+ max_length=256
 
81
  )
82
  return HuggingFacePipeline(pipeline=pipe)
83
 
84
 
85
  # -------------------------------
86
+ # Build QA Chain
87
  # -------------------------------
88
  def build_qa(vectorstore):
89
  llm = load_llm()
90
+ retriever = vectorstore.as_retriever()
91
 
92
+ qa = RetrievalQA.from_chain_type(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  llm=llm,
94
+ retriever=retriever,
95
+ return_source_documents=False
96
  )
97
+ return qa
98
 
99
 
100
  # -------------------------------
101
+ # UI - Upload
102
  # -------------------------------
 
 
 
103
  uploaded_files = st.file_uploader(
104
+ "Upload PDF or TXT files",
105
  accept_multiple_files=True
106
  )
107
 
108
  if uploaded_files:
109
+ with st.spinner("πŸ“„ Processing documents..."):
110
  docs = load_documents(uploaded_files)
111
  chunks = split_documents(docs)
112
  vectorstore = create_vectorstore(chunks)
113
  qa_chain = build_qa(vectorstore)
114
 
115
+ st.success("βœ… Documents ready!")
116
 
117
+ # -------------------------------
118
+ # User Query
119
+ # -------------------------------
120
+ query = st.text_input("πŸ’¬ Ask a question from your documents")
121
 
122
  if query:
123
+ with st.spinner("πŸ€– Generating answer..."):
124
+ result = qa_chain.run(query)
125
+
126
+ st.markdown("### 🧠 Answer:")
127
+ st.write(result)