Files changed (1) hide show
  1. app.py +35 -98
app.py CHANGED
@@ -1,48 +1,31 @@
1
  import streamlit as st
2
- import tempfile
3
- import os
4
 
 
5
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.embeddings import HuggingFaceEmbeddings
8
- from langchain.vectorstores import FAISS
9
- from langchain.llms import HuggingFacePipeline
10
- from langchain.chains import RetrievalQA
11
- from langchain.prompts import PromptTemplate
12
 
13
- from transformers.pipelines import pipeline
 
14
 
15
- # -------------------------------
16
- # Page Config
17
- # -------------------------------
18
- st.set_page_config(page_title="RAG Chatbot", layout="wide")
19
- st.title("📄 Chat with Your Documents (RAG)")
20
- st.write("🚀 App started successfully")
21
 
22
  # -------------------------------
23
  # Load Documents
24
  # -------------------------------
25
  def load_documents(uploaded_files):
26
  documents = []
27
-
28
  for file in uploaded_files:
29
- file_extension = os.path.splitext(file.name)[1]
30
-
31
- with tempfile.NamedTemporaryFile(delete=False, suffix=file_extension) as tmp:
32
- tmp.write(file.getbuffer())
33
- temp_path = tmp.name
34
-
35
- try:
36
- if file_extension.lower() == ".pdf":
37
- loader = PyPDFLoader(temp_path)
38
- else:
39
- loader = TextLoader(temp_path)
40
-
41
- documents.extend(loader.load())
42
 
43
- except Exception as e:
44
- st.error(f"❌ Error loading file: {e}")
 
 
45
 
 
46
  return documents
47
 
48
 
@@ -57,112 +40,66 @@ def split_documents(documents):
57
  return splitter.split_documents(documents)
58
 
59
 
60
- # -------------------------------
61
- # Cached Embeddings
62
- # -------------------------------
63
- @st.cache_resource
64
- def get_embeddings():
65
- return HuggingFaceEmbeddings(
66
- model_name="sentence-transformers/all-MiniLM-L6-v2"
67
- )
68
-
69
-
70
  # -------------------------------
71
  # Create Vector Store
72
  # -------------------------------
73
  def create_vectorstore(chunks):
74
- embeddings = get_embeddings()
 
 
75
  return FAISS.from_documents(chunks, embeddings)
76
 
77
 
78
  # -------------------------------
79
- # Cached LLM
80
  # -------------------------------
81
- @st.cache_resource
82
  def load_llm():
83
  pipe = pipeline(
84
- "text2text-generation",
85
- model="google/flan-t5-small",
86
- max_length=256
87
  )
88
  return HuggingFacePipeline(pipeline=pipe)
89
 
90
 
91
  # -------------------------------
92
- # Custom Prompt (IMPORTANT)
93
- # -------------------------------
94
- prompt_template = """
95
- Use the following context to answer the question clearly.
96
-
97
- Context:
98
- {context}
99
-
100
- Question:
101
- {question}
102
-
103
- Answer:
104
- """
105
-
106
- PROMPT = PromptTemplate(
107
- template=prompt_template,
108
- input_variables=["context", "question"]
109
- )
110
-
111
-
112
- # -------------------------------
113
- # Build QA Chain
114
  # -------------------------------
115
  def build_qa(vectorstore):
116
  llm = load_llm()
117
-
118
- retriever = vectorstore.as_retriever(
119
- search_kwargs={"k": 3} # 🔥 improves answer quality
120
- )
121
 
122
  qa = RetrievalQA.from_chain_type(
123
  llm=llm,
124
- retriever=retriever,
125
- chain_type_kwargs={"prompt": PROMPT},
126
- return_source_documents=False
127
  )
128
-
129
  return qa
130
 
131
 
132
  # -------------------------------
133
- # UI - Upload
134
  # -------------------------------
 
 
 
135
  uploaded_files = st.file_uploader(
136
  "Upload PDF or TXT files",
137
  accept_multiple_files=True
138
  )
139
 
140
  if uploaded_files:
141
- with st.spinner("📄 Processing documents..."):
142
  docs = load_documents(uploaded_files)
143
-
144
- if not docs:
145
- st.error("❌ No valid documents loaded.")
146
- st.stop()
147
-
148
  chunks = split_documents(docs)
149
  vectorstore = create_vectorstore(chunks)
150
  qa_chain = build_qa(vectorstore)
151
 
152
- st.success("Documents ready!")
153
 
154
- # -------------------------------
155
- # User Query
156
- # -------------------------------
157
- query = st.text_input("💬 Ask a question from your documents")
158
 
159
  if query:
160
- with st.spinner("🤖 Generating answer..."):
161
- try:
162
- result = qa_chain.run(query)
163
-
164
- st.markdown("### 🧠 Answer:")
165
- st.write(result)
166
-
167
- except Exception as e:
168
- st.error(f"❌ Error generating answer: {e}")
 
1
  import streamlit as st
 
 
2
 
3
+ # ✅ Correct imports (new structure)
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.llms import HuggingFacePipeline
 
 
9
 
10
+ from langchain.chains import RetrievalQA
11
+ from transformers import pipeline
12
 
 
 
 
 
 
 
13
 
14
  # -------------------------------
15
  # Load Documents
16
  # -------------------------------
17
  def load_documents(uploaded_files):
18
  documents = []
 
19
  for file in uploaded_files:
20
+ with open(file.name, "wb") as f:
21
+ f.write(file.getbuffer())
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ if file.name.endswith(".pdf"):
24
+ loader = PyPDFLoader(file.name)
25
+ else:
26
+ loader = TextLoader(file.name)
27
 
28
+ documents.extend(loader.load())
29
  return documents
30
 
31
 
 
40
  return splitter.split_documents(documents)
41
 
42
 
 
 
 
 
 
 
 
 
 
 
43
  # -------------------------------
44
  # Create Vector Store
45
  # -------------------------------
46
  def create_vectorstore(chunks):
47
+ embeddings = HuggingFaceEmbeddings(
48
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
49
+ )
50
  return FAISS.from_documents(chunks, embeddings)
51
 
52
 
53
  # -------------------------------
54
+ # Load Local LLM (FREE)
55
  # -------------------------------
 
56
  def load_llm():
57
  pipe = pipeline(
58
+ "text2text-generation", # ✅ FIXED
59
+ model="google/flan-t5-base",
60
+ max_length=512
61
  )
62
  return HuggingFacePipeline(pipeline=pipe)
63
 
64
 
65
  # -------------------------------
66
+ # Build QA Chain (with strict prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # -------------------------------
68
  def build_qa(vectorstore):
69
  llm = load_llm()
70
+ retriever = vectorstore.as_retriever()
 
 
 
71
 
72
  qa = RetrievalQA.from_chain_type(
73
  llm=llm,
74
+ retriever=retriever
 
 
75
  )
 
76
  return qa
77
 
78
 
79
  # -------------------------------
80
+ # Streamlit UI
81
  # -------------------------------
82
+ st.set_page_config(page_title="RAG Chatbot", layout="wide")
83
+ st.title("📄 Chat with Your Documents (RAG)")
84
+
85
  uploaded_files = st.file_uploader(
86
  "Upload PDF or TXT files",
87
  accept_multiple_files=True
88
  )
89
 
90
  if uploaded_files:
91
+ with st.spinner("Processing documents..."):
92
  docs = load_documents(uploaded_files)
 
 
 
 
 
93
  chunks = split_documents(docs)
94
  vectorstore = create_vectorstore(chunks)
95
  qa_chain = build_qa(vectorstore)
96
 
97
+ st.success("Documents ready!")
98
 
99
+ query = st.text_input("Ask a question from your documents")
 
 
 
100
 
101
  if query:
102
+ with st.spinner("Generating answer..."):
103
+ result = qa_chain.run(query)
104
+ st.write("### Answer:")
105
+ st.write(result)