Files changed (1) hide show
  1. app.py +30 -52
app.py CHANGED
@@ -1,42 +1,31 @@
1
  import streamlit as st
2
- import tempfile
3
 
 
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.llms import HuggingFacePipeline
9
- from langchain.chains import RetrievalQA
10
 
 
11
  from transformers import pipeline
12
 
13
- # -------------------------------
14
- # Page Config
15
- # -------------------------------
16
- st.set_page_config(page_title="RAG Chatbot", layout="wide")
17
- st.title("πŸ“„ Chat with Your Documents (RAG)")
18
- st.write("πŸš€ App started successfully")
19
 
20
  # -------------------------------
21
- # Load Documents (FIXED)
22
  # -------------------------------
23
  def load_documents(uploaded_files):
24
  documents = []
25
-
26
  for file in uploaded_files:
27
- # Save file safely using temp file
28
- with tempfile.NamedTemporaryFile(delete=False, suffix=file.name) as tmp:
29
- tmp.write(file.getbuffer())
30
- temp_path = tmp.name
31
 
32
- # Load based on type
33
  if file.name.endswith(".pdf"):
34
- loader = PyPDFLoader(temp_path)
35
  else:
36
- loader = TextLoader(temp_path)
37
 
38
  documents.extend(loader.load())
39
-
40
  return documents
41
 
42
 
@@ -51,39 +40,30 @@ def split_documents(documents):
51
  return splitter.split_documents(documents)
52
 
53
 
54
- # -------------------------------
55
- # Cached Embeddings (IMPORTANT)
56
- # -------------------------------
57
- @st.cache_resource
58
- def get_embeddings():
59
- return HuggingFaceEmbeddings(
60
- model_name="sentence-transformers/all-MiniLM-L6-v2"
61
- )
62
-
63
-
64
  # -------------------------------
65
  # Create Vector Store
66
  # -------------------------------
67
  def create_vectorstore(chunks):
68
- embeddings = get_embeddings()
 
 
69
  return FAISS.from_documents(chunks, embeddings)
70
 
71
 
72
  # -------------------------------
73
- # Cached LLM (IMPORTANT)
74
  # -------------------------------
75
- @st.cache_resource
76
  def load_llm():
77
  pipe = pipeline(
78
- "text-generation",
79
- model="google/flan-t5-small", # lightweight model
80
- max_length=256
81
  )
82
  return HuggingFacePipeline(pipeline=pipe)
83
 
84
 
85
  # -------------------------------
86
- # Build QA Chain
87
  # -------------------------------
88
  def build_qa(vectorstore):
89
  llm = load_llm()
@@ -91,37 +71,35 @@ def build_qa(vectorstore):
91
 
92
  qa = RetrievalQA.from_chain_type(
93
  llm=llm,
94
- retriever=retriever,
95
- return_source_documents=False
96
  )
97
  return qa
98
 
99
 
100
  # -------------------------------
101
- # UI - Upload
102
  # -------------------------------
 
 
 
103
  uploaded_files = st.file_uploader(
104
  "Upload PDF or TXT files",
105
  accept_multiple_files=True
106
  )
107
 
108
  if uploaded_files:
109
- with st.spinner("πŸ“„ Processing documents..."):
110
  docs = load_documents(uploaded_files)
111
  chunks = split_documents(docs)
112
  vectorstore = create_vectorstore(chunks)
113
  qa_chain = build_qa(vectorstore)
114
 
115
- st.success("βœ… Documents ready!")
116
 
117
- # -------------------------------
118
- # User Query
119
- # -------------------------------
120
- query = st.text_input("πŸ’¬ Ask a question from your documents")
121
 
122
  if query:
123
- with st.spinner("πŸ€– Generating answer..."):
124
  result = qa_chain.run(query)
125
-
126
- st.markdown("### 🧠 Answer:")
127
- st.write(result)
 
1
  import streamlit as st
 
2
 
3
+ # βœ… Correct imports (new structure)
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.llms import HuggingFacePipeline
 
9
 
10
+ from langchain.chains import RetrievalQA
11
  from transformers import pipeline
12
 
 
 
 
 
 
 
13
 
14
  # -------------------------------
15
+ # Load Documents
16
  # -------------------------------
17
  def load_documents(uploaded_files):
18
  documents = []
 
19
  for file in uploaded_files:
20
+ with open(file.name, "wb") as f:
21
+ f.write(file.getbuffer())
 
 
22
 
 
23
  if file.name.endswith(".pdf"):
24
+ loader = PyPDFLoader(file.name)
25
  else:
26
+ loader = TextLoader(file.name)
27
 
28
  documents.extend(loader.load())
 
29
  return documents
30
 
31
 
 
40
  return splitter.split_documents(documents)
41
 
42
 
 
 
 
 
 
 
 
 
 
 
43
  # -------------------------------
44
  # Create Vector Store
45
  # -------------------------------
46
  def create_vectorstore(chunks):
47
+ embeddings = HuggingFaceEmbeddings(
48
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
49
+ )
50
  return FAISS.from_documents(chunks, embeddings)
51
 
52
 
53
  # -------------------------------
54
+ # Load Local LLM (FREE)
55
  # -------------------------------
 
56
  def load_llm():
57
  pipe = pipeline(
58
+ "text2text-generation", # βœ… FIXED
59
+ model="google/flan-t5-base",
60
+ max_length=512
61
  )
62
  return HuggingFacePipeline(pipeline=pipe)
63
 
64
 
65
  # -------------------------------
66
+ # Build QA Chain (with strict prompt)
67
  # -------------------------------
68
  def build_qa(vectorstore):
69
  llm = load_llm()
 
71
 
72
  qa = RetrievalQA.from_chain_type(
73
  llm=llm,
74
+ retriever=retriever
 
75
  )
76
  return qa
77
 
78
 
79
  # -------------------------------
80
+ # Streamlit UI
81
  # -------------------------------
82
+ st.set_page_config(page_title="RAG Chatbot", layout="wide")
83
+ st.title("πŸ“„ Chat with Your Documents (RAG)")
84
+
85
  uploaded_files = st.file_uploader(
86
  "Upload PDF or TXT files",
87
  accept_multiple_files=True
88
  )
89
 
90
  if uploaded_files:
91
+ with st.spinner("Processing documents..."):
92
  docs = load_documents(uploaded_files)
93
  chunks = split_documents(docs)
94
  vectorstore = create_vectorstore(chunks)
95
  qa_chain = build_qa(vectorstore)
96
 
97
+ st.success("Documents ready!")
98
 
99
+ query = st.text_input("Ask a question from your documents")
 
 
 
100
 
101
  if query:
102
+ with st.spinner("Generating answer..."):
103
  result = qa_chain.run(query)
104
+ st.write("### Answer:")
105
+ st.write(result)