Files changed (1) hide show
  1. app.py +127 -103
app.py CHANGED
@@ -1,103 +1,127 @@
1
- import streamlit as st
2
- from langchain_community.document_loaders import PyPDFLoader, TextLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.embeddings import HuggingFaceEmbeddings
5
- from langchain.vectorstores import FAISS
6
- from langchain.llms import HuggingFacePipeline
7
- from langchain.chains import RetrievalQA
8
-
9
- from transformers import pipeline
10
-
11
- # -------------------------------
12
- # Load Documents
13
- # -------------------------------
14
- def load_documents(uploaded_files):
15
- documents = []
16
- for file in uploaded_files:
17
- with open(file.name, "wb") as f:
18
- f.write(file.getbuffer())
19
-
20
- if file.name.endswith(".pdf"):
21
- loader = PyPDFLoader(file.name)
22
- else:
23
- loader = TextLoader(file.name)
24
-
25
- documents.extend(loader.load())
26
- return documents
27
-
28
-
29
- # -------------------------------
30
- # Split Documents
31
- # -------------------------------
32
- def split_documents(documents):
33
- splitter = RecursiveCharacterTextSplitter(
34
- chunk_size=500,
35
- chunk_overlap=50
36
- )
37
- return splitter.split_documents(documents)
38
-
39
-
40
- # -------------------------------
41
- # Create Vector Store
42
- # -------------------------------
43
- def create_vectorstore(chunks):
44
- embeddings = HuggingFaceEmbeddings(
45
- model_name="sentence-transformers/all-MiniLM-L6-v2"
46
- )
47
- return FAISS.from_documents(chunks, embeddings)
48
-
49
-
50
- # -------------------------------
51
- # Load Local LLM (FREE)
52
- # -------------------------------
53
- def load_llm():
54
- pipe = pipeline(
55
- "text-generation",
56
- model="google/flan-t5-base",
57
- max_length=512
58
- )
59
- return HuggingFacePipeline(pipeline=pipe)
60
-
61
-
62
- # -------------------------------
63
- # Build QA Chain
64
- # -------------------------------
65
- def build_qa(vectorstore):
66
- llm = load_llm()
67
- retriever = vectorstore.as_retriever()
68
-
69
- qa = RetrievalQA.from_chain_type(
70
- llm=llm,
71
- retriever=retriever,
72
- return_source_documents=False
73
- )
74
- return qa
75
-
76
-
77
- # -------------------------------
78
- # Streamlit UI
79
- # -------------------------------
80
- st.set_page_config(page_title="RAG Chatbot", layout="wide")
81
- st.title("πŸ“„ Chat with Your Documents (RAG)")
82
-
83
- uploaded_files = st.file_uploader(
84
- "Upload PDF or TXT files",
85
- accept_multiple_files=True
86
- )
87
-
88
- if uploaded_files:
89
- with st.spinner("Processing documents..."):
90
- docs = load_documents(uploaded_files)
91
- chunks = split_documents(docs)
92
- vectorstore = create_vectorstore(chunks)
93
- qa_chain = build_qa(vectorstore)
94
-
95
- st.success("Documents ready!")
96
-
97
- query = st.text_input("Ask a question from your documents")
98
-
99
- if query:
100
- with st.spinner("Generating answer..."):
101
- result = qa_chain.run(query)
102
- st.write("### Answer:")
103
- st.write(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tempfile
3
+
4
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.llms import HuggingFacePipeline
9
+ from langchain.chains import RetrievalQA
10
+
11
+ from transformers import pipeline
12
+
13
+ # -------------------------------
14
+ # Page Config
15
+ # -------------------------------
16
+ st.set_page_config(page_title="RAG Chatbot", layout="wide")
17
+ st.title("πŸ“„ Chat with Your Documents (RAG)")
18
+ st.write("πŸš€ App started successfully")
19
+
20
+ # -------------------------------
21
+ # Load Documents (FIXED)
22
+ # -------------------------------
23
+ def load_documents(uploaded_files):
24
+ documents = []
25
+
26
+ for file in uploaded_files:
27
+ # Save file safely using temp file
28
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file.name) as tmp:
29
+ tmp.write(file.getbuffer())
30
+ temp_path = tmp.name
31
+
32
+ # Load based on type
33
+ if file.name.endswith(".pdf"):
34
+ loader = PyPDFLoader(temp_path)
35
+ else:
36
+ loader = TextLoader(temp_path)
37
+
38
+ documents.extend(loader.load())
39
+
40
+ return documents
41
+
42
+
43
+ # -------------------------------
44
+ # Split Documents
45
+ # -------------------------------
46
+ def split_documents(documents):
47
+ splitter = RecursiveCharacterTextSplitter(
48
+ chunk_size=500,
49
+ chunk_overlap=50
50
+ )
51
+ return splitter.split_documents(documents)
52
+
53
+
54
+ # -------------------------------
55
+ # Cached Embeddings (IMPORTANT)
56
+ # -------------------------------
57
+ @st.cache_resource
58
+ def get_embeddings():
59
+ return HuggingFaceEmbeddings(
60
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
61
+ )
62
+
63
+
64
+ # -------------------------------
65
+ # Create Vector Store
66
+ # -------------------------------
67
+ def create_vectorstore(chunks):
68
+ embeddings = get_embeddings()
69
+ return FAISS.from_documents(chunks, embeddings)
70
+
71
+
72
+ # -------------------------------
73
+ # Cached LLM (IMPORTANT)
74
+ # -------------------------------
75
+ @st.cache_resource
76
+ def load_llm():
77
+ pipe = pipeline(
78
+ "text-generation",
79
+ model="google/flan-t5-small", # lightweight model
80
+ max_length=256
81
+ )
82
+ return HuggingFacePipeline(pipeline=pipe)
83
+
84
+
85
+ # -------------------------------
86
+ # Build QA Chain
87
+ # -------------------------------
88
+ def build_qa(vectorstore):
89
+ llm = load_llm()
90
+ retriever = vectorstore.as_retriever()
91
+
92
+ qa = RetrievalQA.from_chain_type(
93
+ llm=llm,
94
+ retriever=retriever,
95
+ return_source_documents=False
96
+ )
97
+ return qa
98
+
99
+
100
+ # -------------------------------
101
+ # UI - Upload
102
+ # -------------------------------
103
+ uploaded_files = st.file_uploader(
104
+ "Upload PDF or TXT files",
105
+ accept_multiple_files=True
106
+ )
107
+
108
+ if uploaded_files:
109
+ with st.spinner("πŸ“„ Processing documents..."):
110
+ docs = load_documents(uploaded_files)
111
+ chunks = split_documents(docs)
112
+ vectorstore = create_vectorstore(chunks)
113
+ qa_chain = build_qa(vectorstore)
114
+
115
+ st.success("βœ… Documents ready!")
116
+
117
+ # -------------------------------
118
+ # User Query
119
+ # -------------------------------
120
+ query = st.text_input("πŸ’¬ Ask a question from your documents")
121
+
122
+ if query:
123
+ with st.spinner("πŸ€– Generating answer..."):
124
+ result = qa_chain.run(query)
125
+
126
+ st.markdown("### 🧠 Answer:")
127
+ st.write(result)