Files changed (2) hide show
  1. app.py +103 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.llms import HuggingFacePipeline
7
+ from langchain.chains import RetrievalQA
8
+
9
+ from transformers import pipeline
10
+
11
+ # -------------------------------
12
+ # Load Documents
13
+ # -------------------------------
14
+ def load_documents(uploaded_files):
15
+ documents = []
16
+ for file in uploaded_files:
17
+ with open(file.name, "wb") as f:
18
+ f.write(file.getbuffer())
19
+
20
+ if file.name.endswith(".pdf"):
21
+ loader = PyPDFLoader(file.name)
22
+ else:
23
+ loader = TextLoader(file.name)
24
+
25
+ documents.extend(loader.load())
26
+ return documents
27
+
28
+
29
+ # -------------------------------
30
+ # Split Documents
31
+ # -------------------------------
32
+ def split_documents(documents):
33
+ splitter = RecursiveCharacterTextSplitter(
34
+ chunk_size=500,
35
+ chunk_overlap=50
36
+ )
37
+ return splitter.split_documents(documents)
38
+
39
+
40
+ # -------------------------------
41
+ # Create Vector Store
42
+ # -------------------------------
43
+ def create_vectorstore(chunks):
44
+ embeddings = HuggingFaceEmbeddings(
45
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
46
+ )
47
+ return FAISS.from_documents(chunks, embeddings)
48
+
49
+
50
+ # -------------------------------
51
+ # Load Local LLM (FREE)
52
+ # -------------------------------
53
+ def load_llm():
54
+ pipe = pipeline(
55
+ "text-generation",
56
+ model="google/flan-t5-base",
57
+ max_length=512
58
+ )
59
+ return HuggingFacePipeline(pipeline=pipe)
60
+
61
+
62
+ # -------------------------------
63
+ # Build QA Chain
64
+ # -------------------------------
65
+ def build_qa(vectorstore):
66
+ llm = load_llm()
67
+ retriever = vectorstore.as_retriever()
68
+
69
+ qa = RetrievalQA.from_chain_type(
70
+ llm=llm,
71
+ retriever=retriever,
72
+ return_source_documents=False
73
+ )
74
+ return qa
75
+
76
+
77
+ # -------------------------------
78
+ # Streamlit UI
79
+ # -------------------------------
80
+ st.set_page_config(page_title="RAG Chatbot", layout="wide")
81
+ st.title("📄 Chat with Your Documents (RAG)")
82
+
83
+ uploaded_files = st.file_uploader(
84
+ "Upload PDF or TXT files",
85
+ accept_multiple_files=True
86
+ )
87
+
88
+ if uploaded_files:
89
+ with st.spinner("Processing documents..."):
90
+ docs = load_documents(uploaded_files)
91
+ chunks = split_documents(docs)
92
+ vectorstore = create_vectorstore(chunks)
93
+ qa_chain = build_qa(vectorstore)
94
+
95
+ st.success("Documents ready!")
96
+
97
+ query = st.text_input("Ask a question from your documents")
98
+
99
+ if query:
100
+ with st.spinner("Generating answer..."):
101
+ result = qa_chain.run(query)
102
+ st.write("### Answer:")
103
+ st.write(result)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.35.0
2
+ langchain==0.2.15
3
+ langchain-community==0.2.15
4
+ langchain-text-splitters==0.2.2
5
+ faiss-cpu==1.8.0
6
+ sentence-transformers==2.7.0
7
+ transformers==4.41.2
8
+ pypdf==4.2.0
9
+ torch==2.3.0