Files changed (1) hide show
  1. app.py +39 -21
app.py CHANGED
@@ -1,19 +1,20 @@
1
  import streamlit as st
2
  import os
3
 
4
- # ✅ Imports
5
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
6
  from langchain_text_splitters import RecursiveCharacterTextSplitter
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
 
9
  from langchain.chains import RetrievalQA
 
 
10
 
11
  from transformers import pipeline
12
- from langchain_community.llms import HuggingFacePipeline
13
 
14
 
15
  # -------------------------------
16
- # Load Documents (SAFE PATH)
17
  # -------------------------------
18
  def load_documents(uploaded_files):
19
  documents = []
@@ -35,48 +36,67 @@ def load_documents(uploaded_files):
35
 
36
 
37
  # -------------------------------
38
- # Split Documents
39
  # -------------------------------
40
  def split_documents(documents):
41
  splitter = RecursiveCharacterTextSplitter(
42
- chunk_size=500,
43
- chunk_overlap=50
44
  )
45
  return splitter.split_documents(documents)
46
 
47
 
48
  # -------------------------------
49
- # Create Vector Store (LOCAL)
50
  # -------------------------------
51
  def create_vectorstore(chunks):
52
  embeddings = HuggingFaceEmbeddings(
53
- model_name="sentence-transformers/all-MiniLM-L6-v2" # works without token
54
  )
55
  return FAISS.from_documents(chunks, embeddings)
56
 
57
 
58
  # -------------------------------
59
- # Load LOCAL LLM (VERY LIGHT)
60
  # -------------------------------
61
  def load_llm():
62
  pipe = pipeline(
63
  "text2text-generation",
64
- model="sshleifer/tiny-t5", # 🔥 super light, no auth needed
65
- max_length=256
 
66
  )
67
  return HuggingFacePipeline(pipeline=pipe)
68
 
69
 
70
  # -------------------------------
71
- # Build QA Chain
72
  # -------------------------------
73
  def build_qa(vectorstore):
74
  llm = load_llm()
75
- retriever = vectorstore.as_retriever()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  return RetrievalQA.from_chain_type(
78
  llm=llm,
79
- retriever=retriever
 
80
  )
81
 
82
 
@@ -103,10 +123,8 @@ if uploaded_files:
103
  query = st.text_input("Ask a question from your documents")
104
 
105
  if query:
106
- with st.spinner("Generating answer..."):
107
- try:
108
- result = qa_chain.run(query)
109
- st.write("### Answer:")
110
- st.write(result)
111
- except Exception as e:
112
- st.error(str(e))
 
1
  import streamlit as st
2
  import os
3
 
 
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
+
9
  from langchain.chains import RetrievalQA
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain_community.llms import HuggingFacePipeline
12
 
13
  from transformers import pipeline
 
14
 
15
 
16
  # -------------------------------
17
+ # Load Documents (SAFE)
18
  # -------------------------------
19
  def load_documents(uploaded_files):
20
  documents = []
 
36
 
37
 
38
  # -------------------------------
39
+ # Split Documents (BETTER CHUNKS)
40
  # -------------------------------
41
  def split_documents(documents):
42
  splitter = RecursiveCharacterTextSplitter(
43
+ chunk_size=800,
44
+ chunk_overlap=100
45
  )
46
  return splitter.split_documents(documents)
47
 
48
 
49
  # -------------------------------
50
+ # Embeddings
51
  # -------------------------------
52
  def create_vectorstore(chunks):
53
  embeddings = HuggingFaceEmbeddings(
54
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
55
  )
56
  return FAISS.from_documents(chunks, embeddings)
57
 
58
 
59
  # -------------------------------
60
+ # LLM (Balanced quality + speed)
61
  # -------------------------------
62
  def load_llm():
63
  pipe = pipeline(
64
  "text2text-generation",
65
+ model="google/flan-t5-small", # BEST without token
66
+ max_length=512,
67
+ temperature=0.3
68
  )
69
  return HuggingFacePipeline(pipeline=pipe)
70
 
71
 
72
  # -------------------------------
73
+ # Prompt (VERY IMPORTANT)
74
  # -------------------------------
75
  def build_qa(vectorstore):
76
  llm = load_llm()
77
+
78
+ prompt_template = """
79
+ Use the following context to answer the question.
80
+ If the answer is not in the context, say "Answer not found in document".
81
+
82
+ Context:
83
+ {context}
84
+
85
+ Question:
86
+ {question}
87
+
88
+ Answer:
89
+ """
90
+
91
+ PROMPT = PromptTemplate(
92
+ template=prompt_template,
93
+ input_variables=["context", "question"]
94
+ )
95
 
96
  return RetrievalQA.from_chain_type(
97
  llm=llm,
98
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
99
+ chain_type_kwargs={"prompt": PROMPT}
100
  )
101
 
102
 
 
123
  query = st.text_input("Ask a question from your documents")
124
 
125
  if query:
126
+ with st.spinner("Thinking..."):
127
+ result = qa_chain.run(query)
128
+
129
+ st.write("### 📌 Answer:")
130
+ st.write(result)