Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import streamlit as st
2
- import os
3
 
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
-
9
  from langchain.chains import RetrievalQA
10
  from langchain.prompts import PromptTemplate
11
  from langchain_community.llms import HuggingFacePipeline
@@ -14,17 +13,18 @@ from transformers import pipeline
14
 
15
 
16
  # -------------------------------
17
- # Load Documents (SAFE)
18
  # -------------------------------
19
  def load_documents(uploaded_files):
20
  documents = []
21
 
22
  for file in uploaded_files:
23
- file_path = os.path.join("/tmp", file.name)
24
-
25
- with open(file_path, "wb") as f:
26
- f.write(file.getbuffer())
27
 
 
28
  if file.name.endswith(".pdf"):
29
  loader = PyPDFLoader(file_path)
30
  else:
@@ -36,7 +36,7 @@ def load_documents(uploaded_files):
36
 
37
 
38
  # -------------------------------
39
- # Split Documents (BETTER CHUNKS)
40
  # -------------------------------
41
  def split_documents(documents):
42
  splitter = RecursiveCharacterTextSplitter(
@@ -47,7 +47,7 @@ def split_documents(documents):
47
 
48
 
49
  # -------------------------------
50
- # Embeddings
51
  # -------------------------------
52
  def create_vectorstore(chunks):
53
  embeddings = HuggingFaceEmbeddings(
@@ -57,12 +57,12 @@ def create_vectorstore(chunks):
57
 
58
 
59
  # -------------------------------
60
- # LLM (Balanced quality + speed)
61
  # -------------------------------
62
  def load_llm():
63
  pipe = pipeline(
64
  "text2text-generation",
65
- model="google/flan-t5-small", # BEST without token
66
  max_length=512,
67
  temperature=0.3
68
  )
@@ -70,7 +70,7 @@ def load_llm():
70
 
71
 
72
  # -------------------------------
73
- # Prompt (VERY IMPORTANT)
74
  # -------------------------------
75
  def build_qa(vectorstore):
76
  llm = load_llm()
@@ -101,13 +101,13 @@ def build_qa(vectorstore):
101
 
102
 
103
  # -------------------------------
104
- # UI
105
  # -------------------------------
106
  st.set_page_config(page_title="RAG Chatbot", layout="wide")
107
  st.title("πŸ“„ Chat with Your Documents (RAG)")
108
 
109
  uploaded_files = st.file_uploader(
110
- "Upload PDF or TXT files",
111
  accept_multiple_files=True
112
  )
113
 
@@ -118,13 +118,15 @@ if uploaded_files:
118
  vectorstore = create_vectorstore(chunks)
119
  qa_chain = build_qa(vectorstore)
120
 
121
- st.success("βœ… Documents ready!")
122
 
123
  query = st.text_input("Ask a question from your documents")
124
 
125
  if query:
126
  with st.spinner("Thinking..."):
127
- result = qa_chain.run(query)
128
-
129
- st.write("### πŸ“Œ Answer:")
130
- st.write(result)
 
 
 
1
  import streamlit as st
2
+ import tempfile
3
 
4
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
 
8
  from langchain.chains import RetrievalQA
9
  from langchain.prompts import PromptTemplate
10
  from langchain_community.llms import HuggingFacePipeline
 
13
 
14
 
15
  # -------------------------------
16
+ # Load Documents (FIXED - NO 403)
17
  # -------------------------------
18
  def load_documents(uploaded_files):
19
  documents = []
20
 
21
  for file in uploaded_files:
22
+ # βœ… SAFE TEMP FILE (main fix)
23
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
24
+ tmp_file.write(file.read())
25
+ file_path = tmp_file.name
26
 
27
+ # Load document
28
  if file.name.endswith(".pdf"):
29
  loader = PyPDFLoader(file_path)
30
  else:
 
36
 
37
 
38
  # -------------------------------
39
+ # Split Documents
40
  # -------------------------------
41
  def split_documents(documents):
42
  splitter = RecursiveCharacterTextSplitter(
 
47
 
48
 
49
  # -------------------------------
50
+ # Create Vector Store
51
  # -------------------------------
52
  def create_vectorstore(chunks):
53
  embeddings = HuggingFaceEmbeddings(
 
57
 
58
 
59
  # -------------------------------
60
+ # Load LLM (LIGHT + NO TOKEN)
61
  # -------------------------------
62
  def load_llm():
63
  pipe = pipeline(
64
  "text2text-generation",
65
+ model="google/flan-t5-small", # best balance
66
  max_length=512,
67
  temperature=0.3
68
  )
 
70
 
71
 
72
  # -------------------------------
73
+ # Build QA Chain (Better Prompt)
74
  # -------------------------------
75
  def build_qa(vectorstore):
76
  llm = load_llm()
 
101
 
102
 
103
  # -------------------------------
104
+ # Streamlit UI
105
  # -------------------------------
106
  st.set_page_config(page_title="RAG Chatbot", layout="wide")
107
  st.title("πŸ“„ Chat with Your Documents (RAG)")
108
 
109
  uploaded_files = st.file_uploader(
110
+ "Upload PDF or TXT files (Max ~10MB recommended)",
111
  accept_multiple_files=True
112
  )
113
 
 
118
  vectorstore = create_vectorstore(chunks)
119
  qa_chain = build_qa(vectorstore)
120
 
121
+ st.success("βœ… Documents processed successfully!")
122
 
123
  query = st.text_input("Ask a question from your documents")
124
 
125
  if query:
126
  with st.spinner("Thinking..."):
127
+ try:
128
+ result = qa_chain.run(query)
129
+ st.write("### πŸ“Œ Answer:")
130
+ st.write(result)
131
+ except Exception as e:
132
+ st.error(f"Error: {str(e)}")