Spaces:

Muthuraja18
/

Chatbot

Sleeping

App Files Files Community

Update app.py

by Muthuraja18 - opened Apr 26

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+21

-19

Files changed (1) hide show

app.py +21 -19

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import streamlit as st
-import os
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain_community.llms import HuggingFacePipeline
@@ -14,17 +13,18 @@ from transformers import pipeline
 # -------------------------------
-# Load Documents (SAFE)
 # -------------------------------
 def load_documents(uploaded_files):
     documents = []
     for file in uploaded_files:
-        file_path = os.path.join("/tmp", file.name)
-        with open(file_path, "wb") as f:
-            f.write(file.getbuffer())
         if file.name.endswith(".pdf"):
             loader = PyPDFLoader(file_path)
         else:
@@ -36,7 +36,7 @@ def load_documents(uploaded_files):
 # -------------------------------
-# Split Documents (BETTER CHUNKS)
 # -------------------------------
 def split_documents(documents):
     splitter = RecursiveCharacterTextSplitter(
@@ -47,7 +47,7 @@ def split_documents(documents):
 # -------------------------------
-# Embeddings
 # -------------------------------
 def create_vectorstore(chunks):
     embeddings = HuggingFaceEmbeddings(
@@ -57,12 +57,12 @@ def create_vectorstore(chunks):
 # -------------------------------
-# LLM (Balanced quality + speed)
 # -------------------------------
 def load_llm():
     pipe = pipeline(
         "text2text-generation",
-        model="google/flan-t5-small",   # BEST without token
         max_length=512,
         temperature=0.3
     )
@@ -70,7 +70,7 @@ def load_llm():
 # -------------------------------
-# Prompt (VERY IMPORTANT)
 # -------------------------------
 def build_qa(vectorstore):
     llm = load_llm()
@@ -101,13 +101,13 @@ def build_qa(vectorstore):
 # -------------------------------
-# UI
 # -------------------------------
 st.set_page_config(page_title="RAG Chatbot", layout="wide")
 st.title("📄 Chat with Your Documents (RAG)")
 uploaded_files = st.file_uploader(
-    "Upload PDF or TXT files",
     accept_multiple_files=True
 )
@@ -118,13 +118,15 @@ if uploaded_files:
         vectorstore = create_vectorstore(chunks)
         qa_chain = build_qa(vectorstore)
-    st.success("✅ Documents ready!")
     query = st.text_input("Ask a question from your documents")
     if query:
         with st.spinner("Thinking..."):
-            result = qa_chain.run(query)
-            st.write("### 📌 Answer:")
-            st.write(result)

 import streamlit as st
+import tempfile
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
 from langchain.prompts import PromptTemplate
 from langchain_community.llms import HuggingFacePipeline
 # -------------------------------
+# Load Documents (FIXED - NO 403)
 # -------------------------------
 def load_documents(uploaded_files):
     documents = []
     for file in uploaded_files:
+        # ✅ SAFE TEMP FILE (main fix)
+        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+            tmp_file.write(file.read())
+            file_path = tmp_file.name
+        # Load document
         if file.name.endswith(".pdf"):
             loader = PyPDFLoader(file_path)
         else:
 # -------------------------------
+# Split Documents
 # -------------------------------
 def split_documents(documents):
     splitter = RecursiveCharacterTextSplitter(
 # -------------------------------
+# Create Vector Store
 # -------------------------------
 def create_vectorstore(chunks):
     embeddings = HuggingFaceEmbeddings(
 # -------------------------------
+# Load LLM (LIGHT + NO TOKEN)
 # -------------------------------
 def load_llm():
     pipe = pipeline(
         "text2text-generation",
+        model="google/flan-t5-small",   # best balance
         max_length=512,
         temperature=0.3
     )
 # -------------------------------
+# Build QA Chain (Better Prompt)
 # -------------------------------
 def build_qa(vectorstore):
     llm = load_llm()
 # -------------------------------
+# Streamlit UI
 # -------------------------------
 st.set_page_config(page_title="RAG Chatbot", layout="wide")
 st.title("📄 Chat with Your Documents (RAG)")
 uploaded_files = st.file_uploader(
+    "Upload PDF or TXT files (Max ~10MB recommended)",
     accept_multiple_files=True
 )
         vectorstore = create_vectorstore(chunks)
         qa_chain = build_qa(vectorstore)
+    st.success("✅ Documents processed successfully!")
     query = st.text_input("Ask a question from your documents")
     if query:
         with st.spinner("Thinking..."):
+            try:
+                result = qa_chain.run(query)
+                st.write("### 📌 Answer:")
+                st.write(result)
+            except Exception as e:
+                st.error(f"Error: {str(e)}")