Spaces:

Muthuraja18
/

Chatbot

Sleeping

App Files Files Community

Update app.py

#10

by Muthuraja18 - opened Apr 26

base: refs/heads/main

←

from: refs/pr/10

Discussion Files changed

+55

-60

Files changed (1) hide show

app.py +55 -60

app.py CHANGED Viewed

@@ -2,33 +2,38 @@ import streamlit as st
 import tempfile
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
 from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline
 # -------------------------------
-# Load Documents (FIXED - NO 403)
 # -------------------------------
 def load_documents(uploaded_files):
     documents = []
     for file in uploaded_files:
-        # ✅ SAFE TEMP FILE (main fix)
-        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
-            tmp_file.write(file.read())
-            file_path = tmp_file.name
-        # Load document
         if file.name.endswith(".pdf"):
-            loader = PyPDFLoader(file_path)
         else:
-            loader = TextLoader(file_path)
         documents.extend(loader.load())
@@ -40,93 +45,83 @@ def load_documents(uploaded_files):
 # -------------------------------
 def split_documents(documents):
     splitter = RecursiveCharacterTextSplitter(
-        chunk_size=800,
-        chunk_overlap=100
     )
     return splitter.split_documents(documents)
 # -------------------------------
-# Create Vector Store
 # -------------------------------
-def create_vectorstore(chunks):
-    embeddings = HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-MiniLM-L6-v2"
     )
     return FAISS.from_documents(chunks, embeddings)
 # -------------------------------
-# Load LLM (LIGHT + NO TOKEN)
 # -------------------------------
 def load_llm():
     pipe = pipeline(
-        "text2text-generation",
-        model="google/flan-t5-small",   # best balance
-        max_length=512,
-        temperature=0.3
     )
     return HuggingFacePipeline(pipeline=pipe)
 # -------------------------------
-# Build QA Chain (Better Prompt)
 # -------------------------------
 def build_qa(vectorstore):
     llm = load_llm()
-    prompt_template = """
-    Use the following context to answer the question.
-    If the answer is not in the context, say "Answer not found in document".
-    Context:
-    {context}
-    Question:
-    {question}
-    Answer:
-    """
-    PROMPT = PromptTemplate(
-        template=prompt_template,
-        input_variables=["context", "question"]
-    )
-    return RetrievalQA.from_chain_type(
         llm=llm,
-        retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
-        chain_type_kwargs={"prompt": PROMPT}
     )
 # -------------------------------
-# Streamlit UI
 # -------------------------------
-st.set_page_config(page_title="RAG Chatbot", layout="wide")
-st.title("📄 Chat with Your Documents (RAG)")
 uploaded_files = st.file_uploader(
-    "Upload PDF or TXT files (Max ~10MB recommended)",
     accept_multiple_files=True
 )
 if uploaded_files:
-    with st.spinner("Processing documents..."):
         docs = load_documents(uploaded_files)
         chunks = split_documents(docs)
         vectorstore = create_vectorstore(chunks)
         qa_chain = build_qa(vectorstore)
-    st.success("✅ Documents processed successfully!")
-    query = st.text_input("Ask a question from your documents")
     if query:
-        with st.spinner("Thinking..."):
-            try:
-                result = qa_chain.run(query)
-                st.write("### 📌 Answer:")
-                st.write(result)
-            except Exception as e:
-                st.error(f"Error: {str(e)}")

 import tempfile
 from langchain_community.document_loaders import PyPDFLoader, TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import HuggingFacePipeline
 from langchain.chains import RetrievalQA
 from transformers import pipeline
+# -------------------------------
+# Page Config
+# -------------------------------
+st.set_page_config(page_title="RAG Chatbot", layout="wide")
+st.title("📄 Chat with Your Documents (RAG)")
+st.write("🚀 App started successfully")
 # -------------------------------
+# Load Documents (FIXED)
 # -------------------------------
 def load_documents(uploaded_files):
     documents = []
     for file in uploaded_files:
+        # Save file safely using temp file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=file.name) as tmp:
+            tmp.write(file.getbuffer())
+            temp_path = tmp.name
+        # Load based on type
         if file.name.endswith(".pdf"):
+            loader = PyPDFLoader(temp_path)
         else:
+            loader = TextLoader(temp_path)
         documents.extend(loader.load())
 # -------------------------------
 def split_documents(documents):
     splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=50
     )
     return splitter.split_documents(documents)
 # -------------------------------
+# Cached Embeddings (IMPORTANT)
 # -------------------------------
+@st.cache_resource
+def get_embeddings():
+    return HuggingFaceEmbeddings(
         model_name="sentence-transformers/all-MiniLM-L6-v2"
     )
+# -------------------------------
+# Create Vector Store
+# -------------------------------
+def create_vectorstore(chunks):
+    embeddings = get_embeddings()
     return FAISS.from_documents(chunks, embeddings)
 # -------------------------------
+# Cached LLM (IMPORTANT)
 # -------------------------------
+@st.cache_resource
 def load_llm():
     pipe = pipeline(
+        "text-generation",
+        model="google/flan-t5-small",  # lightweight model
+        max_length=256
     )
     return HuggingFacePipeline(pipeline=pipe)
 # -------------------------------
+# Build QA Chain
 # -------------------------------
 def build_qa(vectorstore):
     llm = load_llm()
+    retriever = vectorstore.as_retriever()
+    qa = RetrievalQA.from_chain_type(
         llm=llm,
+        retriever=retriever,
+        return_source_documents=False
     )
+    return qa
 # -------------------------------
+# UI - Upload
 # -------------------------------
 uploaded_files = st.file_uploader(
+    "Upload PDF or TXT files",
     accept_multiple_files=True
 )
 if uploaded_files:
+    with st.spinner("📄 Processing documents..."):
         docs = load_documents(uploaded_files)
         chunks = split_documents(docs)
         vectorstore = create_vectorstore(chunks)
         qa_chain = build_qa(vectorstore)
+    st.success("✅ Documents ready!")
+    # -------------------------------
+    # User Query
+    # -------------------------------
+    query = st.text_input("💬 Ask a question from your documents")
     if query:
+        with st.spinner("🤖 Generating answer..."):
+            result = qa_chain.run(query)
+        st.markdown("### 🧠 Answer:")
+        st.write(result)