RAG_contract / app.py
Aizaz96's picture
Update app.py
f94cbac verified
import streamlit as st
import transformers
import pdfplumber
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
from langchain.chains import LLMChain
# Use Groq LLM from langchain-community
from langchain_community.llms import Groq
# Load PDFs from a folder using pdfplumber
def load_pdfs_from_folder(folder_path):
docs = []
for fname in os.listdir(folder_path):
if fname.endswith(".pdf"):
full_path = os.path.join(folder_path, fname)
text = ""
with pdfplumber.open(full_path) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
docs.append(Document(page_content=text, metadata={"source": fname}))
return docs
# Answer a question via Groq LLM (fallback)
def ask_groq(question):
llm = Groq(api_key=st.secrets["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"], model="mixtral-8x7b-32768")
return llm.predict(question)
def main():
st.set_page_config(page_title="RAG Chatbot with Groq", layout="centered")
st.title("πŸ“š RAG Chatbot with Groq Fallback")
uploaded_files = st.file_uploader("Upload one or more PDF files", type=["pdf"], accept_multiple_files=True)
question = st.text_input("Ask a question:")
process_btn = st.button("Process and Answer")
if process_btn and uploaded_files and question:
with st.spinner("πŸ“‚ Loading documents..."):
docs = load_uploaded_pdfs(uploaded_files)
# Chunking
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(docs)
# Embedding
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(chunks, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 3})
# Check if relevant documents exist
relevant_docs = retriever.get_relevant_documents(question)
if not relevant_docs:
st.warning("❓ No relevant documents found. Asking Groq LLM...")
answer = ask_groq(question)
else:
llm = Groq(api_key=st.secrets["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"], model="mixtral-8x7b-32768")
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
result = qa({"query": question})
answer = result['result']
st.success("πŸ’¬ Answer:")
st.write(answer)
if __name__ == "__main__":
main()