Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_groq import ChatGroq | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| import tempfile | |
| load_dotenv() | |
| # Page config | |
| st.set_page_config( | |
| page_title="RAG Chatbot", | |
| page_icon="π", | |
| layout="centered" | |
| ) | |
| st.title("π RAG Chatbot") | |
| st.write("Upload a PDF and ask questions about it!") | |
| # Initialize session state | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| if "qa_chain" not in st.session_state: | |
| st.session_state.qa_chain = None | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("Upload PDF") | |
| uploaded_file = st.file_uploader("Choose a PDF", type="pdf") | |
| if uploaded_file: | |
| with st.spinner("Processing PDF... β³"): | |
| # Save temp file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as f: | |
| f.write(uploaded_file.read()) | |
| tmp_path = f.name | |
| # Load and split | |
| loader = PyPDFLoader(tmp_path) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=50 | |
| ) | |
| chunks = splitter.split_documents(documents) | |
| # Embeddings | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| retriever = vectorstore.as_retriever() | |
| # LLM | |
| llm = ChatGroq( | |
| model="llama-3.1-8b-instant", | |
| temperature=0.5, | |
| api_key=os.environ["GROQ_API_KEY"] | |
| ) | |
| # Chain | |
| prompt = ChatPromptTemplate.from_template(""" | |
| Answer the question based on the context below. | |
| Context: {context} | |
| Question: {question} | |
| Answer: | |
| """) | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| st.session_state.qa_chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| st.success(f"β {len(chunks)} chunks ready!") | |
| # Chat history display | |
| for message in st.session_state.chat_history: | |
| with st.chat_message(message["role"]): | |
| st.write(message["content"]) | |
| # Chat input | |
| if question := st.chat_input("Ask a question about your PDF..."): | |
| if st.session_state.qa_chain is None: | |
| st.warning("Please upload a PDF first!") | |
| else: | |
| # Show user message | |
| with st.chat_message("user"): | |
| st.write(question) | |
| st.session_state.chat_history.append({"role": "user", "content": question}) | |
| # Get answer | |
| with st.chat_message("assistant"): | |
| with st.spinner("Thinking... β³"): | |
| answer = st.session_state.qa_chain.invoke(question) | |
| st.write(answer) | |
| st.session_state.chat_history.append({"role": "assistant", "content": answer}) |