# app.py import streamlit as st from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import FAISS import os from groq import Groq from io import BytesIO import tempfile # Load PDF (Corrected) def load_pdf(uploaded_file): try: bytes_data = uploaded_file.getvalue() with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(bytes_data) temp_file_path = temp_file.name loader = PyPDFLoader(temp_file_path) documents = loader.load() os.remove(temp_file_path) # Clean up return documents except Exception as e: st.error(f"Error loading PDF: {e}") return None # Chunking (with error handling) def chunk_text(documents): try: text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.split_documents(documents) return chunks except Exception as e: st.error(f"Error chunking text: {e}") return None # Embeddings and Vectorstore (with error handling) def create_embeddings_and_store(chunks): try: embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2") # Or other suitable model db = FAISS.from_documents(chunks, embeddings) return db except Exception as e: st.error(f"Error creating embeddings: {e}") return None # Groq interaction (with more robust error handling and correct secret access) def query_groq(query, db): try: groq_api_key = st.secrets["GROQ_API_KEY"] # Use st.secrets for Hugging Face secrets docs = db.similarity_search(query) # Similarity search context = "\n".join([doc.page_content for doc in docs]) client = Groq(api_key=groq_api_key) # Pass the secret to the client prompt = f"""Use the following context to answer the question: {query}\n\nContext:\n{context}""" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.3-70b-versatile", # Or other suitable open-source model compatible with Groq ) return chat_completion.choices[0].message.content except Exception as e: st.error(f"Error querying Groq: {e}") return None # Streamlit app st.title("RAG Application") uploaded_file = st.file_uploader("Upload PDF", type="pdf") if uploaded_file is not None: with st.spinner("Processing PDF..."): documents = load_pdf(uploaded_file) if documents: # Check if PDF loaded successfully chunks = chunk_text(documents) if chunks: # Check if chunks were created successfully db = create_embeddings_and_store(chunks) if db: # Check if embeddings were created successfully st.success("PDF processed!") query = st.text_area("Enter your query") if st.button("Submit"): if query: with st.spinner("Querying..."): answer = query_groq(query, db) if answer: # Check if query was successful st.write(answer) else: st.warning("Please enter a query.")