!pip install streamlit langchain chromadb unstructured faiss-cpu sentence_transformers PyPDF2 groq !pip install -U langchain-community import os os.environ["GROQ_API_KEY"] = "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" # Replace with your key import streamlit as st from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import FAISS import os from groq import Groq # Load PDF (with error handling) def load_pdf(uploaded_file): try: loader = PyPDFLoader(uploaded_file) documents = loader.load() return documents except Exception as e: st.error(f"Error loading PDF: {e}") return None # Chunking (with error handling) def chunk_text(documents): try: text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = text_splitter.split_documents(documents) return chunks except Exception as e: st.error(f"Error chunking text: {e}") return None # Embeddings and Vectorstore (with error handling) def create_embeddings_and_store(chunks): try: embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2") db = FAISS.from_documents(chunks, embeddings) return db except Exception as e: st.error(f"Error creating embeddings: {e}") return None # Groq interaction (with more robust error handling) def query_groq(query, db): try: docs = db.similarity_search(query) context = "\n".join([doc.page_content for doc in docs]) client = Groq(api_key=os.environ.get("GROQ_API_KEY")) if not client.api_key: # Check if API key is set st.error("GROQ_API_KEY environment variable is not set.") return None prompt = f"""Use the following context to answer the question: {query}\n\nContext:\n{context}""" chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.3-70b-versatile", # Or other suitable open-source model ) return chat_completion.choices[0].message.content except Exception as e: st.error(f"Error querying Groq: {e}") return None # Streamlit app st.title("RAG Application") uploaded_file = st.file_uploader("Upload PDF", type="pdf") if uploaded_file is not None: with st.spinner("Processing PDF..."): documents = load_pdf(uploaded_file) if documents: # Check if PDF loaded successfully chunks = chunk_text(documents) if chunks: # Check if chunks were created successfully db = create_embeddings_and_store(chunks) if db: # Check if embeddings were created successfully st.success("PDF processed!") query = st.text_area("Enter your query") if st.button("Submit"): if query: with st.spinner("Querying..."): answer = query_groq(query, db) if answer: # Check if query was successful st.write(answer) else: st.warning("Please enter a query.")