import os import streamlit as st import faiss import tempfile from pypdf import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq import numpy as np # Load environment key GROQ_API_KEY = os.environ.get(GROQ_API_KEY) # Load embedding model embed_model = SentenceTransformer('all-MiniLM-L6-v2') # Set up Groq client client = Groq(api_key=GROQ_API_KEY) # Functions def extract_text_from_pdf(pdf_file): reader = PdfReader(pdf_file) return "\n".join(page.extract_text() for page in reader.pages) def chunk_text(text, chunk_size=500, overlap=50): words = text.split() chunks = [] for i in range(0, len(words), chunk_size - overlap): chunk = " ".join(words[i:i + chunk_size]) chunks.append(chunk) return chunks def embed_chunks(chunks): return embed_model.encode(chunks) def create_faiss_index(embeddings): dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) return index def query_groq(query, context): messages = [ {"role": "system", "content": "You are a helpful assistant that answers questions based on the given context."}, {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"} ] chat_completion = client.chat.completions.create( messages=messages, model="llama3-8b-8192", ) return chat_completion.choices[0].message.content # Streamlit UI st.title("RAG PDF Q&A App (Groq + FAISS)") uploaded_file = st.file_uploader("Upload your PDF", type="pdf") query = st.text_input("Ask a question about the PDF") if uploaded_file: with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp.write(uploaded_file.read()) text = extract_text_from_pdf(tmp.name) chunks = chunk_text(text) embeddings = embed_chunks(chunks) index = create_faiss_index(np.array(embeddings)) if query: query_embedding = embed_model.encode([query]) distances, indices = index.search(np.array(query_embedding), k=3) context = "\n\n".join([chunks[i] for i in indices[0]]) response = query_groq(query, context) st.subheader("Answer:") st.write(response)