import os import faiss import numpy as np import streamlit as st from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq GROQ_API_KEY = os.environ['GROQ_API_KEY'] # Initialize Groq Client client = Groq(api_key=GROQ_API_KEY) # Initialize embedder # embedder = SentenceTransformer('all-MiniLM-L6-v2') embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # App UI st.set_page_config(page_title="🧠 RAG Chat with PDF", layout="wide") st.title("📄 Chat with your PDF") # Function to read PDF def read_pdf(file): reader = PdfReader(file) text = "" for page in reader.pages: text += page.extract_text() return text # Function to chunk text def chunk_text(text, max_length=500): words = text.split() chunks = [' '.join(words[i:i + max_length]) for i in range(0, len(words), max_length)] return chunks # Function to embed and create FAISS index def create_faiss_index(chunks): embeddings = embedder.encode(chunks) dim = embeddings.shape[1] index = faiss.IndexFlatL2(dim) index.add(np.array(embeddings)) return index, embeddings # Function to search from index def search_index(index, query, chunks, top_k=3): query_embedding = embedder.encode([query]) D, I = index.search(np.array(query_embedding), top_k) return [chunks[i] for i in I[0]] # File uploader uploaded_file = st.file_uploader("Upload a PDF document", type="pdf") if uploaded_file: with st.spinner("📖 Reading and processing PDF..."): text = read_pdf(uploaded_file) chunks = chunk_text(text) index, embeddings = create_faiss_index(chunks) st.success("✅ PDF processed. You can now ask questions!") query = st.text_input("Ask a question from the PDF:") if query: with st.spinner("🔍 Retrieving context..."): context_chunks = search_index(index, query, chunks, top_k=3) prompt = "\n".join(context_chunks) + f"\n\nQuestion: {query}" with st.spinner("🤖 Getting answer from Groq..."): response = client.chat.completions.create( messages=[ {"role": "user", "content": prompt} ], model="llama-3.3-70b-versatile" ) answer = response.choices[0].message.content st.markdown(f"**Answer:** {answer}")