# app.py import os import faiss import streamlit as st from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq import numpy as np # --- SETUP GROQ --- client = Groq(api_key=os.getenv("GROQ_API_KEY")) # --- LOAD EMBEDDING MODEL --- embedder = SentenceTransformer("all-MiniLM-L6-v2") # --- STREAMLIT UI --- st.set_page_config(page_title="RAG App with Groq", layout="wide") st.title("📄🧠 RAG-Based Question Answering App") # --- UPLOAD PDF --- uploaded_file = st.file_uploader("Upload a PDF", type="pdf") if uploaded_file: reader = PdfReader(uploaded_file) text = "" for page in reader.pages: text += page.extract_text() # --- CHUNKING --- def chunk_text(text, chunk_size=500): words = text.split() return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] chunks = chunk_text(text) # --- VECTORIZE CHUNKS --- embeddings = embedder.encode(chunks) dim = embeddings[0].shape[0] index = faiss.IndexFlatL2(dim) index.add(np.array(embeddings)) st.success("✅ Document uploaded and indexed!") # --- USER QUERY --- user_query = st.text_input("Ask something about the document") if user_query: query_embedding = embedder.encode([user_query]) _, I = index.search(np.array(query_embedding), k=3) retrieved_chunks = "\n\n".join([chunks[i] for i in I[0]]) # --- GROQ COMPLETION --- response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[ {"role": "system", "content": "You are a helpful assistant who answers based on the provided context."}, {"role": "user", "content": f"Context: {retrieved_chunks}\n\nQuestion: {user_query}"} ] ) st.subheader("📢 Answer") st.write(response.choices[0].message.content)