import streamlit as st import PyPDF2 import tiktoken import faiss import numpy as np import os from sentence_transformers import SentenceTransformer import requests # Load embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # GROQ API configuration (🔐 loaded securely from environment variable) GROQ_API_KEY = os.getenv("GROQ_API_KEY") GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" LLAMA3_MODEL = "llama3-8b-8192" # Extract text from PDF def load_pdf(pdf_file): reader = PyPDF2.PdfReader(pdf_file) return "".join(page.extract_text() for page in reader.pages) # Chunk text def chunk_text(text, chunk_size=500): words = text.split() return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)] # Generate embeddings def get_embeddings(chunks): return embedding_model.encode(chunks) # Create FAISS index def create_faiss_index(embeddings): index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(np.array(embeddings)) return index # Search index def search_index(index, query, chunks, top_k=3): q_embed = embedding_model.encode([query]) _, indices = index.search(np.array(q_embed), top_k) return [chunks[i] for i in indices[0]] # Generate answer using GROQ def generate_answer(prompt): if not GROQ_API_KEY: return "🚫 GROQ API key not found. Please set it in environment variables." headers = { "Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json" } data = { "model": LLAMA3_MODEL, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] } response = requests.post(GROQ_URL, headers=headers, json=data) response.raise_for_status() return response.json()["choices"][0]["message"]["content"] # Streamlit UI def main(): st.set_page_config("RAG App", layout="centered") st.title("📄 PDF QA App with LLaMA 3 & GROQ") uploaded_file = st.file_uploader("Upload a PDF", type="pdf") if uploaded_file and st.button("Process PDF"): with st.spinner("Processing..."): text = load_pdf(uploaded_file) chunks = chunk_text(text) embeddings = get_embeddings(chunks) index = create_faiss_index(embeddings) st.session_state.chunks = chunks st.session_state.index = index st.success("✅ PDF processed and indexed.") if "index" in st.session_state: query = st.text_input("Ask a question about the PDF:") if st.button("Get Answer"): with st.spinner("Thinking..."): top_chunks = search_index(st.session_state.index, query, st.session_state.chunks) context = "\n\n".join(top_chunks) prompt = f"Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}" try: answer = generate_answer(prompt) st.markdown("### 🧠 Answer:") st.write(answer) except requests.exceptions.HTTPError as e: st.error(f"❌ API Error: {e}") if __name__ == "__main__": main()