import streamlit as st import os import faiss import numpy as np from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from groq import Groq # ✅ Use Hugging Face Secrets for safety GROQ_API_KEY = os.getenv("GROQ_API_KEY") def extract_text_from_pdf(pdf_file): reader = PdfReader(pdf_file) text = "" for page in reader.pages: text += page.extract_text() return text def split_text_into_chunks(text, chunk_size=500, chunk_overlap=100): splitter = CharacterTextSplitter( separator="\n", chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len ) return splitter.split_text(text) def store_embeddings(chunks): embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.from_texts(chunks, embedding_model) return vectorstore def query_groq(prompt): client = Groq(api_key=GROQ_API_KEY) response = client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3-70b-8192" ) return response.choices[0].message.content def ask_question(query, vectorstore): docs = vectorstore.similarity_search(query, k=3) context = "\n\n".join([doc.page_content for doc in docs]) full_prompt = f"Context:\n{context}\n\nQuestion: {query}" return query_groq(full_prompt) st.set_page_config(page_title="RAG PDF Chatbot", layout="centered") st.title("📄 RAG Chatbot (PDF + FAISS + Groq API)") uploaded_file = st.file_uploader("Upload a PDF", type="pdf") if uploaded_file: with st.spinner("Reading and indexing..."): text = extract_text_from_pdf(uploaded_file) chunks = split_text_into_chunks(text) vectorstore = store_embeddings(chunks) st.success("PDF indexed!") query = st.text_input("Ask something about the document:") if query: with st.spinner("Generating answer..."): response = ask_question(query, vectorstore) st.markdown(f"**Answer:** {response}")