import streamlit as st import os from groq import Groq from PyPDF2 import PdfReader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.docstore.document import Document # Set your Groq API key directly (recommended for Hugging Face Spaces) GROQ_API_KEY = os.getenv("RAG_API") # Use your custom environment variable name # Initialize Groq client groq_client = Groq(api_key=GROQ_API_KEY) # Load embedding model embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") # Function to extract text from PDF def extract_text_from_pdf(uploaded_file): reader = PdfReader(uploaded_file) text = "" for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text return text # Function to split text into chunks def chunk_text(text): splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_text(text) return [Document(page_content=chunk) for chunk in chunks] # Create FAISS vector index def create_faiss_index(documents): return FAISS.from_documents(documents, embedding_model) # Search similar chunks def search_faiss_index(query, index, k=3): return index.similarity_search(query, k=k) # Generate answer using Groq model def generate_answer(query, context_chunks): context = "\n".join([doc.page_content for doc in context_chunks]) prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}""" response = groq_client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model="llama-3.1-8b-instant" # ✅ Correct current model name on Groq ) return response.choices[0].message.content # Streamlit UI st.title("📄 RAG-based PDF QA App (Groq + FAISS)") uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file: with st.spinner("Reading and processing document..."): raw_text = extract_text_from_pdf(uploaded_file) documents = chunk_text(raw_text) vector_index = create_faiss_index(documents) st.success("Document processed and indexed successfully!") question = st.text_input("Ask a question based on the uploaded document:") if question: with st.spinner("Searching and generating answer..."): related_chunks = search_faiss_index(question, vector_index) answer = generate_answer(question, related_chunks) st.subheader("📌 Answer:") st.write(answer)