import os import streamlit as st import PyPDF2 from sentence_transformers import SentenceTransformer import faiss from groq import Groq # Initialize Groq client client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # Load embedding model embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Initialize FAISS Index dimension = 384 # Dimension of embeddings index = faiss.IndexFlatL2(dimension) # Streamlit App st.title("RAG Application with Groq and FAISS") # PDF Upload uploaded_file = st.file_uploader("Upload a PDF Document", type=["pdf"]) if uploaded_file: # Extract text from PDF pdf_reader = PyPDF2.PdfReader(uploaded_file) text = "" for page in pdf_reader.pages: text += page.extract_text() # Split text into chunks chunks = [text[i:i+500] for i in range(0, len(text), 500)] st.write(f"Document split into {len(chunks)} chunks.") # Generate embeddings and store in FAISS embeddings = embedding_model.encode(chunks) index.add(embeddings) st.success("Embeddings created and stored in FAISS.") # Query and Response user_query = st.text_input("Enter your query:") if user_query: query_embedding = embedding_model.encode([user_query]) _, indices = index.search(query_embedding, k=1) retrieved_chunk = chunks[indices[0][0]] # Use Groq API for completion chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": retrieved_chunk}], model="llama3-8b-8192", ) response = chat_completion.choices[0].message.content st.write("**Response:**") st.write(response)