chunks / app.py
zeeshan4801's picture
Create app.py
cdb5969 verified
import os
import streamlit as st
import PyPDF2
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Load embedding model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Initialize FAISS Index
dimension = 384 # Dimension of embeddings
index = faiss.IndexFlatL2(dimension)
# Streamlit App
st.title("RAG Application with Groq and FAISS")
# PDF Upload
uploaded_file = st.file_uploader("Upload a PDF Document", type=["pdf"])
if uploaded_file:
# Extract text from PDF
pdf_reader = PyPDF2.PdfReader(uploaded_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
# Split text into chunks
chunks = [text[i:i+500] for i in range(0, len(text), 500)]
st.write(f"Document split into {len(chunks)} chunks.")
# Generate embeddings and store in FAISS
embeddings = embedding_model.encode(chunks)
index.add(embeddings)
st.success("Embeddings created and stored in FAISS.")
# Query and Response
user_query = st.text_input("Enter your query:")
if user_query:
query_embedding = embedding_model.encode([user_query])
_, indices = index.search(query_embedding, k=1)
retrieved_chunk = chunks[indices[0][0]]
# Use Groq API for completion
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": retrieved_chunk}],
model="llama3-8b-8192",
)
response = chat_completion.choices[0].message.content
st.write("**Response:**")
st.write(response)