document_rag / app.py
FurqanIshaq's picture
Create app.py
7cd855e verified
# app.py
import os
import faiss
import streamlit as st
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
import numpy as np
# --- SETUP GROQ ---
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# --- LOAD EMBEDDING MODEL ---
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# --- STREAMLIT UI ---
st.set_page_config(page_title="RAG App with Groq", layout="wide")
st.title("πŸ“„πŸ§  RAG-Based Question Answering App")
# --- UPLOAD PDF ---
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file:
reader = PdfReader(uploaded_file)
text = ""
for page in reader.pages:
text += page.extract_text()
# --- CHUNKING ---
def chunk_text(text, chunk_size=500):
words = text.split()
return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
chunks = chunk_text(text)
# --- VECTORIZE CHUNKS ---
embeddings = embedder.encode(chunks)
dim = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))
st.success("βœ… Document uploaded and indexed!")
# --- USER QUERY ---
user_query = st.text_input("Ask something about the document")
if user_query:
query_embedding = embedder.encode([user_query])
_, I = index.search(np.array(query_embedding), k=3)
retrieved_chunks = "\n\n".join([chunks[i] for i in I[0]])
# --- GROQ COMPLETION ---
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[
{"role": "system", "content": "You are a helpful assistant who answers based on the provided context."},
{"role": "user", "content": f"Context: {retrieved_chunks}\n\nQuestion: {user_query}"}
]
)
st.subheader("πŸ“’ Answer")
st.write(response.choices[0].message.content)