rag-based-app / app.py
taha-18's picture
Update app.py
7e94702 verified
app_code = '''
import os
import streamlit as st
import tempfile
import fitz # PyMuPDF
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from groq import Groq
# Get API Key securely from Hugging Face Secrets
os.environ["GROQ_API_KEY"] = st.secrets["GROQ_API_KEY"]
# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Load sentence transformer model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
# Text Chunking
def chunk_text(text, chunk_size=500, overlap=100):
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
chunks.append(text[start:end])
start += chunk_size - overlap
return chunks
# PDF Text Extraction
def extract_text_from_pdf(uploaded_file):
text = ""
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(uploaded_file.read())
doc = fitz.open(tmp_file.name)
for page in doc:
text += page.get_text()
return text
# Build FAISS index
def build_faiss_index(chunks):
embeddings = embed_model.encode(chunks)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))
return index, embeddings
# Retrieve top-k chunks
def retrieve_chunks(query, chunks, index, embeddings, top_k=3):
query_embed = embed_model.encode([query])
D, I = index.search(np.array(query_embed), top_k)
return [chunks[i] for i in I[0]]
# Ask Groq with context
def ask_groq(query, context):
prompt = f"Context:\\n{context}\\n\\nQuestion: {query}"
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3-70b-8192",
)
return response.choices[0].message.content
# Streamlit App UI
st.title("🧠 PDF Q&A with RAG (Groq + FAISS)")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
query = st.text_input("Ask a question about the PDF")
if uploaded_file:
st.info("Processing PDF...")
full_text = extract_text_from_pdf(uploaded_file)
chunks = chunk_text(full_text)
index, embeddings = build_faiss_index(chunks)
st.success("PDF processed. Ask a question now.")
if query:
st.info("Retrieving relevant information...")
top_chunks = retrieve_chunks(query, chunks, index, embeddings)
context = "\\n\\n".join(top_chunks)
response = ask_groq(query, context)
st.markdown("### 💬 Answer")
st.write(response)
'''
with open("app.py", "w") as f:
f.write(app_code)