RAG_AI / app.py
ahmedumeraziz's picture
Update app.py
486ea50 verified
import streamlit as st
import PyPDF2
import tiktoken
import faiss
import numpy as np
import os
from sentence_transformers import SentenceTransformer
import requests
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# GROQ API configuration (πŸ” loaded securely from environment variable)
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
LLAMA3_MODEL = "llama3-8b-8192"
# Extract text from PDF
def load_pdf(pdf_file):
reader = PyPDF2.PdfReader(pdf_file)
return "".join(page.extract_text() for page in reader.pages)
# Chunk text
def chunk_text(text, chunk_size=500):
words = text.split()
return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
# Generate embeddings
def get_embeddings(chunks):
return embedding_model.encode(chunks)
# Create FAISS index
def create_faiss_index(embeddings):
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
return index
# Search index
def search_index(index, query, chunks, top_k=3):
q_embed = embedding_model.encode([query])
_, indices = index.search(np.array(q_embed), top_k)
return [chunks[i] for i in indices[0]]
# Generate answer using GROQ
def generate_answer(prompt):
if not GROQ_API_KEY:
return "🚫 GROQ API key not found. Please set it in environment variables."
headers = {
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": LLAMA3_MODEL,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
}
response = requests.post(GROQ_URL, headers=headers, json=data)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
# Streamlit UI
def main():
st.set_page_config("RAG App", layout="centered")
st.title("πŸ“„ PDF QA App with LLaMA 3 & GROQ")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file and st.button("Process PDF"):
with st.spinner("Processing..."):
text = load_pdf(uploaded_file)
chunks = chunk_text(text)
embeddings = get_embeddings(chunks)
index = create_faiss_index(embeddings)
st.session_state.chunks = chunks
st.session_state.index = index
st.success("βœ… PDF processed and indexed.")
if "index" in st.session_state:
query = st.text_input("Ask a question about the PDF:")
if st.button("Get Answer"):
with st.spinner("Thinking..."):
top_chunks = search_index(st.session_state.index, query, st.session_state.chunks)
context = "\n\n".join(top_chunks)
prompt = f"Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}"
try:
answer = generate_answer(prompt)
st.markdown("### 🧠 Answer:")
st.write(answer)
except requests.exceptions.HTTPError as e:
st.error(f"❌ API Error: {e}")
if __name__ == "__main__":
main()