Spaces:

ahmedumeraziz
/

RAG_AI

Sleeping

App Files Files Community

RAG_AI / app.py

ahmedumeraziz

Update app.py

486ea50 verified 9 months ago

raw

history blame contribute delete

3.26 kB

	import streamlit as st
	import PyPDF2
	import tiktoken
	import faiss
	import numpy as np
	import os
	from sentence_transformers import SentenceTransformer
	import requests

	# Load embedding model
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	# GROQ API configuration (🔐 loaded securely from environment variable)
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")
	GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
	LLAMA3_MODEL = "llama3-8b-8192"

	# Extract text from PDF
	def load_pdf(pdf_file):
	reader = PyPDF2.PdfReader(pdf_file)
	return "".join(page.extract_text() for page in reader.pages)

	# Chunk text
	def chunk_text(text, chunk_size=500):
	words = text.split()
	return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

	# Generate embeddings
	def get_embeddings(chunks):
	return embedding_model.encode(chunks)

	# Create FAISS index
	def create_faiss_index(embeddings):
	index = faiss.IndexFlatL2(embeddings.shape[1])
	index.add(np.array(embeddings))
	return index

	# Search index
	def search_index(index, query, chunks, top_k=3):
	q_embed = embedding_model.encode([query])
	_, indices = index.search(np.array(q_embed), top_k)
	return [chunks[i] for i in indices[0]]

	# Generate answer using GROQ
	def generate_answer(prompt):
	if not GROQ_API_KEY:
	return "🚫 GROQ API key not found. Please set it in environment variables."

	headers = {
	"Authorization": f"Bearer {GROQ_API_KEY}",
	"Content-Type": "application/json"
	}
	data = {
	"model": LLAMA3_MODEL,
	"messages": [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	}
	response = requests.post(GROQ_URL, headers=headers, json=data)
	response.raise_for_status()
	return response.json()["choices"][0]["message"]["content"]

	# Streamlit UI
	def main():
	st.set_page_config("RAG App", layout="centered")
	st.title("📄 PDF QA App with LLaMA 3 & GROQ")

	uploaded_file = st.file_uploader("Upload a PDF", type="pdf")

	if uploaded_file and st.button("Process PDF"):
	with st.spinner("Processing..."):
	text = load_pdf(uploaded_file)
	chunks = chunk_text(text)
	embeddings = get_embeddings(chunks)
	index = create_faiss_index(embeddings)
	st.session_state.chunks = chunks
	st.session_state.index = index
	st.success("✅ PDF processed and indexed.")

	if "index" in st.session_state:
	query = st.text_input("Ask a question about the PDF:")
	if st.button("Get Answer"):
	with st.spinner("Thinking..."):
	top_chunks = search_index(st.session_state.index, query, st.session_state.chunks)
	context = "\n\n".join(top_chunks)
	prompt = f"Use the following context to answer the question:\n\n{context}\n\nQuestion: {query}"
	try:
	answer = generate_answer(prompt)
	st.markdown("### 🧠 Answer:")
	st.write(answer)
	except requests.exceptions.HTTPError as e:
	st.error(f"❌ API Error: {e}")

	if __name__ == "__main__":
	main()