Spaces:

Mehak900
/

docs_RAG_app

Sleeping

App Files Files Community

docs_RAG_app / app.py

Mehak900

Update app.py

593fe27 verified 6 months ago

raw

history blame contribute delete

2.68 kB

	import streamlit as st
	import os
	from groq import Groq
	from PyPDF2 import PdfReader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.docstore.document import Document

	# Set your Groq API key directly (recommended for Hugging Face Spaces)

	GROQ_API_KEY = os.getenv("RAG_API") # Use your custom environment variable name

	# Initialize Groq client
	groq_client = Groq(api_key=GROQ_API_KEY)

	# Load embedding model
	embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

	# Function to extract text from PDF
	def extract_text_from_pdf(uploaded_file):
	reader = PdfReader(uploaded_file)
	text = ""
	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text
	return text

	# Function to split text into chunks
	def chunk_text(text):
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = splitter.split_text(text)
	return [Document(page_content=chunk) for chunk in chunks]

	# Create FAISS vector index
	def create_faiss_index(documents):
	return FAISS.from_documents(documents, embedding_model)

	# Search similar chunks
	def search_faiss_index(query, index, k=3):
	return index.similarity_search(query, k=k)

	# Generate answer using Groq model
	def generate_answer(query, context_chunks):
	context = "\n".join([doc.page_content for doc in context_chunks])
	prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}"""

	response = groq_client.chat.completions.create(
	messages=[{"role": "user", "content": prompt}],
	model="llama-3.1-8b-instant" # ✅ Correct current model name on Groq
	)
	return response.choices[0].message.content

	# Streamlit UI
	st.title("📄 RAG-based PDF QA App (Groq + FAISS)")

	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file:
	with st.spinner("Reading and processing document..."):
	raw_text = extract_text_from_pdf(uploaded_file)
	documents = chunk_text(raw_text)
	vector_index = create_faiss_index(documents)
	st.success("Document processed and indexed successfully!")

	question = st.text_input("Ask a question based on the uploaded document:")
	if question:
	with st.spinner("Searching and generating answer..."):
	related_chunks = search_faiss_index(question, vector_index)
	answer = generate_answer(question, related_chunks)
	st.subheader("📌 Answer:")
	st.write(answer)