docs_RAG_app / app.py
Mehak900's picture
Update app.py
593fe27 verified
import streamlit as st
import os
from groq import Groq
from PyPDF2 import PdfReader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
# Set your Groq API key directly (recommended for Hugging Face Spaces)
GROQ_API_KEY = os.getenv("RAG_API") # Use your custom environment variable name
# Initialize Groq client
groq_client = Groq(api_key=GROQ_API_KEY)
# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# Function to extract text from PDF
def extract_text_from_pdf(uploaded_file):
reader = PdfReader(uploaded_file)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
# Function to split text into chunks
def chunk_text(text):
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_text(text)
return [Document(page_content=chunk) for chunk in chunks]
# Create FAISS vector index
def create_faiss_index(documents):
return FAISS.from_documents(documents, embedding_model)
# Search similar chunks
def search_faiss_index(query, index, k=3):
return index.similarity_search(query, k=k)
# Generate answer using Groq model
def generate_answer(query, context_chunks):
context = "\n".join([doc.page_content for doc in context_chunks])
prompt = f"""Answer the following question based on the context:\n\n{context}\n\nQuestion: {query}"""
response = groq_client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.1-8b-instant" # βœ… Correct current model name on Groq
)
return response.choices[0].message.content
# Streamlit UI
st.title("πŸ“„ RAG-based PDF QA App (Groq + FAISS)")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file:
with st.spinner("Reading and processing document..."):
raw_text = extract_text_from_pdf(uploaded_file)
documents = chunk_text(raw_text)
vector_index = create_faiss_index(documents)
st.success("Document processed and indexed successfully!")
question = st.text_input("Ask a question based on the uploaded document:")
if question:
with st.spinner("Searching and generating answer..."):
related_chunks = search_faiss_index(question, vector_index)
answer = generate_answer(question, related_chunks)
st.subheader("πŸ“Œ Answer:")
st.write(answer)