Document-app / app.py
Ayesha003's picture
Update app.py
5624321 verified
import os
import faiss
import numpy as np
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.document_loaders import PyPDFLoader
from sentence_transformers import SentenceTransformer
from groq import Groq
# Initialize Groq API Client
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)
# Load sentence-transformer model (open-source embedding model)
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Function to process PDF and create embeddings
def process_pdf(pdf_file):
loader = PyPDFLoader(pdf_file)
documents = loader.load()
# Chunk text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
# Generate embeddings
texts = [chunk.page_content for chunk in chunks]
embeddings = embedding_model.encode(texts, convert_to_numpy=True)
# Store embeddings in FAISS
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
faiss.write_index(index, "faiss_index")
return texts, index
# Function to search in FAISS
def search_faiss(query, texts, index, top_k=3):
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
D, I = index.search(query_embedding, top_k)
results = [texts[i] for i in I[0] if i != -1]
return results
# Function to retrieve response from Groq API
def get_groq_response(query):
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": query}],
model="llama-3.3-70b-versatile"
)
return chat_completion.choices[0].message.content
# Streamlit UI
st.title("RAG-Based PDF Chatbot")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file:
with open("uploaded_document.pdf", "wb") as f:
f.write(uploaded_file.getbuffer())
st.success("PDF uploaded successfully!")
texts, index = process_pdf("uploaded_document.pdf")
query = st.text_input("Ask a question about the document:")
if query:
docs = search_faiss(query, texts, index)
context = " ".join(docs)
final_query = f"Context: {context} \n\n Question: {query}"
response = get_groq_response(final_query)
st.write("Response:", response)