Spaces:
Sleeping
Sleeping
File size: 2,399 Bytes
f43f81e 946e611 f43f81e f122bc0 f43f81e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
import faiss
import numpy as np
import streamlit as st
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq
GROQ_API_KEY = os.environ['GROQ_API_KEY']
# Initialize Groq Client
client = Groq(api_key=GROQ_API_KEY)
# Initialize embedder
# embedder = SentenceTransformer('all-MiniLM-L6-v2')
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# App UI
st.set_page_config(page_title="π§ RAG Chat with PDF", layout="wide")
st.title("π Chat with your PDF")
# Function to read PDF
def read_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function to chunk text
def chunk_text(text, max_length=500):
words = text.split()
chunks = [' '.join(words[i:i + max_length]) for i in range(0, len(words), max_length)]
return chunks
# Function to embed and create FAISS index
def create_faiss_index(chunks):
embeddings = embedder.encode(chunks)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))
return index, embeddings
# Function to search from index
def search_index(index, query, chunks, top_k=3):
query_embedding = embedder.encode([query])
D, I = index.search(np.array(query_embedding), top_k)
return [chunks[i] for i in I[0]]
# File uploader
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
if uploaded_file:
with st.spinner("π Reading and processing PDF..."):
text = read_pdf(uploaded_file)
chunks = chunk_text(text)
index, embeddings = create_faiss_index(chunks)
st.success("β
PDF processed. You can now ask questions!")
query = st.text_input("Ask a question from the PDF:")
if query:
with st.spinner("π Retrieving context..."):
context_chunks = search_index(index, query, chunks, top_k=3)
prompt = "\n".join(context_chunks) + f"\n\nQuestion: {query}"
with st.spinner("π€ Getting answer from Groq..."):
response = client.chat.completions.create(
messages=[
{"role": "user", "content": prompt}
],
model="llama-3.3-70b-versatile"
)
answer = response.choices[0].message.content
st.markdown(f"**Answer:** {answer}") |