File size: 2,399 Bytes
f43f81e
 
 
 
 
 
 
 
946e611
f43f81e
 
 
 
 
 
 
 
 
 
f122bc0
f43f81e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import faiss
import numpy as np
import streamlit as st
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from groq import Groq

GROQ_API_KEY =  os.environ['GROQ_API_KEY']

# Initialize Groq Client
client = Groq(api_key=GROQ_API_KEY)

# Initialize embedder
# embedder = SentenceTransformer('all-MiniLM-L6-v2')
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# App UI
st.set_page_config(page_title="🧠 RAG Chat with PDF", layout="wide")
st.title("πŸ“„ Chat with your PDF")

# Function to read PDF
def read_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to chunk text
def chunk_text(text, max_length=500):
    words = text.split()
    chunks = [' '.join(words[i:i + max_length]) for i in range(0, len(words), max_length)]
    return chunks

# Function to embed and create FAISS index
def create_faiss_index(chunks):
    embeddings = embedder.encode(chunks)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))
    return index, embeddings

# Function to search from index
def search_index(index, query, chunks, top_k=3):
    query_embedding = embedder.encode([query])
    D, I = index.search(np.array(query_embedding), top_k)
    return [chunks[i] for i in I[0]]

# File uploader
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

if uploaded_file:
    with st.spinner("πŸ“– Reading and processing PDF..."):
        text = read_pdf(uploaded_file)
        chunks = chunk_text(text)
        index, embeddings = create_faiss_index(chunks)
    st.success("βœ… PDF processed. You can now ask questions!")

    query = st.text_input("Ask a question from the PDF:")

    if query:
        with st.spinner("πŸ” Retrieving context..."):
            context_chunks = search_index(index, query, chunks, top_k=3)
            prompt = "\n".join(context_chunks) + f"\n\nQuestion: {query}"

        with st.spinner("πŸ€– Getting answer from Groq..."):
            response = client.chat.completions.create(
                messages=[
                    {"role": "user", "content": prompt}
                ],
                model="llama-3.3-70b-versatile"
            )
            answer = response.choices[0].message.content
            st.markdown(f"**Answer:** {answer}")