File size: 3,223 Bytes
43f0f9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433f762
 
43f0f9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import gradio as gr
import faiss
import numpy as np

from groq import Groq
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer

# -----------------------
# Load models
# -----------------------

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

groq_client = Groq(
    api_key=os.environ.get("GROQ_API_KEY")
  # api_key=userdata.get('RAG')
)

# -----------------------
# Global storage
# -----------------------

faiss_index = None
document_chunks = []

# -----------------------
# Helper functions
# -----------------------

def read_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text


def chunk_text(text, chunk_size=500, overlap=50):
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start = end - overlap

    return chunks


def create_faiss_index(chunks):
    global faiss_index, document_chunks

    document_chunks = chunks

    embeddings = embedding_model.encode(chunks)
    embeddings = np.array(embeddings).astype("float32")

    dimension = embeddings.shape[1]
    faiss_index = faiss.IndexFlatL2(dimension)
    faiss_index.add(embeddings)


def search_chunks(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype("float32")

    distances, indices = faiss_index.search(query_embedding, top_k)

    results = []
    for idx in indices[0]:
        results.append(document_chunks[idx])

    return results


def ask_groq(context, question):
    prompt = f"""
Use the text below to answer the question.

Text:
{context}

Question:
{question}
"""

    response = groq_client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content


# -----------------------
# Gradio functions
# -----------------------

def upload_pdf(file):
    if file is None:
        return "Please upload a PDF first."

    text = read_pdf(file)
    chunks = chunk_text(text)
    create_faiss_index(chunks)

    return "PDF processed. You can now ask questions."


def answer_question(question):
    if faiss_index is None:
        return "Please upload a PDF first."

    relevant_chunks = search_chunks(question)
    context = "\n".join(relevant_chunks)

    answer = ask_groq(context, question)
    return answer


# -----------------------
# Gradio UI
# -----------------------


with gr.Blocks() as demo:
    gr.Markdown("# PDF Question Answer App")

    pdf_file = gr.File(label="Upload PDF")
    upload_btn = gr.Button("Process PDF")
    status = gr.Textbox(label="Status", lines=1)

    question = gr.Textbox(
        label="Your Question",
        lines=2,
        placeholder="Type your question here"
    )

    ask_btn = gr.Button("Ask Question")

    answer = gr.Textbox(
        label="Answer",
        lines=10
    )

    upload_btn.click(upload_pdf, inputs=pdf_file, outputs=status)
    ask_btn.click(answer_question, inputs=question, outputs=answer)

demo.launch()