File size: 3,979 Bytes
9675ff7
bf17205
9675ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e182395
9675ff7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d081b1b
 
 
 
 
 
 
 
9675ff7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
# os.environ["GROQ_API_KEY"] = "YOUR_GROQ_API_KEY"
# from google.colab import userdata
# GROQ_API_KEY=userdata.get('rag-based')
import gradio as gr
from groq import Groq

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# -----------------------------
# Environment Setup
# -----------------------------

# GROQ_API_KEY = os.environ.get("Rag-based")


client = Groq(api_key=os.environ.get("Rag_based"))

# -----------------------------
# Global Variables
# -----------------------------

vector_db = None

# -----------------------------
# Embedding Model
# -----------------------------

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# -----------------------------
# Document Processing Function
# -----------------------------

def process_document(pdf_file):

    global vector_db

    if pdf_file is None:
        return "Please upload a PDF Document first."

    try:

        # Load PDF
        loader = PyPDFLoader(pdf_file.name)
        documents = loader.load()

        # Chunking
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200
        )

        chunks = text_splitter.split_documents(documents)

        # Create FAISS vector database
        vector_db = FAISS.from_documents(
            chunks,
            embedding_model
        )

        return f"Document processed successfully. {len(chunks)} chunks of your document created. Now, proceed to ask your question ahead."

    except Exception as e:
        return f"Error processing document: {str(e)}"


# -----------------------------
# Question Answering Function
# -----------------------------

def ask_question(question):

    global vector_db

    if vector_db is None:
        return "Please upload and process a PDF document first."

    try:

        # Retrieve relevant chunks
        docs = vector_db.similarity_search(question, k=4)

        context = "\n\n".join([doc.page_content for doc in docs])

        prompt = f"""
You are a helpful assistant. Answer the question ONLY using the following context.
If the answer is not in the context, say "I could not find the answer in the provided context."

Context:
{context}

Question:
{question}

Answer clearly and based only on the provided context.
"""

        # Groq LLM call
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "user", "content": prompt}
            ],
            model="llama-3.3-70b-versatile",
        )

        response = chat_completion.choices[0].message.content

        return response

    except Exception as e:
        return f"Error generating answer: {str(e)}"


# -----------------------------
# Gradio Interface
# -----------------------------

with gr.Blocks() as demo:

    gr.Markdown("# 📄 PDF Document Assistant Developed by Asif Jamal")

    gr.Markdown(
        "Upload a PDF document and ask questions about its content."
    )

    pdf_input = gr.File(label="Upload PDF Document")

    process_button = gr.Button("Click to Process Document")

    process_output = gr.Textbox(label="Processing Status")

    process_button.click(
        process_document,
        inputs=pdf_input,
        outputs=process_output
    )

    gr.Markdown("## Ask Questions")

    question_input = gr.Textbox(
        label="Enter your question."
    )

    ask_button = gr.Button("Click to Proceed")

    answer_output = gr.Textbox(
        label="Answer",
        lines=10
    )

    ask_button.click(
        ask_question,
        inputs=question_input,
        outputs=answer_output
    )

    gr.Markdown(
        """
        ---
        © 2026 AI Document Assistant  
        Developed by Asif Jamal
        """
    )

demo.launch()