File size: 5,470 Bytes
e2d2e34
 
e6c2792
994012d
fd5c04a
994012d
e2d2e34
 
 
 
994012d
 
 
 
 
 
 
 
e6c2792
 
994012d
 
 
 
 
 
 
 
 
 
 
 
 
 
e6c2792
994012d
 
 
 
 
e6c2792
994012d
e6c2792
994012d
 
e2d2e34
994012d
e6c2792
994012d
e6c2792
 
 
 
994012d
e6c2792
 
994012d
 
 
 
 
 
e2d2e34
994012d
e2d2e34
994012d
 
 
fd5c04a
 
994012d
 
 
e6c2792
fd5c04a
994012d
e6c2792
e2d2e34
994012d
 
 
e6c2792
994012d
 
 
 
 
 
 
 
 
 
 
 
 
 
e2d2e34
 
994012d
 
e2d2e34
994012d
e2d2e34
994012d
e2d2e34
994012d
 
 
e6c2792
994012d
 
e2d2e34
994012d
fd5c04a
e6c2792
994012d
 
fd5c04a
e6c2792
994012d
 
 
 
 
 
 
 
 
e2d2e34
994012d
e2d2e34
994012d
 
 
fd5c04a
994012d
 
 
e6c2792
e2d2e34
e6c2792
 
994012d
e2d2e34
e6c2792
fd5c04a
e6c2792
e2d2e34
e6c2792
 
e2d2e34
 
e6c2792
 
 
 
994012d
 
e6c2792
994012d
e6c2792
 
 
 
e2d2e34
e6c2792
 
994012d
 
e6c2792
 
 
994012d
 
 
e6c2792
994012d
fd5c04a
994012d
 
fd5c04a
 
994012d
fd5c04a
 
 
 
 
 
994012d
 
 
fd5c04a
994012d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd5c04a
994012d
e2d2e34
994012d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import gradio as gr
import torch
import faiss
import numpy as np
import logging
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader

# =====================================================
# LOGGING CONFIGURATION
# =====================================================

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

logger.info("Starting application...")

# =====================================================
# DEVICE CONFIG
# =====================================================

DEVICE = "cpu"
torch.set_num_threads(4)

# =====================================================
# LOAD EMBEDDING MODEL
# =====================================================

logger.info("Loading embedding model...")
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
logger.info("Embedding model loaded.")

# =====================================================
# LOAD PHI-3 MODEL
# =====================================================

MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

logger.info("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

logger.info("Loading Phi-3 model (CPU optimized)...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True
)

model.to(DEVICE)
model.eval()

logger.info("Model loaded successfully.")

# =====================================================
# GLOBAL STORAGE
# =====================================================

chunks = []
faiss_index = None

# =====================================================
# PDF PROCESSING
# =====================================================

def process_pdf(file):
    global chunks, faiss_index

    logger.info("Processing PDF...")

    reader = PdfReader(file)
    full_text = ""

    for page in reader.pages:
        text = page.extract_text()
        if text:
            full_text += text + "\n"

    if not full_text.strip():
        return "❌ Could not extract text from PDF."

    # Chunking
    chunk_size = 400
    chunks = [
        full_text[i:i+chunk_size]
        for i in range(0, len(full_text), chunk_size)
    ]

    logger.info(f"Created {len(chunks)} chunks.")

    # Embeddings
    embeddings = embed_model.encode(chunks, convert_to_numpy=True)

    dimension = embeddings.shape[1]
    faiss_index = faiss.IndexFlatL2(dimension)
    faiss_index.add(embeddings)

    logger.info("FAISS index built successfully.")

    return f"✅ PDF processed successfully ({len(chunks)} chunks created)."

# =====================================================
# CHAT FUNCTION
# =====================================================

def generate_answer(message, history):
    global chunks, faiss_index

    if faiss_index is None:
        return "⚠ Please upload and process a PDF first."

    logger.info(f"Received question: {message}")

    start_time = time.time()

    # Step 1: Embed Query
    query_embedding = embed_model.encode([message], convert_to_numpy=True)

    # Step 2: Retrieve top 2 chunks
    distances, indices = faiss_index.search(query_embedding, k=2)

    retrieved_context = "\n\n".join(
        [chunks[i] for i in indices[0]]
    )

    logger.info("Retrieved relevant context.")

    # Step 3: Create structured prompt
    prompt = f"""
<|system|>
You are a professional AI assistant.
Provide clear, structured, intelligent answers.
Keep answers concise but informative.
If information is missing in context, say so.
<|end|>

<|user|>
Context:
{retrieved_context}

Question:
{message}
<|end|>

<|assistant|>
"""

    inputs = tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.5,
            top_p=0.9,
            repetition_penalty=1.1,
            do_sample=True,
            use_cache=True
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response.split("<|assistant|>")[-1].strip()

    elapsed = time.time() - start_time
    logger.info(f"Response generated in {elapsed:.2f} seconds.")

    return answer

# =====================================================
# GRADIO UI
# =====================================================

with gr.Blocks() as demo:

    gr.Markdown("# 🤖 Smart RAG Assistant")
    gr.Markdown("Upload a PDF and chat intelligently using Phi-3 Mini.")

    with gr.Row():

        with gr.Column(scale=1):
            pdf_file = gr.File(label="Upload PDF")
            upload_btn = gr.Button("Process PDF")
            status = gr.Markdown()

        with gr.Column(scale=3):
            chatbot = gr.Chatbot(height=600)
            msg = gr.Textbox(
                placeholder="Ask something about the document..."
            )
            clear = gr.Button("Clear Chat")

    upload_btn.click(
        process_pdf,
        inputs=pdf_file,
        outputs=status
    )

    def respond(message, chat_history):
        answer = generate_answer(message, chat_history)
        chat_history.append((message, answer))
        return "", chat_history

    msg.submit(
        respond,
        inputs=[msg, chatbot],
        outputs=[msg, chatbot]
    )

    clear.click(lambda: [], None, chatbot)

demo.launch(theme=gr.themes.Soft())