File size: 13,623 Bytes
464b72a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
import os
import re
import unicodedata
from pathlib import Path
from typing import List

from dotenv import load_dotenv
import google.generativeai as genai
from huggingface_hub import InferenceClient

load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if GEMINI_API_KEY:
    genai.configure(api_key=GEMINI_API_KEY)

vectordb = None
retriever = None
embeddings = None
rag_initialized = False
uploaded_documents = []
last_index_mtime = None

RAG_DATA_DIR = Path(__file__).resolve().parent.parent / "rag_data"
FAISS_INDEX_PATH = RAG_DATA_DIR / "faiss_index"
INSUFFICIENT_CONTEXT_MARKER = "i don't have enough information in the documents"


def initialize_embeddings():
    """Initialize the multilingual embedding model."""
    global embeddings

    if embeddings is not None:
        return embeddings

    print("Loading multilingual embedding model...")
    from langchain_huggingface import HuggingFaceEmbeddings

    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
        encode_kwargs={"normalize_embeddings": True},
    )
    print("Embedding model loaded.")
    return embeddings


def clean_text(text: str) -> str:
    """Clean and normalize text for embedding."""
    if not isinstance(text, str) or not text.strip():
        return ""

    normalized_text = unicodedata.normalize("NFKC", text)

    cleaned_chars = [
        char for char in normalized_text
        if unicodedata.category(char) not in ["So", "Cn", "Cc", "Cf", "Cs"]
    ]
    cleaned_text = "".join(cleaned_chars)

    cleaned_text = re.sub(r"\s+", " ", cleaned_text).strip()

    return cleaned_text


def load_and_process_pdf(pdf_path: str) -> List[dict]:
    """Load a PDF and split it into chunks."""
    from langchain_community.document_loaders import PyPDFLoader
    from langchain_text_splitters import RecursiveCharacterTextSplitter

    print(f"Loading PDF: {pdf_path}")

    loader = PyPDFLoader(pdf_path)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=80,
    )
    chunks = splitter.split_documents(docs)

    print(f"Loaded {len(docs)} pages, created {len(chunks)} chunks.")
    return chunks


def create_vector_store(chunks: List) -> bool:
    """Create or update the FAISS vector store with document chunks."""
    global vectordb, retriever, rag_initialized

    from langchain_community.vectorstores import FAISS

    initialize_embeddings()

    texts = [doc.page_content for doc in chunks]
    metadatas = [doc.metadata for doc in chunks]

    processed_texts = []
    processed_metadatas = []

    for i, text in enumerate(texts):
        cleaned_text = clean_text(text)
        if cleaned_text:
            processed_texts.append(cleaned_text)
            processed_metadatas.append(metadatas[i])

    if not processed_texts:
        print("No valid texts after cleaning.")
        return False

    print(f"Processing {len(processed_texts)} text chunks for embedding...")

    if vectordb is None:
        vectordb = FAISS.from_texts(processed_texts, embeddings, metadatas=processed_metadatas)
    else:
        new_vectordb = FAISS.from_texts(processed_texts, embeddings, metadatas=processed_metadatas)
        vectordb.merge_from(new_vectordb)

    retriever = vectordb.as_retriever(search_kwargs={"k": 4})
    rag_initialized = True

    save_vector_store()
    _sync_uploaded_documents()

    print("Vector store created/updated successfully.")
    return True


def save_vector_store():
    """Save the FAISS index to disk."""
    global vectordb, last_index_mtime

    if vectordb is None:
        return

    RAG_DATA_DIR.mkdir(parents=True, exist_ok=True)
    vectordb.save_local(str(FAISS_INDEX_PATH))
    last_index_mtime = _get_index_mtime()
    print(f"Vector store saved to {FAISS_INDEX_PATH}.")


def load_vector_store() -> bool:
    """Load the FAISS index from disk if it exists."""
    global vectordb, retriever, rag_initialized, last_index_mtime

    if not FAISS_INDEX_PATH.exists():
        return False

    try:
        from langchain_community.vectorstores import FAISS

        initialize_embeddings()
        vectordb = FAISS.load_local(
            str(FAISS_INDEX_PATH),
            embeddings,
            allow_dangerous_deserialization=True,
        )
        retriever = vectordb.as_retriever(search_kwargs={"k": 4})
        rag_initialized = True
        last_index_mtime = _get_index_mtime()
        _sync_uploaded_documents()
        print("Loaded existing vector store from disk.")
        return True
    except Exception as e:
        print(f"Failed to load vector store: {e}")
        return False


def rag_answer(question: str) -> dict:
    """Answer a question using RAG - first check database, then fallback to Gemini/HF."""
    global retriever, vectordb, last_index_mtime

    result = {
        "answer": "",
        "source": "none",
        "context_found": False,
        "relevance_score": 0.0,
    }

    if FAISS_INDEX_PATH.exists():
        current_mtime = _get_index_mtime()
        if (not rag_initialized or retriever is None) or (
            current_mtime and last_index_mtime and current_mtime > last_index_mtime
        ):
            load_vector_store()

    if not rag_initialized or retriever is None:
        result["source"] = "gemini"
        result["answer"] = _ask_gemini_directly(question)
        return result

    docs_with_scores = vectordb.similarity_search_with_score(question, k=4)

    if not docs_with_scores:
        print(f"No documents found for question: {question}")
        result["source"] = "gemini"
        result["answer"] = _ask_gemini_directly(question)
        return result

    best_score = docs_with_scores[0][1] if docs_with_scores else float("inf")
    result["relevance_score"] = float(best_score)

    print(f"\nQuestion: {question}")
    print(f"Retrieved {len(docs_with_scores)} documents:")
    for i, (doc, score) in enumerate(docs_with_scores):
        preview = doc.page_content[:100].replace("\n", " ")
        print(f"  [{i + 1}] Score: {score:.3f} - {preview}...")

    print(f"Using RAG with relevance score: {best_score}")

    docs = [doc for doc, score in docs_with_scores]
    context = "\n\n".join([d.page_content for d in docs])
    result["context_found"] = True

    prompt = (
        "You are a helpful assistant. Answer the question based ONLY on the following "
        "context from the PDF document. If the context doesn't contain enough information "
        "to answer the question, say \"I don't have enough information in the documents to "
        "answer this question.\"\n\n"
        "Context from PDF:\n"
        f"{context}\n\n"
        f"Question: {question}\n\n"
        "Answer (in English):"
    )

    try:
        gemini_key = os.getenv("GEMINI_API_KEY")
        if gemini_key:
            try:
                model = genai.GenerativeModel("models/gemini-2.5-flash")
                response = model.generate_content(prompt)
                rag_answer_text = (response.text or "").strip()
                if _is_insufficient_context_answer(rag_answer_text):
                    print("RAG context not sufficient. Falling back to direct AI answer.")
                    result["answer"] = _ask_gemini_directly(question)
                    result["source"] = "gemini"
                    return result
                result["answer"] = rag_answer_text
                result["source"] = "rag"
                return result
            except Exception as gemini_error:
                error_msg = str(gemini_error)
                print(f"Gemini error in RAG: {error_msg[:200]}...")
                if "429" in error_msg or "quota" in error_msg.lower():
                    print("Gemini quota exceeded. Using Hugging Face for RAG.")

        print("Using Hugging Face for RAG answer...")
        rag_answer_text = _ask_huggingface_free(prompt).strip()
        if _is_insufficient_context_answer(rag_answer_text):
            print("RAG context not sufficient. Falling back to direct AI answer.")
            result["answer"] = _ask_gemini_directly(question)
            result["source"] = "gemini"
            return result
        result["answer"] = rag_answer_text
        result["source"] = "rag"

    except Exception as e:
        print(f"All RAG generation failed: {e}")
        result["answer"] = "Sorry, unable to generate answer. Please try again later."
        result["source"] = "error"

    return result


def _ask_huggingface_free(prompt: str) -> str:
    """Use free Hugging Face Inference API with token if available."""
    hf_token = os.getenv("HF_API_TOKEN")

    try:
        client = InferenceClient(token=hf_token)
    except Exception as e:
        raise Exception(f"Failed to create Hugging Face client: {e}")

    messages = [{"role": "user", "content": prompt}]

    try:
        print("Calling Hugging Face API (Qwen2.5-72B-Instruct)...")
        response = client.chat_completion(
            messages=messages,
            model="Qwen/Qwen2.5-72B-Instruct",
            max_tokens=500,
            temperature=0.7,
        )
        return response.choices[0].message.content
    except Exception as e:
        error_str = str(e)
        print(f"Hugging Face primary model error: {e}")

        try:
            print("Trying backup model (Microsoft Phi-3)...")
            response = client.chat_completion(
                messages=messages,
                model="microsoft/Phi-3-mini-4k-instruct",
                max_tokens=500,
                temperature=0.7,
            )
            return response.choices[0].message.content
        except Exception as e2:
            print(f"Backup model also failed: {e2}")
            raise Exception(f"All HF models failed: {error_str}")


def _ask_gemini_directly(question: str) -> str:
    """Fallback: Ask Gemini directly without RAG context, with Hugging Face fallback."""
    prompt = (
        "Answer the following question helpfully and accurately:\n\n"
        f"Question: {question}\n\n"
        "Answer:"
    )

    gemini_key = os.getenv("GEMINI_API_KEY")

    if gemini_key:
        try:
            model = genai.GenerativeModel("models/gemini-2.5-flash")
            response = model.generate_content(prompt)
            return response.text
        except Exception as gemini_error:
            error_msg = str(gemini_error)
            print(f"Gemini API error: {error_msg[:200]}...")

            if "429" in error_msg or "quota" in error_msg.lower():
                print("Gemini quota exceeded. Switching to Hugging Face.")
            else:
                print("Gemini error. Switching to Hugging Face.")
    else:
        print("No Gemini API key, using Hugging Face.")

    try:
        print("Using Hugging Face for direct answer...")
        return _ask_huggingface_free(prompt)
    except Exception as hf_error:
        print(f"Hugging Face error: {hf_error}")
        return (
            "Sorry, both AI services are unavailable. "
            f"Gemini quota exceeded, and Hugging Face error: {str(hf_error)}"
        )


def get_rag_status() -> dict:
    """Get the current status of the RAG system."""
    if not rag_initialized and FAISS_INDEX_PATH.exists():
        load_vector_store()

    _sync_uploaded_documents()
    return {
        "initialized": rag_initialized,
        "documents_count": len(uploaded_documents),
        "documents": uploaded_documents,
        "has_embeddings": embeddings is not None,
        "has_vector_store": vectordb is not None,
    }


def clear_rag_data():
    """Clear all RAG data."""
    global vectordb, retriever, rag_initialized, uploaded_documents, last_index_mtime

    vectordb = None
    retriever = None
    rag_initialized = False
    uploaded_documents = []
    last_index_mtime = None

    if FAISS_INDEX_PATH.exists():
        import shutil

        shutil.rmtree(FAISS_INDEX_PATH)

    print("RAG data cleared.")
    return True


def _get_index_mtime():
    index_file = FAISS_INDEX_PATH / "index.faiss"
    if index_file.exists():
        return index_file.stat().st_mtime
    return None


def _is_insufficient_context_answer(answer_text: str) -> bool:
    if not answer_text:
        return True
    normalized = answer_text.strip().lower()
    return INSUFFICIENT_CONTEXT_MARKER in normalized


def _sync_uploaded_documents():
    global uploaded_documents

    if not RAG_DATA_DIR.exists():
        uploaded_documents = []
        return

    uploaded_documents = sorted(
        [pdf.name for pdf in RAG_DATA_DIR.glob("*.pdf") if pdf.is_file()]
    )


def rebuild_vector_store_from_pdfs() -> bool:
    """Rebuild vector store from all PDFs in rag_data directory."""
    global vectordb, retriever, rag_initialized

    _sync_uploaded_documents()
    if not uploaded_documents:
        print("No PDFs found in rag_data to rebuild vector store.")
        return False

    initialize_embeddings()

    vectordb = None
    retriever = None
    rag_initialized = False

    all_chunks = []
    for filename in uploaded_documents:
        pdf_path = RAG_DATA_DIR / filename
        try:
            chunks = load_and_process_pdf(str(pdf_path))
            all_chunks.extend(chunks)
        except Exception as e:
            print(f"Skipping PDF '{filename}' due to processing error: {e}")

    if not all_chunks:
        print("No chunks generated from PDFs. Rebuild aborted.")
        return False

    success = create_vector_store(all_chunks)
    if success:
        print(f"Rebuilt vector store from {len(uploaded_documents)} PDF(s).")
    return success