File size: 7,929 Bytes
cd266a5
9466a37
 
 
 
 
cd266a5
 
ebbd49e
e97699c
9f0da7b
41ac7b0
25140b4
41ac7b0
ebbd49e
9466a37
9f0da7b
1242abb
 
 
 
 
 
 
fbd4778
9466a37
fbd4778
50ab09a
0c81fa1
cd266a5
 
 
 
 
 
6718956
fbd4778
 
 
43b802c
fea3890
d7aaa8f
a610ce4
fbd4778
fea3890
cd266a5
fbd4778
92fc472
fbd4778
92fc472
41ac7b0
92fc472
 
197e569
92fc472
6d7ba5b
fbd4778
9466a37
fbd4778
f384f96
197e569
9466a37
 
fbd4778
386cde6
 
 
f384f96
9466a37
 
 
 
fbd4778
9466a37
f384f96
 
fbd4778
9466a37
fbd4778
 
 
9466a37
fea3890
 
41ac7b0
235a5b5
 
9466a37
235a5b5
 
9466a37
 
 
235a5b5
9466a37
235a5b5
9466a37
235a5b5
9466a37
235a5b5
 
fbd4778
 
9466a37
fbd4778
 
 
 
9466a37
fbd4778
 
 
 
 
 
 
 
28eda6f
235a5b5
fbd4778
235a5b5
fbd4778
235a5b5
 
 
 
 
fbd4778
fe9b982
fbd4778
fe9b982
 
 
 
 
f384f96
fe9b982
 
 
 
 
fea3890
 
 
fe9b982
fbd4778
fe9b982
 
 
cd6e69b
c7133f4
fe9b982
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25140b4
f4366a1
fe9b982
9466a37
f4366a1
743f89e
fe9b982
386cde6
fea3890
9466a37
fbd4778
 
 
fea3890
43cd83d
f384f96
fea3890
 
43cd83d
197e569
 
fea3890
c91d8df
197e569
 
c91d8df
197e569
f384f96
197e569
fbd4778
197e569
9466a37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
"""
qa.py — Phi-2 FAST + ReRank (with FULL Reasoning Mode)
-------------------------------------------------------
✅ Semantic retrieval (FAISS + cosine re-rank + neighbor-fill)
✅ Smart factual mode
✅ Deep reasoning mode (ChatGPT-like)
"""

import os
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

print("✅ qa.py (Phi-2 FAST + ReRank + Full Reasoning) loaded from:", __file__)

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    print("❌ OPENAI_API_KEY not found in environment!")
else:
    print("✅ OPENAI_API_KEY loaded successfully (length:", len(api_key), ")")


# ==========================================================
# 1️⃣ Cache Setup
# ==========================================================
CACHE_DIR = "/tmp/hf_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
os.environ.update({
    "HF_HOME": CACHE_DIR,
    "TRANSFORMERS_CACHE": CACHE_DIR,
    "HF_DATASETS_CACHE": CACHE_DIR,
    "HF_MODULES_CACHE": CACHE_DIR
})

# ==========================================================
# 2️⃣ Embedding Model
# ==========================================================
try:
    _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
    print("✅ Loaded embedding model: intfloat/e5-small-v2")
except Exception as e:
    print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback")
    _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)

# ==========================================================
# 3️⃣ GPT-4o Model Setup (OpenAI API)
# ==========================================================
from openai import OpenAI

MODEL_NAME = "gpt-4o"
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

print(f"✅ Connected to OpenAI GPT model: {MODEL_NAME}")

# ==========================================================
# 4️⃣ Prompts
# ==========================================================
STRICT_PROMPT = (
    "You are an enterprise documentation assistant.\n"
    "Use ONLY the CONTEXT below to answer the QUESTION clearly and factually.\n"
    "If the answer isn’t in the document, reply exactly:\n"
    "'I don't know based on the provided document.'\n\n"
    "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
)

REASONING_PROMPT = (
    "You are an expert enterprise assistant capable of deep reasoning.\n"
    "Think step by step before answering. Use the CONTEXT below first, but also apply your world knowledge logically.\n"
    "Explain your reasoning concisely if it helps clarity.\n"
    "Avoid hallucination — if the document does not include the answer, say:\n"
    "'I don't know based on the provided document.'\n\n"
    "Context:\n{context}\n\nQuestion: {query}\nLet's reason this out carefully:\nAnswer:"
)

# ==========================================================
# 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
# ==========================================================
def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
                    min_similarity: float = 0.6, candidate_multiplier: int = 3):
    """Re-rank and optionally fill with neighbors for context continuity."""
    if not index or not chunks:
        return []

    try:
        q_emb = _query_model.encode(
            [f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True
        )[0]

        # Initial FAISS search
        distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier)
        candidate_indices = list(dict.fromkeys(indices[0]))  # dedup

        # Re-rank by cosine similarity
        doc_embs = _query_model.encode(
            [f"passage: {chunks[i]}" for i in candidate_indices],
            convert_to_numpy=True,
            normalize_embeddings=True,
        )
        sims = cosine_similarity([q_emb], doc_embs)[0]
        ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)

        # Filter by min_similarity
        filtered = [idx for idx, sim in ranked if sim >= min_similarity]
        if len(filtered) > top_k:
            filtered = filtered[:top_k]

        # Neighbor fill if needed
        if len(filtered) < top_k:
            expanded = set(filtered)
            for idx in filtered:
                for neighbor in [idx - 1, idx + 1]:
                    if 0 <= neighbor < len(chunks):
                        expanded.add(neighbor)
                        if len(expanded) >= top_k:
                            break
                if len(expanded) >= top_k:
                    break
            filtered = sorted(expanded)[:top_k]

        return [chunks[i] for i in filtered]

    except Exception as e:
        print(f"⚠️ Retrieval error: {e}")
        return []

# ==========================================================
# 6️⃣ Answer Generation (GPT-4o with Full Reasoning)
# ==========================================================
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
MODEL_NAME = "gpt-4o"

def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
    """
    Generates answers using GPT-4o.
    - reasoning_mode=False → strict factual mode (fast)
    - reasoning_mode=True  → reasoning-rich mode (longer, more explanatory)
    """
    if not retrieved_chunks:
        return "Sorry, I couldn’t find relevant information in the document."

    # Format context with chunk tags
    context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
    prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
        context=context, query=query
    )

    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {
                    "role": "system",
                    "content": (
                        "You are an expert enterprise documentation assistant. "
                        "Answer questions precisely using the provided context. "
                        "If reasoning_mode is enabled, provide deeper explanations and step-by-step logic. "
                        "If the document lacks information, respond exactly: "
                        "'I don't know based on the provided document.'"
                    ),
                },
                {"role": "user", "content": prompt},
            ],
            temperature=0.6 if reasoning_mode else 0.2,
            max_tokens=600 if reasoning_mode else 350,
            top_p=0.95,
        )

        text = response.choices[0].message.content.strip()
        return text

    except Exception as e:
        print(f"⚠️ GPT-4o generation failed: {e}")
        return "⚠️ Error: Could not generate an answer."


# ==========================================================
# 7️⃣ Local Test
# ==========================================================
if __name__ == "__main__":
    from vectorstore import build_faiss_index

    dummy_chunks = [
        "Step 1: Open the dashboard and navigate to reports.",
        "Step 2: Click 'Export' to download a CSV summary.",
        "Step 3: Review the generated report in your downloads folder.",
        "Appendix: Communication user creation steps are explained later in this guide."
    ]
    embeddings = [
        _query_model.encode([f"passage: {c}"], convert_to_numpy=True, normalize_embeddings=True)[0]
        for c in dummy_chunks
    ]
    index = build_faiss_index(embeddings)

    query = "How do I create a communication user?"
    retrieved = retrieve_chunks(query, index, dummy_chunks)
    print("🔍 Retrieved:", retrieved)
    print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=True))