File size: 7,929 Bytes
cd266a5 9466a37 cd266a5 ebbd49e e97699c 9f0da7b 41ac7b0 25140b4 41ac7b0 ebbd49e 9466a37 9f0da7b 1242abb fbd4778 9466a37 fbd4778 50ab09a 0c81fa1 cd266a5 6718956 fbd4778 43b802c fea3890 d7aaa8f a610ce4 fbd4778 fea3890 cd266a5 fbd4778 92fc472 fbd4778 92fc472 41ac7b0 92fc472 197e569 92fc472 6d7ba5b fbd4778 9466a37 fbd4778 f384f96 197e569 9466a37 fbd4778 386cde6 f384f96 9466a37 fbd4778 9466a37 f384f96 fbd4778 9466a37 fbd4778 9466a37 fea3890 41ac7b0 235a5b5 9466a37 235a5b5 9466a37 235a5b5 9466a37 235a5b5 9466a37 235a5b5 9466a37 235a5b5 fbd4778 9466a37 fbd4778 9466a37 fbd4778 28eda6f 235a5b5 fbd4778 235a5b5 fbd4778 235a5b5 fbd4778 fe9b982 fbd4778 fe9b982 f384f96 fe9b982 fea3890 fe9b982 fbd4778 fe9b982 cd6e69b c7133f4 fe9b982 25140b4 f4366a1 fe9b982 9466a37 f4366a1 743f89e fe9b982 386cde6 fea3890 9466a37 fbd4778 fea3890 43cd83d f384f96 fea3890 43cd83d 197e569 fea3890 c91d8df 197e569 c91d8df 197e569 f384f96 197e569 fbd4778 197e569 9466a37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
"""
qa.py — Phi-2 FAST + ReRank (with FULL Reasoning Mode)
-------------------------------------------------------
✅ Semantic retrieval (FAISS + cosine re-rank + neighbor-fill)
✅ Smart factual mode
✅ Deep reasoning mode (ChatGPT-like)
"""
import os
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
print("✅ qa.py (Phi-2 FAST + ReRank + Full Reasoning) loaded from:", __file__)
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
print("❌ OPENAI_API_KEY not found in environment!")
else:
print("✅ OPENAI_API_KEY loaded successfully (length:", len(api_key), ")")
# ==========================================================
# 1️⃣ Cache Setup
# ==========================================================
CACHE_DIR = "/tmp/hf_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
os.environ.update({
"HF_HOME": CACHE_DIR,
"TRANSFORMERS_CACHE": CACHE_DIR,
"HF_DATASETS_CACHE": CACHE_DIR,
"HF_MODULES_CACHE": CACHE_DIR
})
# ==========================================================
# 2️⃣ Embedding Model
# ==========================================================
try:
_query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
print("✅ Loaded embedding model: intfloat/e5-small-v2")
except Exception as e:
print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback")
_query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)
# ==========================================================
# 3️⃣ GPT-4o Model Setup (OpenAI API)
# ==========================================================
from openai import OpenAI
MODEL_NAME = "gpt-4o"
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
print(f"✅ Connected to OpenAI GPT model: {MODEL_NAME}")
# ==========================================================
# 4️⃣ Prompts
# ==========================================================
STRICT_PROMPT = (
"You are an enterprise documentation assistant.\n"
"Use ONLY the CONTEXT below to answer the QUESTION clearly and factually.\n"
"If the answer isn’t in the document, reply exactly:\n"
"'I don't know based on the provided document.'\n\n"
"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
)
REASONING_PROMPT = (
"You are an expert enterprise assistant capable of deep reasoning.\n"
"Think step by step before answering. Use the CONTEXT below first, but also apply your world knowledge logically.\n"
"Explain your reasoning concisely if it helps clarity.\n"
"Avoid hallucination — if the document does not include the answer, say:\n"
"'I don't know based on the provided document.'\n\n"
"Context:\n{context}\n\nQuestion: {query}\nLet's reason this out carefully:\nAnswer:"
)
# ==========================================================
# 5️⃣ Retrieval — FAISS + Re-rank + Neighbor Fill
# ==========================================================
def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5,
min_similarity: float = 0.6, candidate_multiplier: int = 3):
"""Re-rank and optionally fill with neighbors for context continuity."""
if not index or not chunks:
return []
try:
q_emb = _query_model.encode(
[f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True
)[0]
# Initial FAISS search
distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier)
candidate_indices = list(dict.fromkeys(indices[0])) # dedup
# Re-rank by cosine similarity
doc_embs = _query_model.encode(
[f"passage: {chunks[i]}" for i in candidate_indices],
convert_to_numpy=True,
normalize_embeddings=True,
)
sims = cosine_similarity([q_emb], doc_embs)[0]
ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True)
# Filter by min_similarity
filtered = [idx for idx, sim in ranked if sim >= min_similarity]
if len(filtered) > top_k:
filtered = filtered[:top_k]
# Neighbor fill if needed
if len(filtered) < top_k:
expanded = set(filtered)
for idx in filtered:
for neighbor in [idx - 1, idx + 1]:
if 0 <= neighbor < len(chunks):
expanded.add(neighbor)
if len(expanded) >= top_k:
break
if len(expanded) >= top_k:
break
filtered = sorted(expanded)[:top_k]
return [chunks[i] for i in filtered]
except Exception as e:
print(f"⚠️ Retrieval error: {e}")
return []
# ==========================================================
# 6️⃣ Answer Generation (GPT-4o with Full Reasoning)
# ==========================================================
from openai import OpenAI
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
MODEL_NAME = "gpt-4o"
def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False):
"""
Generates answers using GPT-4o.
- reasoning_mode=False → strict factual mode (fast)
- reasoning_mode=True → reasoning-rich mode (longer, more explanatory)
"""
if not retrieved_chunks:
return "Sorry, I couldn’t find relevant information in the document."
# Format context with chunk tags
context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks))
prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format(
context=context, query=query
)
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{
"role": "system",
"content": (
"You are an expert enterprise documentation assistant. "
"Answer questions precisely using the provided context. "
"If reasoning_mode is enabled, provide deeper explanations and step-by-step logic. "
"If the document lacks information, respond exactly: "
"'I don't know based on the provided document.'"
),
},
{"role": "user", "content": prompt},
],
temperature=0.6 if reasoning_mode else 0.2,
max_tokens=600 if reasoning_mode else 350,
top_p=0.95,
)
text = response.choices[0].message.content.strip()
return text
except Exception as e:
print(f"⚠️ GPT-4o generation failed: {e}")
return "⚠️ Error: Could not generate an answer."
# ==========================================================
# 7️⃣ Local Test
# ==========================================================
if __name__ == "__main__":
from vectorstore import build_faiss_index
dummy_chunks = [
"Step 1: Open the dashboard and navigate to reports.",
"Step 2: Click 'Export' to download a CSV summary.",
"Step 3: Review the generated report in your downloads folder.",
"Appendix: Communication user creation steps are explained later in this guide."
]
embeddings = [
_query_model.encode([f"passage: {c}"], convert_to_numpy=True, normalize_embeddings=True)[0]
for c in dummy_chunks
]
index = build_faiss_index(embeddings)
query = "How do I create a communication user?"
retrieved = retrieve_chunks(query, index, dummy_chunks)
print("🔍 Retrieved:", retrieved)
print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=True))
|