|
|
""" |
|
|
qa.py — Phi-2 FAST + ReRank (with FULL Reasoning Mode) |
|
|
------------------------------------------------------- |
|
|
✅ Semantic retrieval (FAISS + cosine re-rank + neighbor-fill) |
|
|
✅ Smart factual mode |
|
|
✅ Deep reasoning mode (ChatGPT-like) |
|
|
""" |
|
|
|
|
|
import os |
|
|
import numpy as np |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
import torch |
|
|
|
|
|
print("✅ qa.py (Phi-2 FAST + ReRank + Full Reasoning) loaded from:", __file__) |
|
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY") |
|
|
if not api_key: |
|
|
print("❌ OPENAI_API_KEY not found in environment!") |
|
|
else: |
|
|
print("✅ OPENAI_API_KEY loaded successfully (length:", len(api_key), ")") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CACHE_DIR = "/tmp/hf_cache" |
|
|
os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
os.environ.update({ |
|
|
"HF_HOME": CACHE_DIR, |
|
|
"TRANSFORMERS_CACHE": CACHE_DIR, |
|
|
"HF_DATASETS_CACHE": CACHE_DIR, |
|
|
"HF_MODULES_CACHE": CACHE_DIR |
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
_query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR) |
|
|
print("✅ Loaded embedding model: intfloat/e5-small-v2") |
|
|
except Exception as e: |
|
|
print(f"⚠️ Embedding load failed ({e}), using MiniLM fallback") |
|
|
_query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from openai import OpenAI |
|
|
|
|
|
MODEL_NAME = "gpt-4o" |
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
print(f"✅ Connected to OpenAI GPT model: {MODEL_NAME}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
STRICT_PROMPT = ( |
|
|
"You are an enterprise documentation assistant.\n" |
|
|
"Use ONLY the CONTEXT below to answer the QUESTION clearly and factually.\n" |
|
|
"If the answer isn’t in the document, reply exactly:\n" |
|
|
"'I don't know based on the provided document.'\n\n" |
|
|
"Context:\n{context}\n\nQuestion: {query}\nAnswer:" |
|
|
) |
|
|
|
|
|
REASONING_PROMPT = ( |
|
|
"You are an expert enterprise assistant capable of deep reasoning.\n" |
|
|
"Think step by step before answering. Use the CONTEXT below first, but also apply your world knowledge logically.\n" |
|
|
"Explain your reasoning concisely if it helps clarity.\n" |
|
|
"Avoid hallucination — if the document does not include the answer, say:\n" |
|
|
"'I don't know based on the provided document.'\n\n" |
|
|
"Context:\n{context}\n\nQuestion: {query}\nLet's reason this out carefully:\nAnswer:" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5, |
|
|
min_similarity: float = 0.6, candidate_multiplier: int = 3): |
|
|
"""Re-rank and optionally fill with neighbors for context continuity.""" |
|
|
if not index or not chunks: |
|
|
return [] |
|
|
|
|
|
try: |
|
|
q_emb = _query_model.encode( |
|
|
[f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True |
|
|
)[0] |
|
|
|
|
|
|
|
|
distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * candidate_multiplier) |
|
|
candidate_indices = list(dict.fromkeys(indices[0])) |
|
|
|
|
|
|
|
|
doc_embs = _query_model.encode( |
|
|
[f"passage: {chunks[i]}" for i in candidate_indices], |
|
|
convert_to_numpy=True, |
|
|
normalize_embeddings=True, |
|
|
) |
|
|
sims = cosine_similarity([q_emb], doc_embs)[0] |
|
|
ranked = sorted(zip(candidate_indices, sims), key=lambda x: x[1], reverse=True) |
|
|
|
|
|
|
|
|
filtered = [idx for idx, sim in ranked if sim >= min_similarity] |
|
|
if len(filtered) > top_k: |
|
|
filtered = filtered[:top_k] |
|
|
|
|
|
|
|
|
if len(filtered) < top_k: |
|
|
expanded = set(filtered) |
|
|
for idx in filtered: |
|
|
for neighbor in [idx - 1, idx + 1]: |
|
|
if 0 <= neighbor < len(chunks): |
|
|
expanded.add(neighbor) |
|
|
if len(expanded) >= top_k: |
|
|
break |
|
|
if len(expanded) >= top_k: |
|
|
break |
|
|
filtered = sorted(expanded)[:top_k] |
|
|
|
|
|
return [chunks[i] for i in filtered] |
|
|
|
|
|
except Exception as e: |
|
|
print(f"⚠️ Retrieval error: {e}") |
|
|
return [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from openai import OpenAI |
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
MODEL_NAME = "gpt-4o" |
|
|
|
|
|
def generate_answer(query: str, retrieved_chunks: list, reasoning_mode: bool = False): |
|
|
""" |
|
|
Generates answers using GPT-4o. |
|
|
- reasoning_mode=False → strict factual mode (fast) |
|
|
- reasoning_mode=True → reasoning-rich mode (longer, more explanatory) |
|
|
""" |
|
|
if not retrieved_chunks: |
|
|
return "Sorry, I couldn’t find relevant information in the document." |
|
|
|
|
|
|
|
|
context = "\n".join(f"[Chunk {i+1}] {chunk.strip()}" for i, chunk in enumerate(retrieved_chunks)) |
|
|
prompt = (REASONING_PROMPT if reasoning_mode else STRICT_PROMPT).format( |
|
|
context=context, query=query |
|
|
) |
|
|
|
|
|
try: |
|
|
response = client.chat.completions.create( |
|
|
model=MODEL_NAME, |
|
|
messages=[ |
|
|
{ |
|
|
"role": "system", |
|
|
"content": ( |
|
|
"You are an expert enterprise documentation assistant. " |
|
|
"Answer questions precisely using the provided context. " |
|
|
"If reasoning_mode is enabled, provide deeper explanations and step-by-step logic. " |
|
|
"If the document lacks information, respond exactly: " |
|
|
"'I don't know based on the provided document.'" |
|
|
), |
|
|
}, |
|
|
{"role": "user", "content": prompt}, |
|
|
], |
|
|
temperature=0.6 if reasoning_mode else 0.2, |
|
|
max_tokens=600 if reasoning_mode else 350, |
|
|
top_p=0.95, |
|
|
) |
|
|
|
|
|
text = response.choices[0].message.content.strip() |
|
|
return text |
|
|
|
|
|
except Exception as e: |
|
|
print(f"⚠️ GPT-4o generation failed: {e}") |
|
|
return "⚠️ Error: Could not generate an answer." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
from vectorstore import build_faiss_index |
|
|
|
|
|
dummy_chunks = [ |
|
|
"Step 1: Open the dashboard and navigate to reports.", |
|
|
"Step 2: Click 'Export' to download a CSV summary.", |
|
|
"Step 3: Review the generated report in your downloads folder.", |
|
|
"Appendix: Communication user creation steps are explained later in this guide." |
|
|
] |
|
|
embeddings = [ |
|
|
_query_model.encode([f"passage: {c}"], convert_to_numpy=True, normalize_embeddings=True)[0] |
|
|
for c in dummy_chunks |
|
|
] |
|
|
index = build_faiss_index(embeddings) |
|
|
|
|
|
query = "How do I create a communication user?" |
|
|
retrieved = retrieve_chunks(query, index, dummy_chunks) |
|
|
print("🔍 Retrieved:", retrieved) |
|
|
print("💬 Answer:", generate_answer(query, retrieved, reasoning_mode=True)) |
|
|
|