Spaces:

Shubham170793
/

enterprise-knowledge-assistant

Sleeping

File size: 5,506 Bytes

cd266a5
d7aaa8f
 
 
 
 
 
 
cd266a5
 
ebbd49e
e97699c
9f0da7b
e97699c
ebbd49e
d7aaa8f
9f0da7b
cd266a5
d7aaa8f
cd266a5
50ab09a
0c81fa1
cd266a5
 
 
 
 
 
641185f
cd266a5
d7aaa8f
cd266a5
43b802c
fea3890
d7aaa8f
a610ce4
d7aaa8f
fea3890
cd266a5
 
d7aaa8f
cd266a5
d7aaa8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d7ba5b
cd266a5
d7aaa8f
cd266a5
49c4268
d7aaa8f
 
 
49c4268
 
 
6d7ba5b
fea3890
d7aaa8f
fea3890
d7aaa8f
 
fea3890
 
 
 
d7aaa8f
 
fea3890
d7aaa8f
49c4268
 
d7aaa8f
 
fea3890
d7aaa8f
 
fea3890
 
 
 
 
d7aaa8f
fea3890
 
d7aaa8f
fea3890
 
 
49c4268
fea3890
 
c7133f4
a5ea9d2
 
d7aaa8f
a5ea9d2
d7aaa8f
a5ea9d2
 
 
d4d8027
743f89e
d4d8027
 
fea3890
 
d7aaa8f
fea3890
 
d7aaa8f
fea3890
 
 
 
 
 
d7aaa8f
 
fea3890
d7aaa8f
 
fea3890

"""
qa.py — Retrieval + Generation (Phi-2 Fast Reasoning)
-----------------------------------------------------
Uses:
 - intfloat/e5-small-v2 for embeddings
 - microsoft/phi-2 as main LLM (fast, strong reasoning)
 - Optional fallback: google/flan-t5-base
Optimized for CPU inference (Hugging Face Spaces / Streamlit)
"""

import os
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

print("✅ qa.py (Phi-2 optimized) loaded from:", __file__)

# ==========================================================
# 1️⃣ Cache Setup
# ==========================================================
CACHE_DIR = "/tmp/hf_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
os.environ.update({
    "HF_HOME": CACHE_DIR,
    "TRANSFORMERS_CACHE": CACHE_DIR,
    "HF_DATASETS_CACHE": CACHE_DIR,
    "HF_MODULES_CACHE": CACHE_DIR
})

# ==========================================================
# 2️⃣ Embedding Model
# ==========================================================
try:
    _query_model = SentenceTransformer("intfloat/e5-small-v2", cache_folder=CACHE_DIR)
    print("✅ Loaded embedding model: intfloat/e5-small-v2")
except Exception as e:
    print(f"⚠️ Fallback to MiniLM due to {e}")
    _query_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR)

# ==========================================================
# 3️⃣ Phi-2 LLM Setup
# ==========================================================
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

try:
    MODEL_NAME = "microsoft/phi-2"
    print(f"✅ Loading LLM: {MODEL_NAME}")
    _tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
    _model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        cache_dir=CACHE_DIR,
        torch_dtype="auto",
        low_cpu_mem_usage=True,
    )
    _answer_model = pipeline(
        "text-generation",
        model=_model,
        tokenizer=_tokenizer,
        device=-1,
        max_new_tokens=250,
        do_sample=False,
    )
    print("✅ Phi-2 generation pipeline ready.")
except Exception as e:
    print(f"⚠️ Phi-2 load failed: {e}")
    _answer_model = None

# ==========================================================
# 4️⃣ Prompt Template
# ==========================================================
PROMPT_TEMPLATE = (
    "You are an expert assistant for enterprise document understanding.\n"
    "Use ONLY the context below to answer the question clearly and factually.\n"
    "If the context doesn’t contain the answer, reply: "
    "'I don't know based on the provided document.'\n\n"
    "Context:\n{context}\n\nQuestion: {query}\nAnswer:"
)

# ==========================================================
# 5️⃣ Retrieval Function
# ==========================================================
def retrieve_chunks(query: str, index, chunks: list, top_k: int = 5):
    """Fast FAISS retrieval with E5 embeddings."""
    if not index or not chunks:
        return []

    try:
        q_emb = _query_model.encode([f"query: {query.strip()}"], convert_to_numpy=True, normalize_embeddings=True)[0]
        distances, indices = index.search(np.array([q_emb]).astype("float32"), top_k * 2)

        # Merge nearby chunks for continuity
        selected = set()
        for idx in indices[0]:
            for i in range(max(0, idx - 1), min(len(chunks), idx + 2)):
                selected.add(i)

        ordered_chunks = [chunks[i] for i in sorted(selected)]
        return ordered_chunks
    except Exception as e:
        print(f"⚠️ Retrieval error: {e}")
        return []

# ==========================================================
# 6️⃣ Answer Generation Function
# ==========================================================
def generate_answer(query: str, retrieved_chunks: list):
    """Generate grounded answers using Phi-2."""
    if not retrieved_chunks:
        return "Sorry, I couldn’t find relevant information in the document."

    context = "\n".join(chunk.strip() for chunk in retrieved_chunks)
    prompt = PROMPT_TEMPLATE.format(context=context, query=query)

    try:
        result = _answer_model(
            prompt,
            max_new_tokens=250,
            do_sample=False,
            early_stopping=True,
            pad_token_id=_tokenizer.eos_token_id,
        )
        answer = result[0]["generated_text"].strip()
        return answer
    except Exception as e:
        print(f"⚠️ Generation failed: {e}")
        return "⚠️ Error: Could not generate an answer at the moment."

# ==========================================================
# 7️⃣ Local Test (optional)
# ==========================================================
if __name__ == "__main__":
    from vectorstore import build_faiss_index
    dummy_chunks = [
        "Step 1: Open the dashboard and navigate to reports.",
        "Step 2: Click 'Export' to download a CSV summary.",
        "Step 3: Review the generated report in your downloads folder."
    ]

    embeddings = [
        _query_model.encode([f"passage: {chunk}"], convert_to_numpy=True, normalize_embeddings=True)[0]
        for chunk in dummy_chunks
    ]
    index = build_faiss_index(embeddings)
    query = "What are the steps to export a report?"
    retrieved = retrieve_chunks(query, index, dummy_chunks)
    print("🔍 Retrieved:", retrieved)
    print("💬 Answer:", generate_answer(query, retrieved))