NLP-RAG / test.py
Qar-Raz's picture
Sync backend Docker context from GitHub main
c27a4e3 verified
raw
history blame
6.14 kB
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
import sys
import traceback
from datetime import datetime
from dotenv import load_dotenv
from config_loader import cfg
from data.vector_db import get_index_by_name
from retriever.retriever import HybridRetriever
from retriever.processor import ChunkProcessor
from data.ingest import CHUNKING_TECHNIQUES
def generate_retrieval_report(all_results, queries, output_file="retrieval_report.md"):
"""
Generates a Markdown document summarizing the retrieved chunks
for each query, chunking technique, and retrieval strategy.
"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
content = f"# Retrieval Testing Report\n\n*Generated:* {timestamp}\n\n"
content += "## Test Queries\n\n"
for i, q in enumerate(queries, 1):
content += f"{i}. {q}\n"
content += "\n## Retrieval Results by Query\n\n"
for q_idx, q_results in all_results.items():
content += f"### Query {q_idx + 1}: {queries[q_idx]}\n\n"
for tech_strat_key, chunks_data in q_results.items():
content += f"#### Strategy & Technique: {tech_strat_key}\n\n"
chunks = chunks_data.get('chunks', [])
score = chunks_data.get('score', 0)
content += f"**ChunkScore:** {score:.4f} | **Chunks retrieved:** {len(chunks)}\n\n"
if not chunks:
content += "*No chunks retrieved.*\n\n"
else:
for i, chunk in enumerate(chunks, 1):
content += f"**[Chunk {i}]** ({len(chunk)} chars):\n"
content += f"```text\n{chunk}\n```\n\n"
content += "---\n\n"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"\nRetrieval report saved to: {output_file}")
def main():
# Load environment variables
load_dotenv()
pinecone_key = os.getenv("PINECONE_API_KEY")
if not pinecone_key:
raise RuntimeError("PINECONE_API_KEY not found in environment variables")
test_queries = [
"What is cognitive behavior therapy and how does it work?",
"I feel like a complete failure because I made a mistake at work today. Everyone must think I am incompetent, and I will probably get fired. I just want to hide.",
"No matter what I do, my anxiety will not go away. I am constantly worried about the future and avoid social situations because of it.",
"I have been feeling really down lately and have no energy. It feels like nothing will ever get better and there is no point in trying."
]
# TECHNIQUES_TO_EVALUATE = ["fixed", "semantic", "markdown", "page"]
# Use all 7 chunking techniques from ingest.py
CHUNKING_TECHNIQUES_FILTERED = CHUNKING_TECHNIQUES
print(f"Testing all {len(CHUNKING_TECHNIQUES_FILTERED)} chunking techniques:")
for tech in CHUNKING_TECHNIQUES_FILTERED:
print(f" - {tech['name']}: {tech['description']}")
RETRIEVAL_STRATEGIES = [
{"mode": "semantic", "use_mmr": False, "label": "semantic-no-mmr"},
{"mode": "semantic", "use_mmr": True, "label": "semantic-with-mmr"},
{"mode": "hybrid", "use_mmr": False, "label": "hybrid-no-mmr"},
{"mode": "hybrid", "use_mmr": True, "label": "hybrid-with-mmr"},
{"mode": "bm25", "use_mmr": False, "label": "bm25-no-mmr"},
]
print("Initializing ChunkProcessor to load Embedding Model...")
proc = ChunkProcessor(model_name=cfg.processing['embedding_model'], verbose=False)
print("Initializing HybridRetriever...")
retriever = HybridRetriever(
embed_model=proc.encoder,
rerank_model_name='jinaai/jina-reranker-v1-tiny-en',
verbose=False
)
all_query_results = {}
for query_idx, query in enumerate(test_queries):
print(f"\n{'='*80}")
print(f"PROCESSING QUERY {query_idx + 1}/{len(test_queries)}: {query}")
print(f"{'='*80}")
query_results = {}
# Connect to the single index where all techniques are stored with metadata differentiation
index_name = "cbt-book-recursive"
try:
index = get_index_by_name(pinecone_key, index_name)
stats = index.describe_index_stats()
if stats.get('total_vector_count', 0) == 0:
print(f" [!] Warning: Index {index_name} is empty. Proceeding for sparse test.")
except Exception as e:
print(f" [X] Failed to connect to index {index_name}: {e}")
continue
for technique in CHUNKING_TECHNIQUES_FILTERED:
technique_name = technique['name']
for strategy in RETRIEVAL_STRATEGIES:
result_key = f"{technique_name} + {strategy['label']}"
print(f"\nEvaluating: {result_key}")
try:
context_chunks, chunk_score = retriever.search(
query=query,
index=index,
mode=strategy['mode'],
rerank_strategy="cross-encoder",
use_mmr=strategy['use_mmr'],
top_k=25,
final_k=4,
technique_name=technique_name,
verbose=False,
test=True
)
query_results[result_key] = {
'chunks': context_chunks,
'score': chunk_score
}
print(f" -> Retrieved {len(context_chunks)} chunks (Score: {chunk_score:.4f})")
except Exception as e:
print(f" -> Error retrieving for {result_key}: {e}")
all_query_results[query_idx] = query_results
# Generate isolated retrieval test report
generate_retrieval_report(all_query_results, test_queries)
if __name__ == '__main__':
main()