SanskarModi commited on
Commit
4b0e0a6
·
1 Parent(s): 2cfed75

added evaluation

Browse files
backend/app/evaluation/ablation.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Ablation study for AtlasRAG retrieval."""
2
+
3
+ from app.evaluation.metrics import coverage, diversity, recall_at_k
4
+ from app.evaluation.test_queries import TEST_QUERIES
5
+ from app.evaluation.utils import extract_pages
6
+ from app.retrieval.retrieve import hybrid_graph_search
7
+ from app.retrieval.vector_store import vector_search
8
+
9
+
10
+ def run_ablation() -> None:
11
+ """Run ablation study."""
12
+ print("\n=== AtlasRAG Ablation Study ===\n")
13
+
14
+ for item in TEST_QUERIES:
15
+ query = item["query"]
16
+ expected = item["expected_pages"]
17
+
18
+ print("-" * 70)
19
+ print(f"Query: {query}\n")
20
+
21
+ vector_pages = extract_pages(vector_search(query, top_k=5))
22
+ hybrid_pages = extract_pages(hybrid_graph_search(query, top_k=5))
23
+
24
+ print("VECTOR ONLY")
25
+ print(f"Recall@5: {recall_at_k(vector_pages, expected):.2f}")
26
+ print(f"Coverage: {coverage(vector_pages)}")
27
+ print(f"Diversity: {diversity(vector_pages):.2f}\n")
28
+
29
+ print("VECTOR + GRAPH")
30
+ print(f"Recall@5: {recall_at_k(hybrid_pages, expected):.2f}")
31
+ print(f"Coverage: {coverage(hybrid_pages)}")
32
+ print(f"Diversity: {diversity(hybrid_pages):.2f}\n")
33
+
34
+ print("Ablation complete.\n")
35
+
36
+
37
+ if __name__ == "__main__":
38
+ run_ablation()
backend/app/evaluation/compare_baseline.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Compare Vector Search vs Hybrid Graph-RAG."""
2
+
3
+ from app.evaluation.metrics import coverage, diversity, recall_at_k
4
+ from app.evaluation.test_queries import TEST_QUERIES
5
+ from app.evaluation.utils import extract_pages
6
+ from app.retrieval.retrieve import hybrid_graph_search
7
+ from app.retrieval.vector_store import vector_search
8
+
9
+
10
+ def _print_block(
11
+ *,
12
+ name: str,
13
+ pages: list[int],
14
+ expected: set[int],
15
+ ) -> None:
16
+ print(name)
17
+ print(f"Pages: {pages}")
18
+ print(f"Recall@5: {recall_at_k(pages, expected):.2f}")
19
+ print(f"Coverage: {coverage(pages)}")
20
+ print(f"Diversity: {diversity(pages):.2f}")
21
+ print()
22
+
23
+
24
+ def run_comparison() -> None:
25
+ """Run retrieval comparison."""
26
+ print("\n=== AtlasRAG Retrieval Comparison ===\n")
27
+
28
+ for item in TEST_QUERIES:
29
+ query = item["query"]
30
+ expected = item["expected_pages"]
31
+ qtype = item["type"]
32
+
33
+ print("-" * 70)
34
+ print(f"Query ({qtype}): {query}")
35
+ print(f"Expected pages: {sorted(expected)}\n")
36
+
37
+ vector_results = vector_search(query, top_k=5)
38
+ vector_pages = extract_pages(vector_results)
39
+
40
+ graph_results = hybrid_graph_search(query, top_k=5)
41
+ graph_pages = extract_pages(graph_results)
42
+
43
+ _print_block(
44
+ name="VECTOR SEARCH",
45
+ pages=vector_pages,
46
+ expected=expected,
47
+ )
48
+
49
+ _print_block(
50
+ name="HYBRID GRAPH-RAG",
51
+ pages=graph_pages,
52
+ expected=expected,
53
+ )
54
+
55
+ print("Comparison complete.\n")
56
+
57
+
58
+ if __name__ == "__main__":
59
+ run_comparison()
backend/app/evaluation/metrics.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evaluation metrics for retrieval quality."""
2
+
3
+ from typing import Iterable, Set
4
+
5
+
6
+ def recall_at_k(retrieved_pages: Iterable[int], expected_pages: Set[int]) -> float:
7
+ """Compute Recall@K."""
8
+ return float(bool(set(retrieved_pages) & expected_pages))
9
+
10
+
11
+ def coverage(retrieved_pages: Iterable[int]) -> int:
12
+ """Number of unique pages retrieved."""
13
+ return len(set(retrieved_pages))
14
+
15
+
16
+ def diversity(retrieved_pages: Iterable[int]) -> float:
17
+ """Ratio of unique pages to total retrieved pages."""
18
+ pages = list(retrieved_pages)
19
+ if not pages:
20
+ return 0.0
21
+ return len(set(pages)) / len(pages)
backend/app/evaluation/retrievers.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evaluation-only retrievers."""
2
+
3
+ from typing import List
4
+
5
+ from app.models.retrieval import ScoredChunk
6
+ from app.retrieval.vector_store import vector_search
7
+
8
+
9
+ def vector_only_search(query: str, top_k: int) -> List[ScoredChunk]:
10
+ """Pure vector search baseline."""
11
+ return vector_search(query, top_k=top_k)
backend/app/evaluation/test_queries.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evaluation queries for AtlasRAG."""
2
+
3
+ TEST_QUERIES = [
4
+ {
5
+ "query": "What is scaled dot-product attention?",
6
+ "expected_pages": {3, 4},
7
+ "type": "localized",
8
+ },
9
+ {
10
+ "query": "How does self-attention replace recurrence and convolution?",
11
+ "expected_pages": {1, 2, 5},
12
+ "type": "distributed",
13
+ },
14
+ {
15
+ "query": "Compare encoder, decoder, and encoder-decoder architectures",
16
+ "expected_pages": {2, 3},
17
+ "type": "comparative",
18
+ },
19
+ {
20
+ "query": "What role does positional encoding play in the Transformer model?",
21
+ "expected_pages": {2, 6},
22
+ "type": "distributed",
23
+ },
24
+ ]
backend/app/evaluation/utils.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility helpers for evaluation."""
2
+
3
+ from typing import Iterable
4
+
5
+ from app.models.retrieval import ScoredChunk
6
+
7
+
8
+ def extract_pages(results: Iterable[ScoredChunk]) -> list[int]:
9
+ """Extract page numbers from retrieved chunks."""
10
+ return [sc.chunk.page_start for sc in results]
backend/app/utils/__init__.py DELETED
File without changes