Spaces:
Running
Running
| """Baseline RAG eval — runs with OLD pipeline (no reranking, no headers, no query expansion, sentence-aware chunking).""" | |
| import os, sys, json, time | |
| from pathlib import Path | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv(Path(__file__).resolve().parent / ".env") | |
| except ImportError: | |
| pass | |
| SRC_DIR = Path(__file__).resolve().parent / "src" | |
| sys.path.insert(0, str(SRC_DIR)) | |
| if not os.environ.get("NOTEBOOKLM_DATA_ROOT"): | |
| _root = Path(__file__).resolve().parent / "tmp_eval_data" | |
| _root.mkdir(exist_ok=True) | |
| os.environ["NOTEBOOKLM_DATA_ROOT"] = str(_root) | |
| # Force baseline settings | |
| os.environ["NOTEBOOKLM_QUERY_EXPANSION"] = "off" | |
| from ingestion.chunking import sentence_aware_chunk | |
| from ingestion.embedder import embed_texts | |
| from ingestion.indexer import upsert_chunks | |
| from notebooklm_clone.notebooks import create_notebook | |
| from notebooklm_clone import retrieval as retrieval_mod | |
| # Monkey-patch _rerank to be a no-op (just return candidates as-is truncated to k) | |
| _original_rerank = retrieval_mod._rerank | |
| def _noop_rerank(query, candidates, k): | |
| return candidates[:k] | |
| retrieval_mod._rerank = _noop_rerank | |
| from notebooklm_clone.retrieval import retrieve | |
| import textwrap | |
| SAMPLE_DOCUMENT = textwrap.dedent("""\ | |
| The Solar System consists of the Sun and the objects that orbit it, whether | |
| they orbit it directly or indirectly. Of the objects that orbit the Sun | |
| directly, the largest are the eight planets. The four smaller inner system | |
| planets, Mercury, Venus, Earth, and Mars, are terrestrial planets, composed | |
| primarily of rock and metal. The four outer system planets are giant planets, | |
| being substantially more massive than the terrestrials. The two largest, | |
| Jupiter and Saturn, are gas giants, composed mainly of hydrogen and helium. | |
| The two outermost planets, Uranus and Neptune, are ice giants, composed | |
| mainly of substances with relatively high melting points compared with | |
| hydrogen and helium, called volatiles, such as water, ammonia, and methane. | |
| Earth is the third planet from the Sun and the only astronomical object | |
| known to harbor life. About 71% of Earth's surface is made up of the | |
| ocean, dwarfing Earth's polar ice, lakes, and rivers. The remaining 29% | |
| of Earth's surface is land, consisting of continents and islands. | |
| Mars is the fourth planet and has a thin atmosphere composed primarily of | |
| carbon dioxide. Mars has two small moons, Phobos and Deimos, which are | |
| thought to be captured asteroids. Mars is often called the "Red Planet" | |
| because iron oxide prevalent on its surface gives it a reddish appearance. | |
| Jupiter is the largest planet in the Solar System, with a mass more than | |
| two and a half times that of all the other planets combined. Jupiter has | |
| at least 95 known moons, including the four large Galilean moons discovered | |
| by Galileo Galilei in 1610. The Great Red Spot is a persistent high-pressure | |
| region in the atmosphere of Jupiter, producing an anticyclonic storm that is | |
| the largest in the Solar System. It has been continuously observed since 1830. | |
| Photosynthesis is a process used by plants and other organisms to convert | |
| light energy, normally from the Sun, into chemical energy that can later be | |
| released to fuel the organisms' activities. In most cases, oxygen is also | |
| released as a waste product. Most plants, algae, and cyanobacteria perform | |
| photosynthesis. Such organisms are called photoautotrophs. | |
| The water cycle, also known as the hydrological cycle, describes the | |
| continuous movement of water within the Earth and atmosphere. Water | |
| evaporates from the surface of the ocean, rises into the atmosphere, | |
| cools, condenses into rain or snow in clouds, and falls again to the | |
| surface as precipitation. About 90% of the water in the atmosphere comes | |
| from the evaporation of ocean water. | |
| """) | |
| EVAL_QUERIES = [ | |
| {"query": "What are the inner planets of the solar system?", "relevant_keywords": ["mercury", "venus", "earth", "mars", "terrestrial", "inner"], "topic": "Inner planets"}, | |
| {"query": "What is the Great Red Spot?", "relevant_keywords": ["jupiter", "great red spot", "anticyclonic", "storm", "high-pressure"], "topic": "Jupiter's GRS"}, | |
| {"query": "How does photosynthesis work?", "relevant_keywords": ["photosynthesis", "light energy", "chemical energy", "oxygen", "plants"], "topic": "Photosynthesis"}, | |
| {"query": "Describe the water cycle.", "relevant_keywords": ["water cycle", "hydrological", "evaporat", "precipitation", "condens"], "topic": "Water cycle"}, | |
| {"query": "What is the atmosphere of Mars like?", "relevant_keywords": ["mars", "atmosphere", "carbon dioxide", "thin"], "topic": "Mars atmosphere"}, | |
| {"query": "Which planets are gas giants?", "relevant_keywords": ["jupiter", "saturn", "gas giant", "hydrogen", "helium"], "topic": "Gas giants"}, | |
| {"query": "What percentage of Earth's surface is ocean?", "relevant_keywords": ["71%", "ocean", "earth", "surface"], "topic": "Earth's ocean"}, | |
| {"query": "What moons does Mars have?", "relevant_keywords": ["phobos", "deimos", "mars", "moons", "asteroid"], "topic": "Mars moons"}, | |
| ] | |
| def _keyword_hit(text, keywords): | |
| text_lower = text.lower() | |
| return any(kw.lower() in text_lower for kw in keywords) | |
| def precision_at_k(results, keywords, k): | |
| top_k = results[:k] | |
| if not top_k: return 0.0 | |
| return sum(1 for r in top_k if _keyword_hit(r["text"], keywords)) / len(top_k) | |
| def recall_at_k(results, keywords, k, total_relevant): | |
| if total_relevant == 0: return 1.0 | |
| top_k = results[:k] | |
| return min(sum(1 for r in top_k if _keyword_hit(r["text"], keywords)) / total_relevant, 1.0) | |
| def reciprocal_rank(results, keywords): | |
| for i, r in enumerate(results, 1): | |
| if _keyword_hit(r["text"], keywords): return 1.0 / i | |
| return 0.0 | |
| def main(): | |
| print("=== BASELINE EVAL (no reranking, no headers, no query expansion, sentence-aware chunks) ===\n") | |
| eval_user = "_eval_user_tmp" | |
| nb_name = f"Baseline {time.strftime('%H%M%S')}" | |
| notebook = create_notebook(eval_user, nb_name) | |
| notebook_id = notebook["id"] | |
| print(f"Notebook: {notebook_id}") | |
| # OLD pipeline: sentence_aware_chunk, NO header | |
| chunks = sentence_aware_chunk(SAMPLE_DOCUMENT, 1200, 200) | |
| embeddings = embed_texts([c["chunk_text"] for c in chunks]) | |
| location_hints = [{"start_char": c["start_char"], "end_char": c["end_char"]} for c in chunks] | |
| summary = upsert_chunks( | |
| username=eval_user, notebook_id=notebook_id, source_id="eval_source_001", | |
| chunks=chunks, embeddings=embeddings, | |
| meta={"source_name": "sample_article.txt", "location_hints": location_hints}, | |
| ) | |
| print(f"Indexed {summary['chunk_count']} chunks (sentence-aware, no headers)") | |
| retrieval_k = 5 | |
| results_per_query = [] | |
| for q in EVAL_QUERIES: | |
| t0 = time.perf_counter() | |
| results = retrieve(eval_user, notebook_id, q["query"], k=retrieval_k) | |
| latency = (time.perf_counter() - t0) * 1000 | |
| results_per_query.append({ | |
| "topic": q["topic"], | |
| "P@1": precision_at_k(results, q["relevant_keywords"], 1), | |
| "P@3": precision_at_k(results, q["relevant_keywords"], 3), | |
| "P@5": precision_at_k(results, q["relevant_keywords"], 5), | |
| "MRR": reciprocal_rank(results, q["relevant_keywords"]), | |
| "Recall@5": recall_at_k(results, q["relevant_keywords"], retrieval_k, 2), | |
| "latency_ms": latency, | |
| }) | |
| avg = lambda key: sum(r[key] for r in results_per_query) / len(results_per_query) | |
| output = { | |
| "config": "BASELINE: sentence_aware_chunk(1200/200), no headers, no reranking, no query expansion", | |
| "retrieval_metrics": { | |
| "avg_MRR": round(avg("MRR"), 4), | |
| "avg_P@1": round(avg("P@1"), 4), | |
| "avg_P@5": round(avg("P@5"), 4), | |
| "avg_Recall@5": round(avg("Recall@5"), 4), | |
| "avg_latency_ms": round(avg("latency_ms"), 1), | |
| }, | |
| "per_query": results_per_query, | |
| } | |
| out_path = Path(__file__).resolve().parent / "tmp_eval_baseline.json" | |
| out_path.write_text(json.dumps(output, indent=2, default=str), encoding="utf-8") | |
| print(f"\nBaseline results saved to: {out_path}") | |
| print(json.dumps(output["retrieval_metrics"], indent=2)) | |
| if __name__ == "__main__": | |
| main() | |