File size: 5,216 Bytes
7e95675 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | {
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Provable FastMemory Evaluation Pipeline\\n",
"\\n",
"This notebook provides the empirical proof for **FastMemory**'s latency and structural superiority over standard Vector RAG chunking.\\n",
"We will dynamically fetch real adversarial multi-document datasets (like `PatronusAI/financebench`), compile them into Action-Topology Format (ATF), and execute the Rust-based `fastmemory` parser to output the functional logic clusters directly.\\n",
"\\n",
"> **Note:** FastMemory operates by converting raw text into functional memory blocks, allowing your preferred LLM (Llama, Claude) to ingest perfectly grouped contextual hierarchies instead of disconnected semantic vector chunks."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install fastmemory datasets pandas nltk\\n",
"import nltk\\n",
"try:\\n",
" nltk.download('punkt', quiet=True)\\n",
" nltk.download('punkt_tab', quiet=True)\\n",
" nltk.download('averaged_perceptron_tagger_eng', quiet=True)\\n",
"except:\\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\\n",
"import time\\n",
"import string\\n",
"from datasets import load_dataset\\n",
"import fastmemory\\n",
"import json\\n",
"from nltk.tokenize import word_tokenize\\n",
"from nltk.tag import pos_tag\\n",
"\\n",
"STOP_WORDS = {\\\"this\\\", \\\"that\\\", \\\"these\\\", \\\"those\\\", \\\"when\\\", \\\"where\\\", \\\"which\\\", \\\"what\\\", \\\"there\\\", \\\"their\\\", \\\"after\\\", \\\"before\\\", \\\"will\\\", \\\"have\\\", \\\"with\\\", \\\"from\\\"}\\n",
"def extract_nouns(sentence):\\n",
" words = sentence.translate(str.maketrans('', '', string.punctuation)).split()\\n",
" return [w.lower() for w in words if len(w) > 4 and w.lower() not in STOP_WORDS]\\n",
"\\n",
"def generate_strict_atf(sentences):\\n",
" atfs = []\\n",
" for i, s in enumerate(sentences):\\n",
" my_id = f\\\"ATF_S_{i}\\\"\\n",
" tagged = pos_tag(word_tokenize(s))\\n",
" nouns = [word.title() for (word, pos) in tagged if pos.startswith('NN') and len(word) > 2]\\n",
" action_name = \\\"Process_\\\" + \\\"_\\\".join(nouns[:2]) if nouns else f\\\"Parse_{i}\\\"\\n",
" \\n",
" # Strict brackets required by fastmemory parser.rs\\n",
" context_str = \\\", \\\".join([f\\\"[{n}]\\\" for n in extract_nouns(s)[:3]])\\n",
" if not context_str:\\n",
" context_str = f\\\"[Record_{max(0, i-1)}]\\\"\\n",
" \\n",
" atf = f\\\"## [ID: {my_id}]\\\\n\\\"\\n",
" atf += f\\\"**Action:** {action_name}\\\\n\\\"\\n",
" atf += f\\\"**Input:** {{Context}}\\\\n\\\"\\n",
" atf += f\\\"**Logic:** {s}\\\\n\\\"\\n",
" atf += f\\\"**Data_Connections:** {context_str}\\\\n\\\"\\n",
" atf += f\\\"**Access:** Role_Analyst\\\\n\\\"\\n",
" atf += f\\\"**Events:** Trigger_Analysis\\\\n\\\\n\\\"\\n",
" atfs.append(atf)\\n",
" return \\\"\\\".join(atfs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Executing Empirical Execution Verification\\n",
"We will now parse live data and run `fastmemory.process_markdown()`. Wait times should be sub-0.5 seconds."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = load_dataset(\\\"PatronusAI/financebench\\\", split=\\\"train\\\").select(range(5))\\n",
"\\n",
"for i, row in enumerate(dataset):\\n",
" text = str(row.get(\\\"evidence_text\\\", row.get(\\\"evidence\\\", \\\"\\\")))\\n",
" sentences = [s.strip() for s in re.split(r'(?<=[.!?]) +', text.replace('\\\\n', ' ')) if len(s) > 10]\\n",
" if not sentences: continue\\n",
" \\n",
" markdown_atf = generate_strict_atf(sentences)\\n",
" \\n",
" start_time = time.time()\\n",
" json_graph = fastmemory.process_markdown(markdown_atf)\\n",
" latency = time.time() - start_time\\n",
" \\n",
" try:\\n",
" data = json.loads(json_graph)\\n",
" blocks = len(data)\\n",
" except:\\n",
" blocks = 0\\n",
" \\n",
" print(f\\\"Document {i+1}: Processed {len(sentences)} logic nodes into {blocks} Structural Blocks in {latency:.4f}s\\\")\\n",
" \\n",
"print(\\\"\\\\nExecution metrics successfully captured.\\\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
} |