File size: 5,216 Bytes
505f2c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Provable FastMemory Evaluation Pipeline\\n",
    "\\n",
    "This notebook provides the empirical proof for **FastMemory**'s latency and structural superiority over standard Vector RAG chunking.\\n",
    "We will dynamically fetch real adversarial multi-document datasets (like `PatronusAI/financebench`), compile them into Action-Topology Format (ATF), and execute the Rust-based `fastmemory` parser to output the functional logic clusters directly.\\n",
    "\\n",
    "> **Note:** FastMemory operates by converting raw text into functional memory blocks, allowing your preferred LLM (Llama, Claude) to ingest perfectly grouped contextual hierarchies instead of disconnected semantic vector chunks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install fastmemory datasets pandas nltk\\n",
    "import nltk\\n",
    "try:\\n",
    "    nltk.download('punkt', quiet=True)\\n",
    "    nltk.download('punkt_tab', quiet=True)\\n",
    "    nltk.download('averaged_perceptron_tagger_eng', quiet=True)\\n",
    "except:\\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\\n",
    "import time\\n",
    "import string\\n",
    "from datasets import load_dataset\\n",
    "import fastmemory\\n",
    "import json\\n",
    "from nltk.tokenize import word_tokenize\\n",
    "from nltk.tag import pos_tag\\n",
    "\\n",
    "STOP_WORDS = {\\\"this\\\", \\\"that\\\", \\\"these\\\", \\\"those\\\", \\\"when\\\", \\\"where\\\", \\\"which\\\", \\\"what\\\", \\\"there\\\", \\\"their\\\", \\\"after\\\", \\\"before\\\", \\\"will\\\", \\\"have\\\", \\\"with\\\", \\\"from\\\"}\\n",
    "def extract_nouns(sentence):\\n",
    "    words = sentence.translate(str.maketrans('', '', string.punctuation)).split()\\n",
    "    return [w.lower() for w in words if len(w) > 4 and w.lower() not in STOP_WORDS]\\n",
    "\\n",
    "def generate_strict_atf(sentences):\\n",
    "    atfs = []\\n",
    "    for i, s in enumerate(sentences):\\n",
    "        my_id = f\\\"ATF_S_{i}\\\"\\n",
    "        tagged = pos_tag(word_tokenize(s))\\n",
    "        nouns = [word.title() for (word, pos) in tagged if pos.startswith('NN') and len(word) > 2]\\n",
    "        action_name = \\\"Process_\\\" + \\\"_\\\".join(nouns[:2]) if nouns else f\\\"Parse_{i}\\\"\\n",
    "        \\n",
    "        # Strict brackets required by fastmemory parser.rs\\n",
    "        context_str = \\\", \\\".join([f\\\"[{n}]\\\" for n in extract_nouns(s)[:3]])\\n",
    "        if not context_str:\\n",
    "            context_str = f\\\"[Record_{max(0, i-1)}]\\\"\\n",
    "            \\n",
    "        atf = f\\\"## [ID: {my_id}]\\\\n\\\"\\n",
    "        atf += f\\\"**Action:** {action_name}\\\\n\\\"\\n",
    "        atf += f\\\"**Input:** {{Context}}\\\\n\\\"\\n",
    "        atf += f\\\"**Logic:** {s}\\\\n\\\"\\n",
    "        atf += f\\\"**Data_Connections:** {context_str}\\\\n\\\"\\n",
    "        atf += f\\\"**Access:** Role_Analyst\\\\n\\\"\\n",
    "        atf += f\\\"**Events:** Trigger_Analysis\\\\n\\\\n\\\"\\n",
    "        atfs.append(atf)\\n",
    "    return \\\"\\\".join(atfs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Executing Empirical Execution Verification\\n",
    "We will now parse live data and run `fastmemory.process_markdown()`. Wait times should be sub-0.5 seconds."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = load_dataset(\\\"PatronusAI/financebench\\\", split=\\\"train\\\").select(range(5))\\n",
    "\\n",
    "for i, row in enumerate(dataset):\\n",
    "    text = str(row.get(\\\"evidence_text\\\", row.get(\\\"evidence\\\", \\\"\\\")))\\n",
    "    sentences = [s.strip() for s in re.split(r'(?<=[.!?]) +', text.replace('\\\\n', ' ')) if len(s) > 10]\\n",
    "    if not sentences: continue\\n",
    "        \\n",
    "    markdown_atf = generate_strict_atf(sentences)\\n",
    "    \\n",
    "    start_time = time.time()\\n",
    "    json_graph = fastmemory.process_markdown(markdown_atf)\\n",
    "    latency = time.time() - start_time\\n",
    "    \\n",
    "    try:\\n",
    "        data = json.loads(json_graph)\\n",
    "        blocks = len(data)\\n",
    "    except:\\n",
    "        blocks = 0\\n",
    "        \\n",
    "    print(f\\\"Document {i+1}: Processed {len(sentences)} logic nodes into {blocks} Structural Blocks in {latency:.4f}s\\\")\\n",
    "    \\n",
    "print(\\\"\\\\nExecution metrics successfully captured.\\\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}