Sajil Awale commited on
Commit
f204be9
·
0 Parent(s):

Initial commit without PNG

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.dockerignore ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ *.egg
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+ ENV/
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+ *~
24
+
25
+ # Jupyter
26
+ .ipynb_checkpoints/
27
+ *.ipynb
28
+
29
+ # Environment files
30
+ .env.local
31
+ .env.*.local
32
+
33
+ # Git
34
+ .git/
35
+ .gitignore
36
+
37
+ # Temporary files
38
+ *.log
39
+ *.sqlite
40
+ *.db
41
+ temp_data/
42
+ qdrant_db/
43
+ merchant_cache.json
44
+
45
+ # Data files (upload via UI instead)
46
+ demo_data/
47
+ data/
48
+
49
+ # Logs
50
+ logs/
51
+ *.log
52
+
53
+ # Docker
54
+ Dockerfile
55
+ docker-compose.yml
56
+ .dockerignore
57
+
58
+ # Documentation
59
+ README.md
60
+ ARCHITECTURE.md
61
+ *.md
62
+
63
+ # macOS
64
+ .DS_Store
65
+
66
+ # Notebooks (testing only)
67
+ notebooks/
.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ LLM_PROVIDER=google # FOR INSTRCTOR
2
+ GOOGLE_API_KEY=AIzaSyAwAOQAIW6kVGk9Htf_80ZD7ytS1OykNu0
3
+ GOOGLE_APPLICATION_CREDENTIALS=/Users/sawale/gen-lang-client-0311515393-ea35b1e8872e.json
4
+ GOOGLE_GEMINI_MODEL_NAME=gemini-2.5-flash
5
+ GOOGLE_CLOUD_PROJECT=gen-lang-client-0311515393
6
+ GOOGLE_CLOUD_LOCATION=us-central1
7
+
8
+ OPENAI_API_KEY='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'
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ demo_data/*
2
+ .env*.png
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.12 slim image
2
+ FROM python:3.12-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+
14
+ ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
15
+
16
+ # Copy requirements first for better caching
17
+ COPY requirements.txt .
18
+
19
+ # Install Python dependencies
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # Copy application code
23
+ COPY . .
24
+
25
+ # Create directories for data persistence
26
+ RUN mkdir -p /app/data /app/logs
27
+
28
+ # Expose Streamlit default port
29
+ EXPOSE 8501
30
+
31
+ # Health check
32
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
33
+ CMD curl --fail http://localhost:8501/_stcore/health || exit 1
34
+
35
+ # Set environment variables
36
+ ENV PYTHONUNBUFFERED=1
37
+ ENV STREAMLIT_SERVER_PORT=8501
38
+ ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
39
+
40
+ # Run Streamlit app
41
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.maxUploadSize=200", "--server.maxMessageSize=200", "--server.runOnSave=true", "--server.enableXsrfProtection=false", "--server.enableCORS=false"]
README.md ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Moneyrag
3
+ emoji: 💰
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ short_description: Where did my money go? Chat with your bank statements
10
+ app_port: 8501
11
+ ---
12
+ # MoneyRAG - Personal Finance Transaction Analysis
13
+
14
+ AI-powered financial transaction analysis using RAG (Retrieval-Augmented Generation) with Model Context Protocol (MCP) integration.
15
+
16
+ ## Features
17
+
18
+ - **Smart CSV Ingestion**: Automatically maps any CSV format to standardized transaction schema using LLM
19
+ - **Multi-Provider Support**: Works with Google Gemini and OpenAI models
20
+ - **Merchant Enrichment**: Automatically enriches transactions with web-searched merchant information
21
+ - **Dual Storage**: SQLite for structured queries + Qdrant for semantic search
22
+ - **MCP Integration**: Leverages Model Context Protocol for tool-based agent interactions
23
+ - **Interactive UI**: Streamlit-based web interface for chat-based analysis
24
+ - **Dockerized**: Complete containerized deployment ready for production
25
+
26
+ ## Architecture
27
+
28
+ ```mermaid
29
+ %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#fff', 'primaryBorderColor': '#333', 'primaryTextColor': '#333', 'lineColor': '#666' }}}%%
30
+
31
+ graph TD
32
+ %% --- Top Layer: Entry Point ---
33
+ subgraph UI["💻 User Interface"]
34
+ Streamlit["🌐 Streamlit Web App<br/><i>Interactive Dashboard</i>"]
35
+ end
36
+
37
+ %% --- Middle Layer: Split Processes ---
38
+
39
+ %% Left Column: Ingestion (The Write Path)
40
+ subgraph Ingestion["📥 Data Pipeline (Write)"]
41
+ direction TB
42
+ CSV["📄 CSV Upload<br/><i>Raw Data</i>"]
43
+ Mapper["🧠 LLM Mapper<br/><i>Schema Norm.</i>"]
44
+ Enrich["🔍 Web Enrich<br/><i>DuckDuckGo</i>"]
45
+
46
+ CSV --> Mapper
47
+ Mapper --> Enrich
48
+ end
49
+
50
+ %% Right Column: Intelligence (The Read Path)
51
+ subgraph Agent["🤖 AI Orchestration (Read)"]
52
+ direction TB
53
+ Brain["🧩 LangGraph Agent<br/><i>Controller</i>"]
54
+ LLM["✨ LLM Model<br/><i>Gemini / GPT-4</i>"]
55
+ Brain <-->|Inference| LLM
56
+ end
57
+
58
+ subgraph MCP["🔧 MCP Tool Server"]
59
+ direction LR
60
+ SQL_Tool["⚡ SQL Tool<br/><i>Structured</i>"]
61
+ Vector_Tool["🎯 Vector Tool<br/><i>Semantic</i>"]
62
+ end
63
+
64
+ %% --- Bottom Layer: Persistence ---
65
+ subgraph Storage["💾 Storage Layer"]
66
+ direction LR
67
+ SQLite[("🗄️ SQLite")]
68
+ Qdrant[("🔮 Qdrant")]
69
+ end
70
+
71
+ %% --- Connections & Logic ---
72
+
73
+ %% 1. User Actions
74
+ Streamlit -->|1. Upload| CSV
75
+ Streamlit -->|3. Query| Brain
76
+
77
+ %% 2. Ingestion to Storage flow
78
+ Enrich -->|2. Store| SQLite
79
+ Enrich -->|2. Embed| Qdrant
80
+
81
+ %% 3. Agent to Tools flow
82
+ Brain -->|4. Route| SQL_Tool
83
+ Brain -->|4. Route| Vector_Tool
84
+
85
+ %% 4. Tools to Storage flow (Vertical alignment matches)
86
+ SQL_Tool <-->|5. Read/Write| SQLite
87
+ Vector_Tool <-->|5. Search| Qdrant
88
+
89
+ %% 5. Return Path
90
+ Brain -.->|6. Response| Streamlit
91
+
92
+ %% --- Styling ---
93
+ classDef ui fill:#E3F2FD,stroke:#1565C0,stroke-width:2px,color:#0D47A1,rx:10,ry:10
94
+ classDef ingest fill:#E8F5E9,stroke:#2E7D32,stroke-width:2px,color:#1B5E20,rx:5,ry:5
95
+ classDef agent fill:#F3E5F5,stroke:#7B1FA2,stroke-width:2px,color:#4A148C,rx:5,ry:5
96
+ classDef mcp fill:#FFF3E0,stroke:#EF6C00,stroke-width:2px,color:#E65100,rx:5,ry:5
97
+ classDef storage fill:#ECEFF1,stroke:#455A64,stroke-width:2px,color:#263238,rx:5,ry:5
98
+
99
+ class Streamlit ui
100
+ class CSV,Mapper,Enrich ingest
101
+ class Brain,LLM agent
102
+ class SQL_Tool,Vector_Tool mcp
103
+ class SQLite,Qdrant storage
104
+
105
+ %% Curve the lines for better readability
106
+ linkStyle default interpolate basis
107
+ ```
108
+
109
+ ## Quick Start
110
+
111
+ ### Docker (Recommended)
112
+
113
+ ```bash
114
+ ./docker-run.sh
115
+ ```
116
+ Choose option 1 to build and run, then open http://localhost:8501
117
+
118
+ ### Local Development
119
+
120
+ ```bash
121
+ python -m venv .venv
122
+ source .venv/bin/activate # Windows: .venv\Scripts\activate
123
+ pip install -r requirements.txt
124
+ streamlit run app.py
125
+ ```
126
+
127
+ Open http://localhost:8501
128
+
129
+ ## Getting Started Resources
130
+
131
+ ### 📚 API Keys
132
+ - **Google Gemini**: [Get API key from Google AI Studio](https://aistudio.google.com/app/apikey)
133
+ - **OpenAI**: [Get API key from OpenAI Platform](https://platform.openai.com/api-keys)
134
+
135
+ ### 📥 Download Transaction History
136
+ - **Chase Credit Card**: [Video Guide](https://www.youtube.com/watch?v=gtAFaP9Lts8)
137
+ - **Discover Credit Card**: [Video Guide](https://www.youtube.com/watch?v=cry6-H5b0PQ)
138
+
139
+ ## Usage
140
+
141
+ 1. Enter your API key in the sidebar
142
+ 2. Upload CSV transaction files
143
+ 3. Ask questions in natural language
144
+
145
+ ### Example Questions
146
+
147
+ - "How much did I spend on restaurants last month?"
148
+ - "What are my top 5 spending categories?"
149
+ - "Show me all transactions over $100"
150
+ - "Find all Starbucks transactions"
151
+ - "Analyze my spending patterns"
152
+
153
+ ## Supported CSV Formats
154
+
155
+ MoneyRAG automatically handles different CSV formats including:
156
+ - **Chase Bank**: Negative values for spending
157
+ - **Discover**: Positive values for spending
158
+ - **Custom formats**: LLM-based column mapping
159
+
160
+ Required information (can have any column names):
161
+ - Date
162
+ - Merchant/Description
163
+ - ASupported CSV Formats
164
+
165
+ MoneyRAG automatically handles different CSV formats:
166
+ - Chase Bank, Discover, and custom formats
167
+ - LLM-based column mapping (works with any column names)
168
+ - Required: Date, Merchant/Description, Amount
169
+
170
+ ## Configuration
171
+
172
+ **Supported Models:**
173
+ - Google: gemini-2.0-flash-exp, gemini-1.5-flash, gemini-1.5-pro
174
+ - OpenAI: gpt-4o, gpt-4o-mini
175
+
176
+ **Note:** API keys entered through UI, no environment variables needed.
177
+ docker ps
178
+ docker inspect money-rag-app | grep Health
179
+ ```
180
+
181
+ ### Reset everything
182
+ ```bash
183
+ docker-compose down -v
184
+ docker rmi money_rag-money-rag
185
+ ./docker-run.sh # Choose option 1
186
+ ```
187
+
188
+ ### MCP Server Issues
189
+ The MCP server runs as a subprocess. If you see connection errors:
190
+ 1. Check logs: `docker-compose logs -f`
191
+ 2. Verify mcp_server.py exists: `docker exec money-rag-app ls -la`
192
+
193
+ ### Permission Issues
194
+ ```bash
195
+ chmod +x docker-run.sh
196
+ sudo chown -R $USER:$USER data logs
197
+ ```
198
+
199
+ ## Production Deployment
200
+
201
+ ### Using Docker Hub
202
+
203
+ 1. **Tag and push:**
204
+ ```bash
205
+ docker tag money-rag:latest your-username/money-rag:latest
206
+ docker push your-username/money-rag:latest
207
+ ```
208
+
209
+ 2. **Pull and run on server:**
210
+ ```bash
211
+ docker pull your-username/money-rag:latest
212
+ docker run -d -p 8501:8501 your-username/money-rag:latest
213
+ ```
214
+
215
+ ### Cloud Platforms
216
+
217
+ **Google Cloud Run:**
218
+ ```bash
219
+ gcloud builds submit --tag gcr.io/PROJECT-ID/money-rag
220
+ gcloud run deploy money-rag \
221
+ --image gcr.io/PROJECT-ID/money-rag \
222
+ --platform managed \
223
+ --allow-unauthenticated
224
+ ```
225
+
226
+ **AWS ECS / Azure Container Instances:**
227
+ - Build and push to respective container registries
228
+ - Deploy using platform-specific CLI tools
229
+
230
+ ## Security Notes
231
+
232
+ ⚠️ **Important:**
233
+ - API keys are entered via UI and stored only in session state (not persisted)
234
+ - Keys are cleared when browser session ends
235
+ - Transaction data is session-based and ephemeral
236
+ - No sensitive data stored in environment variables or files
237
+ - For production, implement secure session management and authentication
238
+
239
+ ## Development
240
+
241
+ ### Hot Reload
242
+ Mount code as volume in docker-compose.yml:
243
+ ```yaml
244
+ volumes:
245
+ - ./app.py:/app/app.py
246
+ - ./money_rag.py:/app/money_rag.py
247
+ - ./mcp_server.py:/app/mcp_server.py
248
+ ```
249
+
250
+ ### Testing
251
+ ```bash
252
+ # Run unit tests (if available)
253
+ pytest tests/
254
+
255
+ # Test CSV ingestion
256
+ python -c "from money_rag import MoneyRAG; ..."
257
+ ```
258
+
259
+ ## Technologies
260
+
261
+ **Core Framework:**
262
+ - **LangChain** (>=1.2.3): Agent orchestration and tool integration
263
+ - **LangGraph** (>=1.0.6): Conversational agent with memory
264
+ - **langchain-mcp-adapters** (>=0.2.1): Model Context Protocol integration
265
+
266
+ **LLM Providers:**
267
+ - **langchain-google-genai** (>=2.0.0): Google Gemini integration
268
+ - **langchain-openai** (>=1.1.7): OpenAI GPT integration
269
+
270
+ **Storage & Search:**
271
+ - **Qdrant** (>=1.16.2): Vector database for semantic search
272
+ - **SQLite** (via SQLAlchemy >=2.0.45): Relational database for structured queries
273
+
274
+ **Tools & Services:**
275
+ - **FastMCP** (>=2.14.3): MCP server implementation
276
+ - **DuckDuckGo Search** (>=8.1.1): Web search for merchant enrichment
277
+ **Container issues:**
278
+ ```bash
279
+ docker-compose logs
280
+ docker-compose down -v # Reset everything
281
+ ./docker-run.sh # Rebuild
282
+ ```
283
+
284
+ **Permission issues:**
285
+ ```bash
286
+ chmod +x docker-run.sh
287
+ ```
288
+
289
+ ## Technologies
290
+
291
+ - **LangChain & LangGraph**: Agent orchestration
292
+ - **Google Gemini / OpenAI GPT**: LLM providers
293
+ - **Qdrant**: Vector database
294
+ - **SQLite**: Structured storage
295
+ - **FastMCP**: Model Context Protocol
296
+ - **Streamlit**: Web interface
297
+
298
+ ## Contributors
299
+
300
+ - **Sajil Awale** - [GitHub](https://github.com/AwaleSajil)
301
+ - **Simran KC** - [GitHub](https://github.com/iamsims)
302
+
303
+ ## License
304
+
305
+ MIT
__pycache__/money_rag.cpython-312.pyc ADDED
Binary file (14.2 kB). View file
 
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import os
4
+ from money_rag import MoneyRAG
5
+
6
+ st.set_page_config(page_title="MoneyRAG", layout="wide")
7
+
8
+ # Sidebar for Authentication
9
+ with st.sidebar:
10
+ st.header("Authentication")
11
+ provider = st.selectbox("LLM Provider", ["Google", "OpenAI"])
12
+
13
+ if provider == "Google":
14
+ models = ["gemini-3-flash-preview", "gemini-3-pro-image-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
15
+ embeddings = ["text-embedding-004"]
16
+ else:
17
+ models = ["gpt-5-mini", "gpt-5-nano", "gpt-4o-mini", "gpt-4o"]
18
+ embeddings = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
19
+
20
+ model_name = st.selectbox("Choose Decoder Model", models)
21
+ embed_name = st.selectbox("Choose Embedding Model", embeddings)
22
+ api_key = st.text_input("API Key", type="password")
23
+
24
+ auth_button = st.button("Authenticate")
25
+ if auth_button and api_key:
26
+ st.session_state.rag = MoneyRAG(provider, model_name, embed_name, api_key)
27
+ st.success("Authenticated!")
28
+
29
+ st.divider()
30
+ st.caption("**Contributors:**")
31
+ st.caption("👤 [Sajil Awale](https://github.com/AwaleSajil)")
32
+ st.caption("👤 [Simran KC](https://github.com/iamsims)")
33
+
34
+ # Main Window
35
+ st.title("MoneyRAG 💰")
36
+ st.subheader("Where is my money?")
37
+ st.markdown("""
38
+ This app helps you analyze your personal finances using AI.
39
+ Upload your bank/credit card CSV statements to chat with your data semantically.
40
+ """)
41
+
42
+ # Guides Section
43
+ col1, col2 = st.columns(2)
44
+
45
+ with col1:
46
+ with st.expander("📚 How to get API keys"):
47
+ st.markdown("**Google Gemini API:**")
48
+ st.markdown("🔗 [Get API key from Google AI Studio](https://aistudio.google.com/app/apikey)")
49
+ st.markdown("")
50
+ st.markdown("**OpenAI API:**")
51
+ st.markdown("🔗 [Get API key from OpenAI Platform](https://platform.openai.com/api-keys)")
52
+
53
+ with col2:
54
+ with st.expander("📥 How to download transaction history"):
55
+ st.markdown("**Chase Credit Card:**")
56
+ st.video("https://www.youtube.com/watch?v=gtAFaP9Lts8")
57
+ st.markdown("")
58
+ st.markdown("**Discover Credit Card:**")
59
+ st.video("https://www.youtube.com/watch?v=cry6-H5b0PQ")
60
+
61
+ # Architecture Diagram
62
+ with st.expander("🏗️ How MoneyRAG Works"):
63
+ st.image("architecture.svg", use_container_width=True)
64
+
65
+ st.divider()
66
+
67
+ if "rag" in st.session_state:
68
+ uploaded_files = st.file_uploader("Upload CSV transactions", accept_multiple_files=True, type=['csv'])
69
+
70
+ if uploaded_files:
71
+ if st.button("Ingest Data"):
72
+ temp_paths = []
73
+ for uploaded_file in uploaded_files:
74
+ path = os.path.join(st.session_state.rag.temp_dir, uploaded_file.name)
75
+ with open(path, "wb") as f:
76
+ f.write(uploaded_file.getbuffer())
77
+ temp_paths.append(path)
78
+
79
+ with st.spinner("Ingesting and vectorizing..."):
80
+ asyncio.run(st.session_state.rag.setup_session(temp_paths))
81
+ st.success("Data ready for chat!")
82
+
83
+ # Chat Interface
84
+ st.divider()
85
+ if "messages" not in st.session_state:
86
+ st.session_state.messages = []
87
+
88
+ for message in st.session_state.messages:
89
+ with st.chat_message(message["role"]):
90
+ st.markdown(message["content"])
91
+
92
+ if prompt := st.chat_input("Ask about your spending..."):
93
+ st.session_state.messages.append({"role": "user", "content": prompt})
94
+ with st.chat_message("user"):
95
+ st.markdown(prompt)
96
+
97
+ with st.chat_message("assistant"):
98
+ with st.spinner("Thinking..."):
99
+ response = asyncio.run(st.session_state.rag.chat(prompt))
100
+ st.markdown(response)
101
+ st.session_state.messages.append({"role": "assistant", "content": response})
102
+ else:
103
+ st.info("Please authenticate in the sidebar to start.")
architecture.svg ADDED
docker-compose.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ money-rag:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ container_name: money-rag-app
9
+ ports:
10
+ - "8501:8501"
11
+ environment:
12
+ - PYTHONUNBUFFERED=1
13
+ - STREAMLIT_SERVER_PORT=8501
14
+ - STREAMLIT_SERVER_ADDRESS=0.0.0.0
15
+ # Note: API keys are entered through the Streamlit UI
16
+ # No environment variables required
17
+ volumes:
18
+ # Mount data directory for persistence (optional)
19
+ - ./data:/app/data
20
+ # Mount logs directory
21
+ - ./logs:/app/logs
22
+ restart: unless-stopped
23
+ healthcheck:
24
+ test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
25
+ interval: 30s
26
+ timeout: 10s
27
+ retries: 3
28
+ start_period: 10s
docker-run.sh ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # MoneyRAG Docker Run Script
4
+ # This script helps you run the application easily
5
+
6
+ set -e
7
+
8
+ echo "🐳 MoneyRAG Docker Setup"
9
+ echo "========================"
10
+ echo "ℹ️ Note: API keys are entered through the web UI"
11
+ echo ""
12
+
13
+ # Check if Docker is running
14
+ if ! docker info > /dev/null 2>&1; then
15
+ echo "❌ Docker is not running. Please start Docker Desktop."
16
+ exit 1
17
+ fi
18
+
19
+ # Create data and logs directories
20
+ mkdir -p data logs
21
+
22
+ echo ""
23
+ echo "Choose an option:"
24
+ echo "1) Build and run (first time or after code changes)"
25
+ echo "2) Run existing container"
26
+ echo "3) Stop container"
27
+ echo "4) View logs"
28
+ echo "5) Clean up (remove containers and images)"
29
+ echo ""
30
+ read -p "Enter choice [1-5]: " choice
31
+
32
+ case $choice in
33
+ 1)
34
+ echo "🔨 Building Docker image..."
35
+ docker-compose build
36
+ echo "🚀 Starting container..."
37
+ docker-compose up -d
38
+ echo "✅ Application is running at http://localhost:8501"
39
+ echo "📋 View logs with: docker-compose logs -f"
40
+ ;;
41
+ 2)
42
+ echo "🚀 Starting container..."
43
+ docker-compose up -d
44
+ echo "✅ Application is running at http://localhost:8501"
45
+ ;;
46
+ 3)
47
+ echo "🛑 Stopping container..."
48
+ docker-compose down
49
+ echo "✅ Container stopped"
50
+ ;;
51
+ 4)
52
+ echo "📋 Showing logs (Ctrl+C to exit)..."
53
+ docker-compose logs -f
54
+ ;;
55
+ 5)
56
+ echo "🧹 Cleaning up..."
57
+ docker-compose down -v
58
+ docker rmi money_rag-money-rag 2>/dev/null || true
59
+ echo "✅ Cleanup complete"
60
+ ;;
61
+ *)
62
+ echo "❌ Invalid choice"
63
+ exit 1
64
+ ;;
65
+ esac
mcp_server.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastmcp import FastMCP
2
+ from langchain_qdrant import QdrantVectorStore
3
+ from qdrant_client import QdrantClient
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ import shutil
9
+
10
+ # Load environment variables (API keys, etc.)
11
+ load_dotenv()
12
+
13
+ # Define paths to your data
14
+ # For Hugging Face Spaces (Ephemeral):
15
+ # We use a temporary directory that gets wiped on restart.
16
+ # If DATA_DIR is set (e.g., by your deployment config), use it.
17
+ DATA_DIR = os.getenv("DATA_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "temp_data"))
18
+ QDRANT_PATH = os.path.join(DATA_DIR, "qdrant_db")
19
+ DB_PATH = os.path.join(DATA_DIR, "money_rag.db")
20
+
21
+ # Initialize the MCP Server
22
+ mcp = FastMCP("Money RAG Financial Analyst")
23
+
24
+ import sqlite3
25
+
26
+ def get_schema_info() -> str:
27
+ """Get database schema information."""
28
+ if not os.path.exists(DB_PATH):
29
+ return "Database file does not exist yet. Please upload data."
30
+
31
+ try:
32
+ conn = sqlite3.connect(DB_PATH)
33
+ cursor = conn.cursor()
34
+
35
+ # Get all tables
36
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
37
+ tables = cursor.fetchall()
38
+
39
+ schema_info = []
40
+ for (table_name,) in tables:
41
+ schema_info.append(f"\nTable: {table_name}")
42
+
43
+ # Get column info for each table
44
+ cursor.execute(f"PRAGMA table_info({table_name});")
45
+ columns = cursor.fetchall()
46
+
47
+ schema_info.append("Columns:")
48
+ for col in columns:
49
+ col_id, col_name, col_type, not_null, default_val, pk = col
50
+ schema_info.append(f" - {col_name} ({col_type})")
51
+
52
+ conn.close()
53
+ return "\n".join(schema_info)
54
+ except Exception as e:
55
+ return f"Error reading schema: {e}"
56
+
57
+
58
+ @mcp.resource("schema://database/tables")
59
+ def get_database_schema() -> str:
60
+ """Complete schema information for the money_rag database."""
61
+ return get_schema_info()
62
+
63
+ @mcp.tool()
64
+ def query_database(query: str) -> str:
65
+ """Execute a SELECT query on the money_rag SQLite database.
66
+
67
+ Args:
68
+ query: The SQL SELECT query to execute
69
+
70
+ Returns:
71
+ Query results or error message
72
+
73
+ Important Notes:
74
+ - Only SELECT queries are allowed (read-only)
75
+ - Use 'description' column for text search
76
+ - 'amount' column: positive values = spending, negative values = payments/refunds
77
+
78
+ Example queries:
79
+ - Find Walmart spending: SELECT SUM(amount) FROM transactions WHERE description LIKE '%Walmart%' AND amount > 0;
80
+ - List recent transactions: SELECT transaction_date, description, amount, category FROM transactions ORDER BY transaction_date DESC LIMIT 5;
81
+ - Spending by category: SELECT category, SUM(amount) FROM transactions WHERE amount > 0 GROUP BY category;
82
+ """
83
+ if not os.path.exists(DB_PATH):
84
+ return "Database file does not exist yet. Please upload data."
85
+
86
+ # Security: Only allow SELECT queries
87
+ query_upper = query.strip().upper()
88
+ if not query_upper.startswith("SELECT") and not query_upper.startswith("PRAGMA"):
89
+ return "Error: Only SELECT and PRAGMA queries are allowed"
90
+
91
+ # Forbidden operations
92
+ forbidden = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "REPLACE", "TRUNCATE", "ATTACH", "DETACH"]
93
+ # Check for forbidden words as standalone words to avoid false positives (e.g. "update_date" column)
94
+ # Simple check: space-surrounded or end-of-string
95
+ if any(f" {word} " in f" {query_upper} " for word in forbidden):
96
+ return f"Error: Query contains forbidden operation. Only SELECT queries allowed."
97
+
98
+ try:
99
+ conn = sqlite3.connect(DB_PATH)
100
+ cursor = conn.cursor()
101
+ cursor.execute(query)
102
+ results = cursor.fetchall()
103
+
104
+ # Get column names to make result more readable
105
+ column_names = [description[0] for description in cursor.description] if cursor.description else []
106
+
107
+ conn.close()
108
+
109
+ if not results:
110
+ return "No results found"
111
+
112
+ # Format results nicely
113
+ formatted_results = []
114
+ formatted_results.append(f"Columns: {', '.join(column_names)}")
115
+ for row in results:
116
+ formatted_results.append(str(row))
117
+
118
+ return "\n".join(formatted_results)
119
+ except sqlite3.Error as e:
120
+ return f"Error: {str(e)}"
121
+
122
+ def get_vector_store():
123
+ """Initialize connection to the Qdrant vector store"""
124
+ # Initialize Embedding Model using Google AI Studio
125
+ embeddings = GoogleGenerativeAIEmbeddings(model="text-embedding-004")
126
+
127
+ # Connect to Qdrant (Persistent Disk Mode at specific path)
128
+ # We ensure the directory exists so Qdrant can write to it.
129
+ os.makedirs(QDRANT_PATH, exist_ok=True)
130
+
131
+ client = QdrantClient(path=QDRANT_PATH)
132
+
133
+ # Check if collection exists (it might be empty in a new ephemeral session)
134
+ collections = client.get_collections().collections
135
+ collection_names = [c.name for c in collections]
136
+
137
+ if "transactions" not in collection_names:
138
+ # In a real app, you would probably trigger ingestion here or handle the empty state
139
+ pass
140
+
141
+ return QdrantVectorStore(
142
+ client=client,
143
+ collection_name="transactions",
144
+ embedding=embeddings,
145
+ )
146
+
147
+ @mcp.tool()
148
+ def semantic_search(query: str, top_k: int = 5) -> str:
149
+ """
150
+ Search for personal financial transactions semantically.
151
+
152
+ Use this to find spending when specific merchant names are unknown or ambiguous.
153
+ Examples: "how much did I spend on fast food?", "subscriptions", "travel expenses".
154
+
155
+ Args:
156
+ query: The description or category of spending to look for.
157
+ top_k: Number of results to return (default 5).
158
+ """
159
+ try:
160
+ vector_store = get_vector_store()
161
+
162
+ # Safety check: if no data has been ingested yet
163
+ if not os.path.exists(QDRANT_PATH) or not os.listdir(QDRANT_PATH):
164
+ return "No matching transactions found (Database is empty. Please upload data first)."
165
+
166
+ results = vector_store.similarity_search(query, k=top_k)
167
+
168
+ if not results:
169
+ return "No matching transactions found."
170
+
171
+ output = []
172
+ for doc in results:
173
+ # Format the output clearly for the LLM/User
174
+ amount = doc.metadata.get('amount', 'N/A')
175
+ date = doc.metadata.get('transaction_date', 'N/A')
176
+ output.append(f"Date: {date} | Match: {doc.page_content} | Amount: {amount}")
177
+
178
+ return "\n".join(output)
179
+
180
+ except Exception as e:
181
+ return f"Error performing search: {str(e)}"
182
+
183
+ # A helper to clear data (useful for session reset)
184
+ @mcp.tool()
185
+ def clear_database() -> str:
186
+ """Clear all stored transaction data to reset the session."""
187
+ try:
188
+ if os.path.exists(DATA_DIR):
189
+ shutil.rmtree(DATA_DIR)
190
+ os.makedirs(DATA_DIR)
191
+ return "Database cleared successfully."
192
+ except Exception as e:
193
+ return f"Error clearing database: {e}"
194
+
195
+ if __name__ == "__main__":
196
+ # Runs the server over stdio
197
+ mcp.run(transport="stdio")
money_rag.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import asyncio
4
+ import pandas as pd
5
+ import sqlite3
6
+ import shutil
7
+ import tempfile
8
+ from typing import List, Optional
9
+ from dataclasses import dataclass
10
+
11
+ from langchain.chat_models import init_chat_model
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ from langchain_core.output_parsers import JsonOutputParser
14
+ from langchain_core.tools import tool
15
+ from langchain_community.utilities import SQLDatabase
16
+ from langchain_qdrant import QdrantVectorStore
17
+ from qdrant_client import QdrantClient
18
+ from qdrant_client.http.models import Distance, VectorParams
19
+ from langgraph.runtime import get_runtime
20
+ from langgraph.checkpoint.memory import InMemorySaver
21
+ from langchain.agents import create_agent
22
+ from langchain_community.tools import DuckDuckGoSearchRun
23
+ from langchain_mcp_adapters.client import MultiServerMCPClient
24
+
25
+ # Import specific embeddings
26
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
27
+ from langchain_openai import OpenAIEmbeddings
28
+
29
+ class MoneyRAG:
30
+ def __init__(self, llm_provider: str, model_name: str, embedding_model_name: str, api_key: str):
31
+ self.llm_provider = llm_provider.lower()
32
+ self.model_name = model_name
33
+ self.embedding_model_name = embedding_model_name
34
+
35
+ # Set API Keys
36
+ if self.llm_provider == "google":
37
+ os.environ["GOOGLE_API_KEY"] = api_key
38
+ self.embeddings = GoogleGenerativeAIEmbeddings(model=embedding_model_name)
39
+ provider_name = "google_genai"
40
+ else:
41
+ os.environ["OPENAI_API_KEY"] = api_key
42
+ self.embeddings = OpenAIEmbeddings(model=embedding_model_name)
43
+ provider_name = "openai"
44
+
45
+ # Initialize LLM
46
+ self.llm = init_chat_model(
47
+ self.model_name,
48
+ model_provider=provider_name,
49
+ )
50
+
51
+ # Temporary paths for this session
52
+ self.temp_dir = tempfile.mkdtemp()
53
+ os.environ["DATA_DIR"] = self.temp_dir # Harmonize with mcp_server.py
54
+ self.db_path = os.path.join(self.temp_dir, "money_rag.db")
55
+ self.qdrant_path = os.path.join(self.temp_dir, "qdrant_db")
56
+
57
+ self.db: Optional[SQLDatabase] = None
58
+ self.vector_store: Optional[QdrantVectorStore] = None
59
+ self.agent = None
60
+ self.mcp_client: Optional[MultiServerMCPClient] = None
61
+ self.search_tool = DuckDuckGoSearchRun()
62
+ self.merchant_cache = {} # Session-based cache for merchant enrichment
63
+
64
+ async def setup_session(self, csv_paths: List[str]):
65
+ """Ingests CSVs and sets up DBs."""
66
+ for path in csv_paths:
67
+ await self._ingest_csv(path)
68
+
69
+ self.db = SQLDatabase.from_uri(f"sqlite:///{self.db_path}")
70
+ self.vector_store = self._sync_to_qdrant()
71
+ await self._init_agent()
72
+
73
+ async def _ingest_csv(self, file_path):
74
+ df = pd.read_csv(file_path)
75
+ headers = df.columns.tolist()
76
+ sample_data = df.head(10).to_json()
77
+
78
+ prompt = ChatPromptTemplate.from_template("""
79
+ Act as a financial data parser. Analyze this CSV data:
80
+ Filename: {filename}
81
+ Headers: {headers}
82
+ Sample Data: {sample}
83
+
84
+ TASK:
85
+ 1. Map the CSV columns to standard fields: date, description, amount, and category.
86
+ 2. Determine the 'sign_convention' for spending.
87
+
88
+ RULES:
89
+ - If the filename suggests 'Discover' credit card, spending are usually POSITIVE.
90
+ - If the filename suggests 'Chase' credit card, spending are usually NEGATIVE.
91
+
92
+ - Analyze the 'sign_convention' for spending (outflows):
93
+ - Look at the sample data for known merchants or spending patterns.
94
+ - If spending (like a restaurant or store) is NEGATIVE (e.g., -25.00), the convention is 'spending_is_negative'.
95
+ - If spending is POSITIVE (e.g., 25.00), the convention is 'spending_is_positive'.
96
+
97
+ OUTPUT FORMAT (JSON ONLY):
98
+ {{
99
+ "date_col": "column_name",
100
+ "desc_col": "column_name",
101
+ "amount_col": "column_name",
102
+ "category_col": "column_name or null",
103
+ "sign_convention": "spending_is_negative" | "spending_is_positive"
104
+ }}
105
+ """)
106
+
107
+ chain = prompt | self.llm | JsonOutputParser()
108
+ mapping = await chain.ainvoke({"headers": headers, "sample": sample_data, "filename": os.path.basename(file_path)})
109
+
110
+ standard_df = pd.DataFrame()
111
+ standard_df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
112
+ standard_df['transaction_date'] = pd.to_datetime(df[mapping['date_col']])
113
+ standard_df['description'] = df[mapping['desc_col']]
114
+
115
+ raw_amounts = pd.to_numeric(df[mapping['amount_col']])
116
+ standard_df['amount'] = raw_amounts * -1 if mapping['sign_convention'] == "spending_is_negative" else raw_amounts
117
+ standard_df['category'] = df[mapping.get('category_col')] if mapping.get('category_col') else 'Uncategorized'
118
+ standard_df['source_file'] = os.path.basename(file_path)
119
+
120
+ # --- Async Enrichment Step ---
121
+ print(f" ✨ Enriching descriptions for {os.path.basename(file_path)}...")
122
+ unique_descriptions = standard_df['description'].unique()
123
+ sem = asyncio.Semaphore(5)
124
+
125
+ async def get_merchant_info(description):
126
+ if description in self.merchant_cache:
127
+ return self.merchant_cache[description]
128
+
129
+ async with sem:
130
+ try:
131
+ await asyncio.sleep(0.05) # Jitter
132
+ print(f" 🔍 Web searching: {description}...")
133
+ result = await self.search_tool.ainvoke(f"What type of business / store is '{description}'?")
134
+ self.merchant_cache[description] = result
135
+ return result
136
+ except Exception as e:
137
+ print(f" ⚠️ Search failed for {description}: {e}")
138
+ return "Unknown"
139
+
140
+ tasks = [get_merchant_info(desc) for desc in unique_descriptions]
141
+ enrichment_results = await asyncio.gather(*tasks)
142
+
143
+ desc_map = dict(zip(unique_descriptions, enrichment_results))
144
+ standard_df['enriched_info'] = standard_df['description'].map(desc_map).fillna("")
145
+
146
+ conn = sqlite3.connect(self.db_path)
147
+ standard_df.to_sql("transactions", conn, if_exists="append", index=False)
148
+ conn.close()
149
+
150
+ def _sync_to_qdrant(self):
151
+ client = QdrantClient(path=self.qdrant_path)
152
+ collection = "transactions"
153
+
154
+ conn = sqlite3.connect(self.db_path)
155
+ df = pd.read_sql_query("SELECT * FROM transactions", conn)
156
+ conn.close()
157
+
158
+ # Check for empty dataframe
159
+ if df.empty:
160
+ raise ValueError("No transactions found in database. Please ingest CSV files first.")
161
+
162
+ # Dynamically detect embedding dimension
163
+ sample_embedding = self.embeddings.embed_query("test")
164
+ embedding_dim = len(sample_embedding)
165
+
166
+ client.recreate_collection(
167
+ collection_name=collection,
168
+ vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
169
+ )
170
+
171
+ vs = QdrantVectorStore(client=client, collection_name=collection, embedding=self.embeddings)
172
+
173
+ # Use description + category + enrichment for vectorization
174
+ texts = []
175
+ for _, row in df.iterrows():
176
+ enriched = row.get('enriched_info', '')
177
+ base_text = f"{row['description']} ({row['category']})"
178
+ if enriched and enriched != "Unknown" and enriched != "":
179
+ texts.append(f"{base_text} - {enriched}")
180
+ else:
181
+ texts.append(base_text)
182
+
183
+ metadatas = df[['id', 'amount', 'category', 'transaction_date']].to_dict('records')
184
+ for m in metadatas: m['transaction_date'] = str(m['transaction_date'])
185
+
186
+ vs.add_texts(texts=texts, metadatas=metadatas)
187
+ return vs
188
+
189
+ async def _init_agent(self):
190
+ # 1. Initialize MCP client with absolute path to server
191
+ server_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mcp_server.py")
192
+
193
+ self.mcp_client = MultiServerMCPClient(
194
+ {
195
+ "money_rag": {
196
+ "transport": "stdio",
197
+ "command": "python",
198
+ "args": [server_path],
199
+ "env": os.environ.copy(),
200
+ }
201
+ }
202
+ )
203
+
204
+ # 2. Get tools from MCP server
205
+ mcp_tools = await self.mcp_client.get_tools()
206
+
207
+ # 3. Define the Agent with MCP Tools
208
+ system_prompt = (
209
+ "You are a financial analyst. Use the provided tools to query the database "
210
+ "and perform semantic searches. Spending is POSITIVE (>0). "
211
+ "Always explain your findings clearly."
212
+ )
213
+
214
+ self.agent = create_agent(
215
+ model=self.llm,
216
+ tools=mcp_tools,
217
+ system_prompt=system_prompt,
218
+ checkpointer=InMemorySaver(),
219
+ )
220
+
221
+ async def chat(self, query: str):
222
+ config = {"configurable": {"thread_id": "session_1"}}
223
+
224
+ result = await self.agent.ainvoke(
225
+ {"messages": [{"role": "user", "content": query}]},
226
+ config,
227
+ )
228
+
229
+ # Extract content - handle both string and list formats
230
+ content = result["messages"][-1].content
231
+
232
+ # If content is a list (Gemini format), extract text from blocks
233
+ if isinstance(content, list):
234
+ text_parts = []
235
+ for block in content:
236
+ if isinstance(block, dict) and block.get("type") == "text":
237
+ text_parts.append(block.get("text", ""))
238
+ return "\n".join(text_parts)
239
+
240
+ # If content is already a string (OpenAI format), return as-is
241
+ return content
242
+
243
+ async def cleanup(self):
244
+ """Delete temporary session files and close MCP client."""
245
+ if self.mcp_client:
246
+ try:
247
+ await self.mcp_client.close()
248
+ except Exception as e:
249
+ print(f"Warning: Failed to close MCP client: {e}")
250
+
251
+ if os.path.exists(self.temp_dir):
252
+ try:
253
+ shutil.rmtree(self.temp_dir)
254
+ except Exception as e:
255
+ print(f"Warning: Failed to remove temp directory: {e}")
notebooks/0_test.ipynb ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "%reload_ext autoreload\n",
10
+ "%autoreload 2"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 4,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "True"
22
+ ]
23
+ },
24
+ "execution_count": 4,
25
+ "metadata": {},
26
+ "output_type": "execute_result"
27
+ }
28
+ ],
29
+ "source": [
30
+ "import pandas as pd\n",
31
+ "import sqlite3\n",
32
+ "from langchain.chat_models import init_chat_model\n",
33
+ "from langchain_core.prompts import ChatPromptTemplate\n",
34
+ "from langchain_core.output_parsers import JsonOutputParser\n",
35
+ "\n",
36
+ "from dotenv import load_dotenv\n",
37
+ "load_dotenv()"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 5,
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stderr",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "/Users/sawale/Documents/learning/money_rag/.venv/lib/python3.12/site-packages/google/cloud/aiplatform/models.py:52: FutureWarning: Support for google-cloud-storage < 3.0.0 will be removed in a future version of google-cloud-aiplatform. Please upgrade to google-cloud-storage >= 3.0.0.\n",
50
+ " from google.cloud.aiplatform.utils import gcs_utils\n"
51
+ ]
52
+ },
53
+ {
54
+ "data": {
55
+ "text/plain": [
56
+ "langchain_google_vertexai.chat_models.ChatVertexAI"
57
+ ]
58
+ },
59
+ "execution_count": 5,
60
+ "metadata": {},
61
+ "output_type": "execute_result"
62
+ }
63
+ ],
64
+ "source": [
65
+ "# Initialize the Gemini model via Vertex AI\n",
66
+ "vertex_llm = init_chat_model(\n",
67
+ " \"gemini-2.5-flash\", \n",
68
+ " model_provider=\"google_vertexai\",\n",
69
+ " project='gen-lang-client-0311515393',\n",
70
+ " location='us-central1',\n",
71
+ ")\n",
72
+ "\n",
73
+ "type(vertex_llm)\n"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 4,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "data": {
83
+ "text/plain": [
84
+ "langchain_openai.chat_models.base.ChatOpenAI"
85
+ ]
86
+ },
87
+ "execution_count": 4,
88
+ "metadata": {},
89
+ "output_type": "execute_result"
90
+ }
91
+ ],
92
+ "source": [
93
+ "\n",
94
+ "openai_llm = init_chat_model(\n",
95
+ " \"gpt-4o\", # or \"gpt-3.5-turbo\", \"o1-preview\", etc.\n",
96
+ " model_provider=\"openai\",\n",
97
+ " api_key=\"your-api-key-here\", # Optional: can also be set via OPENAI_API_KEY env var\n",
98
+ ")\n",
99
+ "\n",
100
+ "type(openai_llm)"
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "execution_count": 7,
106
+ "metadata": {},
107
+ "outputs": [
108
+ {
109
+ "data": {
110
+ "text/plain": [
111
+ "{'messages': [HumanMessage(content='what is the weather in sf', additional_kwargs={}, response_metadata={}, id='6eec9126-cda3-4cdd-9f6d-f893b228c7cc'),\n",
112
+ " AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_weather', 'arguments': '{\"city\": \"sf\"}'}, '__gemini_function_call_thought_signatures__': {'28adfdf3-778e-4d93-bd45-94a38f64064f': 'CtsFAY89a1+qAQDcM0PFB0w9LKN21xyUOLVEzccWGHcyKXyKAye7ST4azyzp32wcQLIbbiA4N5NSZO5Ua07pNp6Ey+dHG7MoVMoirXyDabR/IBeaesEOIg3fSaXAegxgzEjgLEmmADwAJRxvGpTNyalfZDUPkh0UcRcwGB6jqibG261Euk9G3hkXrHNBig5gBG1uoAxDxwrydV3T57wFRIxn4EEU+IqDiQRdMFsVn8ANZ4BzcpfRdNKsE/VRJ/htqEAHGeEk3jlqZRcJmkRCFHQvsRMxMI911jBH1puimK61Ah3r6P0aMtfkh4wqAENd+Oou06CG5Iz8IS1kBXNxL2/XbqdCDiSmIepKZNVmkyShmt315nPsQ1U3uM7N6415goYjW0q3IouuhiTl1Q6xwzrCfITXgBa9TL09x/CcSOIW9LgRJ6z7PX20PByOZ/rcqsdxr3lGgGSv+3P4b22mDPrrXZEHs8Hkw+1Fw4QMmFJLcj6WCXJaxlRaNR9TMlcspJGT+AtSv1X287cKqyyKGL6x536j5/vU4xDf2do4QIN/Rom0Xj7d46vUS9/B5abHSvfIErAlwDLcGDvHE3S2nC8gN4GKuTSDSm88DLhv46UwLov/NrF/6v9n7ZiqLkvHl/ByRVldSzOH3cIf+KVxVJlyXrSjjMOg0IP2HcK9kBBls0sDSdwWFcuoF7YkACguzPmcpvrHDkRZYacSv1hZB+LvDHSs7INBev0D38o4Id7fo9V0URUkDATlkvji1m7X+XuCEa0XCcN9vuDB4/z/lEfn/j6It+Cb9SQ7pCxsY4qfk39wusymtgFnRTkaK5JZM7sH+GtM+FBMbi7p/SVsFCeooaW1MshwFjBX2Bevex0HPomNYzPJoRYC7em61d5oDvrsCS5AwLTU8Ph2gjFNF8E8G7NuCfYTCsH/MtcHUIB0HXbIkzmUH0PNqLIYa/uTwd8uaxVQR7zPupoWtDc='}}, response_metadata={'is_blocked': False, 'safety_ratings': [], 'usage_metadata': {'prompt_token_count': 25, 'candidates_token_count': 5, 'total_token_count': 217, 'prompt_tokens_details': [{'modality': 1, 'token_count': 25}], 'candidates_tokens_details': [{'modality': 1, 'token_count': 5}], 'thoughts_token_count': 187, 'cached_content_token_count': 0, 'cache_tokens_details': []}, 'finish_reason': 'STOP', 'avg_logprobs': -5.900767517089844, 'model_name': 'gemini-2.5-flash'}, id='lc_run--019baf03-6dd9-73f3-a9ed-fe8c471e01df-0', tool_calls=[{'name': 'get_weather', 'args': {'city': 'sf'}, 'id': '28adfdf3-778e-4d93-bd45-94a38f64064f', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens': 25, 'output_tokens': 5, 'total_tokens': 217, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 187}}),\n",
113
+ " ToolMessage(content=\"It's always sunny in sf!\", name='get_weather', id='426b50b1-b985-418f-aa4a-0a76ceae5e30', tool_call_id='28adfdf3-778e-4d93-bd45-94a38f64064f'),\n",
114
+ " AIMessage(content=\"It's always sunny in sf!\\n\", additional_kwargs={}, response_metadata={'is_blocked': False, 'safety_ratings': [], 'usage_metadata': {'prompt_token_count': 229, 'candidates_token_count': 9, 'total_token_count': 238, 'prompt_tokens_details': [{'modality': 1, 'token_count': 229}], 'candidates_tokens_details': [{'modality': 1, 'token_count': 9}], 'thoughts_token_count': 0, 'cached_content_token_count': 0, 'cache_tokens_details': []}, 'finish_reason': 'STOP', 'avg_logprobs': -0.1222022639380561, 'model_name': 'gemini-2.5-flash'}, id='lc_run--019baf03-7bdd-7d31-893b-20167e884547-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 229, 'output_tokens': 9, 'total_tokens': 238, 'input_token_details': {'cache_read': 0}})]}"
115
+ ]
116
+ },
117
+ "execution_count": 7,
118
+ "metadata": {},
119
+ "output_type": "execute_result"
120
+ }
121
+ ],
122
+ "source": [
123
+ "from langchain.agents import create_agent\n",
124
+ "\n",
125
+ "def get_weather(city: str) -> str:\n",
126
+ " \"\"\"Get weather for a given city.\"\"\"\n",
127
+ " return f\"It's always sunny in {city}!\"\n",
128
+ "\n",
129
+ "agent = create_agent(\n",
130
+ " # model=\"gpt-4o\",\n",
131
+ " model=vertex_llm,\n",
132
+ " tools=[get_weather],\n",
133
+ " system_prompt=\"You are a helpful assistant\",\n",
134
+ ")\n",
135
+ "\n",
136
+ "# Run the agent\n",
137
+ "agent.invoke(\n",
138
+ " {\"messages\": [{\"role\": \"user\", \"content\": \"what is the weather in sf\"}]}\n",
139
+ ")"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 6,
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "from typing import TypedDict\n",
149
+ "from dataclasses import dataclass\n",
150
+ "from langgraph.runtime import get_runtime\n",
151
+ "from langgraph.checkpoint.memory import InMemorySaver\n",
152
+ "from langchain_community.utilities import SQLDatabase\n",
153
+ "from langchain_core.tools import tool\n",
154
+ "from langchain.agents import create_agent\n",
155
+ "\n",
156
+ "\n",
157
+ "db = SQLDatabase.from_uri(\"sqlite:///money_rag.db\")\n",
158
+ "\n",
159
+ "@dataclass\n",
160
+ "class RuntimeContext:\n",
161
+ " db: SQLDatabase\n",
162
+ "\n",
163
+ "@tool\n",
164
+ "def execute_sql(query: str) -> str:\n",
165
+ " \"\"\"Execute a SQLite command and return results.\"\"\"\n",
166
+ " runtime = get_runtime(RuntimeContext)\n",
167
+ " db = runtime.context.db\n",
168
+ " try:\n",
169
+ " return db.run(query)\n",
170
+ " except Exception as e:\n",
171
+ " return f\"Error: {e}\"\n",
172
+ "\n",
173
+ "SYSTEM = f\"\"\"You are a careful SQLite analyst.\n",
174
+ "Rules:\n",
175
+ "- Think step-by-step.\n",
176
+ "- When you need data, call the tool `execute_sql` with ONE SELECT query.\n",
177
+ "- Read-only only; no INSERT/UPDATE/DELETE/ALTER/DROP/CREATE/REPLACE/TRUNCATE.\n",
178
+ "- Limit to 5 rows of output unless the user explicitly asks otherwise.\n",
179
+ "- If the tool returns 'Error:', revise the SQL and try again.\n",
180
+ "- Prefer explicit column lists; avoid SELECT *.\n",
181
+ "- **Spending Logic**:\n",
182
+ " - Spending is represented by POSITIVE values (> 0) in the amount column.\n",
183
+ " - NEGATIVE values (< 0) represent credit card payments or refunds and MUST NOT be included when calculating total spending or expenses.\n",
184
+ "\"\"\"\n",
185
+ "agent = create_agent(\n",
186
+ " model=vertex_llm,\n",
187
+ " tools=[execute_sql],\n",
188
+ " system_prompt=SYSTEM,\n",
189
+ " context_schema=RuntimeContext,\n",
190
+ " checkpointer=InMemorySaver(),\n",
191
+ ")"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 7,
197
+ "metadata": {},
198
+ "outputs": [
199
+ {
200
+ "name": "stdout",
201
+ "output_type": "stream",
202
+ "text": [
203
+ "================================\u001b[1m Human Message \u001b[0m=================================\n",
204
+ "\n",
205
+ "how much total amount i spent for movie ticket in last 6 months?\n",
206
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
207
+ "Tool Calls:\n",
208
+ " execute_sql (13c234ed-8b6e-4db2-b4cd-ebff7fa65c1b)\n",
209
+ " Call ID: 13c234ed-8b6e-4db2-b4cd-ebff7fa65c1b\n",
210
+ " Args:\n",
211
+ " query: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0\n",
212
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
213
+ "Name: execute_sql\n",
214
+ "\n",
215
+ "Error: (sqlite3.OperationalError) no such column: date\n",
216
+ "[SQL: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0]\n",
217
+ "(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
218
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
219
+ "Tool Calls:\n",
220
+ " execute_sql (3b514cf6-71cd-4730-b7d1-69f2547d9c56)\n",
221
+ " Call ID: 3b514cf6-71cd-4730-b7d1-69f2547d9c56\n",
222
+ " Args:\n",
223
+ " query: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND transaction_date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0\n",
224
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
225
+ "Name: execute_sql\n",
226
+ "\n",
227
+ "[(None,)]\n",
228
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
229
+ "Tool Calls:\n",
230
+ " execute_sql (ef8975f2-f1e5-4f71-a228-75e72c494c64)\n",
231
+ " Call ID: ef8975f2-f1e5-4f71-a228-75e72c494c64\n",
232
+ " Args:\n",
233
+ " query: SELECT category, amount, transaction_date FROM transactions WHERE category = 'movie ticket' AND amount > 0 LIMIT 5\n",
234
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
235
+ "Name: execute_sql\n",
236
+ "\n",
237
+ "\n",
238
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
239
+ "Tool Calls:\n",
240
+ " execute_sql (e146482f-1c5c-438e-bf05-d6c2a9a9ef14)\n",
241
+ " Call ID: e146482f-1c5c-438e-bf05-d6c2a9a9ef14\n",
242
+ " Args:\n",
243
+ " query: SELECT id, amount, category FROM transactions LIMIT 5\n",
244
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
245
+ "Name: execute_sql\n",
246
+ "\n",
247
+ "Error: (sqlite3.OperationalError) no such column: id\n",
248
+ "[SQL: SELECT id, amount, category FROM transactions LIMIT 5]\n",
249
+ "(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
250
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
251
+ "Tool Calls:\n",
252
+ " execute_sql (b736c455-b950-46a2-95c0-9ecb041c27a8)\n",
253
+ " Call ID: b736c455-b950-46a2-95c0-9ecb041c27a8\n",
254
+ " Args:\n",
255
+ " query: SELECT amount, category, date FROM spending LIMIT 5\n",
256
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
257
+ "Name: execute_sql\n",
258
+ "\n",
259
+ "Error: (sqlite3.OperationalError) no such table: spending\n",
260
+ "[SQL: SELECT amount, category, date FROM spending LIMIT 5]\n",
261
+ "(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
262
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
263
+ "Tool Calls:\n",
264
+ " execute_sql (000502f1-501f-44d9-826b-2f8a7c9bc70d)\n",
265
+ " Call ID: 000502f1-501f-44d9-826b-2f8a7c9bc70d\n",
266
+ " Args:\n",
267
+ " query: SELECT amount, category, date FROM expenses LIMIT 5\n",
268
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
269
+ "Name: execute_sql\n",
270
+ "\n",
271
+ "Error: (sqlite3.OperationalError) no such table: expenses\n",
272
+ "[SQL: SELECT amount, category, date FROM expenses LIMIT 5]\n",
273
+ "(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
274
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
275
+ "Tool Calls:\n",
276
+ " execute_sql (fb1bab6f-de96-42d6-b124-68f9ce80f3ed)\n",
277
+ " Call ID: fb1bab6f-de96-42d6-b124-68f9ce80f3ed\n",
278
+ " Args:\n",
279
+ " query: SELECT amount, category, date FROM transactions LIMIT 5\n",
280
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
281
+ "Name: execute_sql\n",
282
+ "\n",
283
+ "Error: (sqlite3.OperationalError) no such column: date\n",
284
+ "[SQL: SELECT amount, category, date FROM transactions LIMIT 5]\n",
285
+ "(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
286
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
287
+ "Tool Calls:\n",
288
+ " execute_sql (678dea01-88eb-4bce-a2b1-48cb93c25d97)\n",
289
+ " Call ID: 678dea01-88eb-4bce-a2b1-48cb93c25d97\n",
290
+ " Args:\n",
291
+ " query: SELECT amount, category, transaction_date FROM transactions LIMIT 5\n",
292
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
293
+ "Name: execute_sql\n",
294
+ "\n",
295
+ "[(231.19, 'Merchandise', '2024-10-17 00:00:00'), (16.51, 'Merchandise', '2024-10-18 00:00:00'), (146.73, 'Merchandise', '2024-10-18 00:00:00'), (-100.0, 'Awards and Rebate Credits', '2024-10-18 00:00:00'), (8.09, 'Restaurants', '2024-11-02 00:00:00')]\n",
296
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
297
+ "Tool Calls:\n",
298
+ " execute_sql (dab1dfe3-c9ac-4c25-b737-887088edc6c7)\n",
299
+ " Call ID: dab1dfe3-c9ac-4c25-b737-887088edc6c7\n",
300
+ " Args:\n",
301
+ " query: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND transaction_date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0\n",
302
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
303
+ "Name: execute_sql\n",
304
+ "\n",
305
+ "[(None,)]\n",
306
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
307
+ "Tool Calls:\n",
308
+ " execute_sql (f236f188-f301-400e-8f97-19f87a8717eb)\n",
309
+ " Call ID: f236f188-f301-400e-8f97-19f87a8717eb\n",
310
+ " Args:\n",
311
+ " query: SELECT DISTINCT category FROM transactions LIMIT 5\n",
312
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
313
+ "Name: execute_sql\n",
314
+ "\n",
315
+ "[('Merchandise',), ('Awards and Rebate Credits',), ('Restaurants',), ('Government Services',), ('Payments and Credits',)]\n",
316
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
317
+ "Tool Calls:\n",
318
+ " execute_sql (a0bed1e3-0c0d-46f7-9852-ae0de757cba7)\n",
319
+ " Call ID: a0bed1e3-0c0d-46f7-9852-ae0de757cba7\n",
320
+ " Args:\n",
321
+ " query: SELECT DISTINCT category FROM transactions WHERE category LIKE '%movie%' LIMIT 5\n",
322
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
323
+ "Name: execute_sql\n",
324
+ "\n",
325
+ "\n",
326
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
327
+ "\n",
328
+ "It seems you haven't spent any amount on movie tickets in the last 6 months, or there are no transactions categorized as 'movie ticket' in the database.\n"
329
+ ]
330
+ }
331
+ ],
332
+ "source": [
333
+ "question = \"how much total amount i spent for movie ticket in last 6 months?\"\n",
334
+ "steps = []\n",
335
+ "\n",
336
+ "for step in agent.stream(\n",
337
+ " {\"messages\": [{\"role\": \"user\", \"content\": question}]},\n",
338
+ " {\"configurable\": {\"thread_id\": \"1\"}},\n",
339
+ " stream_mode=\"values\",\n",
340
+ " context=RuntimeContext(db=db)\n",
341
+ "):\n",
342
+ " step[\"messages\"][-1].pretty_print()\n",
343
+ " steps.append(step)"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": 21,
349
+ "metadata": {},
350
+ "outputs": [
351
+ {
352
+ "name": "stdout",
353
+ "output_type": "stream",
354
+ "text": [
355
+ "================================\u001b[1m Human Message \u001b[0m=================================\n",
356
+ "\n",
357
+ "what transcation did you take into account for this calculation?\n",
358
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
359
+ "\n",
360
+ "I need more information to answer your question. Could you please tell me which calculation you are referring to? I don't have memory of past interactions or calculations.\n"
361
+ ]
362
+ }
363
+ ],
364
+ "source": [
365
+ "question = \"what transcation did you take into account for this calculation?\"\n",
366
+ "steps = []\n",
367
+ "\n",
368
+ "for step in agent.stream(\n",
369
+ " {\"messages\": [{\"role\": \"user\", \"content\": question}]},\n",
370
+ " {\"configurable\": {\"thread_id\": \"1\"}},\n",
371
+ " stream_mode=\"values\",\n",
372
+ " context=RuntimeContext(db=db)\n",
373
+ "):\n",
374
+ " step[\"messages\"][-1].pretty_print()\n",
375
+ " steps.append(step)"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": null,
381
+ "metadata": {},
382
+ "outputs": [],
383
+ "source": []
384
+ }
385
+ ],
386
+ "metadata": {
387
+ "kernelspec": {
388
+ "display_name": "money_rag",
389
+ "language": "python",
390
+ "name": "python3"
391
+ },
392
+ "language_info": {
393
+ "codemirror_mode": {
394
+ "name": "ipython",
395
+ "version": 3
396
+ },
397
+ "file_extension": ".py",
398
+ "mimetype": "text/x-python",
399
+ "name": "python",
400
+ "nbconvert_exporter": "python",
401
+ "pygments_lexer": "ipython3",
402
+ "version": "3.12.7"
403
+ }
404
+ },
405
+ "nbformat": 4,
406
+ "nbformat_minor": 2
407
+ }
notebooks/1_test_pdf_reader.ipynb ADDED
@@ -0,0 +1,956 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "%reload_ext autoreload\n",
10
+ "%autoreload 2"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "name": "stderr",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "/Users/sawale/Documents/learning/money_rag/.venv/lib/python3.12/site-packages/google/cloud/aiplatform/models.py:52: FutureWarning: Support for google-cloud-storage < 3.0.0 will be removed in a future version of google-cloud-aiplatform. Please upgrade to google-cloud-storage >= 3.0.0.\n",
23
+ " from google.cloud.aiplatform.utils import gcs_utils\n"
24
+ ]
25
+ },
26
+ {
27
+ "data": {
28
+ "text/plain": [
29
+ "True"
30
+ ]
31
+ },
32
+ "execution_count": 2,
33
+ "metadata": {},
34
+ "output_type": "execute_result"
35
+ }
36
+ ],
37
+ "source": [
38
+ "import pandas as pd\n",
39
+ "import sqlite3\n",
40
+ "from langchain.chat_models import init_chat_model\n",
41
+ "from langchain_core.prompts import ChatPromptTemplate\n",
42
+ "from langchain_core.output_parsers import JsonOutputParser\n",
43
+ "import uuid\n",
44
+ "\n",
45
+ "from langchain_qdrant import QdrantVectorStore\n",
46
+ "from qdrant_client import QdrantClient\n",
47
+ "from qdrant_client.http.models import Distance, VectorParams\n",
48
+ "from langchain_google_vertexai import VertexAIEmbeddings\n",
49
+ "\n",
50
+ "from typing import TypedDict\n",
51
+ "from dataclasses import dataclass\n",
52
+ "from langgraph.runtime import get_runtime\n",
53
+ "from langgraph.checkpoint.memory import InMemorySaver\n",
54
+ "from langchain_community.utilities import SQLDatabase\n",
55
+ "from langchain_core.tools import tool\n",
56
+ "from langchain.agents import create_agent\n",
57
+ "\n",
58
+ "\n",
59
+ "\n",
60
+ "from dotenv import load_dotenv\n",
61
+ "load_dotenv()"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 3,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "import os \n",
71
+ "import json\n",
72
+ "import asyncio\n",
73
+ "from langchain_community.tools import DuckDuckGoSearchRun\n",
74
+ "\n",
75
+ "# Initialize search\n",
76
+ "search_tool = DuckDuckGoSearchRun()\n",
77
+ "MERCHANT_CACHE_FILE = \"merchant_cache.json\"\n",
78
+ "\n",
79
+ "# Helper to load/save cache\n",
80
+ "def _load_cache():\n",
81
+ " if os.path.exists(MERCHANT_CACHE_FILE):\n",
82
+ " try:\n",
83
+ " with open(MERCHANT_CACHE_FILE, 'r') as f:\n",
84
+ " return json.load(f)\n",
85
+ " except:\n",
86
+ " return {}\n",
87
+ " return {}\n",
88
+ "\n",
89
+ "def _save_cache(cache):\n",
90
+ " with open(MERCHANT_CACHE_FILE, 'w') as f:\n",
91
+ " json.dump(cache, f)\n",
92
+ "\n",
93
+ "async def get_merchant_category_async(description, cache, sem):\n",
94
+ " \"\"\"Async search with semaphore and shared cache\"\"\"\n",
95
+ " if description in cache:\n",
96
+ " return cache[description]\n",
97
+ " \n",
98
+ " async with sem:\n",
99
+ " try:\n",
100
+ " # Random small sleep to jitter requests slightly\n",
101
+ " await asyncio.sleep(0.05) \n",
102
+ " print(f\" 🔍 Web searching for: {description}...\")\n",
103
+ " # Run the search asynchronously\n",
104
+ " result = await search_tool.ainvoke(f\"What type of business / store is '{description}'? Whats is location\")\n",
105
+ " return result\n",
106
+ " except Exception as e:\n",
107
+ " print(f\" ⚠️ Search failed for {description}: {e}\")\n",
108
+ " return \"Unknown\"\n",
109
+ "\n",
110
+ "async def ingest_csv(file_path, llm, db_path=\"money_rag.db\"):\n",
111
+ " print(f\"📂 Processing {file_path}...\")\n",
112
+ " df = pd.read_csv(file_path)\n",
113
+ " headers = df.columns.tolist()\n",
114
+ " sample_data = df.head(10).to_json() \n",
115
+ "\n",
116
+ " # 1. LLM Mapping (Sync is fine here)\n",
117
+ " prompt = ChatPromptTemplate.from_template(\"\"\"\n",
118
+ " Act as a financial data parser. Analyze this CSV data:\n",
119
+ " Filename: {filename}\n",
120
+ " Headers: {headers}\n",
121
+ " Sample Data: {sample}\n",
122
+ "\n",
123
+ " TASK:\n",
124
+ " 1. Map the CSV columns to standard fields: date, description, amount, and category.\n",
125
+ " 2. Determine the 'sign_convention' for spending.\n",
126
+ " \n",
127
+ " RULES:\n",
128
+ " - If the filename suggests 'Discover' credit card, spending are usually POSITIVE.\n",
129
+ " - If the filename suggests 'Chase' credit card, spending are usually NEGATIVE.\n",
130
+ " \n",
131
+ " - Analyze the 'sign_convention' for spending (outflows):\n",
132
+ " - Look at the sample data for known merchants or spending patterns.\n",
133
+ " - If spending (like a restaurant or store) is NEGATIVE (e.g., -25.00), the convention is 'spending_is_negative'.\n",
134
+ " - If spending is POSITIVE (e.g., 25.00), the convention is 'spending_is_positive'.\n",
135
+ "\n",
136
+ " OUTPUT FORMAT (JSON ONLY):\n",
137
+ " {{\n",
138
+ " \"date_col\": \"column_name\",\n",
139
+ " \"desc_col\": \"column_name\",\n",
140
+ " \"amount_col\": \"column_name\",\n",
141
+ " \"category_col\": \"column_name or null\",\n",
142
+ " \"sign_convention\": \"spending_is_negative\" | \"spending_is_positive\"\n",
143
+ " }}\n",
144
+ " \"\"\")\n",
145
+ " \n",
146
+ " chain = prompt | llm | JsonOutputParser()\n",
147
+ " mapping = chain.invoke({\"headers\": headers, \"sample\": sample_data, \"filename\": file_path})\n",
148
+ "\n",
149
+ " # 2. Standardization\n",
150
+ " standard_df = pd.DataFrame()\n",
151
+ " standard_df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]\n",
152
+ " standard_df['transaction_date'] = pd.to_datetime(df[mapping['date_col']])\n",
153
+ " standard_df['description'] = df[mapping['desc_col']]\n",
154
+ " \n",
155
+ " # Normalization Logic\n",
156
+ " raw_amounts = pd.to_numeric(df[mapping['amount_col']])\n",
157
+ " if mapping['sign_convention'] == \"spending_is_negative\":\n",
158
+ " standard_df['amount'] = raw_amounts * -1\n",
159
+ " else:\n",
160
+ " standard_df['amount'] = raw_amounts\n",
161
+ " \n",
162
+ " standard_df['category'] = df[mapping['category_col']] if mapping.get('category_col') else 'Uncategorized'\n",
163
+ " standard_df['source_file'] = file_path.split(\"/\")[-1]\n",
164
+ "\n",
165
+ " # 3. --- Async Enrichment Step ---\n",
166
+ " print(\" ✨ Enriching descriptions (Async)...\")\n",
167
+ " unique_descriptions = standard_df['description'].unique()\n",
168
+ " \n",
169
+ " # Load cache once\n",
170
+ " cache = _load_cache()\n",
171
+ " \n",
172
+ " # Create a semaphore to limit concurrent web searches (e.g. 5)\n",
173
+ " sem = asyncio.Semaphore(5)\n",
174
+ " \n",
175
+ " # Create tasks for all descriptions\n",
176
+ " # Note: We process ALL descriptions now effectively\n",
177
+ " tasks = []\n",
178
+ " for desc in unique_descriptions:\n",
179
+ " tasks.append(get_merchant_category_async(desc, cache, sem))\n",
180
+ " \n",
181
+ " # Run in parallel\n",
182
+ " results = await asyncio.gather(*tasks)\n",
183
+ " \n",
184
+ " # Update cache object with new results\n",
185
+ " for desc, res in zip(unique_descriptions, results):\n",
186
+ " cache[desc] = res\n",
187
+ " \n",
188
+ " # Save cache back to disk\n",
189
+ " _save_cache(cache)\n",
190
+ " \n",
191
+ " # Map back\n",
192
+ " desc_map = dict(zip(unique_descriptions, results))\n",
193
+ " standard_df['enriched_info'] = standard_df['description'].map(desc_map).fillna(\"\")\n",
194
+ "\n",
195
+ " # 4. Save to DB\n",
196
+ " conn = sqlite3.connect(db_path)\n",
197
+ " standard_df.to_sql(\"transactions\", conn, if_exists=\"append\", index=False)\n",
198
+ " conn.close()\n",
199
+ " \n",
200
+ " print(f\"✅ Ingested {len(standard_df)} rows from {file_path.split('/')[-1]}. Logic: {mapping['sign_convention']}\")"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 4,
206
+ "metadata": {},
207
+ "outputs": [
208
+ {
209
+ "name": "stdout",
210
+ "output_type": "stream",
211
+ "text": [
212
+ "📂 Processing /Users/sawale/Documents/learning/money_rag/demo_data/Discover-AllAvailable-20260110.csv...\n",
213
+ " ✨ Enriching descriptions (Async)...\n",
214
+ " 🔍 Web searching for: BACK MARKET BROOKLYN NY...\n",
215
+ " 🔍 Web searching for: TEMU.COM 8884958368 DE...\n",
216
+ " 🔍 Web searching for: WALMART STORE 00332 HUNTSVILLE AL...\n",
217
+ " 🔍 Web searching for: $100 STATEMENT CREDIT W 1ST PU...\n",
218
+ " 🔍 Web searching for: PY *KUNG-FU TEA AL HUNTSVILLE AL...\n",
219
+ " 🔍 Web searching for: MADISON MONTGOMERY AL...\n",
220
+ " 🔍 Web searching for: INTERNET PAYMENT - THANK YOU...\n",
221
+ " 🔍 Web searching for: GRUBHUB - UNIVERSITY OF HUNTSVILLE AL...\n",
222
+ " 🔍 Web searching for: MINT MOBILE 800-683-7392 CA...\n",
223
+ " 🔍 Web searching for: POPEYES 2577 HUNTSVILLE AL...\n",
224
+ " 🔍 Web searching for: 88 BUFFET HUNTSVILLE AL...\n",
225
+ " 🔍 Web searching for: VIET HUONG VIETNAMESE RE HUNTSVILLE AL...\n",
226
+ " 🔍 Web searching for: CASHBACK BONUS REDEMPTION PYMT/STMT CRDT...\n",
227
+ " 🔍 Web searching for: SPO*THECURRYMODERNINDIAN HUNTSVILLE AL...\n",
228
+ " 🔍 Web searching for: H&M 0273HUNTSVILLE HUNTSVILLE ALUS0273001241222182740...\n",
229
+ " 🔍 Web searching for: INDIAN BAZAAR HUNTSVILLE AL...\n",
230
+ " 🔍 Web searching for: HANDELS HOMEMADE HUNTSVI HUNTSVILLE AL...\n",
231
+ " 🔍 Web searching for: UAH COLLEGE 256-824-6170 AL...\n",
232
+ " 🔍 Web searching for: UAH COLLEGE FSF 800-346-9252 MA...\n",
233
+ " 🔍 Web searching for: CHIPOTLE 1687 NASHVILLE TN...\n",
234
+ " 🔍 Web searching for: TST*PIE TOWN TACOS - F NASHVILLE TN00153526022200965677AA...\n",
235
+ " 🔍 Web searching for: INDIAN BAZAAR HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
236
+ " 🔍 Web searching for: INDIA MART HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
237
+ " 🔍 Web searching for: PAYPAL *KEVDUDE1186 KEV 888-221-1161 CA...\n",
238
+ " 🔍 Web searching for: LYFT *RIDE WED 10AM 8552800278 CA...\n",
239
+ " 🔍 Web searching for: SKECHERS USA INC 1069 HUNTSVILLE AL...\n",
240
+ " 🔍 Web searching for: STORE HUNTSVILLE AL...\n",
241
+ " 🔍 Web searching for: LYFT *RIDE WED 3PM 8552800278 CA...\n",
242
+ " 🔍 Web searching for: SQ *TAQUERIA LAS ADELI HUNTSVILLE AL0002305843021411201895...\n",
243
+ " 🔍 Web searching for: UAH HUNTSVILLE DUNKIN HUNTSVILLE AL...\n",
244
+ " 🔍 Web searching for: WALMART.COM 800-925-6278 AR...\n",
245
+ " 🔍 Web searching for: WALMART.COM 8009256278 BENTONVILLE AR...\n",
246
+ " 🔍 Web searching for: TOUS LES JOURS - HUNTSVI HUNTSVILLE AL...\n",
247
+ " 🔍 Web searching for: MARSHALLS #422 HUNTSVILLE AL...\n",
248
+ " 🔍 Web searching for: ROSS STORE #2436 HUNTSVILLE AL...\n",
249
+ " 🔍 Web searching for: SPRINTAX NR TAX 8882038900 NY...\n",
250
+ " 🔍 Web searching for: USPS PO 0142460804 HUNTSVILLE AL...\n",
251
+ " 🔍 Web searching for: CHIPOTLE 1796 HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
252
+ " 🔍 Web searching for: TST*POURHOUSE HUNTSVILLE AL00031984024314246667AA...\n",
253
+ " 🔍 Web searching for: TST*WOKS UP HUNTSVILLE AL00075396024313993332AA...\n",
254
+ " 🔍 Web searching for: SPIRIT AIRLINES 8014012222 FL...\n",
255
+ " 🔍 Web searching for: CHIPOTLE 1796 HUNTSVILLE AL...\n",
256
+ " 🔍 Web searching for: UAH BURSARS OFFICE HUNTSVILLE AL...\n",
257
+ " 🔍 Web searching for: STARS AND STRIKES - HUNT HUNTSVILLE AL...\n",
258
+ " 🔍 Web searching for: ROSS STORES #620 HUNTSVILLE AL...\n",
259
+ " 🔍 Web searching for: TST*KAMADO RAMEN - MID HUNTSVILLE AL00006963025030352515AA...\n",
260
+ " 🔍 Web searching for: SQ *MOM'SCLAYCO HARVEST AL0002305843022068424398...\n",
261
+ " 🔍 Web searching for: DOLLARTREE HUNTSVILLE AL...\n",
262
+ " 🔍 Web searching for: SLIM & HUSKIES NASHVILLE TN...\n",
263
+ " 🔍 Web searching for: CHIPOTLE 1392 SANTA MONICA CA...\n",
264
+ " 🔍 Web searching for: DOLLAR TREE LAS VEGAS NV...\n",
265
+ " 🔍 Web searching for: LYFT *RIDE TUE 12AM 8552800278 CA...\n",
266
+ " 🔍 Web searching for: SQ *SHIKU GCM LOS ANGELES CA0001152921515467218869...\n",
267
+ " 🔍 Web searching for: SQ *SHIKU GCM LOS ANGELES CA0001152921515467211997...\n",
268
+ " 🔍 Web searching for: WALMART STORE 05686 BURBANK CA...\n",
269
+ " 🔍 Web searching for: CAFE BELLA NEWPORT SAN DIEGO CAGOOGLE PAY ENDING IN 8984...\n",
270
+ " 🔍 Web searching for: CHIPOTLE 2883 NORTH LAS VEGNVGOOGLE PAY ENDING IN 8984...\n",
271
+ " 🔍 Web searching for: SHELL10006319007 HESPERIA CAGOOGLE PAY ENDING IN 8984...\n",
272
+ " 🔍 Web searching for: PANDA EXPRESS #1964 LAS VEGAS NV...\n",
273
+ " 🔍 Web searching for: DENNY'S #0141 QR LAS VEGAS NVGOOGLE PAY ENDING IN 8984...\n",
274
+ " 🔍 Web searching for: LAS VEGAS SOUVENIRS AND LAS VEGAS NV...\n",
275
+ " 🔍 Web searching for: CTLP*FIRST CLASS VENDI BELLGARDENS CA...\n",
276
+ " 🔍 Web searching for: SHELL12874333011 FRANKLIN TN...\n",
277
+ " 🔍 Web searching for: AMARAVATI INDIAN CUISINE BRENTWOOD TNGOOGLE PAY ENDING IN 8984...\n",
278
+ " 🔍 Web searching for: CENTRAL MARKET NASHVILLE TN...\n",
279
+ " 🔍 Web searching for: TST*PRINCES HOT CHICKE NASHVILLE TN00104605025320544723AA...\n",
280
+ " 🔍 Web searching for: TST*PRINCES HOT CHICKE NASHVILLE TN00104605025321087148AA...\n",
281
+ " 🔍 Web searching for: WALMART STORE 05616 NASHVILLE TN...\n",
282
+ " 🔍 Web searching for: PY *KUNG-FU TEA AL HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
283
+ " 🔍 Web searching for: 2LEVY R&C CHATTANOOGA TNGOOGLE PAY ENDING IN 8984...\n",
284
+ "✅ Ingested 124 rows from Discover-AllAvailable-20260110.csv. Logic: spending_is_positive\n",
285
+ "📂 Processing /Users/sawale/Documents/learning/money_rag/demo_data/Chase5282_Activity20240110_20260110_20260111.CSV...\n",
286
+ " ✨ Enriching descriptions (Async)...\n",
287
+ " 🔍 Web searching for: TOUS LES JOURS - HUNTSVIL...\n",
288
+ " 🔍 Web searching for: Payment Thank You-Mobile...\n",
289
+ " 🔍 Web searching for: INDIAN BAZAAR...\n",
290
+ " 🔍 Web searching for: TST*BLUE OAK BBQ-HUNTSVI...\n",
291
+ " 🔍 Web searching for: AMC 4112 VAL BEND 18...\n",
292
+ " 🔍 Web searching for: HANDELS HOMEMADE JONES V...\n",
293
+ " 🔍 Web searching for: PAYYOURSELFBACK CREDIT...\n",
294
+ " 🔍 Web searching for: TST* HYDERABAD HOUSE...\n",
295
+ " 🔍 Web searching for: PATEL BROTHERS NASHVILLE...\n",
296
+ " 🔍 Web searching for: CITY OF HUNTSVILLE...\n",
297
+ " 🔍 Web searching for: WM SUPERCENTER #332...\n",
298
+ " 🔍 Web searching for: WAL-MART #0332...\n",
299
+ " 🔍 Web searching for: AMAZON MKTPL*OS1RI3LN3...\n",
300
+ " 🔍 Web searching for: TST* HATTIE B'S HUNTSVILL...\n",
301
+ " 🔍 Web searching for: AMAZON MKTPL*BI23Z6JR0...\n",
302
+ " 🔍 Web searching for: AMAZON MKTPL*BI9IW9OS2...\n",
303
+ " 🔍 Web searching for: AMAZON MKTPL*BI0296OJ2...\n",
304
+ " 🔍 Web searching for: AMAZON MKTPL*BB71A2881...\n",
305
+ " 🔍 Web searching for: AMAZON MKTPL*BB3FU2UQ2...\n",
306
+ " 🔍 Web searching for: AMAZON MKTPL*BI03P1OX2...\n",
307
+ " 🔍 Web searching for: AMAZON MKTPL*BB92U9QK2...\n",
308
+ " 🔍 Web searching for: AMAZON MKTPL*BB9TA14Q0...\n",
309
+ " 🔍 Web searching for: 88 BUFFET...\n",
310
+ " 🔍 Web searching for: AMAZON MKTPL*BB0DC71B1...\n",
311
+ " 🔍 Web searching for: AMAZON MKTPL*B20NN4ID0...\n",
312
+ " 🔍 Web searching for: AMAZON MKTPL*B273C1WY2...\n",
313
+ " 🔍 Web searching for: AMAZON MKTPL*B27IN41E1...\n",
314
+ " 🔍 Web searching for: AMAZON MKTPL*B250Z60P1...\n",
315
+ " 🔍 Web searching for: BEST BUY 00005140...\n",
316
+ " 🔍 Web searching for: DAVES HOT CHICKEN 1282...\n",
317
+ " 🔍 Web searching for: SQ *VIETCUISINE LLC...\n",
318
+ " 🔍 Web searching for: CHICK-FIL-A #00579...\n",
319
+ " 🔍 Web searching for: COSTCO WHSE #0356...\n",
320
+ " 🔍 Web searching for: AMAZON MKTPL*NK4AM43Q2...\n",
321
+ " 🔍 Web searching for: HUNTSVILLE FLV...\n",
322
+ " 🔍 Web searching for: AMAZON MKTPL*NM1H055K0...\n",
323
+ " 🔍 Web searching for: MAPCO EXPRESS #3403...\n",
324
+ " 🔍 Web searching for: DUNKIN #346212 Q35...\n",
325
+ " 🔍 Web searching for: CENTRAL MARKET...\n",
326
+ " 🔍 Web searching for: TARA INTERNATIONAL MARKET...\n",
327
+ " 🔍 Web searching for: BOTAN MARKET INC...\n",
328
+ " 🔍 Web searching for: AMARAVATI INDIAN CUISINE...\n",
329
+ " 🔍 Web searching for: GRUBHUB - UNIVERSITY OF A...\n",
330
+ " 🔍 Web searching for: BURGER KING #4959...\n",
331
+ " 🔍 Web searching for: PANDA EXPRESS #3013...\n",
332
+ " 🔍 Web searching for: MCDONALD'S F2431...\n",
333
+ " 🔍 Web searching for: ENDZONE COLLECTIBLES...\n",
334
+ " 🔍 Web searching for: ZIMMAD EVE* ZIMMAD JOI...\n",
335
+ " 🔍 Web searching for: SQ *SPILL COFFEE AND CREA...\n",
336
+ " 🔍 Web searching for: 10267 CAVA WHITESBURG...\n",
337
+ " 🔍 Web searching for: SPO*DRAGONSFORGECAFE...\n",
338
+ " 🔍 Web searching for: UAH BURSARS OFFICE...\n",
339
+ " 🔍 Web searching for: MARATHON PETRO42804...\n",
340
+ " 🔍 Web searching for: TST*NOTHING BUT NOODLES...\n",
341
+ " 🔍 Web searching for: VEDA INDIAN CUISINE...\n",
342
+ " 🔍 Web searching for: DOLLARTREE...\n",
343
+ " 🔍 Web searching for: TARGET 00013466...\n",
344
+ " 🔍 Web searching for: POPEYES 2577...\n",
345
+ " 🔍 Web searching for: DEORALI GROCERY...\n",
346
+ " 🔍 Web searching for: HELLO ATLANTA #33...\n",
347
+ " 🔍 Web searching for: SKY VIEW ATLANTA...\n",
348
+ " 🔍 Web searching for: STARBUCKS 25111...\n",
349
+ " 🔍 Web searching for: BP#8998205AM/PM WADE GRE...\n",
350
+ " 🔍 Web searching for: Waffle House 0857...\n",
351
+ " 🔍 Web searching for: CINEMARK 1131 BOXCON...\n",
352
+ " 🔍 Web searching for: CINEMARK 1131 RSTBAR...\n",
353
+ " 🔍 Web searching for: HOMEGOODS # 0568...\n",
354
+ " 🔍 Web searching for: ASIAN MARKET...\n",
355
+ " 🔍 Web searching for: PANDA EXPRESS #2005...\n",
356
+ " 🔍 Web searching for: STARS AND STRIKES - HUNTS...\n",
357
+ " 🔍 Web searching for: WAL-MART #332...\n",
358
+ "✅ Ingested 126 rows from Chase5282_Activity20240110_20260110_20260111.CSV. Logic: spending_is_negative\n"
359
+ ]
360
+ }
361
+ ],
362
+ "source": [
363
+ "path1 = \"/Users/sawale/Documents/learning/money_rag/demo_data/Discover-AllAvailable-20260110.csv\"\n",
364
+ "path2 = \"/Users/sawale/Documents/learning/money_rag/demo_data/Chase5282_Activity20240110_20260110_20260111.CSV\"\n",
365
+ "\n",
366
+ "# Initialize the Gemini model via Vertex AI\n",
367
+ "vertex_llm = init_chat_model(\n",
368
+ " \"gemini-2.5-flash\", \n",
369
+ " model_provider=\"google_vertexai\",\n",
370
+ " project='gen-lang-client-0311515393',\n",
371
+ " location='us-central1',\n",
372
+ ")\n",
373
+ "\n",
374
+ "# Run async functions in Jupyter\n",
375
+ "# We run them sequentially here to avoid file lock issues with SQLite if both try to write at once\n",
376
+ "# (Though SQLite handles concurrency, keeping ingestion strictly ordered is safer for the demo)\n",
377
+ "await ingest_csv(path1, vertex_llm)\n",
378
+ "await ingest_csv(path2, vertex_llm)"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": 5,
384
+ "metadata": {},
385
+ "outputs": [
386
+ {
387
+ "data": {
388
+ "text/html": [
389
+ "<div>\n",
390
+ "<style scoped>\n",
391
+ " .dataframe tbody tr th:only-of-type {\n",
392
+ " vertical-align: middle;\n",
393
+ " }\n",
394
+ "\n",
395
+ " .dataframe tbody tr th {\n",
396
+ " vertical-align: top;\n",
397
+ " }\n",
398
+ "\n",
399
+ " .dataframe thead th {\n",
400
+ " text-align: right;\n",
401
+ " }\n",
402
+ "</style>\n",
403
+ "<table border=\"1\" class=\"dataframe\">\n",
404
+ " <thead>\n",
405
+ " <tr style=\"text-align: right;\">\n",
406
+ " <th></th>\n",
407
+ " <th>id</th>\n",
408
+ " <th>transaction_date</th>\n",
409
+ " <th>description</th>\n",
410
+ " <th>amount</th>\n",
411
+ " <th>category</th>\n",
412
+ " <th>source_file</th>\n",
413
+ " <th>enriched_info</th>\n",
414
+ " </tr>\n",
415
+ " </thead>\n",
416
+ " <tbody>\n",
417
+ " <tr>\n",
418
+ " <th>0</th>\n",
419
+ " <td>cf2f48c5-34ab-4544-b1f4-542681fd5017</td>\n",
420
+ " <td>2024-10-17 00:00:00</td>\n",
421
+ " <td>BACK MARKET BROOKLYN NY</td>\n",
422
+ " <td>231.19</td>\n",
423
+ " <td>Merchandise</td>\n",
424
+ " <td>Discover-AllAvailable-20260110.csv</td>\n",
425
+ " <td>Online Retailer in New York, NY . See BBB rati...</td>\n",
426
+ " </tr>\n",
427
+ " <tr>\n",
428
+ " <th>1</th>\n",
429
+ " <td>16913dce-52bf-43f4-853d-7bb55e09aac5</td>\n",
430
+ " <td>2024-10-18 00:00:00</td>\n",
431
+ " <td>TEMU.COM 8884958368 DE</td>\n",
432
+ " <td>16.51</td>\n",
433
+ " <td>Merchandise</td>\n",
434
+ " <td>Discover-AllAvailable-20260110.csv</td>\n",
435
+ " <td>Temu ' s business model has allowed it to beco...</td>\n",
436
+ " </tr>\n",
437
+ " <tr>\n",
438
+ " <th>2</th>\n",
439
+ " <td>5917cb10-f0e6-49fa-9987-776cee28688c</td>\n",
440
+ " <td>2024-10-18 00:00:00</td>\n",
441
+ " <td>WALMART STORE 00332 HUNTSVILLE AL</td>\n",
442
+ " <td>146.73</td>\n",
443
+ " <td>Merchandise</td>\n",
444
+ " <td>Discover-AllAvailable-20260110.csv</td>\n",
445
+ " <td>Posts Walmart Huntsville - University Drive De...</td>\n",
446
+ " </tr>\n",
447
+ " <tr>\n",
448
+ " <th>3</th>\n",
449
+ " <td>bd66a49e-5064-43ca-bdb2-94947297b6d5</td>\n",
450
+ " <td>2024-10-18 00:00:00</td>\n",
451
+ " <td>$100 STATEMENT CREDIT W 1ST PU</td>\n",
452
+ " <td>-100.00</td>\n",
453
+ " <td>Awards and Rebate Credits</td>\n",
454
+ " <td>Discover-AllAvailable-20260110.csv</td>\n",
455
+ " <td>That said, I woke up yesterday and saw a $ 100...</td>\n",
456
+ " </tr>\n",
457
+ " <tr>\n",
458
+ " <th>4</th>\n",
459
+ " <td>8b9b16d4-dd81-4ebe-820f-62fdc827a0bb</td>\n",
460
+ " <td>2024-11-02 00:00:00</td>\n",
461
+ " <td>PY *KUNG-FU TEA AL HUNTSVILLE AL</td>\n",
462
+ " <td>8.09</td>\n",
463
+ " <td>Restaurants</td>\n",
464
+ " <td>Discover-AllAvailable-20260110.csv</td>\n",
465
+ " <td>Jan 22, 2021 · Best part to me--besides the ro...</td>\n",
466
+ " </tr>\n",
467
+ " <tr>\n",
468
+ " <th>...</th>\n",
469
+ " <td>...</td>\n",
470
+ " <td>...</td>\n",
471
+ " <td>...</td>\n",
472
+ " <td>...</td>\n",
473
+ " <td>...</td>\n",
474
+ " <td>...</td>\n",
475
+ " <td>...</td>\n",
476
+ " </tr>\n",
477
+ " <tr>\n",
478
+ " <th>245</th>\n",
479
+ " <td>239f983e-2ed8-4454-90ad-0e8477bf7893</td>\n",
480
+ " <td>2025-06-18 00:00:00</td>\n",
481
+ " <td>PANDA EXPRESS #2005</td>\n",
482
+ " <td>52.87</td>\n",
483
+ " <td>Food &amp; Drink</td>\n",
484
+ " <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
485
+ " <td>Aug 29, 2025 · The group's headquarters is loc...</td>\n",
486
+ " </tr>\n",
487
+ " <tr>\n",
488
+ " <th>246</th>\n",
489
+ " <td>0ecba974-5491-4ef2-ba52-bf3c37d6e854</td>\n",
490
+ " <td>2025-06-14 00:00:00</td>\n",
491
+ " <td>Payment Thank You-Mobile</td>\n",
492
+ " <td>-62.07</td>\n",
493
+ " <td>None</td>\n",
494
+ " <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
495
+ " <td>Sep 25, 2025 · Thank You Mobile is not a compa...</td>\n",
496
+ " </tr>\n",
497
+ " <tr>\n",
498
+ " <th>247</th>\n",
499
+ " <td>fb966472-d77a-4723-a9e6-15e63bda4cd2</td>\n",
500
+ " <td>2025-06-12 00:00:00</td>\n",
501
+ " <td>STARS AND STRIKES - HUNTS</td>\n",
502
+ " <td>21.80</td>\n",
503
+ " <td>Entertainment</td>\n",
504
+ " <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
505
+ " <td>At our Huntsville , AL location , we pride our...</td>\n",
506
+ " </tr>\n",
507
+ " <tr>\n",
508
+ " <th>248</th>\n",
509
+ " <td>b6bc2e1d-d68b-4acd-a201-46d8cf5175e5</td>\n",
510
+ " <td>2025-06-11 00:00:00</td>\n",
511
+ " <td>WAL-MART #332</td>\n",
512
+ " <td>4.47</td>\n",
513
+ " <td>Groceries</td>\n",
514
+ " <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
515
+ " <td>3 days ago · Walmart Inc. is an American multi...</td>\n",
516
+ " </tr>\n",
517
+ " <tr>\n",
518
+ " <th>249</th>\n",
519
+ " <td>e896adcc-5445-4626-b14c-09ebd5c9f658</td>\n",
520
+ " <td>2025-06-11 00:00:00</td>\n",
521
+ " <td>WAL-MART #332</td>\n",
522
+ " <td>57.60</td>\n",
523
+ " <td>Groceries</td>\n",
524
+ " <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
525
+ " <td>3 days ago · Walmart Inc. is an American multi...</td>\n",
526
+ " </tr>\n",
527
+ " </tbody>\n",
528
+ "</table>\n",
529
+ "<p>250 rows × 7 columns</p>\n",
530
+ "</div>"
531
+ ],
532
+ "text/plain": [
533
+ " id transaction_date \\\n",
534
+ "0 cf2f48c5-34ab-4544-b1f4-542681fd5017 2024-10-17 00:00:00 \n",
535
+ "1 16913dce-52bf-43f4-853d-7bb55e09aac5 2024-10-18 00:00:00 \n",
536
+ "2 5917cb10-f0e6-49fa-9987-776cee28688c 2024-10-18 00:00:00 \n",
537
+ "3 bd66a49e-5064-43ca-bdb2-94947297b6d5 2024-10-18 00:00:00 \n",
538
+ "4 8b9b16d4-dd81-4ebe-820f-62fdc827a0bb 2024-11-02 00:00:00 \n",
539
+ ".. ... ... \n",
540
+ "245 239f983e-2ed8-4454-90ad-0e8477bf7893 2025-06-18 00:00:00 \n",
541
+ "246 0ecba974-5491-4ef2-ba52-bf3c37d6e854 2025-06-14 00:00:00 \n",
542
+ "247 fb966472-d77a-4723-a9e6-15e63bda4cd2 2025-06-12 00:00:00 \n",
543
+ "248 b6bc2e1d-d68b-4acd-a201-46d8cf5175e5 2025-06-11 00:00:00 \n",
544
+ "249 e896adcc-5445-4626-b14c-09ebd5c9f658 2025-06-11 00:00:00 \n",
545
+ "\n",
546
+ " description amount category \\\n",
547
+ "0 BACK MARKET BROOKLYN NY 231.19 Merchandise \n",
548
+ "1 TEMU.COM 8884958368 DE 16.51 Merchandise \n",
549
+ "2 WALMART STORE 00332 HUNTSVILLE AL 146.73 Merchandise \n",
550
+ "3 $100 STATEMENT CREDIT W 1ST PU -100.00 Awards and Rebate Credits \n",
551
+ "4 PY *KUNG-FU TEA AL HUNTSVILLE AL 8.09 Restaurants \n",
552
+ ".. ... ... ... \n",
553
+ "245 PANDA EXPRESS #2005 52.87 Food & Drink \n",
554
+ "246 Payment Thank You-Mobile -62.07 None \n",
555
+ "247 STARS AND STRIKES - HUNTS 21.80 Entertainment \n",
556
+ "248 WAL-MART #332 4.47 Groceries \n",
557
+ "249 WAL-MART #332 57.60 Groceries \n",
558
+ "\n",
559
+ " source_file \\\n",
560
+ "0 Discover-AllAvailable-20260110.csv \n",
561
+ "1 Discover-AllAvailable-20260110.csv \n",
562
+ "2 Discover-AllAvailable-20260110.csv \n",
563
+ "3 Discover-AllAvailable-20260110.csv \n",
564
+ "4 Discover-AllAvailable-20260110.csv \n",
565
+ ".. ... \n",
566
+ "245 Chase5282_Activity20240110_20260110_20260111.CSV \n",
567
+ "246 Chase5282_Activity20240110_20260110_20260111.CSV \n",
568
+ "247 Chase5282_Activity20240110_20260110_20260111.CSV \n",
569
+ "248 Chase5282_Activity20240110_20260110_20260111.CSV \n",
570
+ "249 Chase5282_Activity20240110_20260110_20260111.CSV \n",
571
+ "\n",
572
+ " enriched_info \n",
573
+ "0 Online Retailer in New York, NY . See BBB rati... \n",
574
+ "1 Temu ' s business model has allowed it to beco... \n",
575
+ "2 Posts Walmart Huntsville - University Drive De... \n",
576
+ "3 That said, I woke up yesterday and saw a $ 100... \n",
577
+ "4 Jan 22, 2021 · Best part to me--besides the ro... \n",
578
+ ".. ... \n",
579
+ "245 Aug 29, 2025 · The group's headquarters is loc... \n",
580
+ "246 Sep 25, 2025 · Thank You Mobile is not a compa... \n",
581
+ "247 At our Huntsville , AL location , we pride our... \n",
582
+ "248 3 days ago · Walmart Inc. is an American multi... \n",
583
+ "249 3 days ago · Walmart Inc. is an American multi... \n",
584
+ "\n",
585
+ "[250 rows x 7 columns]"
586
+ ]
587
+ },
588
+ "execution_count": 5,
589
+ "metadata": {},
590
+ "output_type": "execute_result"
591
+ }
592
+ ],
593
+ "source": [
594
+ "\n",
595
+ "import sqlite3\n",
596
+ "import pandas as pd\n",
597
+ "\n",
598
+ "# Connect to the database\n",
599
+ "conn = sqlite3.connect(\"money_rag.db\")\n",
600
+ "\n",
601
+ "# Query all transactions\n",
602
+ "df_view = pd.read_sql_query(\"SELECT * FROM transactions\", conn)\n",
603
+ "\n",
604
+ "# Close connection\n",
605
+ "conn.close()\n",
606
+ "\n",
607
+ "# Display the data\n",
608
+ "df_view"
609
+ ]
610
+ },
611
+ {
612
+ "cell_type": "code",
613
+ "execution_count": 6,
614
+ "metadata": {},
615
+ "outputs": [],
616
+ "source": [
617
+ "# df_view[\"amount\"].sum()"
618
+ ]
619
+ },
620
+ {
621
+ "cell_type": "code",
622
+ "execution_count": 7,
623
+ "metadata": {},
624
+ "outputs": [
625
+ {
626
+ "name": "stderr",
627
+ "output_type": "stream",
628
+ "text": [
629
+ "/Users/sawale/Documents/learning/money_rag/.venv/lib/python3.12/site-packages/vertexai/_model_garden/_model_garden_models.py:278: UserWarning: This feature is deprecated as of June 24, 2025 and will be removed on June 24, 2026. For details, see https://cloud.google.com/vertex-ai/generative-ai/docs/deprecations/genai-vertexai-sdk.\n",
630
+ " warning_logs.show_deprecation_warning()\n"
631
+ ]
632
+ },
633
+ {
634
+ "name": "stdout",
635
+ "output_type": "stream",
636
+ "text": [
637
+ "✅ Synced 250 records to Qdrant at 'qdrant_db/'.\n"
638
+ ]
639
+ }
640
+ ],
641
+ "source": [
642
+ "db = SQLDatabase.from_uri(\"sqlite:///money_rag.db\")\n",
643
+ "\n",
644
+ "embeddings = VertexAIEmbeddings(model_name=\"text-embedding-005\")\n",
645
+ "\n",
646
+ "\n",
647
+ "# Initialize Qdrant with disk persistence\n",
648
+ "# \"path\" creates a local directory to store the vectors\n",
649
+ "qdrant_client = QdrantClient(path=\"qdrant_db\") \n",
650
+ "COLLECTION_NAME = \"transactions\"\n",
651
+ "\n",
652
+ "def sync_to_qdrant(db_path: str):\n",
653
+ " \"\"\"Sync the transactions table from SQLite to Qdrant vector store.\"\"\"\n",
654
+ " # Load data from the database\n",
655
+ " conn = sqlite3.connect(db_path)\n",
656
+ " # Ensure we select the new column\n",
657
+ " df = pd.read_sql_query(\"SELECT * FROM transactions\", conn)\n",
658
+ " conn.close()\n",
659
+ "\n",
660
+ " # Recreate collection to ensure clean state on re-ingestion\n",
661
+ " if qdrant_client.collection_exists(COLLECTION_NAME):\n",
662
+ " qdrant_client.delete_collection(COLLECTION_NAME)\n",
663
+ " \n",
664
+ " qdrant_client.create_collection(\n",
665
+ " collection_name=COLLECTION_NAME,\n",
666
+ " vectors_config=VectorParams(size=768, distance=Distance.COSINE),\n",
667
+ " )\n",
668
+ " \n",
669
+ " vector_store = QdrantVectorStore(\n",
670
+ " client=qdrant_client,\n",
671
+ " collection_name=COLLECTION_NAME,\n",
672
+ " embedding=embeddings,\n",
673
+ " )\n",
674
+ "\n",
675
+ " # Use description + enrichment as the main text for embedding\n",
676
+ " texts = []\n",
677
+ " for _, row in df.iterrows():\n",
678
+ " # Combine original description with the permanent enriched info\n",
679
+ " # If enriched_info is present, it looks like: \"MCDONALDS - Fast food chain...\"\n",
680
+ " enriched = row.get('enriched_info', '')\n",
681
+ " if enriched:\n",
682
+ " texts.append(f\"{row['description']} - {enriched}\")\n",
683
+ " else:\n",
684
+ " texts.append(str(row['description']))\n",
685
+ " \n",
686
+ " # Store other fields as metadata for correlation\n",
687
+ " metadatas = df[['id', 'amount', 'category', 'transaction_date']].to_dict('records')\n",
688
+ " # Convert timestamps to string for metadata compatibility\n",
689
+ " for m in metadatas:\n",
690
+ " m['transaction_date'] = str(m['transaction_date'])\n",
691
+ "\n",
692
+ " vector_store.add_texts(texts=texts, metadatas=metadatas)\n",
693
+ " print(f\"✅ Synced {len(texts)} records to Qdrant at 'qdrant_db/'.\")\n",
694
+ " return vector_store\n",
695
+ "\n",
696
+ "# Initialize store from DB\n",
697
+ "vector_store = sync_to_qdrant(\"money_rag.db\")"
698
+ ]
699
+ },
700
+ {
701
+ "cell_type": "code",
702
+ "execution_count": 8,
703
+ "metadata": {},
704
+ "outputs": [],
705
+ "source": [
706
+ "@dataclass\n",
707
+ "class RuntimeContext:\n",
708
+ " db: SQLDatabase\n",
709
+ " vector_store: QdrantVectorStore\n",
710
+ "\n",
711
+ "@tool\n",
712
+ "def execute_sql(query: str) -> str:\n",
713
+ " \"\"\"Execute a SQLite command and return results.\"\"\"\n",
714
+ " runtime = get_runtime(RuntimeContext)\n",
715
+ " db = runtime.context.db\n",
716
+ " try:\n",
717
+ " return db.run(query)\n",
718
+ " except Exception as e:\n",
719
+ " return f\"Error: {e}\"\n",
720
+ "\n",
721
+ "\n",
722
+ "@tool\n",
723
+ "def semantic_search(query: str, topk: int = 5) -> str:\n",
724
+ " \"\"\"\n",
725
+ " Search for transactions semantically when exact category or description matches are unknown.\n",
726
+ " Use this to find specific merchants or types of spending (e.g., 'streaming services' or 'fast food').\n",
727
+ " Returns a list of matching transactions with their IDs and metadata.\n",
728
+ " \"\"\"\n",
729
+ " runtime = get_runtime(RuntimeContext)\n",
730
+ " vs = runtime.context.vector_store\n",
731
+ " results = vs.similarity_search(query, k=topk)\n",
732
+ " \n",
733
+ " output = []\n",
734
+ " for doc in results:\n",
735
+ " output.append(f\"Result: {doc.page_content} | Metadata: {doc.metadata}\")\n",
736
+ " \n",
737
+ " return \"\\n\".join(output) if output else \"No semantically similar transactions found.\"\n",
738
+ "\n",
739
+ "\n",
740
+ "SYSTEM = f\"\"\"You are a sophisticated financial analyst with access to both a SQLite database and a semantic search tool.\n",
741
+ "\n",
742
+ "Workflow:\n",
743
+ "1. **Identify the Need**: If the user's request uses vague terms (e.g., \"junk food\", \"travel stuff\") or you don't know the exact category/description name, start with `semantic_search`.\n",
744
+ "2. **Semantic Discovery**:\n",
745
+ " - Call `semantic_search(query, topk=...)`. \n",
746
+ " - **Pro Tip**: Use a higher `topk` (e.g., 10 or 20) if you suspect there are many relevant transactions to find.\n",
747
+ " - **Relevance Check**: Not all results in the `topk` list may be relevant. You have permission to select only the few that match the user's intent and discard the rest.\n",
748
+ " - **Reiteration Logic**: \n",
749
+ " - If the initial results seem too narrow, or if you suspect more relevant transactions exist but were cut off, reiterate by calling `semantic_search` again with a higher `topk` (e.g., 20, 50).\n",
750
+ " - **Stop Condition**: If you found a few matching transactions and are confident that's all of them (i.e., the rest of the results are clearly irrelevant), do NOT iterate further.\n",
751
+ "3. **Filter & Extract**:\n",
752
+ " - Review the results from semantic search. Manually filter out any that aren't relevant to the user's specific intent.\n",
753
+ " - Extract the unique `id` values from the metadata of relevant results.\n",
754
+ "4. **Handling No Results**:\n",
755
+ " - If `semantic_search` returns nothing useful, or if the results are clearly not what the user asked for (e.g., user asked for \"coffee\" but results are all \"gas stations\"), STOP.\n",
756
+ " - Return a clear message: \"I couldn't find any transactions related to [topic].\"\n",
757
+ " - **DO NOT GUESS**. It is better to say you found nothing than to sum up unrelated transactions.\n",
758
+ "5. **SQL Execution**:\n",
759
+ " - Use `execute_sql` to perform the final calculation or retrieval.\n",
760
+ " - Correlate results by using the discovered IDs in your query: `SELECT SUM(amount) FROM transactions WHERE id IN ('uuid1', 'uuid2', ...)`.\n",
761
+ " - You can also use discovered merchant names if they share a common pattern.\n",
762
+ "\n",
763
+ "Rules:\n",
764
+ "- **Thinking**: Explain your plan before calling any tools.\n",
765
+ "- **Read-only**: No modifications (INSERT/UPDATE/DELETE/etc.) to the database.\n",
766
+ "- **Spending Logic**: \n",
767
+ " - Spending = POSITIVE values (> 0). \n",
768
+ " - Payments/Refunds = NEGATIVE values (< 0). EXCLUDE negative values when calculating spending.\n",
769
+ "- **SQL Formatting**: Limit results to 5 rows for non-aggregation queries. Use `SUM()` for totals.\n",
770
+ "\"\"\"\n",
771
+ "\n",
772
+ "agent = create_agent(\n",
773
+ " model=vertex_llm,\n",
774
+ " tools=[execute_sql, semantic_search],\n",
775
+ " system_prompt=SYSTEM,\n",
776
+ " context_schema=RuntimeContext,\n",
777
+ " checkpointer=InMemorySaver(),\n",
778
+ ")"
779
+ ]
780
+ },
781
+ {
782
+ "cell_type": "code",
783
+ "execution_count": 9,
784
+ "metadata": {},
785
+ "outputs": [
786
+ {
787
+ "name": "stdout",
788
+ "output_type": "stream",
789
+ "text": [
790
+ "Match: TST*WOKS UP HUNTSVILLE AL00075396024313993332AA - Take a look at these nine new businesses that have just opened or announced an opening here in the Rocket City. Check Dress Up - Huntsville in Huntsville , AL, 920 Bob Wallace Ave SW #317 on Cylex and find ☎ (256) 585-2..., contact info, ⌚ opening hours. We're posted at the WeUp location for just a couple more hours —so if you're hungry, now's the time to pull up ! We've had some amazing folks swing through today, including the owner of Ms. Juju's Kitchen and one of your favorite local barbers Mike —and YES, they're repeat customers because the food speaks for itself! Detailed info and reviews on 27 top companies and startups in Huntsville in 2026. Get the latest updates on their products, jobs, funding, investors, founders and more. Check Your Statement: Look for entries starting with \"TST\" on your debit card statement. Note the date, amount, and any additional details like a business name or code. Match with Receipts: Compare the charge to recent purchases, especially at restaurants, cafes, or bars. The amount should match your receipt or include a tip. | Metadata: {'id': '737481c4-fc9c-48a5-8cd9-9f2a02844c12', 'amount': 15.21, 'category': 'Restaurants', 'transaction_date': '2025-04-14 00:00:00', '_id': 'd461cada0bc542e6803cd9d14460ad05', '_collection_name': 'transactions'}\n",
791
+ "Match: TST*KAMADO RAMEN - MID HUNTSVILLE AL00006963025030352515AA - Kamado Ramen in Huntsville rated 4.7 out of 5 on Restaurant Guru: 896 reviews by visitors, 205 photos & 5 videos. Explore menu, check opening hours and order delivery Kamado Ramen : Spicy Miso Ramen 1022 Mid City Drive Huntsville , AL & 3414 Governors Dr. Huntsville , AL ( Mid City location and Stovehouse Food Garden Location ) Ramen is a hearty soup with quick-cooking noodles, often loaded with vegetables, meat, and egg. These two locations vary but carry similar items. Get address, phone number, hours, reviews, photos and more for Kamado Ramen Midcity | 1022 MidCity Drive, Huntsville , AL 35806, USA on usarestaurants.info Check Your Statement: Look for entries starting with \"TST\" on your debit card statement. Note the date, amount, and any additional details like a business name or code. Match with Receipts: Compare the charge to recent purchases, especially at restaurants, cafes, or bars. The amount should match your receipt or include a tip. Kamado Ramen at MidCity 1022 Mid City Drive, Huntsville , AL 35806 About Discussion 4 went 35 interested 3 shares | Metadata: {'id': 'cb6d01a0-0bab-4617-aba9-7e587b3ea32d', 'amount': 16.29, 'category': 'Restaurants', 'transaction_date': '2025-05-16 00:00:00', '_id': '9dc318e6b9e04a2f91e1a85ffbb375b8', '_collection_name': 'transactions'}\n",
792
+ "Match: AMAZON MKTPL*B27IN41E1 - In these cases, Amazon charges your payment method when each part of the order is shipped. This is why you have separate charges on your statement. Access Your Transactions to match the charge amounts and dates on your statement with the associated order number. Amazon is one of the world's most valuable brands and is one of the big five American information technology companies, along with Alphabet, Apple, Meta, and Microsoft. Some items Amazon sells include Show some love to small businesses . Donate to Kiva.org today to provide loans to local entrepreneurs and the Tripadvisor Foundation will match it, up to $150,000 USD collectively. Terms apply. Amazon mktpl ZG27L7451. General operating fund. Buildings and grounds.The UPS store 5242. General operating fund. Police department. Политика обработки персональных данных Согласие на обработку персональных данных Агентский Договор Все бренды. Мобильное приложение. Скачать в App Store Скачать в Google Play. | Metadata: {'id': 'ea13dad8-5978-442b-b4e4-7b22c226d72f', 'amount': 86.38, 'category': 'Shopping', 'transaction_date': '2025-11-24 00:00:00', '_id': '88754c75d8154f9e9912cf46cc51ade3', '_collection_name': 'transactions'}\n"
793
+ ]
794
+ }
795
+ ],
796
+ "source": [
797
+ "# # Initialize the store once\n",
798
+ "# # vector_store = sync_to_qdrant(\"money_rag.db\")\n",
799
+ "\n",
800
+ "# Test search\n",
801
+ "query = \"Where did I spend money on groceries or food?\"\n",
802
+ "results = vector_store.similarity_search(query, k=3)\n",
803
+ "\n",
804
+ "for doc in results:\n",
805
+ " print(f\"Match: {doc.page_content} | Metadata: {doc.metadata}\")"
806
+ ]
807
+ },
808
+ {
809
+ "cell_type": "code",
810
+ "execution_count": 10,
811
+ "metadata": {},
812
+ "outputs": [
813
+ {
814
+ "name": "stdout",
815
+ "output_type": "stream",
816
+ "text": [
817
+ "================================\u001b[1m Human Message \u001b[0m=================================\n",
818
+ "\n",
819
+ "ow much did i spend in haircuts\n",
820
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
821
+ "Tool Calls:\n",
822
+ " semantic_search (f8fc4ebf-658f-4c58-a163-6bc4308ae46a)\n",
823
+ " Call ID: f8fc4ebf-658f-4c58-a163-6bc4308ae46a\n",
824
+ " Args:\n",
825
+ " topk: 10.0\n",
826
+ " query: haircuts\n",
827
+ "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
828
+ "Name: semantic_search\n",
829
+ "\n",
830
+ "Result: TST*WOKS UP HUNTSVILLE AL00075396024313993332AA - Take a look at these nine new businesses that have just opened or announced an opening here in the Rocket City. Check Dress Up - Huntsville in Huntsville , AL, 920 Bob Wallace Ave SW #317 on Cylex and find ☎ (256) 585-2..., contact info, ⌚ opening hours. We're posted at the WeUp location for just a couple more hours —so if you're hungry, now's the time to pull up ! We've had some amazing folks swing through today, including the owner of Ms. Juju's Kitchen and one of your favorite local barbers Mike —and YES, they're repeat customers because the food speaks for itself! Detailed info and reviews on 27 top companies and startups in Huntsville in 2026. Get the latest updates on their products, jobs, funding, investors, founders and more. Check Your Statement: Look for entries starting with \"TST\" on your debit card statement. Note the date, amount, and any additional details like a business name or code. Match with Receipts: Compare the charge to recent purchases, especially at restaurants, cafes, or bars. The amount should match your receipt or include a tip. | Metadata: {'id': '737481c4-fc9c-48a5-8cd9-9f2a02844c12', 'amount': 15.21, 'category': 'Restaurants', 'transaction_date': '2025-04-14 00:00:00', '_id': 'd461cada0bc542e6803cd9d14460ad05', '_collection_name': 'transactions'}\n",
831
+ "Result: BEST BUY 00005140 - Visit your local Best Buy at 20290 Katy Fwy in Katy, TX for electronics, computers, appliances, cell phones, video games & more new tech. In- store pickup & free shipping. Learn about the \" Best Buy 00005140 Huntsville Al\" charge and why it appears on your credit card statement. First seen on March 20, 2023, Last updated on March 20, 2023. What is it?Also Appears on Statements As. Chkcardbest buy 00005140 huntsville al. Is it time you ditched store - bought bread and started making your own, being in total control of the ingredients and your health? You might just do that when you learn about the differences between homemade bread vs. store - bought bread. Best Buy Goes Out Of Business ?? Why Best Buy Is Tanking Hard - YouTube. Best Buy has closed 18 stores , but physical locations still key to strategy. Typically during the weekdays, AT&T store hours are from 9 AM to 9 PM on Monday through Friday. The weekend begins in a similar fashion, and the AT&T store will open at 9 AM and close at 9 PM as well for most locations . | Metadata: {'id': '112c618f-6a83-4bfd-bb37-1193a2632a80', 'amount': 816.42, 'category': 'Shopping', 'transaction_date': '2025-11-22 00:00:00', '_id': '2e1ced908f6e44e3936e32cb8352e625', '_collection_name': 'transactions'}\n",
832
+ "Result: SLIM & HUSKIES NASHVILLE TN - Slim & Husky ’s has quickly become one of Nashville ’s fastest-growing businesses , and their mission to support the Nashville community is very ... While this isn ’ t their only location around the city, Emmy Squared: The Gulch is located in the heart of Nashville . ... of experience in WordPress development , we deliver powerful, scalable WordPress development in Nashville , TN —delivering custom development and ... Whether your business is based in Nashville , or you want to give a national feel, our comfortable and spacious meeting room will provide a creative ... ... Reviews: Reviews ordered by recency and descriptiveness of user-identified themes such as wait time, length of visit, general tips, and location ... | Metadata: {'id': '536147de-9359-4206-84b5-08e316fbd784', 'amount': 16.9, 'category': 'Restaurants', 'transaction_date': '2025-05-19 00:00:00', '_id': '6f3179b1f3364948a07bd5188e95b836', '_collection_name': 'transactions'}\n",
833
+ "Result: HUNTSVILLE FLV - Huntsville FLV is a local establishment in Huntsville, AL that offers a variety of services to its customers. Specializing in providing solutions for everyday needs, Huntsville FLV aims to cater to the diverse needs of the community. Now that we've got the long-awaited Trader Joe's, Dave & Busters, Top Golf, and Cheesecake Factory in town, what is the next business you'd like to see come to Hsv? Dec 16, 2024 · Take a look at these three new businesses that have opened or are about to open here in the Huntsville area. With four unique services— Huntsville International Airport, Rail and Air Cargo, Space Port, and Jetplex Industrial Park—we deliver opportunities for businesses and travelers. Whether by land, air, or space, we link people to places and products to markets. Jan 3, 2024 · Find the best Retail Stores in Huntsville , AL. Search Huntsville , AL Retail Stores to find the top rated Retail Stores . | Metadata: {'id': 'f6171537-bbe6-4c47-9b75-81b443e2d3ad', 'amount': 3.25, 'category': 'Food & Drink', 'transaction_date': '2025-10-25 00:00:00', '_id': 'f65cd50fdc7d4d7f8d5ad2ff4e307a78', '_collection_name': 'transactions'}\n",
834
+ "Result: HUNTSVILLE FLV - Huntsville FLV is a local establishment in Huntsville, AL that offers a variety of services to its customers. Specializing in providing solutions for everyday needs, Huntsville FLV aims to cater to the diverse needs of the community. Now that we've got the long-awaited Trader Joe's, Dave & Busters, Top Golf, and Cheesecake Factory in town, what is the next business you'd like to see come to Hsv? Dec 16, 2024 · Take a look at these three new businesses that have opened or are about to open here in the Huntsville area. With four unique services— Huntsville International Airport, Rail and Air Cargo, Space Port, and Jetplex Industrial Park—we deliver opportunities for businesses and travelers. Whether by land, air, or space, we link people to places and products to markets. Jan 3, 2024 · Find the best Retail Stores in Huntsville , AL. Search Huntsville , AL Retail Stores to find the top rated Retail Stores . | Metadata: {'id': '5aad4b5a-848f-4539-9d3a-54dfce7f3ed9', 'amount': 3.25, 'category': 'Food & Drink', 'transaction_date': '2025-10-30 00:00:00', '_id': 'ed5ee1ae725b452983b42bda99cd129b', '_collection_name': 'transactions'}\n",
835
+ "Result: HUNTSVILLE FLV - Huntsville FLV is a local establishment in Huntsville, AL that offers a variety of services to its customers. Specializing in providing solutions for everyday needs, Huntsville FLV aims to cater to the diverse needs of the community. Now that we've got the long-awaited Trader Joe's, Dave & Busters, Top Golf, and Cheesecake Factory in town, what is the next business you'd like to see come to Hsv? Dec 16, 2024 · Take a look at these three new businesses that have opened or are about to open here in the Huntsville area. With four unique services— Huntsville International Airport, Rail and Air Cargo, Space Port, and Jetplex Industrial Park—we deliver opportunities for businesses and travelers. Whether by land, air, or space, we link people to places and products to markets. Jan 3, 2024 · Find the best Retail Stores in Huntsville , AL. Search Huntsville , AL Retail Stores to find the top rated Retail Stores . | Metadata: {'id': 'ea82ce94-0253-4e76-bdfa-2ca89be58129', 'amount': 3.25, 'category': 'Food & Drink', 'transaction_date': '2025-10-26 00:00:00', '_id': '6f27f08b44aa462282a2267ff5b2c339', '_collection_name': 'transactions'}\n",
836
+ "Result: LAS VEGAS SOUVENIRS AND LAS VEGAS NV - Earn $$ as a professional gift shop owner. Take your first steps and register today. Become a professional gift shop owner from the comfort of your home. Register today. amazon.com has been visited by 1M+ users in the past month Read Customer Reviews & Find Best Sellers. Free, Easy Returns On Millions Of Items. Shop Chairs, Tables, Place Mats, Chandeliers, Pendant Light Fixtures and More. faire.com has been visited by 100K+ users in the past month Empowering Independent Entrepreneurs To Chase Their Dreams, Shop From Over 100,000 Brands. Buy Wholesale Products Easily & Risk Free on Faire With Free Returns On Your First Order. Organic · Eco-Friendly · Free & Easy Returns · Small Batch Bonanza Gift Shop - The World's Largest Gift Shop located in Las Vegas , Nevada . 40,000 square feet of Vegas souvenirs, apparel, collectibles, Native American art and more since 1980. Open daily 9AM-midnight. This is a review for souvenir shops in Las Vegas , NV : \"Morris & Co is a high-end souvenir shop of luxury items like jewelry, clothing, gifts, accessories, snacks, and many other cool items like LIV nightclub accessories. M&M’s World Las Vegasis located inside a showcase mall on Las Vegas Blvd. The impressive four-level store was the first ever M&M store ! This isn’t a traditional gift shop, but that doesn’t mean you won’t find plenty of fun gifts and Las Vegas souvenirs . Guests can create their M&M design or choose from several premade, Vegas -themed designs. This is ... See full list on vegasfoodandfun.com Coca-Cola has been a staple in American homes for decades. If you’re one of the many Coca-Cola lovers, this gift shop is for you! The store features a variety of Coca-Cola and Las Vegas -themed merchandise. Many of the products sold in the Coca-Cola storeare vintage or limited edition items that can’t be purchased anywhere else. If the gifts aren’t ... See full list on vegasfoodandfun.com Welcome to Las Vegas has multiple gift shops around Sin City, but the store on Las Vegas Blvd inside The Venetian at Grand Canal Shoppesis the most impressive. The whole store is dedicated to celebrating fabulous Las Vegas ! You’ll find t-shirts, shot glasses, snow globes, onesies, and anything else you can imagine in Las Vegas memorabilia form. Thi... See full list on vegasfoodandfun.com Downtown Container Parkis a hub for shopping, dining, and entertainment in Downtown Las Vegas . The park is made of repurposed shipping containers, hence the unique name. The best part about visiting the Downtown Container Park is wandering from one store to another with amazing food in between. The park focuses on local retailers, so you’ll find a ... See full list on vegasfoodandfun.com Tokyo Discount is a store on Maryland Pkwy, Las Vegas , NV , that brings Japanese culture to Sin City. The discount shop has all things Tokyo including Hello Kitty, Japanese snacks, Pokemon, and Japanese decor. The downtown location makes it easy to explore Tokyo Discount during your Las Vegas trip! Whether you buy something Vegas -themed or something... See full list on vegasfoodandfun.com Local Oasis is a shop in the art district of Las Vegas that a local artist opened in 2020. The small shop sells art, gifts, and items from local brand owners. What better souvenir for your Vegas trip than a one- of -a-kind art piece? It’s important to remember this shop doesn’t have a huge selection of cheesy Las Vegas souvenirs , but that’s why many ... See full list on vegasfoodandfun.com Karma Connection is a shop that specializes in all things psychic, crystals, and natural healing. If you’re interested in those things, you’ll find this store is overflowing with possibilities! You can buy a new crystal to keep as your Las Vegas souvenir or get a personalized psychic reading. This gift shop is usually filled with like-minded people... See full list on vegasfoodandfun.com The Gamblers General Store has been appealing to casino lovers since 1984. The store is located on S Main Street, and it’s one of the only souvenir shops in Las Vegas that caters to people who love gambling. You’ll find an inventory that includes playing cards, customized chips, and casino memorabilia. There’s even a book section with books that te... See full list on vegasfoodandfun.com Hudson News is the best place to get a gift when entering Las Vegas via Harry Reid Airport, formerly known as McCarran International Airport. They have a lot of the same stuff you’d find in gift shops outside the airport, but it’s way more convenient! Grab a t-shirt, mugs, key chains, and several other airport-friendly last-minute gifts. You can al... See full list on vegasfoodandfun.com Bonanza Gift Shop is much more than just another souvenir shop on the Strip. It’s the world’s largest gift shop! The inventory at Bonanza doesn’t just compete with other souvenir shops. It blows those shops out of the water! Bonanza has reasonable prices and a prime location , making it a fan favorite over the years. The world’s largest gift shop ca... See full list on vegasfoodandfun.com Jan 24, 2024 · The store was established in 1984 and has since been a staple of Las Vegas as the one and only Gamblers general store. You won’t miss any gamblers’ favorite items; ranging from hand to find items to popular selections. This guide will give you all the best places to get souvenirs, the different types of gifts to expect in the shops, and even tips on how to get them back home. Get your shopping lists ready because you’ll check off everyone—and find a few fun items for your own keepsake. Just a short walk away, M&M’s Las Vegas at Showcase Mall is a four-story candy wonderland, where you can create personalized M&M’s in minutes. Whether you're a chocolate fanatic or just looking for a fun stop, these iconic stores are the sweetest attractions on the Strip! Because we are here \"in the heart of it\" we are able to offer you ALL the popular Las Vegas Souvenirs and Las Vegas Gift Items you will see around Vegas in the various giftshops. We continually strive to bring you what you see and want from Las Vegas in the form of Quality Products. Earn $$ as a professional gift shop owner. Take your first steps and register today. Become a professional gift shop owner from the comfort of your home. Register today. amazon.com has been visited by 1M+ users in the past month Read Customer Reviews & Find Best Sellers. Free, Easy Returns On Millions Of Items. Shop Chairs, Tables, Place Mats, Chandeliers, Pendant Light Fixtures and More. faire.com has been visited by 100K+ users in the past month Empowering Independent Entrepreneurs To Chase Their Dreams, Shop From Over 100,000 Brands. Buy Wholesale Products Easily & Risk Free on Faire With Free Returns On Your First Order. Organic · Eco-Friendly · Free & Easy Returns · Small Batch | Metadata: {'id': 'bac13cd8-cfcc-48f3-8dfa-18c3a3b2cc59', 'amount': 15.14, 'category': 'Merchandise', 'transaction_date': '2025-05-24 00:00:00', '_id': '415f62d03ac949f2b5ab56ef8cc8d105', '_collection_name': 'transactions'}\n",
837
+ "Result: UAH BURSARS OFFICE - The Bursar 's Office oversees the billing and collection of fees, adding funds to student accounts, and student refunds.Student Services Building, Room 123 301 Sparkman Drive Huntsville, AL 35899. 256.824.6226 bursar @ uah .edu. UAH Bursar Welcome to the Office of the Bursar The Bursar ’s Office manages student accounts, including the billing and collection of fees, and student refunds. Located in the Student Services… cs_ office . Dismiss.Your personal data will be processed and information from your device (cookies, unique identifiers, and other device data) may be stored by, accessed by and shared with 156 TCF vendor(s) and 81 ad partner(s), or used specifically by this site. UAH 1 199,50Цена предложения: UAH 1 199,50.Политика отмены PS Store . Меры предосторожности. О рейтингах. Searching for an Advance Auto Parts close to you? Advance has locations across North America which provide quality auto parts, advice and accessories for your car. | Metadata: {'id': 'eea3224b-1289-40bd-bf02-a54a6a7a7ce7', 'amount': 50.0, 'category': 'Education', 'transaction_date': '2025-08-21 00:00:00', '_id': '775b6f5066bd4435837a5f3c1b613aea', '_collection_name': 'transactions'}\n",
838
+ "Result: HANDELS HOMEMADE JONES V - Jan 2, 2026 · Monty Python's Life of Brian is a 1979 British surreal biblical black comedy film starring and written by the comedy group Monty Python It was directed by ... Nov 24, 2025 · In 1861, Welsh entrepreneur Pryce Pryce-Jones formed the first mail order business, an idea which would change the nature of retail. Selling Welsh flannel ... Dec 10, 2025 · Christmas music comprises a variety of genres of music regularly performed or heard around the Christmas season. Enter your address to see if Handel ' s Homemade Ice Cream - Jones Valley delivery is available to your location in Huntsville.What are some other stores nearby Handel ' s Homemade Ice Cream - Jones Valley? store locator pin Store Finder. All Our Fun-Filled Flavors. We have a collection of over 100 flavors. Below is a full list of our flavors. Be sure to check your local store to discover the flavors they are currently serving. | Metadata: {'id': '1c5b9d0f-8ecf-4d06-baa5-e6f036102eb0', 'amount': 8.18, 'category': 'Food & Drink', 'transaction_date': '2025-12-30 00:00:00', '_id': '8e207351ec57437aae0343b6c81ea434', '_collection_name': 'transactions'}\n",
839
+ "Result: SQ *SPILL COFFEE AND CREA - **Free refills of hot and iced brewed coffee and tea during same store visit. Excludes Cold Brew, Nitro Cold Brew, Iced Tea Lemonade, flavored Iced Tea and Starbucks Refreshers® base. At participating stores . Find out my IP address, public IPv4 and IPv6, IP location , ISP with additional information like the country, city, user agent, your browser and OS. osu! farm pp maps and beatmap recommendations... Square register is the POS software that’s simple, powerful, and free. Grand Tour Around Island 16 Locations plus Snorkeling. 4.9. (7,365).Donate to Kiva.org today to provide loans to local entrepreneurs and the Tripadvisor Foundation will match it, up to $150,000 USD collectively. Terms apply. | Metadata: {'id': 'b7708547-c125-427e-9e9e-f330ea66af71', 'amount': 4.39, 'category': 'Food & Drink', 'transaction_date': '2025-08-30 00:00:00', '_id': '5b48543fabe44e468e8db6844d868baf', '_collection_name': 'transactions'}\n",
840
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
841
+ "\n",
842
+ "I couldn't find any transactions related to haircuts.\n"
843
+ ]
844
+ }
845
+ ],
846
+ "source": [
847
+ "question = \"ow much did i spend in haircuts\"\n",
848
+ "steps = []\n",
849
+ "\n",
850
+ "for step in agent.stream(\n",
851
+ " {\"messages\": [{\"role\": \"user\", \"content\": question}]},\n",
852
+ " {\"configurable\": {\"thread_id\": \"1\"}},\n",
853
+ " stream_mode=\"values\",\n",
854
+ " context=RuntimeContext(db=db, vector_store=vector_store)\n",
855
+ "):\n",
856
+ " step[\"messages\"][-1].pretty_print()\n",
857
+ " steps.append(step)"
858
+ ]
859
+ },
860
+ {
861
+ "cell_type": "code",
862
+ "execution_count": null,
863
+ "metadata": {},
864
+ "outputs": [
865
+ {
866
+ "name": "stdout",
867
+ "output_type": "stream",
868
+ "text": [
869
+ "💬 Chat with your financial data! (Type 'exit' to stop)\n",
870
+ "\n",
871
+ "--------------------------------------------------\n",
872
+ "================================\u001b[1m Human Message \u001b[0m=================================\n",
873
+ "\n",
874
+ "or db won't be in github..\n",
875
+ "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
876
+ "\n",
877
+ "It seems like your last message might be incomplete or a comment about something else. Could you please let me know what you'd like me to do or if you have a question about your spending? I'm ready to help analyze your transactions.\n",
878
+ "--------------------------------------------------\n",
879
+ "\n"
880
+ ]
881
+ }
882
+ ],
883
+ "source": [
884
+ "# ...existing code...\n",
885
+ "# Interactive Chat Loop\n",
886
+ "print(\"💬 Chat with your financial data! (Type 'exit' to stop)\")\n",
887
+ "\n",
888
+ "while True:\n",
889
+ " try:\n",
890
+ " user_input = input(\"User: \")\n",
891
+ " if user_input.lower() in [\"exit\", \"quit\", \"q\"]:\n",
892
+ " print(\"Goodbye!\")\n",
893
+ " break\n",
894
+ " \n",
895
+ " print(\"\\n\" + \"-\"*50)\n",
896
+ " \n",
897
+ " # Stream the agent's response\n",
898
+ " for step in agent.stream(\n",
899
+ " {\"messages\": [{\"role\": \"user\", \"content\": user_input}]},\n",
900
+ " {\"configurable\": {\"thread_id\": \"1\"}}, # Keeps memory of the conversation\n",
901
+ " stream_mode=\"values\",\n",
902
+ " context=RuntimeContext(db=db, vector_store=vector_store)\n",
903
+ " ):\n",
904
+ " step[\"messages\"][-1].pretty_print()\n",
905
+ " \n",
906
+ " print(\"-\" * 50 + \"\\n\")\n",
907
+ " \n",
908
+ " except KeyboardInterrupt:\n",
909
+ " print(\"\\nGoodbye!\")\n",
910
+ " break"
911
+ ]
912
+ },
913
+ {
914
+ "cell_type": "code",
915
+ "execution_count": null,
916
+ "metadata": {},
917
+ "outputs": [],
918
+ "source": []
919
+ },
920
+ {
921
+ "cell_type": "code",
922
+ "execution_count": null,
923
+ "metadata": {},
924
+ "outputs": [],
925
+ "source": []
926
+ },
927
+ {
928
+ "cell_type": "code",
929
+ "execution_count": null,
930
+ "metadata": {},
931
+ "outputs": [],
932
+ "source": []
933
+ }
934
+ ],
935
+ "metadata": {
936
+ "kernelspec": {
937
+ "display_name": "money_rag",
938
+ "language": "python",
939
+ "name": "python3"
940
+ },
941
+ "language_info": {
942
+ "codemirror_mode": {
943
+ "name": "ipython",
944
+ "version": 3
945
+ },
946
+ "file_extension": ".py",
947
+ "mimetype": "text/x-python",
948
+ "name": "python",
949
+ "nbconvert_exporter": "python",
950
+ "pygments_lexer": "ipython3",
951
+ "version": "3.12.7"
952
+ }
953
+ },
954
+ "nbformat": 4,
955
+ "nbformat_minor": 2
956
+ }
notebooks/2_test_vectordb.ipynb ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Test MCP Server (Local Mode)\n",
8
+ "\n",
9
+ "This notebook demonstrates how to use the `mcp_server.py` tools directly within a LangChain workflow. \n",
10
+ "This simulates how an Agent would interact with your Local or Cloud MCP Server."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "import sys\n",
20
+ "import os\n",
21
+ "import pandas as pd\n",
22
+ "from dotenv import load_dotenv\n",
23
+ "from langchain_core.tools import StructuredTool\n",
24
+ "from langchain_google_vertexai import ChatVertexAI\n",
25
+ "from langgraph.prebuilt import create_react_agent\n",
26
+ "\n",
27
+ "# 1. Setup Path to import mcp_server from parent directory\n",
28
+ "sys.path.append(\"..\")\n",
29
+ "\n",
30
+ "# 2. Setup Environment\n",
31
+ "load_dotenv()\n",
32
+ "\n",
33
+ "# POINT DATABASE TO A LOCAL FOLDER FOR THIS NOTEBOOK\n",
34
+ "# This ensures we don't mess with deployment default paths, or we explicitly stick to 'temp_data'\n",
35
+ "# mcp_server defaults to using 'DATA_DIR' env var if set\n",
36
+ "os.environ[\"DATA_DIR\"] = os.path.abspath(\"../temp_data_notebook_test\")\n",
37
+ "print(f\"Using Data Dir: {os.environ['DATA_DIR']}\")\n",
38
+ "\n",
39
+ "# 3. Import Tools directly from Server Code\n",
40
+ "# (In a real remote scenario, you would use MCPClient instead)\n",
41
+ "from mcp_server import ingest_transactions, get_database_schema, query_database, semantic_search, clear_database\n",
42
+ "\n",
43
+ "# 4. Wrap as LangChain Tools\n",
44
+ "tools = [\n",
45
+ " StructuredTool.from_function(ingest_transactions),\n",
46
+ " StructuredTool.from_function(clear_database),\n",
47
+ " StructuredTool.from_function(get_database_schema),\n",
48
+ " StructuredTool.from_function(query_database),\n",
49
+ " StructuredTool.from_function(semantic_search),\n",
50
+ "]\n",
51
+ "\n",
52
+ "# 5. Initialize Agent\n",
53
+ "llm = ChatVertexAI(model=\"gemini-1.5-flash-001\", temperature=0)\n",
54
+ "agent_executor = create_react_agent(llm, tools)\n",
55
+ "\n",
56
+ "print(\"Agent and Tools loaded successfully!\")"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": null,
62
+ "metadata": {},
63
+ "outputs": [],
64
+ "source": [
65
+ "# --- STEP 1: LOAD & INGEST DATA ---\n",
66
+ "# We will read one of the CSVs from demo_data and send it to the 'ingest_transactions' tool.\n",
67
+ "\n",
68
+ "# Read Local File\n",
69
+ "csv_path = \"../demo_data/Chase5282_Activity20240110_20260110_20260111.CSV\"\n",
70
+ "\n",
71
+ "if os.path.exists(csv_path):\n",
72
+ " with open(csv_path, 'r') as f:\n",
73
+ " csv_content = f.read()\n",
74
+ "\n",
75
+ " print(f\"Read {len(csv_content)} bytes from CSV.\")\n",
76
+ " \n",
77
+ " # Run the Agent to Ingest\n",
78
+ " print(\"\\n--- Ingesting Data via Agent ---\")\n",
79
+ " response = agent_executor.invoke(\n",
80
+ " {\"messages\": [(\"user\", f\"Please clear the database first, then ingest this transaction data:\\n\\n{csv_content}\")]}\n",
81
+ " )\n",
82
+ " print(response[\"messages\"][-1].content)\n",
83
+ "else:\n",
84
+ " print(f\"File not found: {csv_path}\")"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": null,
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "# --- STEP 2: TEST RETRIEVAL (RAG) ---\n",
94
+ "\n",
95
+ "queries = [\n",
96
+ " \"What is the database schema?\",\n",
97
+ " \"How much did I spend at McDonalds?\",\n",
98
+ " \"Show me the top 3 most expensive transactions.\",\n",
99
+ " \"What is my total spending on Food & Drink?\" \n",
100
+ "]\n",
101
+ "\n",
102
+ "for q in queries:\n",
103
+ " print(f\"\\nUser: {q}\")\n",
104
+ " result = agent_executor.invoke({\"messages\": [(\"user\", q)]})\n",
105
+ " print(f\"AI: {result['messages'][-1].content}\")"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 3,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "%reload_ext autoreload\n",
115
+ "%autoreload 2"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": null,
121
+ "metadata": {},
122
+ "outputs": [],
123
+ "source": []
124
+ }
125
+ ],
126
+ "metadata": {
127
+ "kernelspec": {
128
+ "display_name": "money_rag",
129
+ "language": "python",
130
+ "name": "python3"
131
+ },
132
+ "language_info": {
133
+ "codemirror_mode": {
134
+ "name": "ipython",
135
+ "version": 3
136
+ },
137
+ "file_extension": ".py",
138
+ "mimetype": "text/x-python",
139
+ "name": "python",
140
+ "nbconvert_exporter": "python",
141
+ "pygments_lexer": "ipython3",
142
+ "version": "3.12.7"
143
+ }
144
+ },
145
+ "nbformat": 4,
146
+ "nbformat_minor": 2
147
+ }
requirements.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Core AI Orchestration ---
2
+ langchain>=1.2.3
3
+ langchain-community>=0.4.1
4
+ langchain-core>=1.2.7
5
+ langgraph>=1.0.6
6
+ pydantic>=2.12.5
7
+
8
+ # --- Model Providers ---
9
+ # Google / Vertex AI
10
+ langchain-google-vertexai>=2.1.2
11
+ langchain-google-genai>=2.0.0
12
+ google-cloud-aiplatform>=1.133.0
13
+ google-genai>=1.57.0
14
+
15
+ # OpenAI
16
+ langchain-openai>=1.1.7
17
+ openai>=2.15.0
18
+
19
+ # --- MCP & Tooling ---
20
+ fastmcp>=2.14.3
21
+ mcp>=1.25.0
22
+ # langchain-mcp>=0.2.1
23
+ langchain-mcp-adapters>=0.2.1
24
+ duckduckgo-search>=8.1.1
25
+
26
+ # --- Vector DB & Data ---
27
+ qdrant-client>=1.16.2
28
+ langchain-qdrant>=1.1.0
29
+ sqlalchemy>=2.0.45
30
+ pandas>=2.3.3
31
+ redis>=7.1.0
32
+
33
+ # --- Infrastructure & API ---
34
+ uvicorn>=0.40.0
35
+ starlette>=0.51.0
36
+ python-dotenv>=1.2.1
37
+ httpx>=0.28.1
38
+ requests>=2.32.5
39
+ tenacity>=9.1.2
40
+
41
+
42
+ streamlit>=1.53.0
43
+ ddgs>=9.10.0