Spaces:
Sleeping
Sleeping
Sajil Awale commited on
Commit ·
f204be9
0
Parent(s):
Initial commit without PNG
Browse files- .DS_Store +0 -0
- .dockerignore +67 -0
- .env +8 -0
- .gitattributes +36 -0
- .gitignore +2 -0
- Dockerfile +41 -0
- README.md +305 -0
- __pycache__/money_rag.cpython-312.pyc +0 -0
- app.py +103 -0
- architecture.svg +1 -0
- docker-compose.yml +28 -0
- docker-run.sh +65 -0
- mcp_server.py +197 -0
- money_rag.py +255 -0
- notebooks/0_test.ipynb +407 -0
- notebooks/1_test_pdf_reader.ipynb +956 -0
- notebooks/2_test_vectordb.ipynb +147 -0
- requirements.txt +43 -0
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
.dockerignore
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
*.egg-info/
|
| 8 |
+
dist/
|
| 9 |
+
build/
|
| 10 |
+
*.egg
|
| 11 |
+
|
| 12 |
+
# Virtual environments
|
| 13 |
+
.venv/
|
| 14 |
+
venv/
|
| 15 |
+
env/
|
| 16 |
+
ENV/
|
| 17 |
+
|
| 18 |
+
# IDE
|
| 19 |
+
.vscode/
|
| 20 |
+
.idea/
|
| 21 |
+
*.swp
|
| 22 |
+
*.swo
|
| 23 |
+
*~
|
| 24 |
+
|
| 25 |
+
# Jupyter
|
| 26 |
+
.ipynb_checkpoints/
|
| 27 |
+
*.ipynb
|
| 28 |
+
|
| 29 |
+
# Environment files
|
| 30 |
+
.env.local
|
| 31 |
+
.env.*.local
|
| 32 |
+
|
| 33 |
+
# Git
|
| 34 |
+
.git/
|
| 35 |
+
.gitignore
|
| 36 |
+
|
| 37 |
+
# Temporary files
|
| 38 |
+
*.log
|
| 39 |
+
*.sqlite
|
| 40 |
+
*.db
|
| 41 |
+
temp_data/
|
| 42 |
+
qdrant_db/
|
| 43 |
+
merchant_cache.json
|
| 44 |
+
|
| 45 |
+
# Data files (upload via UI instead)
|
| 46 |
+
demo_data/
|
| 47 |
+
data/
|
| 48 |
+
|
| 49 |
+
# Logs
|
| 50 |
+
logs/
|
| 51 |
+
*.log
|
| 52 |
+
|
| 53 |
+
# Docker
|
| 54 |
+
Dockerfile
|
| 55 |
+
docker-compose.yml
|
| 56 |
+
.dockerignore
|
| 57 |
+
|
| 58 |
+
# Documentation
|
| 59 |
+
README.md
|
| 60 |
+
ARCHITECTURE.md
|
| 61 |
+
*.md
|
| 62 |
+
|
| 63 |
+
# macOS
|
| 64 |
+
.DS_Store
|
| 65 |
+
|
| 66 |
+
# Notebooks (testing only)
|
| 67 |
+
notebooks/
|
.env
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LLM_PROVIDER=google # FOR INSTRCTOR
|
| 2 |
+
GOOGLE_API_KEY=AIzaSyAwAOQAIW6kVGk9Htf_80ZD7ytS1OykNu0
|
| 3 |
+
GOOGLE_APPLICATION_CREDENTIALS=/Users/sawale/gen-lang-client-0311515393-ea35b1e8872e.json
|
| 4 |
+
GOOGLE_GEMINI_MODEL_NAME=gemini-2.5-flash
|
| 5 |
+
GOOGLE_CLOUD_PROJECT=gen-lang-client-0311515393
|
| 6 |
+
GOOGLE_CLOUD_LOCATION=us-central1
|
| 7 |
+
|
| 8 |
+
OPENAI_API_KEY='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'
|
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
demo_data/*
|
| 2 |
+
.env*.png
|
Dockerfile
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.12 slim image
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
build-essential \
|
| 10 |
+
curl \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
| 15 |
+
|
| 16 |
+
# Copy requirements first for better caching
|
| 17 |
+
COPY requirements.txt .
|
| 18 |
+
|
| 19 |
+
# Install Python dependencies
|
| 20 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 21 |
+
|
| 22 |
+
# Copy application code
|
| 23 |
+
COPY . .
|
| 24 |
+
|
| 25 |
+
# Create directories for data persistence
|
| 26 |
+
RUN mkdir -p /app/data /app/logs
|
| 27 |
+
|
| 28 |
+
# Expose Streamlit default port
|
| 29 |
+
EXPOSE 8501
|
| 30 |
+
|
| 31 |
+
# Health check
|
| 32 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 33 |
+
CMD curl --fail http://localhost:8501/_stcore/health || exit 1
|
| 34 |
+
|
| 35 |
+
# Set environment variables
|
| 36 |
+
ENV PYTHONUNBUFFERED=1
|
| 37 |
+
ENV STREAMLIT_SERVER_PORT=8501
|
| 38 |
+
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 39 |
+
|
| 40 |
+
# Run Streamlit app
|
| 41 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.maxUploadSize=200", "--server.maxMessageSize=200", "--server.runOnSave=true", "--server.enableXsrfProtection=false", "--server.enableCORS=false"]
|
README.md
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Moneyrag
|
| 3 |
+
emoji: 💰
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: apache-2.0
|
| 9 |
+
short_description: Where did my money go? Chat with your bank statements
|
| 10 |
+
app_port: 8501
|
| 11 |
+
---
|
| 12 |
+
# MoneyRAG - Personal Finance Transaction Analysis
|
| 13 |
+
|
| 14 |
+
AI-powered financial transaction analysis using RAG (Retrieval-Augmented Generation) with Model Context Protocol (MCP) integration.
|
| 15 |
+
|
| 16 |
+
## Features
|
| 17 |
+
|
| 18 |
+
- **Smart CSV Ingestion**: Automatically maps any CSV format to standardized transaction schema using LLM
|
| 19 |
+
- **Multi-Provider Support**: Works with Google Gemini and OpenAI models
|
| 20 |
+
- **Merchant Enrichment**: Automatically enriches transactions with web-searched merchant information
|
| 21 |
+
- **Dual Storage**: SQLite for structured queries + Qdrant for semantic search
|
| 22 |
+
- **MCP Integration**: Leverages Model Context Protocol for tool-based agent interactions
|
| 23 |
+
- **Interactive UI**: Streamlit-based web interface for chat-based analysis
|
| 24 |
+
- **Dockerized**: Complete containerized deployment ready for production
|
| 25 |
+
|
| 26 |
+
## Architecture
|
| 27 |
+
|
| 28 |
+
```mermaid
|
| 29 |
+
%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#fff', 'primaryBorderColor': '#333', 'primaryTextColor': '#333', 'lineColor': '#666' }}}%%
|
| 30 |
+
|
| 31 |
+
graph TD
|
| 32 |
+
%% --- Top Layer: Entry Point ---
|
| 33 |
+
subgraph UI["💻 User Interface"]
|
| 34 |
+
Streamlit["🌐 Streamlit Web App<br/><i>Interactive Dashboard</i>"]
|
| 35 |
+
end
|
| 36 |
+
|
| 37 |
+
%% --- Middle Layer: Split Processes ---
|
| 38 |
+
|
| 39 |
+
%% Left Column: Ingestion (The Write Path)
|
| 40 |
+
subgraph Ingestion["📥 Data Pipeline (Write)"]
|
| 41 |
+
direction TB
|
| 42 |
+
CSV["📄 CSV Upload<br/><i>Raw Data</i>"]
|
| 43 |
+
Mapper["🧠 LLM Mapper<br/><i>Schema Norm.</i>"]
|
| 44 |
+
Enrich["🔍 Web Enrich<br/><i>DuckDuckGo</i>"]
|
| 45 |
+
|
| 46 |
+
CSV --> Mapper
|
| 47 |
+
Mapper --> Enrich
|
| 48 |
+
end
|
| 49 |
+
|
| 50 |
+
%% Right Column: Intelligence (The Read Path)
|
| 51 |
+
subgraph Agent["🤖 AI Orchestration (Read)"]
|
| 52 |
+
direction TB
|
| 53 |
+
Brain["🧩 LangGraph Agent<br/><i>Controller</i>"]
|
| 54 |
+
LLM["✨ LLM Model<br/><i>Gemini / GPT-4</i>"]
|
| 55 |
+
Brain <-->|Inference| LLM
|
| 56 |
+
end
|
| 57 |
+
|
| 58 |
+
subgraph MCP["🔧 MCP Tool Server"]
|
| 59 |
+
direction LR
|
| 60 |
+
SQL_Tool["⚡ SQL Tool<br/><i>Structured</i>"]
|
| 61 |
+
Vector_Tool["🎯 Vector Tool<br/><i>Semantic</i>"]
|
| 62 |
+
end
|
| 63 |
+
|
| 64 |
+
%% --- Bottom Layer: Persistence ---
|
| 65 |
+
subgraph Storage["💾 Storage Layer"]
|
| 66 |
+
direction LR
|
| 67 |
+
SQLite[("🗄️ SQLite")]
|
| 68 |
+
Qdrant[("🔮 Qdrant")]
|
| 69 |
+
end
|
| 70 |
+
|
| 71 |
+
%% --- Connections & Logic ---
|
| 72 |
+
|
| 73 |
+
%% 1. User Actions
|
| 74 |
+
Streamlit -->|1. Upload| CSV
|
| 75 |
+
Streamlit -->|3. Query| Brain
|
| 76 |
+
|
| 77 |
+
%% 2. Ingestion to Storage flow
|
| 78 |
+
Enrich -->|2. Store| SQLite
|
| 79 |
+
Enrich -->|2. Embed| Qdrant
|
| 80 |
+
|
| 81 |
+
%% 3. Agent to Tools flow
|
| 82 |
+
Brain -->|4. Route| SQL_Tool
|
| 83 |
+
Brain -->|4. Route| Vector_Tool
|
| 84 |
+
|
| 85 |
+
%% 4. Tools to Storage flow (Vertical alignment matches)
|
| 86 |
+
SQL_Tool <-->|5. Read/Write| SQLite
|
| 87 |
+
Vector_Tool <-->|5. Search| Qdrant
|
| 88 |
+
|
| 89 |
+
%% 5. Return Path
|
| 90 |
+
Brain -.->|6. Response| Streamlit
|
| 91 |
+
|
| 92 |
+
%% --- Styling ---
|
| 93 |
+
classDef ui fill:#E3F2FD,stroke:#1565C0,stroke-width:2px,color:#0D47A1,rx:10,ry:10
|
| 94 |
+
classDef ingest fill:#E8F5E9,stroke:#2E7D32,stroke-width:2px,color:#1B5E20,rx:5,ry:5
|
| 95 |
+
classDef agent fill:#F3E5F5,stroke:#7B1FA2,stroke-width:2px,color:#4A148C,rx:5,ry:5
|
| 96 |
+
classDef mcp fill:#FFF3E0,stroke:#EF6C00,stroke-width:2px,color:#E65100,rx:5,ry:5
|
| 97 |
+
classDef storage fill:#ECEFF1,stroke:#455A64,stroke-width:2px,color:#263238,rx:5,ry:5
|
| 98 |
+
|
| 99 |
+
class Streamlit ui
|
| 100 |
+
class CSV,Mapper,Enrich ingest
|
| 101 |
+
class Brain,LLM agent
|
| 102 |
+
class SQL_Tool,Vector_Tool mcp
|
| 103 |
+
class SQLite,Qdrant storage
|
| 104 |
+
|
| 105 |
+
%% Curve the lines for better readability
|
| 106 |
+
linkStyle default interpolate basis
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
## Quick Start
|
| 110 |
+
|
| 111 |
+
### Docker (Recommended)
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
./docker-run.sh
|
| 115 |
+
```
|
| 116 |
+
Choose option 1 to build and run, then open http://localhost:8501
|
| 117 |
+
|
| 118 |
+
### Local Development
|
| 119 |
+
|
| 120 |
+
```bash
|
| 121 |
+
python -m venv .venv
|
| 122 |
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
| 123 |
+
pip install -r requirements.txt
|
| 124 |
+
streamlit run app.py
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
Open http://localhost:8501
|
| 128 |
+
|
| 129 |
+
## Getting Started Resources
|
| 130 |
+
|
| 131 |
+
### 📚 API Keys
|
| 132 |
+
- **Google Gemini**: [Get API key from Google AI Studio](https://aistudio.google.com/app/apikey)
|
| 133 |
+
- **OpenAI**: [Get API key from OpenAI Platform](https://platform.openai.com/api-keys)
|
| 134 |
+
|
| 135 |
+
### 📥 Download Transaction History
|
| 136 |
+
- **Chase Credit Card**: [Video Guide](https://www.youtube.com/watch?v=gtAFaP9Lts8)
|
| 137 |
+
- **Discover Credit Card**: [Video Guide](https://www.youtube.com/watch?v=cry6-H5b0PQ)
|
| 138 |
+
|
| 139 |
+
## Usage
|
| 140 |
+
|
| 141 |
+
1. Enter your API key in the sidebar
|
| 142 |
+
2. Upload CSV transaction files
|
| 143 |
+
3. Ask questions in natural language
|
| 144 |
+
|
| 145 |
+
### Example Questions
|
| 146 |
+
|
| 147 |
+
- "How much did I spend on restaurants last month?"
|
| 148 |
+
- "What are my top 5 spending categories?"
|
| 149 |
+
- "Show me all transactions over $100"
|
| 150 |
+
- "Find all Starbucks transactions"
|
| 151 |
+
- "Analyze my spending patterns"
|
| 152 |
+
|
| 153 |
+
## Supported CSV Formats
|
| 154 |
+
|
| 155 |
+
MoneyRAG automatically handles different CSV formats including:
|
| 156 |
+
- **Chase Bank**: Negative values for spending
|
| 157 |
+
- **Discover**: Positive values for spending
|
| 158 |
+
- **Custom formats**: LLM-based column mapping
|
| 159 |
+
|
| 160 |
+
Required information (can have any column names):
|
| 161 |
+
- Date
|
| 162 |
+
- Merchant/Description
|
| 163 |
+
- ASupported CSV Formats
|
| 164 |
+
|
| 165 |
+
MoneyRAG automatically handles different CSV formats:
|
| 166 |
+
- Chase Bank, Discover, and custom formats
|
| 167 |
+
- LLM-based column mapping (works with any column names)
|
| 168 |
+
- Required: Date, Merchant/Description, Amount
|
| 169 |
+
|
| 170 |
+
## Configuration
|
| 171 |
+
|
| 172 |
+
**Supported Models:**
|
| 173 |
+
- Google: gemini-2.0-flash-exp, gemini-1.5-flash, gemini-1.5-pro
|
| 174 |
+
- OpenAI: gpt-4o, gpt-4o-mini
|
| 175 |
+
|
| 176 |
+
**Note:** API keys entered through UI, no environment variables needed.
|
| 177 |
+
docker ps
|
| 178 |
+
docker inspect money-rag-app | grep Health
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
### Reset everything
|
| 182 |
+
```bash
|
| 183 |
+
docker-compose down -v
|
| 184 |
+
docker rmi money_rag-money-rag
|
| 185 |
+
./docker-run.sh # Choose option 1
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
### MCP Server Issues
|
| 189 |
+
The MCP server runs as a subprocess. If you see connection errors:
|
| 190 |
+
1. Check logs: `docker-compose logs -f`
|
| 191 |
+
2. Verify mcp_server.py exists: `docker exec money-rag-app ls -la`
|
| 192 |
+
|
| 193 |
+
### Permission Issues
|
| 194 |
+
```bash
|
| 195 |
+
chmod +x docker-run.sh
|
| 196 |
+
sudo chown -R $USER:$USER data logs
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
## Production Deployment
|
| 200 |
+
|
| 201 |
+
### Using Docker Hub
|
| 202 |
+
|
| 203 |
+
1. **Tag and push:**
|
| 204 |
+
```bash
|
| 205 |
+
docker tag money-rag:latest your-username/money-rag:latest
|
| 206 |
+
docker push your-username/money-rag:latest
|
| 207 |
+
```
|
| 208 |
+
|
| 209 |
+
2. **Pull and run on server:**
|
| 210 |
+
```bash
|
| 211 |
+
docker pull your-username/money-rag:latest
|
| 212 |
+
docker run -d -p 8501:8501 your-username/money-rag:latest
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
### Cloud Platforms
|
| 216 |
+
|
| 217 |
+
**Google Cloud Run:**
|
| 218 |
+
```bash
|
| 219 |
+
gcloud builds submit --tag gcr.io/PROJECT-ID/money-rag
|
| 220 |
+
gcloud run deploy money-rag \
|
| 221 |
+
--image gcr.io/PROJECT-ID/money-rag \
|
| 222 |
+
--platform managed \
|
| 223 |
+
--allow-unauthenticated
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
**AWS ECS / Azure Container Instances:**
|
| 227 |
+
- Build and push to respective container registries
|
| 228 |
+
- Deploy using platform-specific CLI tools
|
| 229 |
+
|
| 230 |
+
## Security Notes
|
| 231 |
+
|
| 232 |
+
⚠️ **Important:**
|
| 233 |
+
- API keys are entered via UI and stored only in session state (not persisted)
|
| 234 |
+
- Keys are cleared when browser session ends
|
| 235 |
+
- Transaction data is session-based and ephemeral
|
| 236 |
+
- No sensitive data stored in environment variables or files
|
| 237 |
+
- For production, implement secure session management and authentication
|
| 238 |
+
|
| 239 |
+
## Development
|
| 240 |
+
|
| 241 |
+
### Hot Reload
|
| 242 |
+
Mount code as volume in docker-compose.yml:
|
| 243 |
+
```yaml
|
| 244 |
+
volumes:
|
| 245 |
+
- ./app.py:/app/app.py
|
| 246 |
+
- ./money_rag.py:/app/money_rag.py
|
| 247 |
+
- ./mcp_server.py:/app/mcp_server.py
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
### Testing
|
| 251 |
+
```bash
|
| 252 |
+
# Run unit tests (if available)
|
| 253 |
+
pytest tests/
|
| 254 |
+
|
| 255 |
+
# Test CSV ingestion
|
| 256 |
+
python -c "from money_rag import MoneyRAG; ..."
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
## Technologies
|
| 260 |
+
|
| 261 |
+
**Core Framework:**
|
| 262 |
+
- **LangChain** (>=1.2.3): Agent orchestration and tool integration
|
| 263 |
+
- **LangGraph** (>=1.0.6): Conversational agent with memory
|
| 264 |
+
- **langchain-mcp-adapters** (>=0.2.1): Model Context Protocol integration
|
| 265 |
+
|
| 266 |
+
**LLM Providers:**
|
| 267 |
+
- **langchain-google-genai** (>=2.0.0): Google Gemini integration
|
| 268 |
+
- **langchain-openai** (>=1.1.7): OpenAI GPT integration
|
| 269 |
+
|
| 270 |
+
**Storage & Search:**
|
| 271 |
+
- **Qdrant** (>=1.16.2): Vector database for semantic search
|
| 272 |
+
- **SQLite** (via SQLAlchemy >=2.0.45): Relational database for structured queries
|
| 273 |
+
|
| 274 |
+
**Tools & Services:**
|
| 275 |
+
- **FastMCP** (>=2.14.3): MCP server implementation
|
| 276 |
+
- **DuckDuckGo Search** (>=8.1.1): Web search for merchant enrichment
|
| 277 |
+
**Container issues:**
|
| 278 |
+
```bash
|
| 279 |
+
docker-compose logs
|
| 280 |
+
docker-compose down -v # Reset everything
|
| 281 |
+
./docker-run.sh # Rebuild
|
| 282 |
+
```
|
| 283 |
+
|
| 284 |
+
**Permission issues:**
|
| 285 |
+
```bash
|
| 286 |
+
chmod +x docker-run.sh
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
## Technologies
|
| 290 |
+
|
| 291 |
+
- **LangChain & LangGraph**: Agent orchestration
|
| 292 |
+
- **Google Gemini / OpenAI GPT**: LLM providers
|
| 293 |
+
- **Qdrant**: Vector database
|
| 294 |
+
- **SQLite**: Structured storage
|
| 295 |
+
- **FastMCP**: Model Context Protocol
|
| 296 |
+
- **Streamlit**: Web interface
|
| 297 |
+
|
| 298 |
+
## Contributors
|
| 299 |
+
|
| 300 |
+
- **Sajil Awale** - [GitHub](https://github.com/AwaleSajil)
|
| 301 |
+
- **Simran KC** - [GitHub](https://github.com/iamsims)
|
| 302 |
+
|
| 303 |
+
## License
|
| 304 |
+
|
| 305 |
+
MIT
|
__pycache__/money_rag.cpython-312.pyc
ADDED
|
Binary file (14.2 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import asyncio
|
| 3 |
+
import os
|
| 4 |
+
from money_rag import MoneyRAG
|
| 5 |
+
|
| 6 |
+
st.set_page_config(page_title="MoneyRAG", layout="wide")
|
| 7 |
+
|
| 8 |
+
# Sidebar for Authentication
|
| 9 |
+
with st.sidebar:
|
| 10 |
+
st.header("Authentication")
|
| 11 |
+
provider = st.selectbox("LLM Provider", ["Google", "OpenAI"])
|
| 12 |
+
|
| 13 |
+
if provider == "Google":
|
| 14 |
+
models = ["gemini-3-flash-preview", "gemini-3-pro-image-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"]
|
| 15 |
+
embeddings = ["text-embedding-004"]
|
| 16 |
+
else:
|
| 17 |
+
models = ["gpt-5-mini", "gpt-5-nano", "gpt-4o-mini", "gpt-4o"]
|
| 18 |
+
embeddings = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
|
| 19 |
+
|
| 20 |
+
model_name = st.selectbox("Choose Decoder Model", models)
|
| 21 |
+
embed_name = st.selectbox("Choose Embedding Model", embeddings)
|
| 22 |
+
api_key = st.text_input("API Key", type="password")
|
| 23 |
+
|
| 24 |
+
auth_button = st.button("Authenticate")
|
| 25 |
+
if auth_button and api_key:
|
| 26 |
+
st.session_state.rag = MoneyRAG(provider, model_name, embed_name, api_key)
|
| 27 |
+
st.success("Authenticated!")
|
| 28 |
+
|
| 29 |
+
st.divider()
|
| 30 |
+
st.caption("**Contributors:**")
|
| 31 |
+
st.caption("👤 [Sajil Awale](https://github.com/AwaleSajil)")
|
| 32 |
+
st.caption("👤 [Simran KC](https://github.com/iamsims)")
|
| 33 |
+
|
| 34 |
+
# Main Window
|
| 35 |
+
st.title("MoneyRAG 💰")
|
| 36 |
+
st.subheader("Where is my money?")
|
| 37 |
+
st.markdown("""
|
| 38 |
+
This app helps you analyze your personal finances using AI.
|
| 39 |
+
Upload your bank/credit card CSV statements to chat with your data semantically.
|
| 40 |
+
""")
|
| 41 |
+
|
| 42 |
+
# Guides Section
|
| 43 |
+
col1, col2 = st.columns(2)
|
| 44 |
+
|
| 45 |
+
with col1:
|
| 46 |
+
with st.expander("📚 How to get API keys"):
|
| 47 |
+
st.markdown("**Google Gemini API:**")
|
| 48 |
+
st.markdown("🔗 [Get API key from Google AI Studio](https://aistudio.google.com/app/apikey)")
|
| 49 |
+
st.markdown("")
|
| 50 |
+
st.markdown("**OpenAI API:**")
|
| 51 |
+
st.markdown("🔗 [Get API key from OpenAI Platform](https://platform.openai.com/api-keys)")
|
| 52 |
+
|
| 53 |
+
with col2:
|
| 54 |
+
with st.expander("📥 How to download transaction history"):
|
| 55 |
+
st.markdown("**Chase Credit Card:**")
|
| 56 |
+
st.video("https://www.youtube.com/watch?v=gtAFaP9Lts8")
|
| 57 |
+
st.markdown("")
|
| 58 |
+
st.markdown("**Discover Credit Card:**")
|
| 59 |
+
st.video("https://www.youtube.com/watch?v=cry6-H5b0PQ")
|
| 60 |
+
|
| 61 |
+
# Architecture Diagram
|
| 62 |
+
with st.expander("🏗️ How MoneyRAG Works"):
|
| 63 |
+
st.image("architecture.svg", use_container_width=True)
|
| 64 |
+
|
| 65 |
+
st.divider()
|
| 66 |
+
|
| 67 |
+
if "rag" in st.session_state:
|
| 68 |
+
uploaded_files = st.file_uploader("Upload CSV transactions", accept_multiple_files=True, type=['csv'])
|
| 69 |
+
|
| 70 |
+
if uploaded_files:
|
| 71 |
+
if st.button("Ingest Data"):
|
| 72 |
+
temp_paths = []
|
| 73 |
+
for uploaded_file in uploaded_files:
|
| 74 |
+
path = os.path.join(st.session_state.rag.temp_dir, uploaded_file.name)
|
| 75 |
+
with open(path, "wb") as f:
|
| 76 |
+
f.write(uploaded_file.getbuffer())
|
| 77 |
+
temp_paths.append(path)
|
| 78 |
+
|
| 79 |
+
with st.spinner("Ingesting and vectorizing..."):
|
| 80 |
+
asyncio.run(st.session_state.rag.setup_session(temp_paths))
|
| 81 |
+
st.success("Data ready for chat!")
|
| 82 |
+
|
| 83 |
+
# Chat Interface
|
| 84 |
+
st.divider()
|
| 85 |
+
if "messages" not in st.session_state:
|
| 86 |
+
st.session_state.messages = []
|
| 87 |
+
|
| 88 |
+
for message in st.session_state.messages:
|
| 89 |
+
with st.chat_message(message["role"]):
|
| 90 |
+
st.markdown(message["content"])
|
| 91 |
+
|
| 92 |
+
if prompt := st.chat_input("Ask about your spending..."):
|
| 93 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 94 |
+
with st.chat_message("user"):
|
| 95 |
+
st.markdown(prompt)
|
| 96 |
+
|
| 97 |
+
with st.chat_message("assistant"):
|
| 98 |
+
with st.spinner("Thinking..."):
|
| 99 |
+
response = asyncio.run(st.session_state.rag.chat(prompt))
|
| 100 |
+
st.markdown(response)
|
| 101 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
| 102 |
+
else:
|
| 103 |
+
st.info("Please authenticate in the sidebar to start.")
|
architecture.svg
ADDED
|
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
money-rag:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
container_name: money-rag-app
|
| 9 |
+
ports:
|
| 10 |
+
- "8501:8501"
|
| 11 |
+
environment:
|
| 12 |
+
- PYTHONUNBUFFERED=1
|
| 13 |
+
- STREAMLIT_SERVER_PORT=8501
|
| 14 |
+
- STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 15 |
+
# Note: API keys are entered through the Streamlit UI
|
| 16 |
+
# No environment variables required
|
| 17 |
+
volumes:
|
| 18 |
+
# Mount data directory for persistence (optional)
|
| 19 |
+
- ./data:/app/data
|
| 20 |
+
# Mount logs directory
|
| 21 |
+
- ./logs:/app/logs
|
| 22 |
+
restart: unless-stopped
|
| 23 |
+
healthcheck:
|
| 24 |
+
test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
|
| 25 |
+
interval: 30s
|
| 26 |
+
timeout: 10s
|
| 27 |
+
retries: 3
|
| 28 |
+
start_period: 10s
|
docker-run.sh
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# MoneyRAG Docker Run Script
|
| 4 |
+
# This script helps you run the application easily
|
| 5 |
+
|
| 6 |
+
set -e
|
| 7 |
+
|
| 8 |
+
echo "🐳 MoneyRAG Docker Setup"
|
| 9 |
+
echo "========================"
|
| 10 |
+
echo "ℹ️ Note: API keys are entered through the web UI"
|
| 11 |
+
echo ""
|
| 12 |
+
|
| 13 |
+
# Check if Docker is running
|
| 14 |
+
if ! docker info > /dev/null 2>&1; then
|
| 15 |
+
echo "❌ Docker is not running. Please start Docker Desktop."
|
| 16 |
+
exit 1
|
| 17 |
+
fi
|
| 18 |
+
|
| 19 |
+
# Create data and logs directories
|
| 20 |
+
mkdir -p data logs
|
| 21 |
+
|
| 22 |
+
echo ""
|
| 23 |
+
echo "Choose an option:"
|
| 24 |
+
echo "1) Build and run (first time or after code changes)"
|
| 25 |
+
echo "2) Run existing container"
|
| 26 |
+
echo "3) Stop container"
|
| 27 |
+
echo "4) View logs"
|
| 28 |
+
echo "5) Clean up (remove containers and images)"
|
| 29 |
+
echo ""
|
| 30 |
+
read -p "Enter choice [1-5]: " choice
|
| 31 |
+
|
| 32 |
+
case $choice in
|
| 33 |
+
1)
|
| 34 |
+
echo "🔨 Building Docker image..."
|
| 35 |
+
docker-compose build
|
| 36 |
+
echo "🚀 Starting container..."
|
| 37 |
+
docker-compose up -d
|
| 38 |
+
echo "✅ Application is running at http://localhost:8501"
|
| 39 |
+
echo "📋 View logs with: docker-compose logs -f"
|
| 40 |
+
;;
|
| 41 |
+
2)
|
| 42 |
+
echo "🚀 Starting container..."
|
| 43 |
+
docker-compose up -d
|
| 44 |
+
echo "✅ Application is running at http://localhost:8501"
|
| 45 |
+
;;
|
| 46 |
+
3)
|
| 47 |
+
echo "🛑 Stopping container..."
|
| 48 |
+
docker-compose down
|
| 49 |
+
echo "✅ Container stopped"
|
| 50 |
+
;;
|
| 51 |
+
4)
|
| 52 |
+
echo "📋 Showing logs (Ctrl+C to exit)..."
|
| 53 |
+
docker-compose logs -f
|
| 54 |
+
;;
|
| 55 |
+
5)
|
| 56 |
+
echo "🧹 Cleaning up..."
|
| 57 |
+
docker-compose down -v
|
| 58 |
+
docker rmi money_rag-money-rag 2>/dev/null || true
|
| 59 |
+
echo "✅ Cleanup complete"
|
| 60 |
+
;;
|
| 61 |
+
*)
|
| 62 |
+
echo "❌ Invalid choice"
|
| 63 |
+
exit 1
|
| 64 |
+
;;
|
| 65 |
+
esac
|
mcp_server.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastmcp import FastMCP
|
| 2 |
+
from langchain_qdrant import QdrantVectorStore
|
| 3 |
+
from qdrant_client import QdrantClient
|
| 4 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
import shutil
|
| 9 |
+
|
| 10 |
+
# Load environment variables (API keys, etc.)
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
# Define paths to your data
|
| 14 |
+
# For Hugging Face Spaces (Ephemeral):
|
| 15 |
+
# We use a temporary directory that gets wiped on restart.
|
| 16 |
+
# If DATA_DIR is set (e.g., by your deployment config), use it.
|
| 17 |
+
DATA_DIR = os.getenv("DATA_DIR", os.path.join(os.path.dirname(os.path.abspath(__file__)), "temp_data"))
|
| 18 |
+
QDRANT_PATH = os.path.join(DATA_DIR, "qdrant_db")
|
| 19 |
+
DB_PATH = os.path.join(DATA_DIR, "money_rag.db")
|
| 20 |
+
|
| 21 |
+
# Initialize the MCP Server
|
| 22 |
+
mcp = FastMCP("Money RAG Financial Analyst")
|
| 23 |
+
|
| 24 |
+
import sqlite3
|
| 25 |
+
|
| 26 |
+
def get_schema_info() -> str:
|
| 27 |
+
"""Get database schema information."""
|
| 28 |
+
if not os.path.exists(DB_PATH):
|
| 29 |
+
return "Database file does not exist yet. Please upload data."
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
conn = sqlite3.connect(DB_PATH)
|
| 33 |
+
cursor = conn.cursor()
|
| 34 |
+
|
| 35 |
+
# Get all tables
|
| 36 |
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
| 37 |
+
tables = cursor.fetchall()
|
| 38 |
+
|
| 39 |
+
schema_info = []
|
| 40 |
+
for (table_name,) in tables:
|
| 41 |
+
schema_info.append(f"\nTable: {table_name}")
|
| 42 |
+
|
| 43 |
+
# Get column info for each table
|
| 44 |
+
cursor.execute(f"PRAGMA table_info({table_name});")
|
| 45 |
+
columns = cursor.fetchall()
|
| 46 |
+
|
| 47 |
+
schema_info.append("Columns:")
|
| 48 |
+
for col in columns:
|
| 49 |
+
col_id, col_name, col_type, not_null, default_val, pk = col
|
| 50 |
+
schema_info.append(f" - {col_name} ({col_type})")
|
| 51 |
+
|
| 52 |
+
conn.close()
|
| 53 |
+
return "\n".join(schema_info)
|
| 54 |
+
except Exception as e:
|
| 55 |
+
return f"Error reading schema: {e}"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@mcp.resource("schema://database/tables")
|
| 59 |
+
def get_database_schema() -> str:
|
| 60 |
+
"""Complete schema information for the money_rag database."""
|
| 61 |
+
return get_schema_info()
|
| 62 |
+
|
| 63 |
+
@mcp.tool()
|
| 64 |
+
def query_database(query: str) -> str:
|
| 65 |
+
"""Execute a SELECT query on the money_rag SQLite database.
|
| 66 |
+
|
| 67 |
+
Args:
|
| 68 |
+
query: The SQL SELECT query to execute
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Query results or error message
|
| 72 |
+
|
| 73 |
+
Important Notes:
|
| 74 |
+
- Only SELECT queries are allowed (read-only)
|
| 75 |
+
- Use 'description' column for text search
|
| 76 |
+
- 'amount' column: positive values = spending, negative values = payments/refunds
|
| 77 |
+
|
| 78 |
+
Example queries:
|
| 79 |
+
- Find Walmart spending: SELECT SUM(amount) FROM transactions WHERE description LIKE '%Walmart%' AND amount > 0;
|
| 80 |
+
- List recent transactions: SELECT transaction_date, description, amount, category FROM transactions ORDER BY transaction_date DESC LIMIT 5;
|
| 81 |
+
- Spending by category: SELECT category, SUM(amount) FROM transactions WHERE amount > 0 GROUP BY category;
|
| 82 |
+
"""
|
| 83 |
+
if not os.path.exists(DB_PATH):
|
| 84 |
+
return "Database file does not exist yet. Please upload data."
|
| 85 |
+
|
| 86 |
+
# Security: Only allow SELECT queries
|
| 87 |
+
query_upper = query.strip().upper()
|
| 88 |
+
if not query_upper.startswith("SELECT") and not query_upper.startswith("PRAGMA"):
|
| 89 |
+
return "Error: Only SELECT and PRAGMA queries are allowed"
|
| 90 |
+
|
| 91 |
+
# Forbidden operations
|
| 92 |
+
forbidden = ["INSERT", "UPDATE", "DELETE", "DROP", "ALTER", "CREATE", "REPLACE", "TRUNCATE", "ATTACH", "DETACH"]
|
| 93 |
+
# Check for forbidden words as standalone words to avoid false positives (e.g. "update_date" column)
|
| 94 |
+
# Simple check: space-surrounded or end-of-string
|
| 95 |
+
if any(f" {word} " in f" {query_upper} " for word in forbidden):
|
| 96 |
+
return f"Error: Query contains forbidden operation. Only SELECT queries allowed."
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
conn = sqlite3.connect(DB_PATH)
|
| 100 |
+
cursor = conn.cursor()
|
| 101 |
+
cursor.execute(query)
|
| 102 |
+
results = cursor.fetchall()
|
| 103 |
+
|
| 104 |
+
# Get column names to make result more readable
|
| 105 |
+
column_names = [description[0] for description in cursor.description] if cursor.description else []
|
| 106 |
+
|
| 107 |
+
conn.close()
|
| 108 |
+
|
| 109 |
+
if not results:
|
| 110 |
+
return "No results found"
|
| 111 |
+
|
| 112 |
+
# Format results nicely
|
| 113 |
+
formatted_results = []
|
| 114 |
+
formatted_results.append(f"Columns: {', '.join(column_names)}")
|
| 115 |
+
for row in results:
|
| 116 |
+
formatted_results.append(str(row))
|
| 117 |
+
|
| 118 |
+
return "\n".join(formatted_results)
|
| 119 |
+
except sqlite3.Error as e:
|
| 120 |
+
return f"Error: {str(e)}"
|
| 121 |
+
|
| 122 |
+
def get_vector_store():
|
| 123 |
+
"""Initialize connection to the Qdrant vector store"""
|
| 124 |
+
# Initialize Embedding Model using Google AI Studio
|
| 125 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="text-embedding-004")
|
| 126 |
+
|
| 127 |
+
# Connect to Qdrant (Persistent Disk Mode at specific path)
|
| 128 |
+
# We ensure the directory exists so Qdrant can write to it.
|
| 129 |
+
os.makedirs(QDRANT_PATH, exist_ok=True)
|
| 130 |
+
|
| 131 |
+
client = QdrantClient(path=QDRANT_PATH)
|
| 132 |
+
|
| 133 |
+
# Check if collection exists (it might be empty in a new ephemeral session)
|
| 134 |
+
collections = client.get_collections().collections
|
| 135 |
+
collection_names = [c.name for c in collections]
|
| 136 |
+
|
| 137 |
+
if "transactions" not in collection_names:
|
| 138 |
+
# In a real app, you would probably trigger ingestion here or handle the empty state
|
| 139 |
+
pass
|
| 140 |
+
|
| 141 |
+
return QdrantVectorStore(
|
| 142 |
+
client=client,
|
| 143 |
+
collection_name="transactions",
|
| 144 |
+
embedding=embeddings,
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
@mcp.tool()
|
| 148 |
+
def semantic_search(query: str, top_k: int = 5) -> str:
|
| 149 |
+
"""
|
| 150 |
+
Search for personal financial transactions semantically.
|
| 151 |
+
|
| 152 |
+
Use this to find spending when specific merchant names are unknown or ambiguous.
|
| 153 |
+
Examples: "how much did I spend on fast food?", "subscriptions", "travel expenses".
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
query: The description or category of spending to look for.
|
| 157 |
+
top_k: Number of results to return (default 5).
|
| 158 |
+
"""
|
| 159 |
+
try:
|
| 160 |
+
vector_store = get_vector_store()
|
| 161 |
+
|
| 162 |
+
# Safety check: if no data has been ingested yet
|
| 163 |
+
if not os.path.exists(QDRANT_PATH) or not os.listdir(QDRANT_PATH):
|
| 164 |
+
return "No matching transactions found (Database is empty. Please upload data first)."
|
| 165 |
+
|
| 166 |
+
results = vector_store.similarity_search(query, k=top_k)
|
| 167 |
+
|
| 168 |
+
if not results:
|
| 169 |
+
return "No matching transactions found."
|
| 170 |
+
|
| 171 |
+
output = []
|
| 172 |
+
for doc in results:
|
| 173 |
+
# Format the output clearly for the LLM/User
|
| 174 |
+
amount = doc.metadata.get('amount', 'N/A')
|
| 175 |
+
date = doc.metadata.get('transaction_date', 'N/A')
|
| 176 |
+
output.append(f"Date: {date} | Match: {doc.page_content} | Amount: {amount}")
|
| 177 |
+
|
| 178 |
+
return "\n".join(output)
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
return f"Error performing search: {str(e)}"
|
| 182 |
+
|
| 183 |
+
# A helper to clear data (useful for session reset)
|
| 184 |
+
@mcp.tool()
|
| 185 |
+
def clear_database() -> str:
|
| 186 |
+
"""Clear all stored transaction data to reset the session."""
|
| 187 |
+
try:
|
| 188 |
+
if os.path.exists(DATA_DIR):
|
| 189 |
+
shutil.rmtree(DATA_DIR)
|
| 190 |
+
os.makedirs(DATA_DIR)
|
| 191 |
+
return "Database cleared successfully."
|
| 192 |
+
except Exception as e:
|
| 193 |
+
return f"Error clearing database: {e}"
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
# Runs the server over stdio
|
| 197 |
+
mcp.run(transport="stdio")
|
money_rag.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import uuid
|
| 3 |
+
import asyncio
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import sqlite3
|
| 6 |
+
import shutil
|
| 7 |
+
import tempfile
|
| 8 |
+
from typing import List, Optional
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
|
| 11 |
+
from langchain.chat_models import init_chat_model
|
| 12 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 13 |
+
from langchain_core.output_parsers import JsonOutputParser
|
| 14 |
+
from langchain_core.tools import tool
|
| 15 |
+
from langchain_community.utilities import SQLDatabase
|
| 16 |
+
from langchain_qdrant import QdrantVectorStore
|
| 17 |
+
from qdrant_client import QdrantClient
|
| 18 |
+
from qdrant_client.http.models import Distance, VectorParams
|
| 19 |
+
from langgraph.runtime import get_runtime
|
| 20 |
+
from langgraph.checkpoint.memory import InMemorySaver
|
| 21 |
+
from langchain.agents import create_agent
|
| 22 |
+
from langchain_community.tools import DuckDuckGoSearchRun
|
| 23 |
+
from langchain_mcp_adapters.client import MultiServerMCPClient
|
| 24 |
+
|
| 25 |
+
# Import specific embeddings
|
| 26 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 27 |
+
from langchain_openai import OpenAIEmbeddings
|
| 28 |
+
|
| 29 |
+
class MoneyRAG:
|
| 30 |
+
def __init__(self, llm_provider: str, model_name: str, embedding_model_name: str, api_key: str):
|
| 31 |
+
self.llm_provider = llm_provider.lower()
|
| 32 |
+
self.model_name = model_name
|
| 33 |
+
self.embedding_model_name = embedding_model_name
|
| 34 |
+
|
| 35 |
+
# Set API Keys
|
| 36 |
+
if self.llm_provider == "google":
|
| 37 |
+
os.environ["GOOGLE_API_KEY"] = api_key
|
| 38 |
+
self.embeddings = GoogleGenerativeAIEmbeddings(model=embedding_model_name)
|
| 39 |
+
provider_name = "google_genai"
|
| 40 |
+
else:
|
| 41 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
| 42 |
+
self.embeddings = OpenAIEmbeddings(model=embedding_model_name)
|
| 43 |
+
provider_name = "openai"
|
| 44 |
+
|
| 45 |
+
# Initialize LLM
|
| 46 |
+
self.llm = init_chat_model(
|
| 47 |
+
self.model_name,
|
| 48 |
+
model_provider=provider_name,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Temporary paths for this session
|
| 52 |
+
self.temp_dir = tempfile.mkdtemp()
|
| 53 |
+
os.environ["DATA_DIR"] = self.temp_dir # Harmonize with mcp_server.py
|
| 54 |
+
self.db_path = os.path.join(self.temp_dir, "money_rag.db")
|
| 55 |
+
self.qdrant_path = os.path.join(self.temp_dir, "qdrant_db")
|
| 56 |
+
|
| 57 |
+
self.db: Optional[SQLDatabase] = None
|
| 58 |
+
self.vector_store: Optional[QdrantVectorStore] = None
|
| 59 |
+
self.agent = None
|
| 60 |
+
self.mcp_client: Optional[MultiServerMCPClient] = None
|
| 61 |
+
self.search_tool = DuckDuckGoSearchRun()
|
| 62 |
+
self.merchant_cache = {} # Session-based cache for merchant enrichment
|
| 63 |
+
|
| 64 |
+
async def setup_session(self, csv_paths: List[str]):
|
| 65 |
+
"""Ingests CSVs and sets up DBs."""
|
| 66 |
+
for path in csv_paths:
|
| 67 |
+
await self._ingest_csv(path)
|
| 68 |
+
|
| 69 |
+
self.db = SQLDatabase.from_uri(f"sqlite:///{self.db_path}")
|
| 70 |
+
self.vector_store = self._sync_to_qdrant()
|
| 71 |
+
await self._init_agent()
|
| 72 |
+
|
| 73 |
+
async def _ingest_csv(self, file_path):
|
| 74 |
+
df = pd.read_csv(file_path)
|
| 75 |
+
headers = df.columns.tolist()
|
| 76 |
+
sample_data = df.head(10).to_json()
|
| 77 |
+
|
| 78 |
+
prompt = ChatPromptTemplate.from_template("""
|
| 79 |
+
Act as a financial data parser. Analyze this CSV data:
|
| 80 |
+
Filename: {filename}
|
| 81 |
+
Headers: {headers}
|
| 82 |
+
Sample Data: {sample}
|
| 83 |
+
|
| 84 |
+
TASK:
|
| 85 |
+
1. Map the CSV columns to standard fields: date, description, amount, and category.
|
| 86 |
+
2. Determine the 'sign_convention' for spending.
|
| 87 |
+
|
| 88 |
+
RULES:
|
| 89 |
+
- If the filename suggests 'Discover' credit card, spending are usually POSITIVE.
|
| 90 |
+
- If the filename suggests 'Chase' credit card, spending are usually NEGATIVE.
|
| 91 |
+
|
| 92 |
+
- Analyze the 'sign_convention' for spending (outflows):
|
| 93 |
+
- Look at the sample data for known merchants or spending patterns.
|
| 94 |
+
- If spending (like a restaurant or store) is NEGATIVE (e.g., -25.00), the convention is 'spending_is_negative'.
|
| 95 |
+
- If spending is POSITIVE (e.g., 25.00), the convention is 'spending_is_positive'.
|
| 96 |
+
|
| 97 |
+
OUTPUT FORMAT (JSON ONLY):
|
| 98 |
+
{{
|
| 99 |
+
"date_col": "column_name",
|
| 100 |
+
"desc_col": "column_name",
|
| 101 |
+
"amount_col": "column_name",
|
| 102 |
+
"category_col": "column_name or null",
|
| 103 |
+
"sign_convention": "spending_is_negative" | "spending_is_positive"
|
| 104 |
+
}}
|
| 105 |
+
""")
|
| 106 |
+
|
| 107 |
+
chain = prompt | self.llm | JsonOutputParser()
|
| 108 |
+
mapping = await chain.ainvoke({"headers": headers, "sample": sample_data, "filename": os.path.basename(file_path)})
|
| 109 |
+
|
| 110 |
+
standard_df = pd.DataFrame()
|
| 111 |
+
standard_df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
|
| 112 |
+
standard_df['transaction_date'] = pd.to_datetime(df[mapping['date_col']])
|
| 113 |
+
standard_df['description'] = df[mapping['desc_col']]
|
| 114 |
+
|
| 115 |
+
raw_amounts = pd.to_numeric(df[mapping['amount_col']])
|
| 116 |
+
standard_df['amount'] = raw_amounts * -1 if mapping['sign_convention'] == "spending_is_negative" else raw_amounts
|
| 117 |
+
standard_df['category'] = df[mapping.get('category_col')] if mapping.get('category_col') else 'Uncategorized'
|
| 118 |
+
standard_df['source_file'] = os.path.basename(file_path)
|
| 119 |
+
|
| 120 |
+
# --- Async Enrichment Step ---
|
| 121 |
+
print(f" ✨ Enriching descriptions for {os.path.basename(file_path)}...")
|
| 122 |
+
unique_descriptions = standard_df['description'].unique()
|
| 123 |
+
sem = asyncio.Semaphore(5)
|
| 124 |
+
|
| 125 |
+
async def get_merchant_info(description):
|
| 126 |
+
if description in self.merchant_cache:
|
| 127 |
+
return self.merchant_cache[description]
|
| 128 |
+
|
| 129 |
+
async with sem:
|
| 130 |
+
try:
|
| 131 |
+
await asyncio.sleep(0.05) # Jitter
|
| 132 |
+
print(f" 🔍 Web searching: {description}...")
|
| 133 |
+
result = await self.search_tool.ainvoke(f"What type of business / store is '{description}'?")
|
| 134 |
+
self.merchant_cache[description] = result
|
| 135 |
+
return result
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f" ⚠️ Search failed for {description}: {e}")
|
| 138 |
+
return "Unknown"
|
| 139 |
+
|
| 140 |
+
tasks = [get_merchant_info(desc) for desc in unique_descriptions]
|
| 141 |
+
enrichment_results = await asyncio.gather(*tasks)
|
| 142 |
+
|
| 143 |
+
desc_map = dict(zip(unique_descriptions, enrichment_results))
|
| 144 |
+
standard_df['enriched_info'] = standard_df['description'].map(desc_map).fillna("")
|
| 145 |
+
|
| 146 |
+
conn = sqlite3.connect(self.db_path)
|
| 147 |
+
standard_df.to_sql("transactions", conn, if_exists="append", index=False)
|
| 148 |
+
conn.close()
|
| 149 |
+
|
| 150 |
+
def _sync_to_qdrant(self):
|
| 151 |
+
client = QdrantClient(path=self.qdrant_path)
|
| 152 |
+
collection = "transactions"
|
| 153 |
+
|
| 154 |
+
conn = sqlite3.connect(self.db_path)
|
| 155 |
+
df = pd.read_sql_query("SELECT * FROM transactions", conn)
|
| 156 |
+
conn.close()
|
| 157 |
+
|
| 158 |
+
# Check for empty dataframe
|
| 159 |
+
if df.empty:
|
| 160 |
+
raise ValueError("No transactions found in database. Please ingest CSV files first.")
|
| 161 |
+
|
| 162 |
+
# Dynamically detect embedding dimension
|
| 163 |
+
sample_embedding = self.embeddings.embed_query("test")
|
| 164 |
+
embedding_dim = len(sample_embedding)
|
| 165 |
+
|
| 166 |
+
client.recreate_collection(
|
| 167 |
+
collection_name=collection,
|
| 168 |
+
vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
vs = QdrantVectorStore(client=client, collection_name=collection, embedding=self.embeddings)
|
| 172 |
+
|
| 173 |
+
# Use description + category + enrichment for vectorization
|
| 174 |
+
texts = []
|
| 175 |
+
for _, row in df.iterrows():
|
| 176 |
+
enriched = row.get('enriched_info', '')
|
| 177 |
+
base_text = f"{row['description']} ({row['category']})"
|
| 178 |
+
if enriched and enriched != "Unknown" and enriched != "":
|
| 179 |
+
texts.append(f"{base_text} - {enriched}")
|
| 180 |
+
else:
|
| 181 |
+
texts.append(base_text)
|
| 182 |
+
|
| 183 |
+
metadatas = df[['id', 'amount', 'category', 'transaction_date']].to_dict('records')
|
| 184 |
+
for m in metadatas: m['transaction_date'] = str(m['transaction_date'])
|
| 185 |
+
|
| 186 |
+
vs.add_texts(texts=texts, metadatas=metadatas)
|
| 187 |
+
return vs
|
| 188 |
+
|
| 189 |
+
async def _init_agent(self):
|
| 190 |
+
# 1. Initialize MCP client with absolute path to server
|
| 191 |
+
server_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mcp_server.py")
|
| 192 |
+
|
| 193 |
+
self.mcp_client = MultiServerMCPClient(
|
| 194 |
+
{
|
| 195 |
+
"money_rag": {
|
| 196 |
+
"transport": "stdio",
|
| 197 |
+
"command": "python",
|
| 198 |
+
"args": [server_path],
|
| 199 |
+
"env": os.environ.copy(),
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# 2. Get tools from MCP server
|
| 205 |
+
mcp_tools = await self.mcp_client.get_tools()
|
| 206 |
+
|
| 207 |
+
# 3. Define the Agent with MCP Tools
|
| 208 |
+
system_prompt = (
|
| 209 |
+
"You are a financial analyst. Use the provided tools to query the database "
|
| 210 |
+
"and perform semantic searches. Spending is POSITIVE (>0). "
|
| 211 |
+
"Always explain your findings clearly."
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
self.agent = create_agent(
|
| 215 |
+
model=self.llm,
|
| 216 |
+
tools=mcp_tools,
|
| 217 |
+
system_prompt=system_prompt,
|
| 218 |
+
checkpointer=InMemorySaver(),
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
async def chat(self, query: str):
|
| 222 |
+
config = {"configurable": {"thread_id": "session_1"}}
|
| 223 |
+
|
| 224 |
+
result = await self.agent.ainvoke(
|
| 225 |
+
{"messages": [{"role": "user", "content": query}]},
|
| 226 |
+
config,
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Extract content - handle both string and list formats
|
| 230 |
+
content = result["messages"][-1].content
|
| 231 |
+
|
| 232 |
+
# If content is a list (Gemini format), extract text from blocks
|
| 233 |
+
if isinstance(content, list):
|
| 234 |
+
text_parts = []
|
| 235 |
+
for block in content:
|
| 236 |
+
if isinstance(block, dict) and block.get("type") == "text":
|
| 237 |
+
text_parts.append(block.get("text", ""))
|
| 238 |
+
return "\n".join(text_parts)
|
| 239 |
+
|
| 240 |
+
# If content is already a string (OpenAI format), return as-is
|
| 241 |
+
return content
|
| 242 |
+
|
| 243 |
+
async def cleanup(self):
|
| 244 |
+
"""Delete temporary session files and close MCP client."""
|
| 245 |
+
if self.mcp_client:
|
| 246 |
+
try:
|
| 247 |
+
await self.mcp_client.close()
|
| 248 |
+
except Exception as e:
|
| 249 |
+
print(f"Warning: Failed to close MCP client: {e}")
|
| 250 |
+
|
| 251 |
+
if os.path.exists(self.temp_dir):
|
| 252 |
+
try:
|
| 253 |
+
shutil.rmtree(self.temp_dir)
|
| 254 |
+
except Exception as e:
|
| 255 |
+
print(f"Warning: Failed to remove temp directory: {e}")
|
notebooks/0_test.ipynb
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 3,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"%reload_ext autoreload\n",
|
| 10 |
+
"%autoreload 2"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 4,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"outputs": [
|
| 18 |
+
{
|
| 19 |
+
"data": {
|
| 20 |
+
"text/plain": [
|
| 21 |
+
"True"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
"execution_count": 4,
|
| 25 |
+
"metadata": {},
|
| 26 |
+
"output_type": "execute_result"
|
| 27 |
+
}
|
| 28 |
+
],
|
| 29 |
+
"source": [
|
| 30 |
+
"import pandas as pd\n",
|
| 31 |
+
"import sqlite3\n",
|
| 32 |
+
"from langchain.chat_models import init_chat_model\n",
|
| 33 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
| 34 |
+
"from langchain_core.output_parsers import JsonOutputParser\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"from dotenv import load_dotenv\n",
|
| 37 |
+
"load_dotenv()"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": 5,
|
| 43 |
+
"metadata": {},
|
| 44 |
+
"outputs": [
|
| 45 |
+
{
|
| 46 |
+
"name": "stderr",
|
| 47 |
+
"output_type": "stream",
|
| 48 |
+
"text": [
|
| 49 |
+
"/Users/sawale/Documents/learning/money_rag/.venv/lib/python3.12/site-packages/google/cloud/aiplatform/models.py:52: FutureWarning: Support for google-cloud-storage < 3.0.0 will be removed in a future version of google-cloud-aiplatform. Please upgrade to google-cloud-storage >= 3.0.0.\n",
|
| 50 |
+
" from google.cloud.aiplatform.utils import gcs_utils\n"
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"data": {
|
| 55 |
+
"text/plain": [
|
| 56 |
+
"langchain_google_vertexai.chat_models.ChatVertexAI"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
"execution_count": 5,
|
| 60 |
+
"metadata": {},
|
| 61 |
+
"output_type": "execute_result"
|
| 62 |
+
}
|
| 63 |
+
],
|
| 64 |
+
"source": [
|
| 65 |
+
"# Initialize the Gemini model via Vertex AI\n",
|
| 66 |
+
"vertex_llm = init_chat_model(\n",
|
| 67 |
+
" \"gemini-2.5-flash\", \n",
|
| 68 |
+
" model_provider=\"google_vertexai\",\n",
|
| 69 |
+
" project='gen-lang-client-0311515393',\n",
|
| 70 |
+
" location='us-central1',\n",
|
| 71 |
+
")\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"type(vertex_llm)\n"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"cell_type": "code",
|
| 78 |
+
"execution_count": 4,
|
| 79 |
+
"metadata": {},
|
| 80 |
+
"outputs": [
|
| 81 |
+
{
|
| 82 |
+
"data": {
|
| 83 |
+
"text/plain": [
|
| 84 |
+
"langchain_openai.chat_models.base.ChatOpenAI"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
"execution_count": 4,
|
| 88 |
+
"metadata": {},
|
| 89 |
+
"output_type": "execute_result"
|
| 90 |
+
}
|
| 91 |
+
],
|
| 92 |
+
"source": [
|
| 93 |
+
"\n",
|
| 94 |
+
"openai_llm = init_chat_model(\n",
|
| 95 |
+
" \"gpt-4o\", # or \"gpt-3.5-turbo\", \"o1-preview\", etc.\n",
|
| 96 |
+
" model_provider=\"openai\",\n",
|
| 97 |
+
" api_key=\"your-api-key-here\", # Optional: can also be set via OPENAI_API_KEY env var\n",
|
| 98 |
+
")\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"type(openai_llm)"
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"cell_type": "code",
|
| 105 |
+
"execution_count": 7,
|
| 106 |
+
"metadata": {},
|
| 107 |
+
"outputs": [
|
| 108 |
+
{
|
| 109 |
+
"data": {
|
| 110 |
+
"text/plain": [
|
| 111 |
+
"{'messages': [HumanMessage(content='what is the weather in sf', additional_kwargs={}, response_metadata={}, id='6eec9126-cda3-4cdd-9f6d-f893b228c7cc'),\n",
|
| 112 |
+
" AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_weather', 'arguments': '{\"city\": \"sf\"}'}, '__gemini_function_call_thought_signatures__': {'28adfdf3-778e-4d93-bd45-94a38f64064f': 'CtsFAY89a1+qAQDcM0PFB0w9LKN21xyUOLVEzccWGHcyKXyKAye7ST4azyzp32wcQLIbbiA4N5NSZO5Ua07pNp6Ey+dHG7MoVMoirXyDabR/IBeaesEOIg3fSaXAegxgzEjgLEmmADwAJRxvGpTNyalfZDUPkh0UcRcwGB6jqibG261Euk9G3hkXrHNBig5gBG1uoAxDxwrydV3T57wFRIxn4EEU+IqDiQRdMFsVn8ANZ4BzcpfRdNKsE/VRJ/htqEAHGeEk3jlqZRcJmkRCFHQvsRMxMI911jBH1puimK61Ah3r6P0aMtfkh4wqAENd+Oou06CG5Iz8IS1kBXNxL2/XbqdCDiSmIepKZNVmkyShmt315nPsQ1U3uM7N6415goYjW0q3IouuhiTl1Q6xwzrCfITXgBa9TL09x/CcSOIW9LgRJ6z7PX20PByOZ/rcqsdxr3lGgGSv+3P4b22mDPrrXZEHs8Hkw+1Fw4QMmFJLcj6WCXJaxlRaNR9TMlcspJGT+AtSv1X287cKqyyKGL6x536j5/vU4xDf2do4QIN/Rom0Xj7d46vUS9/B5abHSvfIErAlwDLcGDvHE3S2nC8gN4GKuTSDSm88DLhv46UwLov/NrF/6v9n7ZiqLkvHl/ByRVldSzOH3cIf+KVxVJlyXrSjjMOg0IP2HcK9kBBls0sDSdwWFcuoF7YkACguzPmcpvrHDkRZYacSv1hZB+LvDHSs7INBev0D38o4Id7fo9V0URUkDATlkvji1m7X+XuCEa0XCcN9vuDB4/z/lEfn/j6It+Cb9SQ7pCxsY4qfk39wusymtgFnRTkaK5JZM7sH+GtM+FBMbi7p/SVsFCeooaW1MshwFjBX2Bevex0HPomNYzPJoRYC7em61d5oDvrsCS5AwLTU8Ph2gjFNF8E8G7NuCfYTCsH/MtcHUIB0HXbIkzmUH0PNqLIYa/uTwd8uaxVQR7zPupoWtDc='}}, response_metadata={'is_blocked': False, 'safety_ratings': [], 'usage_metadata': {'prompt_token_count': 25, 'candidates_token_count': 5, 'total_token_count': 217, 'prompt_tokens_details': [{'modality': 1, 'token_count': 25}], 'candidates_tokens_details': [{'modality': 1, 'token_count': 5}], 'thoughts_token_count': 187, 'cached_content_token_count': 0, 'cache_tokens_details': []}, 'finish_reason': 'STOP', 'avg_logprobs': -5.900767517089844, 'model_name': 'gemini-2.5-flash'}, id='lc_run--019baf03-6dd9-73f3-a9ed-fe8c471e01df-0', tool_calls=[{'name': 'get_weather', 'args': {'city': 'sf'}, 'id': '28adfdf3-778e-4d93-bd45-94a38f64064f', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens': 25, 'output_tokens': 5, 'total_tokens': 217, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 187}}),\n",
|
| 113 |
+
" ToolMessage(content=\"It's always sunny in sf!\", name='get_weather', id='426b50b1-b985-418f-aa4a-0a76ceae5e30', tool_call_id='28adfdf3-778e-4d93-bd45-94a38f64064f'),\n",
|
| 114 |
+
" AIMessage(content=\"It's always sunny in sf!\\n\", additional_kwargs={}, response_metadata={'is_blocked': False, 'safety_ratings': [], 'usage_metadata': {'prompt_token_count': 229, 'candidates_token_count': 9, 'total_token_count': 238, 'prompt_tokens_details': [{'modality': 1, 'token_count': 229}], 'candidates_tokens_details': [{'modality': 1, 'token_count': 9}], 'thoughts_token_count': 0, 'cached_content_token_count': 0, 'cache_tokens_details': []}, 'finish_reason': 'STOP', 'avg_logprobs': -0.1222022639380561, 'model_name': 'gemini-2.5-flash'}, id='lc_run--019baf03-7bdd-7d31-893b-20167e884547-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 229, 'output_tokens': 9, 'total_tokens': 238, 'input_token_details': {'cache_read': 0}})]}"
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
"execution_count": 7,
|
| 118 |
+
"metadata": {},
|
| 119 |
+
"output_type": "execute_result"
|
| 120 |
+
}
|
| 121 |
+
],
|
| 122 |
+
"source": [
|
| 123 |
+
"from langchain.agents import create_agent\n",
|
| 124 |
+
"\n",
|
| 125 |
+
"def get_weather(city: str) -> str:\n",
|
| 126 |
+
" \"\"\"Get weather for a given city.\"\"\"\n",
|
| 127 |
+
" return f\"It's always sunny in {city}!\"\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"agent = create_agent(\n",
|
| 130 |
+
" # model=\"gpt-4o\",\n",
|
| 131 |
+
" model=vertex_llm,\n",
|
| 132 |
+
" tools=[get_weather],\n",
|
| 133 |
+
" system_prompt=\"You are a helpful assistant\",\n",
|
| 134 |
+
")\n",
|
| 135 |
+
"\n",
|
| 136 |
+
"# Run the agent\n",
|
| 137 |
+
"agent.invoke(\n",
|
| 138 |
+
" {\"messages\": [{\"role\": \"user\", \"content\": \"what is the weather in sf\"}]}\n",
|
| 139 |
+
")"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": 6,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [],
|
| 147 |
+
"source": [
|
| 148 |
+
"from typing import TypedDict\n",
|
| 149 |
+
"from dataclasses import dataclass\n",
|
| 150 |
+
"from langgraph.runtime import get_runtime\n",
|
| 151 |
+
"from langgraph.checkpoint.memory import InMemorySaver\n",
|
| 152 |
+
"from langchain_community.utilities import SQLDatabase\n",
|
| 153 |
+
"from langchain_core.tools import tool\n",
|
| 154 |
+
"from langchain.agents import create_agent\n",
|
| 155 |
+
"\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"db = SQLDatabase.from_uri(\"sqlite:///money_rag.db\")\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"@dataclass\n",
|
| 160 |
+
"class RuntimeContext:\n",
|
| 161 |
+
" db: SQLDatabase\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"@tool\n",
|
| 164 |
+
"def execute_sql(query: str) -> str:\n",
|
| 165 |
+
" \"\"\"Execute a SQLite command and return results.\"\"\"\n",
|
| 166 |
+
" runtime = get_runtime(RuntimeContext)\n",
|
| 167 |
+
" db = runtime.context.db\n",
|
| 168 |
+
" try:\n",
|
| 169 |
+
" return db.run(query)\n",
|
| 170 |
+
" except Exception as e:\n",
|
| 171 |
+
" return f\"Error: {e}\"\n",
|
| 172 |
+
"\n",
|
| 173 |
+
"SYSTEM = f\"\"\"You are a careful SQLite analyst.\n",
|
| 174 |
+
"Rules:\n",
|
| 175 |
+
"- Think step-by-step.\n",
|
| 176 |
+
"- When you need data, call the tool `execute_sql` with ONE SELECT query.\n",
|
| 177 |
+
"- Read-only only; no INSERT/UPDATE/DELETE/ALTER/DROP/CREATE/REPLACE/TRUNCATE.\n",
|
| 178 |
+
"- Limit to 5 rows of output unless the user explicitly asks otherwise.\n",
|
| 179 |
+
"- If the tool returns 'Error:', revise the SQL and try again.\n",
|
| 180 |
+
"- Prefer explicit column lists; avoid SELECT *.\n",
|
| 181 |
+
"- **Spending Logic**:\n",
|
| 182 |
+
" - Spending is represented by POSITIVE values (> 0) in the amount column.\n",
|
| 183 |
+
" - NEGATIVE values (< 0) represent credit card payments or refunds and MUST NOT be included when calculating total spending or expenses.\n",
|
| 184 |
+
"\"\"\"\n",
|
| 185 |
+
"agent = create_agent(\n",
|
| 186 |
+
" model=vertex_llm,\n",
|
| 187 |
+
" tools=[execute_sql],\n",
|
| 188 |
+
" system_prompt=SYSTEM,\n",
|
| 189 |
+
" context_schema=RuntimeContext,\n",
|
| 190 |
+
" checkpointer=InMemorySaver(),\n",
|
| 191 |
+
")"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"execution_count": 7,
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [
|
| 199 |
+
{
|
| 200 |
+
"name": "stdout",
|
| 201 |
+
"output_type": "stream",
|
| 202 |
+
"text": [
|
| 203 |
+
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
| 204 |
+
"\n",
|
| 205 |
+
"how much total amount i spent for movie ticket in last 6 months?\n",
|
| 206 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 207 |
+
"Tool Calls:\n",
|
| 208 |
+
" execute_sql (13c234ed-8b6e-4db2-b4cd-ebff7fa65c1b)\n",
|
| 209 |
+
" Call ID: 13c234ed-8b6e-4db2-b4cd-ebff7fa65c1b\n",
|
| 210 |
+
" Args:\n",
|
| 211 |
+
" query: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0\n",
|
| 212 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 213 |
+
"Name: execute_sql\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"Error: (sqlite3.OperationalError) no such column: date\n",
|
| 216 |
+
"[SQL: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0]\n",
|
| 217 |
+
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
| 218 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 219 |
+
"Tool Calls:\n",
|
| 220 |
+
" execute_sql (3b514cf6-71cd-4730-b7d1-69f2547d9c56)\n",
|
| 221 |
+
" Call ID: 3b514cf6-71cd-4730-b7d1-69f2547d9c56\n",
|
| 222 |
+
" Args:\n",
|
| 223 |
+
" query: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND transaction_date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0\n",
|
| 224 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 225 |
+
"Name: execute_sql\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"[(None,)]\n",
|
| 228 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 229 |
+
"Tool Calls:\n",
|
| 230 |
+
" execute_sql (ef8975f2-f1e5-4f71-a228-75e72c494c64)\n",
|
| 231 |
+
" Call ID: ef8975f2-f1e5-4f71-a228-75e72c494c64\n",
|
| 232 |
+
" Args:\n",
|
| 233 |
+
" query: SELECT category, amount, transaction_date FROM transactions WHERE category = 'movie ticket' AND amount > 0 LIMIT 5\n",
|
| 234 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 235 |
+
"Name: execute_sql\n",
|
| 236 |
+
"\n",
|
| 237 |
+
"\n",
|
| 238 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 239 |
+
"Tool Calls:\n",
|
| 240 |
+
" execute_sql (e146482f-1c5c-438e-bf05-d6c2a9a9ef14)\n",
|
| 241 |
+
" Call ID: e146482f-1c5c-438e-bf05-d6c2a9a9ef14\n",
|
| 242 |
+
" Args:\n",
|
| 243 |
+
" query: SELECT id, amount, category FROM transactions LIMIT 5\n",
|
| 244 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 245 |
+
"Name: execute_sql\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"Error: (sqlite3.OperationalError) no such column: id\n",
|
| 248 |
+
"[SQL: SELECT id, amount, category FROM transactions LIMIT 5]\n",
|
| 249 |
+
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
| 250 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 251 |
+
"Tool Calls:\n",
|
| 252 |
+
" execute_sql (b736c455-b950-46a2-95c0-9ecb041c27a8)\n",
|
| 253 |
+
" Call ID: b736c455-b950-46a2-95c0-9ecb041c27a8\n",
|
| 254 |
+
" Args:\n",
|
| 255 |
+
" query: SELECT amount, category, date FROM spending LIMIT 5\n",
|
| 256 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 257 |
+
"Name: execute_sql\n",
|
| 258 |
+
"\n",
|
| 259 |
+
"Error: (sqlite3.OperationalError) no such table: spending\n",
|
| 260 |
+
"[SQL: SELECT amount, category, date FROM spending LIMIT 5]\n",
|
| 261 |
+
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
| 262 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 263 |
+
"Tool Calls:\n",
|
| 264 |
+
" execute_sql (000502f1-501f-44d9-826b-2f8a7c9bc70d)\n",
|
| 265 |
+
" Call ID: 000502f1-501f-44d9-826b-2f8a7c9bc70d\n",
|
| 266 |
+
" Args:\n",
|
| 267 |
+
" query: SELECT amount, category, date FROM expenses LIMIT 5\n",
|
| 268 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 269 |
+
"Name: execute_sql\n",
|
| 270 |
+
"\n",
|
| 271 |
+
"Error: (sqlite3.OperationalError) no such table: expenses\n",
|
| 272 |
+
"[SQL: SELECT amount, category, date FROM expenses LIMIT 5]\n",
|
| 273 |
+
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
| 274 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 275 |
+
"Tool Calls:\n",
|
| 276 |
+
" execute_sql (fb1bab6f-de96-42d6-b124-68f9ce80f3ed)\n",
|
| 277 |
+
" Call ID: fb1bab6f-de96-42d6-b124-68f9ce80f3ed\n",
|
| 278 |
+
" Args:\n",
|
| 279 |
+
" query: SELECT amount, category, date FROM transactions LIMIT 5\n",
|
| 280 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 281 |
+
"Name: execute_sql\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"Error: (sqlite3.OperationalError) no such column: date\n",
|
| 284 |
+
"[SQL: SELECT amount, category, date FROM transactions LIMIT 5]\n",
|
| 285 |
+
"(Background on this error at: https://sqlalche.me/e/20/e3q8)\n",
|
| 286 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 287 |
+
"Tool Calls:\n",
|
| 288 |
+
" execute_sql (678dea01-88eb-4bce-a2b1-48cb93c25d97)\n",
|
| 289 |
+
" Call ID: 678dea01-88eb-4bce-a2b1-48cb93c25d97\n",
|
| 290 |
+
" Args:\n",
|
| 291 |
+
" query: SELECT amount, category, transaction_date FROM transactions LIMIT 5\n",
|
| 292 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 293 |
+
"Name: execute_sql\n",
|
| 294 |
+
"\n",
|
| 295 |
+
"[(231.19, 'Merchandise', '2024-10-17 00:00:00'), (16.51, 'Merchandise', '2024-10-18 00:00:00'), (146.73, 'Merchandise', '2024-10-18 00:00:00'), (-100.0, 'Awards and Rebate Credits', '2024-10-18 00:00:00'), (8.09, 'Restaurants', '2024-11-02 00:00:00')]\n",
|
| 296 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 297 |
+
"Tool Calls:\n",
|
| 298 |
+
" execute_sql (dab1dfe3-c9ac-4c25-b737-887088edc6c7)\n",
|
| 299 |
+
" Call ID: dab1dfe3-c9ac-4c25-b737-887088edc6c7\n",
|
| 300 |
+
" Args:\n",
|
| 301 |
+
" query: SELECT SUM(amount) FROM transactions WHERE category = 'movie ticket' AND transaction_date >= strftime('%Y-%m-%d', date('now', '-6 months')) AND amount > 0\n",
|
| 302 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 303 |
+
"Name: execute_sql\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"[(None,)]\n",
|
| 306 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 307 |
+
"Tool Calls:\n",
|
| 308 |
+
" execute_sql (f236f188-f301-400e-8f97-19f87a8717eb)\n",
|
| 309 |
+
" Call ID: f236f188-f301-400e-8f97-19f87a8717eb\n",
|
| 310 |
+
" Args:\n",
|
| 311 |
+
" query: SELECT DISTINCT category FROM transactions LIMIT 5\n",
|
| 312 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 313 |
+
"Name: execute_sql\n",
|
| 314 |
+
"\n",
|
| 315 |
+
"[('Merchandise',), ('Awards and Rebate Credits',), ('Restaurants',), ('Government Services',), ('Payments and Credits',)]\n",
|
| 316 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 317 |
+
"Tool Calls:\n",
|
| 318 |
+
" execute_sql (a0bed1e3-0c0d-46f7-9852-ae0de757cba7)\n",
|
| 319 |
+
" Call ID: a0bed1e3-0c0d-46f7-9852-ae0de757cba7\n",
|
| 320 |
+
" Args:\n",
|
| 321 |
+
" query: SELECT DISTINCT category FROM transactions WHERE category LIKE '%movie%' LIMIT 5\n",
|
| 322 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 323 |
+
"Name: execute_sql\n",
|
| 324 |
+
"\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 327 |
+
"\n",
|
| 328 |
+
"It seems you haven't spent any amount on movie tickets in the last 6 months, or there are no transactions categorized as 'movie ticket' in the database.\n"
|
| 329 |
+
]
|
| 330 |
+
}
|
| 331 |
+
],
|
| 332 |
+
"source": [
|
| 333 |
+
"question = \"how much total amount i spent for movie ticket in last 6 months?\"\n",
|
| 334 |
+
"steps = []\n",
|
| 335 |
+
"\n",
|
| 336 |
+
"for step in agent.stream(\n",
|
| 337 |
+
" {\"messages\": [{\"role\": \"user\", \"content\": question}]},\n",
|
| 338 |
+
" {\"configurable\": {\"thread_id\": \"1\"}},\n",
|
| 339 |
+
" stream_mode=\"values\",\n",
|
| 340 |
+
" context=RuntimeContext(db=db)\n",
|
| 341 |
+
"):\n",
|
| 342 |
+
" step[\"messages\"][-1].pretty_print()\n",
|
| 343 |
+
" steps.append(step)"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": 21,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [
|
| 351 |
+
{
|
| 352 |
+
"name": "stdout",
|
| 353 |
+
"output_type": "stream",
|
| 354 |
+
"text": [
|
| 355 |
+
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"what transcation did you take into account for this calculation?\n",
|
| 358 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 359 |
+
"\n",
|
| 360 |
+
"I need more information to answer your question. Could you please tell me which calculation you are referring to? I don't have memory of past interactions or calculations.\n"
|
| 361 |
+
]
|
| 362 |
+
}
|
| 363 |
+
],
|
| 364 |
+
"source": [
|
| 365 |
+
"question = \"what transcation did you take into account for this calculation?\"\n",
|
| 366 |
+
"steps = []\n",
|
| 367 |
+
"\n",
|
| 368 |
+
"for step in agent.stream(\n",
|
| 369 |
+
" {\"messages\": [{\"role\": \"user\", \"content\": question}]},\n",
|
| 370 |
+
" {\"configurable\": {\"thread_id\": \"1\"}},\n",
|
| 371 |
+
" stream_mode=\"values\",\n",
|
| 372 |
+
" context=RuntimeContext(db=db)\n",
|
| 373 |
+
"):\n",
|
| 374 |
+
" step[\"messages\"][-1].pretty_print()\n",
|
| 375 |
+
" steps.append(step)"
|
| 376 |
+
]
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"cell_type": "code",
|
| 380 |
+
"execution_count": null,
|
| 381 |
+
"metadata": {},
|
| 382 |
+
"outputs": [],
|
| 383 |
+
"source": []
|
| 384 |
+
}
|
| 385 |
+
],
|
| 386 |
+
"metadata": {
|
| 387 |
+
"kernelspec": {
|
| 388 |
+
"display_name": "money_rag",
|
| 389 |
+
"language": "python",
|
| 390 |
+
"name": "python3"
|
| 391 |
+
},
|
| 392 |
+
"language_info": {
|
| 393 |
+
"codemirror_mode": {
|
| 394 |
+
"name": "ipython",
|
| 395 |
+
"version": 3
|
| 396 |
+
},
|
| 397 |
+
"file_extension": ".py",
|
| 398 |
+
"mimetype": "text/x-python",
|
| 399 |
+
"name": "python",
|
| 400 |
+
"nbconvert_exporter": "python",
|
| 401 |
+
"pygments_lexer": "ipython3",
|
| 402 |
+
"version": "3.12.7"
|
| 403 |
+
}
|
| 404 |
+
},
|
| 405 |
+
"nbformat": 4,
|
| 406 |
+
"nbformat_minor": 2
|
| 407 |
+
}
|
notebooks/1_test_pdf_reader.ipynb
ADDED
|
@@ -0,0 +1,956 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"%reload_ext autoreload\n",
|
| 10 |
+
"%autoreload 2"
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 2,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"outputs": [
|
| 18 |
+
{
|
| 19 |
+
"name": "stderr",
|
| 20 |
+
"output_type": "stream",
|
| 21 |
+
"text": [
|
| 22 |
+
"/Users/sawale/Documents/learning/money_rag/.venv/lib/python3.12/site-packages/google/cloud/aiplatform/models.py:52: FutureWarning: Support for google-cloud-storage < 3.0.0 will be removed in a future version of google-cloud-aiplatform. Please upgrade to google-cloud-storage >= 3.0.0.\n",
|
| 23 |
+
" from google.cloud.aiplatform.utils import gcs_utils\n"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"data": {
|
| 28 |
+
"text/plain": [
|
| 29 |
+
"True"
|
| 30 |
+
]
|
| 31 |
+
},
|
| 32 |
+
"execution_count": 2,
|
| 33 |
+
"metadata": {},
|
| 34 |
+
"output_type": "execute_result"
|
| 35 |
+
}
|
| 36 |
+
],
|
| 37 |
+
"source": [
|
| 38 |
+
"import pandas as pd\n",
|
| 39 |
+
"import sqlite3\n",
|
| 40 |
+
"from langchain.chat_models import init_chat_model\n",
|
| 41 |
+
"from langchain_core.prompts import ChatPromptTemplate\n",
|
| 42 |
+
"from langchain_core.output_parsers import JsonOutputParser\n",
|
| 43 |
+
"import uuid\n",
|
| 44 |
+
"\n",
|
| 45 |
+
"from langchain_qdrant import QdrantVectorStore\n",
|
| 46 |
+
"from qdrant_client import QdrantClient\n",
|
| 47 |
+
"from qdrant_client.http.models import Distance, VectorParams\n",
|
| 48 |
+
"from langchain_google_vertexai import VertexAIEmbeddings\n",
|
| 49 |
+
"\n",
|
| 50 |
+
"from typing import TypedDict\n",
|
| 51 |
+
"from dataclasses import dataclass\n",
|
| 52 |
+
"from langgraph.runtime import get_runtime\n",
|
| 53 |
+
"from langgraph.checkpoint.memory import InMemorySaver\n",
|
| 54 |
+
"from langchain_community.utilities import SQLDatabase\n",
|
| 55 |
+
"from langchain_core.tools import tool\n",
|
| 56 |
+
"from langchain.agents import create_agent\n",
|
| 57 |
+
"\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"\n",
|
| 60 |
+
"from dotenv import load_dotenv\n",
|
| 61 |
+
"load_dotenv()"
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"cell_type": "code",
|
| 66 |
+
"execution_count": 3,
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"outputs": [],
|
| 69 |
+
"source": [
|
| 70 |
+
"import os \n",
|
| 71 |
+
"import json\n",
|
| 72 |
+
"import asyncio\n",
|
| 73 |
+
"from langchain_community.tools import DuckDuckGoSearchRun\n",
|
| 74 |
+
"\n",
|
| 75 |
+
"# Initialize search\n",
|
| 76 |
+
"search_tool = DuckDuckGoSearchRun()\n",
|
| 77 |
+
"MERCHANT_CACHE_FILE = \"merchant_cache.json\"\n",
|
| 78 |
+
"\n",
|
| 79 |
+
"# Helper to load/save cache\n",
|
| 80 |
+
"def _load_cache():\n",
|
| 81 |
+
" if os.path.exists(MERCHANT_CACHE_FILE):\n",
|
| 82 |
+
" try:\n",
|
| 83 |
+
" with open(MERCHANT_CACHE_FILE, 'r') as f:\n",
|
| 84 |
+
" return json.load(f)\n",
|
| 85 |
+
" except:\n",
|
| 86 |
+
" return {}\n",
|
| 87 |
+
" return {}\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"def _save_cache(cache):\n",
|
| 90 |
+
" with open(MERCHANT_CACHE_FILE, 'w') as f:\n",
|
| 91 |
+
" json.dump(cache, f)\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"async def get_merchant_category_async(description, cache, sem):\n",
|
| 94 |
+
" \"\"\"Async search with semaphore and shared cache\"\"\"\n",
|
| 95 |
+
" if description in cache:\n",
|
| 96 |
+
" return cache[description]\n",
|
| 97 |
+
" \n",
|
| 98 |
+
" async with sem:\n",
|
| 99 |
+
" try:\n",
|
| 100 |
+
" # Random small sleep to jitter requests slightly\n",
|
| 101 |
+
" await asyncio.sleep(0.05) \n",
|
| 102 |
+
" print(f\" 🔍 Web searching for: {description}...\")\n",
|
| 103 |
+
" # Run the search asynchronously\n",
|
| 104 |
+
" result = await search_tool.ainvoke(f\"What type of business / store is '{description}'? Whats is location\")\n",
|
| 105 |
+
" return result\n",
|
| 106 |
+
" except Exception as e:\n",
|
| 107 |
+
" print(f\" ⚠️ Search failed for {description}: {e}\")\n",
|
| 108 |
+
" return \"Unknown\"\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"async def ingest_csv(file_path, llm, db_path=\"money_rag.db\"):\n",
|
| 111 |
+
" print(f\"📂 Processing {file_path}...\")\n",
|
| 112 |
+
" df = pd.read_csv(file_path)\n",
|
| 113 |
+
" headers = df.columns.tolist()\n",
|
| 114 |
+
" sample_data = df.head(10).to_json() \n",
|
| 115 |
+
"\n",
|
| 116 |
+
" # 1. LLM Mapping (Sync is fine here)\n",
|
| 117 |
+
" prompt = ChatPromptTemplate.from_template(\"\"\"\n",
|
| 118 |
+
" Act as a financial data parser. Analyze this CSV data:\n",
|
| 119 |
+
" Filename: {filename}\n",
|
| 120 |
+
" Headers: {headers}\n",
|
| 121 |
+
" Sample Data: {sample}\n",
|
| 122 |
+
"\n",
|
| 123 |
+
" TASK:\n",
|
| 124 |
+
" 1. Map the CSV columns to standard fields: date, description, amount, and category.\n",
|
| 125 |
+
" 2. Determine the 'sign_convention' for spending.\n",
|
| 126 |
+
" \n",
|
| 127 |
+
" RULES:\n",
|
| 128 |
+
" - If the filename suggests 'Discover' credit card, spending are usually POSITIVE.\n",
|
| 129 |
+
" - If the filename suggests 'Chase' credit card, spending are usually NEGATIVE.\n",
|
| 130 |
+
" \n",
|
| 131 |
+
" - Analyze the 'sign_convention' for spending (outflows):\n",
|
| 132 |
+
" - Look at the sample data for known merchants or spending patterns.\n",
|
| 133 |
+
" - If spending (like a restaurant or store) is NEGATIVE (e.g., -25.00), the convention is 'spending_is_negative'.\n",
|
| 134 |
+
" - If spending is POSITIVE (e.g., 25.00), the convention is 'spending_is_positive'.\n",
|
| 135 |
+
"\n",
|
| 136 |
+
" OUTPUT FORMAT (JSON ONLY):\n",
|
| 137 |
+
" {{\n",
|
| 138 |
+
" \"date_col\": \"column_name\",\n",
|
| 139 |
+
" \"desc_col\": \"column_name\",\n",
|
| 140 |
+
" \"amount_col\": \"column_name\",\n",
|
| 141 |
+
" \"category_col\": \"column_name or null\",\n",
|
| 142 |
+
" \"sign_convention\": \"spending_is_negative\" | \"spending_is_positive\"\n",
|
| 143 |
+
" }}\n",
|
| 144 |
+
" \"\"\")\n",
|
| 145 |
+
" \n",
|
| 146 |
+
" chain = prompt | llm | JsonOutputParser()\n",
|
| 147 |
+
" mapping = chain.invoke({\"headers\": headers, \"sample\": sample_data, \"filename\": file_path})\n",
|
| 148 |
+
"\n",
|
| 149 |
+
" # 2. Standardization\n",
|
| 150 |
+
" standard_df = pd.DataFrame()\n",
|
| 151 |
+
" standard_df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]\n",
|
| 152 |
+
" standard_df['transaction_date'] = pd.to_datetime(df[mapping['date_col']])\n",
|
| 153 |
+
" standard_df['description'] = df[mapping['desc_col']]\n",
|
| 154 |
+
" \n",
|
| 155 |
+
" # Normalization Logic\n",
|
| 156 |
+
" raw_amounts = pd.to_numeric(df[mapping['amount_col']])\n",
|
| 157 |
+
" if mapping['sign_convention'] == \"spending_is_negative\":\n",
|
| 158 |
+
" standard_df['amount'] = raw_amounts * -1\n",
|
| 159 |
+
" else:\n",
|
| 160 |
+
" standard_df['amount'] = raw_amounts\n",
|
| 161 |
+
" \n",
|
| 162 |
+
" standard_df['category'] = df[mapping['category_col']] if mapping.get('category_col') else 'Uncategorized'\n",
|
| 163 |
+
" standard_df['source_file'] = file_path.split(\"/\")[-1]\n",
|
| 164 |
+
"\n",
|
| 165 |
+
" # 3. --- Async Enrichment Step ---\n",
|
| 166 |
+
" print(\" ✨ Enriching descriptions (Async)...\")\n",
|
| 167 |
+
" unique_descriptions = standard_df['description'].unique()\n",
|
| 168 |
+
" \n",
|
| 169 |
+
" # Load cache once\n",
|
| 170 |
+
" cache = _load_cache()\n",
|
| 171 |
+
" \n",
|
| 172 |
+
" # Create a semaphore to limit concurrent web searches (e.g. 5)\n",
|
| 173 |
+
" sem = asyncio.Semaphore(5)\n",
|
| 174 |
+
" \n",
|
| 175 |
+
" # Create tasks for all descriptions\n",
|
| 176 |
+
" # Note: We process ALL descriptions now effectively\n",
|
| 177 |
+
" tasks = []\n",
|
| 178 |
+
" for desc in unique_descriptions:\n",
|
| 179 |
+
" tasks.append(get_merchant_category_async(desc, cache, sem))\n",
|
| 180 |
+
" \n",
|
| 181 |
+
" # Run in parallel\n",
|
| 182 |
+
" results = await asyncio.gather(*tasks)\n",
|
| 183 |
+
" \n",
|
| 184 |
+
" # Update cache object with new results\n",
|
| 185 |
+
" for desc, res in zip(unique_descriptions, results):\n",
|
| 186 |
+
" cache[desc] = res\n",
|
| 187 |
+
" \n",
|
| 188 |
+
" # Save cache back to disk\n",
|
| 189 |
+
" _save_cache(cache)\n",
|
| 190 |
+
" \n",
|
| 191 |
+
" # Map back\n",
|
| 192 |
+
" desc_map = dict(zip(unique_descriptions, results))\n",
|
| 193 |
+
" standard_df['enriched_info'] = standard_df['description'].map(desc_map).fillna(\"\")\n",
|
| 194 |
+
"\n",
|
| 195 |
+
" # 4. Save to DB\n",
|
| 196 |
+
" conn = sqlite3.connect(db_path)\n",
|
| 197 |
+
" standard_df.to_sql(\"transactions\", conn, if_exists=\"append\", index=False)\n",
|
| 198 |
+
" conn.close()\n",
|
| 199 |
+
" \n",
|
| 200 |
+
" print(f\"✅ Ingested {len(standard_df)} rows from {file_path.split('/')[-1]}. Logic: {mapping['sign_convention']}\")"
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"cell_type": "code",
|
| 205 |
+
"execution_count": 4,
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"outputs": [
|
| 208 |
+
{
|
| 209 |
+
"name": "stdout",
|
| 210 |
+
"output_type": "stream",
|
| 211 |
+
"text": [
|
| 212 |
+
"📂 Processing /Users/sawale/Documents/learning/money_rag/demo_data/Discover-AllAvailable-20260110.csv...\n",
|
| 213 |
+
" ✨ Enriching descriptions (Async)...\n",
|
| 214 |
+
" 🔍 Web searching for: BACK MARKET BROOKLYN NY...\n",
|
| 215 |
+
" 🔍 Web searching for: TEMU.COM 8884958368 DE...\n",
|
| 216 |
+
" 🔍 Web searching for: WALMART STORE 00332 HUNTSVILLE AL...\n",
|
| 217 |
+
" 🔍 Web searching for: $100 STATEMENT CREDIT W 1ST PU...\n",
|
| 218 |
+
" 🔍 Web searching for: PY *KUNG-FU TEA AL HUNTSVILLE AL...\n",
|
| 219 |
+
" 🔍 Web searching for: MADISON MONTGOMERY AL...\n",
|
| 220 |
+
" 🔍 Web searching for: INTERNET PAYMENT - THANK YOU...\n",
|
| 221 |
+
" 🔍 Web searching for: GRUBHUB - UNIVERSITY OF HUNTSVILLE AL...\n",
|
| 222 |
+
" 🔍 Web searching for: MINT MOBILE 800-683-7392 CA...\n",
|
| 223 |
+
" 🔍 Web searching for: POPEYES 2577 HUNTSVILLE AL...\n",
|
| 224 |
+
" 🔍 Web searching for: 88 BUFFET HUNTSVILLE AL...\n",
|
| 225 |
+
" 🔍 Web searching for: VIET HUONG VIETNAMESE RE HUNTSVILLE AL...\n",
|
| 226 |
+
" 🔍 Web searching for: CASHBACK BONUS REDEMPTION PYMT/STMT CRDT...\n",
|
| 227 |
+
" 🔍 Web searching for: SPO*THECURRYMODERNINDIAN HUNTSVILLE AL...\n",
|
| 228 |
+
" 🔍 Web searching for: H&M 0273HUNTSVILLE HUNTSVILLE ALUS0273001241222182740...\n",
|
| 229 |
+
" 🔍 Web searching for: INDIAN BAZAAR HUNTSVILLE AL...\n",
|
| 230 |
+
" 🔍 Web searching for: HANDELS HOMEMADE HUNTSVI HUNTSVILLE AL...\n",
|
| 231 |
+
" 🔍 Web searching for: UAH COLLEGE 256-824-6170 AL...\n",
|
| 232 |
+
" 🔍 Web searching for: UAH COLLEGE FSF 800-346-9252 MA...\n",
|
| 233 |
+
" 🔍 Web searching for: CHIPOTLE 1687 NASHVILLE TN...\n",
|
| 234 |
+
" 🔍 Web searching for: TST*PIE TOWN TACOS - F NASHVILLE TN00153526022200965677AA...\n",
|
| 235 |
+
" 🔍 Web searching for: INDIAN BAZAAR HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
|
| 236 |
+
" 🔍 Web searching for: INDIA MART HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
|
| 237 |
+
" 🔍 Web searching for: PAYPAL *KEVDUDE1186 KEV 888-221-1161 CA...\n",
|
| 238 |
+
" 🔍 Web searching for: LYFT *RIDE WED 10AM 8552800278 CA...\n",
|
| 239 |
+
" 🔍 Web searching for: SKECHERS USA INC 1069 HUNTSVILLE AL...\n",
|
| 240 |
+
" 🔍 Web searching for: STORE HUNTSVILLE AL...\n",
|
| 241 |
+
" 🔍 Web searching for: LYFT *RIDE WED 3PM 8552800278 CA...\n",
|
| 242 |
+
" 🔍 Web searching for: SQ *TAQUERIA LAS ADELI HUNTSVILLE AL0002305843021411201895...\n",
|
| 243 |
+
" 🔍 Web searching for: UAH HUNTSVILLE DUNKIN HUNTSVILLE AL...\n",
|
| 244 |
+
" 🔍 Web searching for: WALMART.COM 800-925-6278 AR...\n",
|
| 245 |
+
" 🔍 Web searching for: WALMART.COM 8009256278 BENTONVILLE AR...\n",
|
| 246 |
+
" 🔍 Web searching for: TOUS LES JOURS - HUNTSVI HUNTSVILLE AL...\n",
|
| 247 |
+
" 🔍 Web searching for: MARSHALLS #422 HUNTSVILLE AL...\n",
|
| 248 |
+
" 🔍 Web searching for: ROSS STORE #2436 HUNTSVILLE AL...\n",
|
| 249 |
+
" 🔍 Web searching for: SPRINTAX NR TAX 8882038900 NY...\n",
|
| 250 |
+
" 🔍 Web searching for: USPS PO 0142460804 HUNTSVILLE AL...\n",
|
| 251 |
+
" 🔍 Web searching for: CHIPOTLE 1796 HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
|
| 252 |
+
" 🔍 Web searching for: TST*POURHOUSE HUNTSVILLE AL00031984024314246667AA...\n",
|
| 253 |
+
" 🔍 Web searching for: TST*WOKS UP HUNTSVILLE AL00075396024313993332AA...\n",
|
| 254 |
+
" 🔍 Web searching for: SPIRIT AIRLINES 8014012222 FL...\n",
|
| 255 |
+
" 🔍 Web searching for: CHIPOTLE 1796 HUNTSVILLE AL...\n",
|
| 256 |
+
" 🔍 Web searching for: UAH BURSARS OFFICE HUNTSVILLE AL...\n",
|
| 257 |
+
" 🔍 Web searching for: STARS AND STRIKES - HUNT HUNTSVILLE AL...\n",
|
| 258 |
+
" 🔍 Web searching for: ROSS STORES #620 HUNTSVILLE AL...\n",
|
| 259 |
+
" 🔍 Web searching for: TST*KAMADO RAMEN - MID HUNTSVILLE AL00006963025030352515AA...\n",
|
| 260 |
+
" 🔍 Web searching for: SQ *MOM'SCLAYCO HARVEST AL0002305843022068424398...\n",
|
| 261 |
+
" 🔍 Web searching for: DOLLARTREE HUNTSVILLE AL...\n",
|
| 262 |
+
" 🔍 Web searching for: SLIM & HUSKIES NASHVILLE TN...\n",
|
| 263 |
+
" 🔍 Web searching for: CHIPOTLE 1392 SANTA MONICA CA...\n",
|
| 264 |
+
" 🔍 Web searching for: DOLLAR TREE LAS VEGAS NV...\n",
|
| 265 |
+
" 🔍 Web searching for: LYFT *RIDE TUE 12AM 8552800278 CA...\n",
|
| 266 |
+
" 🔍 Web searching for: SQ *SHIKU GCM LOS ANGELES CA0001152921515467218869...\n",
|
| 267 |
+
" 🔍 Web searching for: SQ *SHIKU GCM LOS ANGELES CA0001152921515467211997...\n",
|
| 268 |
+
" 🔍 Web searching for: WALMART STORE 05686 BURBANK CA...\n",
|
| 269 |
+
" 🔍 Web searching for: CAFE BELLA NEWPORT SAN DIEGO CAGOOGLE PAY ENDING IN 8984...\n",
|
| 270 |
+
" 🔍 Web searching for: CHIPOTLE 2883 NORTH LAS VEGNVGOOGLE PAY ENDING IN 8984...\n",
|
| 271 |
+
" 🔍 Web searching for: SHELL10006319007 HESPERIA CAGOOGLE PAY ENDING IN 8984...\n",
|
| 272 |
+
" 🔍 Web searching for: PANDA EXPRESS #1964 LAS VEGAS NV...\n",
|
| 273 |
+
" 🔍 Web searching for: DENNY'S #0141 QR LAS VEGAS NVGOOGLE PAY ENDING IN 8984...\n",
|
| 274 |
+
" 🔍 Web searching for: LAS VEGAS SOUVENIRS AND LAS VEGAS NV...\n",
|
| 275 |
+
" 🔍 Web searching for: CTLP*FIRST CLASS VENDI BELLGARDENS CA...\n",
|
| 276 |
+
" 🔍 Web searching for: SHELL12874333011 FRANKLIN TN...\n",
|
| 277 |
+
" 🔍 Web searching for: AMARAVATI INDIAN CUISINE BRENTWOOD TNGOOGLE PAY ENDING IN 8984...\n",
|
| 278 |
+
" 🔍 Web searching for: CENTRAL MARKET NASHVILLE TN...\n",
|
| 279 |
+
" 🔍 Web searching for: TST*PRINCES HOT CHICKE NASHVILLE TN00104605025320544723AA...\n",
|
| 280 |
+
" 🔍 Web searching for: TST*PRINCES HOT CHICKE NASHVILLE TN00104605025321087148AA...\n",
|
| 281 |
+
" 🔍 Web searching for: WALMART STORE 05616 NASHVILLE TN...\n",
|
| 282 |
+
" 🔍 Web searching for: PY *KUNG-FU TEA AL HUNTSVILLE ALGOOGLE PAY ENDING IN 8984...\n",
|
| 283 |
+
" 🔍 Web searching for: 2LEVY R&C CHATTANOOGA TNGOOGLE PAY ENDING IN 8984...\n",
|
| 284 |
+
"✅ Ingested 124 rows from Discover-AllAvailable-20260110.csv. Logic: spending_is_positive\n",
|
| 285 |
+
"📂 Processing /Users/sawale/Documents/learning/money_rag/demo_data/Chase5282_Activity20240110_20260110_20260111.CSV...\n",
|
| 286 |
+
" ✨ Enriching descriptions (Async)...\n",
|
| 287 |
+
" 🔍 Web searching for: TOUS LES JOURS - HUNTSVIL...\n",
|
| 288 |
+
" 🔍 Web searching for: Payment Thank You-Mobile...\n",
|
| 289 |
+
" 🔍 Web searching for: INDIAN BAZAAR...\n",
|
| 290 |
+
" 🔍 Web searching for: TST*BLUE OAK BBQ-HUNTSVI...\n",
|
| 291 |
+
" 🔍 Web searching for: AMC 4112 VAL BEND 18...\n",
|
| 292 |
+
" 🔍 Web searching for: HANDELS HOMEMADE JONES V...\n",
|
| 293 |
+
" 🔍 Web searching for: PAYYOURSELFBACK CREDIT...\n",
|
| 294 |
+
" 🔍 Web searching for: TST* HYDERABAD HOUSE...\n",
|
| 295 |
+
" 🔍 Web searching for: PATEL BROTHERS NASHVILLE...\n",
|
| 296 |
+
" 🔍 Web searching for: CITY OF HUNTSVILLE...\n",
|
| 297 |
+
" 🔍 Web searching for: WM SUPERCENTER #332...\n",
|
| 298 |
+
" 🔍 Web searching for: WAL-MART #0332...\n",
|
| 299 |
+
" 🔍 Web searching for: AMAZON MKTPL*OS1RI3LN3...\n",
|
| 300 |
+
" 🔍 Web searching for: TST* HATTIE B'S HUNTSVILL...\n",
|
| 301 |
+
" 🔍 Web searching for: AMAZON MKTPL*BI23Z6JR0...\n",
|
| 302 |
+
" 🔍 Web searching for: AMAZON MKTPL*BI9IW9OS2...\n",
|
| 303 |
+
" 🔍 Web searching for: AMAZON MKTPL*BI0296OJ2...\n",
|
| 304 |
+
" 🔍 Web searching for: AMAZON MKTPL*BB71A2881...\n",
|
| 305 |
+
" 🔍 Web searching for: AMAZON MKTPL*BB3FU2UQ2...\n",
|
| 306 |
+
" 🔍 Web searching for: AMAZON MKTPL*BI03P1OX2...\n",
|
| 307 |
+
" 🔍 Web searching for: AMAZON MKTPL*BB92U9QK2...\n",
|
| 308 |
+
" 🔍 Web searching for: AMAZON MKTPL*BB9TA14Q0...\n",
|
| 309 |
+
" 🔍 Web searching for: 88 BUFFET...\n",
|
| 310 |
+
" 🔍 Web searching for: AMAZON MKTPL*BB0DC71B1...\n",
|
| 311 |
+
" 🔍 Web searching for: AMAZON MKTPL*B20NN4ID0...\n",
|
| 312 |
+
" 🔍 Web searching for: AMAZON MKTPL*B273C1WY2...\n",
|
| 313 |
+
" 🔍 Web searching for: AMAZON MKTPL*B27IN41E1...\n",
|
| 314 |
+
" 🔍 Web searching for: AMAZON MKTPL*B250Z60P1...\n",
|
| 315 |
+
" 🔍 Web searching for: BEST BUY 00005140...\n",
|
| 316 |
+
" 🔍 Web searching for: DAVES HOT CHICKEN 1282...\n",
|
| 317 |
+
" 🔍 Web searching for: SQ *VIETCUISINE LLC...\n",
|
| 318 |
+
" 🔍 Web searching for: CHICK-FIL-A #00579...\n",
|
| 319 |
+
" 🔍 Web searching for: COSTCO WHSE #0356...\n",
|
| 320 |
+
" 🔍 Web searching for: AMAZON MKTPL*NK4AM43Q2...\n",
|
| 321 |
+
" 🔍 Web searching for: HUNTSVILLE FLV...\n",
|
| 322 |
+
" 🔍 Web searching for: AMAZON MKTPL*NM1H055K0...\n",
|
| 323 |
+
" 🔍 Web searching for: MAPCO EXPRESS #3403...\n",
|
| 324 |
+
" 🔍 Web searching for: DUNKIN #346212 Q35...\n",
|
| 325 |
+
" 🔍 Web searching for: CENTRAL MARKET...\n",
|
| 326 |
+
" 🔍 Web searching for: TARA INTERNATIONAL MARKET...\n",
|
| 327 |
+
" 🔍 Web searching for: BOTAN MARKET INC...\n",
|
| 328 |
+
" 🔍 Web searching for: AMARAVATI INDIAN CUISINE...\n",
|
| 329 |
+
" 🔍 Web searching for: GRUBHUB - UNIVERSITY OF A...\n",
|
| 330 |
+
" 🔍 Web searching for: BURGER KING #4959...\n",
|
| 331 |
+
" 🔍 Web searching for: PANDA EXPRESS #3013...\n",
|
| 332 |
+
" 🔍 Web searching for: MCDONALD'S F2431...\n",
|
| 333 |
+
" 🔍 Web searching for: ENDZONE COLLECTIBLES...\n",
|
| 334 |
+
" 🔍 Web searching for: ZIMMAD EVE* ZIMMAD JOI...\n",
|
| 335 |
+
" 🔍 Web searching for: SQ *SPILL COFFEE AND CREA...\n",
|
| 336 |
+
" 🔍 Web searching for: 10267 CAVA WHITESBURG...\n",
|
| 337 |
+
" 🔍 Web searching for: SPO*DRAGONSFORGECAFE...\n",
|
| 338 |
+
" 🔍 Web searching for: UAH BURSARS OFFICE...\n",
|
| 339 |
+
" 🔍 Web searching for: MARATHON PETRO42804...\n",
|
| 340 |
+
" 🔍 Web searching for: TST*NOTHING BUT NOODLES...\n",
|
| 341 |
+
" 🔍 Web searching for: VEDA INDIAN CUISINE...\n",
|
| 342 |
+
" 🔍 Web searching for: DOLLARTREE...\n",
|
| 343 |
+
" 🔍 Web searching for: TARGET 00013466...\n",
|
| 344 |
+
" 🔍 Web searching for: POPEYES 2577...\n",
|
| 345 |
+
" 🔍 Web searching for: DEORALI GROCERY...\n",
|
| 346 |
+
" 🔍 Web searching for: HELLO ATLANTA #33...\n",
|
| 347 |
+
" 🔍 Web searching for: SKY VIEW ATLANTA...\n",
|
| 348 |
+
" 🔍 Web searching for: STARBUCKS 25111...\n",
|
| 349 |
+
" 🔍 Web searching for: BP#8998205AM/PM WADE GRE...\n",
|
| 350 |
+
" 🔍 Web searching for: Waffle House 0857...\n",
|
| 351 |
+
" 🔍 Web searching for: CINEMARK 1131 BOXCON...\n",
|
| 352 |
+
" 🔍 Web searching for: CINEMARK 1131 RSTBAR...\n",
|
| 353 |
+
" 🔍 Web searching for: HOMEGOODS # 0568...\n",
|
| 354 |
+
" 🔍 Web searching for: ASIAN MARKET...\n",
|
| 355 |
+
" 🔍 Web searching for: PANDA EXPRESS #2005...\n",
|
| 356 |
+
" 🔍 Web searching for: STARS AND STRIKES - HUNTS...\n",
|
| 357 |
+
" 🔍 Web searching for: WAL-MART #332...\n",
|
| 358 |
+
"✅ Ingested 126 rows from Chase5282_Activity20240110_20260110_20260111.CSV. Logic: spending_is_negative\n"
|
| 359 |
+
]
|
| 360 |
+
}
|
| 361 |
+
],
|
| 362 |
+
"source": [
|
| 363 |
+
"path1 = \"/Users/sawale/Documents/learning/money_rag/demo_data/Discover-AllAvailable-20260110.csv\"\n",
|
| 364 |
+
"path2 = \"/Users/sawale/Documents/learning/money_rag/demo_data/Chase5282_Activity20240110_20260110_20260111.CSV\"\n",
|
| 365 |
+
"\n",
|
| 366 |
+
"# Initialize the Gemini model via Vertex AI\n",
|
| 367 |
+
"vertex_llm = init_chat_model(\n",
|
| 368 |
+
" \"gemini-2.5-flash\", \n",
|
| 369 |
+
" model_provider=\"google_vertexai\",\n",
|
| 370 |
+
" project='gen-lang-client-0311515393',\n",
|
| 371 |
+
" location='us-central1',\n",
|
| 372 |
+
")\n",
|
| 373 |
+
"\n",
|
| 374 |
+
"# Run async functions in Jupyter\n",
|
| 375 |
+
"# We run them sequentially here to avoid file lock issues with SQLite if both try to write at once\n",
|
| 376 |
+
"# (Though SQLite handles concurrency, keeping ingestion strictly ordered is safer for the demo)\n",
|
| 377 |
+
"await ingest_csv(path1, vertex_llm)\n",
|
| 378 |
+
"await ingest_csv(path2, vertex_llm)"
|
| 379 |
+
]
|
| 380 |
+
},
|
| 381 |
+
{
|
| 382 |
+
"cell_type": "code",
|
| 383 |
+
"execution_count": 5,
|
| 384 |
+
"metadata": {},
|
| 385 |
+
"outputs": [
|
| 386 |
+
{
|
| 387 |
+
"data": {
|
| 388 |
+
"text/html": [
|
| 389 |
+
"<div>\n",
|
| 390 |
+
"<style scoped>\n",
|
| 391 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 392 |
+
" vertical-align: middle;\n",
|
| 393 |
+
" }\n",
|
| 394 |
+
"\n",
|
| 395 |
+
" .dataframe tbody tr th {\n",
|
| 396 |
+
" vertical-align: top;\n",
|
| 397 |
+
" }\n",
|
| 398 |
+
"\n",
|
| 399 |
+
" .dataframe thead th {\n",
|
| 400 |
+
" text-align: right;\n",
|
| 401 |
+
" }\n",
|
| 402 |
+
"</style>\n",
|
| 403 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 404 |
+
" <thead>\n",
|
| 405 |
+
" <tr style=\"text-align: right;\">\n",
|
| 406 |
+
" <th></th>\n",
|
| 407 |
+
" <th>id</th>\n",
|
| 408 |
+
" <th>transaction_date</th>\n",
|
| 409 |
+
" <th>description</th>\n",
|
| 410 |
+
" <th>amount</th>\n",
|
| 411 |
+
" <th>category</th>\n",
|
| 412 |
+
" <th>source_file</th>\n",
|
| 413 |
+
" <th>enriched_info</th>\n",
|
| 414 |
+
" </tr>\n",
|
| 415 |
+
" </thead>\n",
|
| 416 |
+
" <tbody>\n",
|
| 417 |
+
" <tr>\n",
|
| 418 |
+
" <th>0</th>\n",
|
| 419 |
+
" <td>cf2f48c5-34ab-4544-b1f4-542681fd5017</td>\n",
|
| 420 |
+
" <td>2024-10-17 00:00:00</td>\n",
|
| 421 |
+
" <td>BACK MARKET BROOKLYN NY</td>\n",
|
| 422 |
+
" <td>231.19</td>\n",
|
| 423 |
+
" <td>Merchandise</td>\n",
|
| 424 |
+
" <td>Discover-AllAvailable-20260110.csv</td>\n",
|
| 425 |
+
" <td>Online Retailer in New York, NY . See BBB rati...</td>\n",
|
| 426 |
+
" </tr>\n",
|
| 427 |
+
" <tr>\n",
|
| 428 |
+
" <th>1</th>\n",
|
| 429 |
+
" <td>16913dce-52bf-43f4-853d-7bb55e09aac5</td>\n",
|
| 430 |
+
" <td>2024-10-18 00:00:00</td>\n",
|
| 431 |
+
" <td>TEMU.COM 8884958368 DE</td>\n",
|
| 432 |
+
" <td>16.51</td>\n",
|
| 433 |
+
" <td>Merchandise</td>\n",
|
| 434 |
+
" <td>Discover-AllAvailable-20260110.csv</td>\n",
|
| 435 |
+
" <td>Temu ' s business model has allowed it to beco...</td>\n",
|
| 436 |
+
" </tr>\n",
|
| 437 |
+
" <tr>\n",
|
| 438 |
+
" <th>2</th>\n",
|
| 439 |
+
" <td>5917cb10-f0e6-49fa-9987-776cee28688c</td>\n",
|
| 440 |
+
" <td>2024-10-18 00:00:00</td>\n",
|
| 441 |
+
" <td>WALMART STORE 00332 HUNTSVILLE AL</td>\n",
|
| 442 |
+
" <td>146.73</td>\n",
|
| 443 |
+
" <td>Merchandise</td>\n",
|
| 444 |
+
" <td>Discover-AllAvailable-20260110.csv</td>\n",
|
| 445 |
+
" <td>Posts Walmart Huntsville - University Drive De...</td>\n",
|
| 446 |
+
" </tr>\n",
|
| 447 |
+
" <tr>\n",
|
| 448 |
+
" <th>3</th>\n",
|
| 449 |
+
" <td>bd66a49e-5064-43ca-bdb2-94947297b6d5</td>\n",
|
| 450 |
+
" <td>2024-10-18 00:00:00</td>\n",
|
| 451 |
+
" <td>$100 STATEMENT CREDIT W 1ST PU</td>\n",
|
| 452 |
+
" <td>-100.00</td>\n",
|
| 453 |
+
" <td>Awards and Rebate Credits</td>\n",
|
| 454 |
+
" <td>Discover-AllAvailable-20260110.csv</td>\n",
|
| 455 |
+
" <td>That said, I woke up yesterday and saw a $ 100...</td>\n",
|
| 456 |
+
" </tr>\n",
|
| 457 |
+
" <tr>\n",
|
| 458 |
+
" <th>4</th>\n",
|
| 459 |
+
" <td>8b9b16d4-dd81-4ebe-820f-62fdc827a0bb</td>\n",
|
| 460 |
+
" <td>2024-11-02 00:00:00</td>\n",
|
| 461 |
+
" <td>PY *KUNG-FU TEA AL HUNTSVILLE AL</td>\n",
|
| 462 |
+
" <td>8.09</td>\n",
|
| 463 |
+
" <td>Restaurants</td>\n",
|
| 464 |
+
" <td>Discover-AllAvailable-20260110.csv</td>\n",
|
| 465 |
+
" <td>Jan 22, 2021 · Best part to me--besides the ro...</td>\n",
|
| 466 |
+
" </tr>\n",
|
| 467 |
+
" <tr>\n",
|
| 468 |
+
" <th>...</th>\n",
|
| 469 |
+
" <td>...</td>\n",
|
| 470 |
+
" <td>...</td>\n",
|
| 471 |
+
" <td>...</td>\n",
|
| 472 |
+
" <td>...</td>\n",
|
| 473 |
+
" <td>...</td>\n",
|
| 474 |
+
" <td>...</td>\n",
|
| 475 |
+
" <td>...</td>\n",
|
| 476 |
+
" </tr>\n",
|
| 477 |
+
" <tr>\n",
|
| 478 |
+
" <th>245</th>\n",
|
| 479 |
+
" <td>239f983e-2ed8-4454-90ad-0e8477bf7893</td>\n",
|
| 480 |
+
" <td>2025-06-18 00:00:00</td>\n",
|
| 481 |
+
" <td>PANDA EXPRESS #2005</td>\n",
|
| 482 |
+
" <td>52.87</td>\n",
|
| 483 |
+
" <td>Food & Drink</td>\n",
|
| 484 |
+
" <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
|
| 485 |
+
" <td>Aug 29, 2025 · The group's headquarters is loc...</td>\n",
|
| 486 |
+
" </tr>\n",
|
| 487 |
+
" <tr>\n",
|
| 488 |
+
" <th>246</th>\n",
|
| 489 |
+
" <td>0ecba974-5491-4ef2-ba52-bf3c37d6e854</td>\n",
|
| 490 |
+
" <td>2025-06-14 00:00:00</td>\n",
|
| 491 |
+
" <td>Payment Thank You-Mobile</td>\n",
|
| 492 |
+
" <td>-62.07</td>\n",
|
| 493 |
+
" <td>None</td>\n",
|
| 494 |
+
" <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
|
| 495 |
+
" <td>Sep 25, 2025 · Thank You Mobile is not a compa...</td>\n",
|
| 496 |
+
" </tr>\n",
|
| 497 |
+
" <tr>\n",
|
| 498 |
+
" <th>247</th>\n",
|
| 499 |
+
" <td>fb966472-d77a-4723-a9e6-15e63bda4cd2</td>\n",
|
| 500 |
+
" <td>2025-06-12 00:00:00</td>\n",
|
| 501 |
+
" <td>STARS AND STRIKES - HUNTS</td>\n",
|
| 502 |
+
" <td>21.80</td>\n",
|
| 503 |
+
" <td>Entertainment</td>\n",
|
| 504 |
+
" <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
|
| 505 |
+
" <td>At our Huntsville , AL location , we pride our...</td>\n",
|
| 506 |
+
" </tr>\n",
|
| 507 |
+
" <tr>\n",
|
| 508 |
+
" <th>248</th>\n",
|
| 509 |
+
" <td>b6bc2e1d-d68b-4acd-a201-46d8cf5175e5</td>\n",
|
| 510 |
+
" <td>2025-06-11 00:00:00</td>\n",
|
| 511 |
+
" <td>WAL-MART #332</td>\n",
|
| 512 |
+
" <td>4.47</td>\n",
|
| 513 |
+
" <td>Groceries</td>\n",
|
| 514 |
+
" <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
|
| 515 |
+
" <td>3 days ago · Walmart Inc. is an American multi...</td>\n",
|
| 516 |
+
" </tr>\n",
|
| 517 |
+
" <tr>\n",
|
| 518 |
+
" <th>249</th>\n",
|
| 519 |
+
" <td>e896adcc-5445-4626-b14c-09ebd5c9f658</td>\n",
|
| 520 |
+
" <td>2025-06-11 00:00:00</td>\n",
|
| 521 |
+
" <td>WAL-MART #332</td>\n",
|
| 522 |
+
" <td>57.60</td>\n",
|
| 523 |
+
" <td>Groceries</td>\n",
|
| 524 |
+
" <td>Chase5282_Activity20240110_20260110_20260111.CSV</td>\n",
|
| 525 |
+
" <td>3 days ago · Walmart Inc. is an American multi...</td>\n",
|
| 526 |
+
" </tr>\n",
|
| 527 |
+
" </tbody>\n",
|
| 528 |
+
"</table>\n",
|
| 529 |
+
"<p>250 rows × 7 columns</p>\n",
|
| 530 |
+
"</div>"
|
| 531 |
+
],
|
| 532 |
+
"text/plain": [
|
| 533 |
+
" id transaction_date \\\n",
|
| 534 |
+
"0 cf2f48c5-34ab-4544-b1f4-542681fd5017 2024-10-17 00:00:00 \n",
|
| 535 |
+
"1 16913dce-52bf-43f4-853d-7bb55e09aac5 2024-10-18 00:00:00 \n",
|
| 536 |
+
"2 5917cb10-f0e6-49fa-9987-776cee28688c 2024-10-18 00:00:00 \n",
|
| 537 |
+
"3 bd66a49e-5064-43ca-bdb2-94947297b6d5 2024-10-18 00:00:00 \n",
|
| 538 |
+
"4 8b9b16d4-dd81-4ebe-820f-62fdc827a0bb 2024-11-02 00:00:00 \n",
|
| 539 |
+
".. ... ... \n",
|
| 540 |
+
"245 239f983e-2ed8-4454-90ad-0e8477bf7893 2025-06-18 00:00:00 \n",
|
| 541 |
+
"246 0ecba974-5491-4ef2-ba52-bf3c37d6e854 2025-06-14 00:00:00 \n",
|
| 542 |
+
"247 fb966472-d77a-4723-a9e6-15e63bda4cd2 2025-06-12 00:00:00 \n",
|
| 543 |
+
"248 b6bc2e1d-d68b-4acd-a201-46d8cf5175e5 2025-06-11 00:00:00 \n",
|
| 544 |
+
"249 e896adcc-5445-4626-b14c-09ebd5c9f658 2025-06-11 00:00:00 \n",
|
| 545 |
+
"\n",
|
| 546 |
+
" description amount category \\\n",
|
| 547 |
+
"0 BACK MARKET BROOKLYN NY 231.19 Merchandise \n",
|
| 548 |
+
"1 TEMU.COM 8884958368 DE 16.51 Merchandise \n",
|
| 549 |
+
"2 WALMART STORE 00332 HUNTSVILLE AL 146.73 Merchandise \n",
|
| 550 |
+
"3 $100 STATEMENT CREDIT W 1ST PU -100.00 Awards and Rebate Credits \n",
|
| 551 |
+
"4 PY *KUNG-FU TEA AL HUNTSVILLE AL 8.09 Restaurants \n",
|
| 552 |
+
".. ... ... ... \n",
|
| 553 |
+
"245 PANDA EXPRESS #2005 52.87 Food & Drink \n",
|
| 554 |
+
"246 Payment Thank You-Mobile -62.07 None \n",
|
| 555 |
+
"247 STARS AND STRIKES - HUNTS 21.80 Entertainment \n",
|
| 556 |
+
"248 WAL-MART #332 4.47 Groceries \n",
|
| 557 |
+
"249 WAL-MART #332 57.60 Groceries \n",
|
| 558 |
+
"\n",
|
| 559 |
+
" source_file \\\n",
|
| 560 |
+
"0 Discover-AllAvailable-20260110.csv \n",
|
| 561 |
+
"1 Discover-AllAvailable-20260110.csv \n",
|
| 562 |
+
"2 Discover-AllAvailable-20260110.csv \n",
|
| 563 |
+
"3 Discover-AllAvailable-20260110.csv \n",
|
| 564 |
+
"4 Discover-AllAvailable-20260110.csv \n",
|
| 565 |
+
".. ... \n",
|
| 566 |
+
"245 Chase5282_Activity20240110_20260110_20260111.CSV \n",
|
| 567 |
+
"246 Chase5282_Activity20240110_20260110_20260111.CSV \n",
|
| 568 |
+
"247 Chase5282_Activity20240110_20260110_20260111.CSV \n",
|
| 569 |
+
"248 Chase5282_Activity20240110_20260110_20260111.CSV \n",
|
| 570 |
+
"249 Chase5282_Activity20240110_20260110_20260111.CSV \n",
|
| 571 |
+
"\n",
|
| 572 |
+
" enriched_info \n",
|
| 573 |
+
"0 Online Retailer in New York, NY . See BBB rati... \n",
|
| 574 |
+
"1 Temu ' s business model has allowed it to beco... \n",
|
| 575 |
+
"2 Posts Walmart Huntsville - University Drive De... \n",
|
| 576 |
+
"3 That said, I woke up yesterday and saw a $ 100... \n",
|
| 577 |
+
"4 Jan 22, 2021 · Best part to me--besides the ro... \n",
|
| 578 |
+
".. ... \n",
|
| 579 |
+
"245 Aug 29, 2025 · The group's headquarters is loc... \n",
|
| 580 |
+
"246 Sep 25, 2025 · Thank You Mobile is not a compa... \n",
|
| 581 |
+
"247 At our Huntsville , AL location , we pride our... \n",
|
| 582 |
+
"248 3 days ago · Walmart Inc. is an American multi... \n",
|
| 583 |
+
"249 3 days ago · Walmart Inc. is an American multi... \n",
|
| 584 |
+
"\n",
|
| 585 |
+
"[250 rows x 7 columns]"
|
| 586 |
+
]
|
| 587 |
+
},
|
| 588 |
+
"execution_count": 5,
|
| 589 |
+
"metadata": {},
|
| 590 |
+
"output_type": "execute_result"
|
| 591 |
+
}
|
| 592 |
+
],
|
| 593 |
+
"source": [
|
| 594 |
+
"\n",
|
| 595 |
+
"import sqlite3\n",
|
| 596 |
+
"import pandas as pd\n",
|
| 597 |
+
"\n",
|
| 598 |
+
"# Connect to the database\n",
|
| 599 |
+
"conn = sqlite3.connect(\"money_rag.db\")\n",
|
| 600 |
+
"\n",
|
| 601 |
+
"# Query all transactions\n",
|
| 602 |
+
"df_view = pd.read_sql_query(\"SELECT * FROM transactions\", conn)\n",
|
| 603 |
+
"\n",
|
| 604 |
+
"# Close connection\n",
|
| 605 |
+
"conn.close()\n",
|
| 606 |
+
"\n",
|
| 607 |
+
"# Display the data\n",
|
| 608 |
+
"df_view"
|
| 609 |
+
]
|
| 610 |
+
},
|
| 611 |
+
{
|
| 612 |
+
"cell_type": "code",
|
| 613 |
+
"execution_count": 6,
|
| 614 |
+
"metadata": {},
|
| 615 |
+
"outputs": [],
|
| 616 |
+
"source": [
|
| 617 |
+
"# df_view[\"amount\"].sum()"
|
| 618 |
+
]
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"cell_type": "code",
|
| 622 |
+
"execution_count": 7,
|
| 623 |
+
"metadata": {},
|
| 624 |
+
"outputs": [
|
| 625 |
+
{
|
| 626 |
+
"name": "stderr",
|
| 627 |
+
"output_type": "stream",
|
| 628 |
+
"text": [
|
| 629 |
+
"/Users/sawale/Documents/learning/money_rag/.venv/lib/python3.12/site-packages/vertexai/_model_garden/_model_garden_models.py:278: UserWarning: This feature is deprecated as of June 24, 2025 and will be removed on June 24, 2026. For details, see https://cloud.google.com/vertex-ai/generative-ai/docs/deprecations/genai-vertexai-sdk.\n",
|
| 630 |
+
" warning_logs.show_deprecation_warning()\n"
|
| 631 |
+
]
|
| 632 |
+
},
|
| 633 |
+
{
|
| 634 |
+
"name": "stdout",
|
| 635 |
+
"output_type": "stream",
|
| 636 |
+
"text": [
|
| 637 |
+
"✅ Synced 250 records to Qdrant at 'qdrant_db/'.\n"
|
| 638 |
+
]
|
| 639 |
+
}
|
| 640 |
+
],
|
| 641 |
+
"source": [
|
| 642 |
+
"db = SQLDatabase.from_uri(\"sqlite:///money_rag.db\")\n",
|
| 643 |
+
"\n",
|
| 644 |
+
"embeddings = VertexAIEmbeddings(model_name=\"text-embedding-005\")\n",
|
| 645 |
+
"\n",
|
| 646 |
+
"\n",
|
| 647 |
+
"# Initialize Qdrant with disk persistence\n",
|
| 648 |
+
"# \"path\" creates a local directory to store the vectors\n",
|
| 649 |
+
"qdrant_client = QdrantClient(path=\"qdrant_db\") \n",
|
| 650 |
+
"COLLECTION_NAME = \"transactions\"\n",
|
| 651 |
+
"\n",
|
| 652 |
+
"def sync_to_qdrant(db_path: str):\n",
|
| 653 |
+
" \"\"\"Sync the transactions table from SQLite to Qdrant vector store.\"\"\"\n",
|
| 654 |
+
" # Load data from the database\n",
|
| 655 |
+
" conn = sqlite3.connect(db_path)\n",
|
| 656 |
+
" # Ensure we select the new column\n",
|
| 657 |
+
" df = pd.read_sql_query(\"SELECT * FROM transactions\", conn)\n",
|
| 658 |
+
" conn.close()\n",
|
| 659 |
+
"\n",
|
| 660 |
+
" # Recreate collection to ensure clean state on re-ingestion\n",
|
| 661 |
+
" if qdrant_client.collection_exists(COLLECTION_NAME):\n",
|
| 662 |
+
" qdrant_client.delete_collection(COLLECTION_NAME)\n",
|
| 663 |
+
" \n",
|
| 664 |
+
" qdrant_client.create_collection(\n",
|
| 665 |
+
" collection_name=COLLECTION_NAME,\n",
|
| 666 |
+
" vectors_config=VectorParams(size=768, distance=Distance.COSINE),\n",
|
| 667 |
+
" )\n",
|
| 668 |
+
" \n",
|
| 669 |
+
" vector_store = QdrantVectorStore(\n",
|
| 670 |
+
" client=qdrant_client,\n",
|
| 671 |
+
" collection_name=COLLECTION_NAME,\n",
|
| 672 |
+
" embedding=embeddings,\n",
|
| 673 |
+
" )\n",
|
| 674 |
+
"\n",
|
| 675 |
+
" # Use description + enrichment as the main text for embedding\n",
|
| 676 |
+
" texts = []\n",
|
| 677 |
+
" for _, row in df.iterrows():\n",
|
| 678 |
+
" # Combine original description with the permanent enriched info\n",
|
| 679 |
+
" # If enriched_info is present, it looks like: \"MCDONALDS - Fast food chain...\"\n",
|
| 680 |
+
" enriched = row.get('enriched_info', '')\n",
|
| 681 |
+
" if enriched:\n",
|
| 682 |
+
" texts.append(f\"{row['description']} - {enriched}\")\n",
|
| 683 |
+
" else:\n",
|
| 684 |
+
" texts.append(str(row['description']))\n",
|
| 685 |
+
" \n",
|
| 686 |
+
" # Store other fields as metadata for correlation\n",
|
| 687 |
+
" metadatas = df[['id', 'amount', 'category', 'transaction_date']].to_dict('records')\n",
|
| 688 |
+
" # Convert timestamps to string for metadata compatibility\n",
|
| 689 |
+
" for m in metadatas:\n",
|
| 690 |
+
" m['transaction_date'] = str(m['transaction_date'])\n",
|
| 691 |
+
"\n",
|
| 692 |
+
" vector_store.add_texts(texts=texts, metadatas=metadatas)\n",
|
| 693 |
+
" print(f\"✅ Synced {len(texts)} records to Qdrant at 'qdrant_db/'.\")\n",
|
| 694 |
+
" return vector_store\n",
|
| 695 |
+
"\n",
|
| 696 |
+
"# Initialize store from DB\n",
|
| 697 |
+
"vector_store = sync_to_qdrant(\"money_rag.db\")"
|
| 698 |
+
]
|
| 699 |
+
},
|
| 700 |
+
{
|
| 701 |
+
"cell_type": "code",
|
| 702 |
+
"execution_count": 8,
|
| 703 |
+
"metadata": {},
|
| 704 |
+
"outputs": [],
|
| 705 |
+
"source": [
|
| 706 |
+
"@dataclass\n",
|
| 707 |
+
"class RuntimeContext:\n",
|
| 708 |
+
" db: SQLDatabase\n",
|
| 709 |
+
" vector_store: QdrantVectorStore\n",
|
| 710 |
+
"\n",
|
| 711 |
+
"@tool\n",
|
| 712 |
+
"def execute_sql(query: str) -> str:\n",
|
| 713 |
+
" \"\"\"Execute a SQLite command and return results.\"\"\"\n",
|
| 714 |
+
" runtime = get_runtime(RuntimeContext)\n",
|
| 715 |
+
" db = runtime.context.db\n",
|
| 716 |
+
" try:\n",
|
| 717 |
+
" return db.run(query)\n",
|
| 718 |
+
" except Exception as e:\n",
|
| 719 |
+
" return f\"Error: {e}\"\n",
|
| 720 |
+
"\n",
|
| 721 |
+
"\n",
|
| 722 |
+
"@tool\n",
|
| 723 |
+
"def semantic_search(query: str, topk: int = 5) -> str:\n",
|
| 724 |
+
" \"\"\"\n",
|
| 725 |
+
" Search for transactions semantically when exact category or description matches are unknown.\n",
|
| 726 |
+
" Use this to find specific merchants or types of spending (e.g., 'streaming services' or 'fast food').\n",
|
| 727 |
+
" Returns a list of matching transactions with their IDs and metadata.\n",
|
| 728 |
+
" \"\"\"\n",
|
| 729 |
+
" runtime = get_runtime(RuntimeContext)\n",
|
| 730 |
+
" vs = runtime.context.vector_store\n",
|
| 731 |
+
" results = vs.similarity_search(query, k=topk)\n",
|
| 732 |
+
" \n",
|
| 733 |
+
" output = []\n",
|
| 734 |
+
" for doc in results:\n",
|
| 735 |
+
" output.append(f\"Result: {doc.page_content} | Metadata: {doc.metadata}\")\n",
|
| 736 |
+
" \n",
|
| 737 |
+
" return \"\\n\".join(output) if output else \"No semantically similar transactions found.\"\n",
|
| 738 |
+
"\n",
|
| 739 |
+
"\n",
|
| 740 |
+
"SYSTEM = f\"\"\"You are a sophisticated financial analyst with access to both a SQLite database and a semantic search tool.\n",
|
| 741 |
+
"\n",
|
| 742 |
+
"Workflow:\n",
|
| 743 |
+
"1. **Identify the Need**: If the user's request uses vague terms (e.g., \"junk food\", \"travel stuff\") or you don't know the exact category/description name, start with `semantic_search`.\n",
|
| 744 |
+
"2. **Semantic Discovery**:\n",
|
| 745 |
+
" - Call `semantic_search(query, topk=...)`. \n",
|
| 746 |
+
" - **Pro Tip**: Use a higher `topk` (e.g., 10 or 20) if you suspect there are many relevant transactions to find.\n",
|
| 747 |
+
" - **Relevance Check**: Not all results in the `topk` list may be relevant. You have permission to select only the few that match the user's intent and discard the rest.\n",
|
| 748 |
+
" - **Reiteration Logic**: \n",
|
| 749 |
+
" - If the initial results seem too narrow, or if you suspect more relevant transactions exist but were cut off, reiterate by calling `semantic_search` again with a higher `topk` (e.g., 20, 50).\n",
|
| 750 |
+
" - **Stop Condition**: If you found a few matching transactions and are confident that's all of them (i.e., the rest of the results are clearly irrelevant), do NOT iterate further.\n",
|
| 751 |
+
"3. **Filter & Extract**:\n",
|
| 752 |
+
" - Review the results from semantic search. Manually filter out any that aren't relevant to the user's specific intent.\n",
|
| 753 |
+
" - Extract the unique `id` values from the metadata of relevant results.\n",
|
| 754 |
+
"4. **Handling No Results**:\n",
|
| 755 |
+
" - If `semantic_search` returns nothing useful, or if the results are clearly not what the user asked for (e.g., user asked for \"coffee\" but results are all \"gas stations\"), STOP.\n",
|
| 756 |
+
" - Return a clear message: \"I couldn't find any transactions related to [topic].\"\n",
|
| 757 |
+
" - **DO NOT GUESS**. It is better to say you found nothing than to sum up unrelated transactions.\n",
|
| 758 |
+
"5. **SQL Execution**:\n",
|
| 759 |
+
" - Use `execute_sql` to perform the final calculation or retrieval.\n",
|
| 760 |
+
" - Correlate results by using the discovered IDs in your query: `SELECT SUM(amount) FROM transactions WHERE id IN ('uuid1', 'uuid2', ...)`.\n",
|
| 761 |
+
" - You can also use discovered merchant names if they share a common pattern.\n",
|
| 762 |
+
"\n",
|
| 763 |
+
"Rules:\n",
|
| 764 |
+
"- **Thinking**: Explain your plan before calling any tools.\n",
|
| 765 |
+
"- **Read-only**: No modifications (INSERT/UPDATE/DELETE/etc.) to the database.\n",
|
| 766 |
+
"- **Spending Logic**: \n",
|
| 767 |
+
" - Spending = POSITIVE values (> 0). \n",
|
| 768 |
+
" - Payments/Refunds = NEGATIVE values (< 0). EXCLUDE negative values when calculating spending.\n",
|
| 769 |
+
"- **SQL Formatting**: Limit results to 5 rows for non-aggregation queries. Use `SUM()` for totals.\n",
|
| 770 |
+
"\"\"\"\n",
|
| 771 |
+
"\n",
|
| 772 |
+
"agent = create_agent(\n",
|
| 773 |
+
" model=vertex_llm,\n",
|
| 774 |
+
" tools=[execute_sql, semantic_search],\n",
|
| 775 |
+
" system_prompt=SYSTEM,\n",
|
| 776 |
+
" context_schema=RuntimeContext,\n",
|
| 777 |
+
" checkpointer=InMemorySaver(),\n",
|
| 778 |
+
")"
|
| 779 |
+
]
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"cell_type": "code",
|
| 783 |
+
"execution_count": 9,
|
| 784 |
+
"metadata": {},
|
| 785 |
+
"outputs": [
|
| 786 |
+
{
|
| 787 |
+
"name": "stdout",
|
| 788 |
+
"output_type": "stream",
|
| 789 |
+
"text": [
|
| 790 |
+
"Match: TST*WOKS UP HUNTSVILLE AL00075396024313993332AA - Take a look at these nine new businesses that have just opened or announced an opening here in the Rocket City. Check Dress Up - Huntsville in Huntsville , AL, 920 Bob Wallace Ave SW #317 on Cylex and find ☎ (256) 585-2..., contact info, ⌚ opening hours. We're posted at the WeUp location for just a couple more hours —so if you're hungry, now's the time to pull up ! We've had some amazing folks swing through today, including the owner of Ms. Juju's Kitchen and one of your favorite local barbers Mike —and YES, they're repeat customers because the food speaks for itself! Detailed info and reviews on 27 top companies and startups in Huntsville in 2026. Get the latest updates on their products, jobs, funding, investors, founders and more. Check Your Statement: Look for entries starting with \"TST\" on your debit card statement. Note the date, amount, and any additional details like a business name or code. Match with Receipts: Compare the charge to recent purchases, especially at restaurants, cafes, or bars. The amount should match your receipt or include a tip. | Metadata: {'id': '737481c4-fc9c-48a5-8cd9-9f2a02844c12', 'amount': 15.21, 'category': 'Restaurants', 'transaction_date': '2025-04-14 00:00:00', '_id': 'd461cada0bc542e6803cd9d14460ad05', '_collection_name': 'transactions'}\n",
|
| 791 |
+
"Match: TST*KAMADO RAMEN - MID HUNTSVILLE AL00006963025030352515AA - Kamado Ramen in Huntsville rated 4.7 out of 5 on Restaurant Guru: 896 reviews by visitors, 205 photos & 5 videos. Explore menu, check opening hours and order delivery Kamado Ramen : Spicy Miso Ramen 1022 Mid City Drive Huntsville , AL & 3414 Governors Dr. Huntsville , AL ( Mid City location and Stovehouse Food Garden Location ) Ramen is a hearty soup with quick-cooking noodles, often loaded with vegetables, meat, and egg. These two locations vary but carry similar items. Get address, phone number, hours, reviews, photos and more for Kamado Ramen Midcity | 1022 MidCity Drive, Huntsville , AL 35806, USA on usarestaurants.info Check Your Statement: Look for entries starting with \"TST\" on your debit card statement. Note the date, amount, and any additional details like a business name or code. Match with Receipts: Compare the charge to recent purchases, especially at restaurants, cafes, or bars. The amount should match your receipt or include a tip. Kamado Ramen at MidCity 1022 Mid City Drive, Huntsville , AL 35806 About Discussion 4 went 35 interested 3 shares | Metadata: {'id': 'cb6d01a0-0bab-4617-aba9-7e587b3ea32d', 'amount': 16.29, 'category': 'Restaurants', 'transaction_date': '2025-05-16 00:00:00', '_id': '9dc318e6b9e04a2f91e1a85ffbb375b8', '_collection_name': 'transactions'}\n",
|
| 792 |
+
"Match: AMAZON MKTPL*B27IN41E1 - In these cases, Amazon charges your payment method when each part of the order is shipped. This is why you have separate charges on your statement. Access Your Transactions to match the charge amounts and dates on your statement with the associated order number. Amazon is one of the world's most valuable brands and is one of the big five American information technology companies, along with Alphabet, Apple, Meta, and Microsoft. Some items Amazon sells include Show some love to small businesses . Donate to Kiva.org today to provide loans to local entrepreneurs and the Tripadvisor Foundation will match it, up to $150,000 USD collectively. Terms apply. Amazon mktpl ZG27L7451. General operating fund. Buildings and grounds.The UPS store 5242. General operating fund. Police department. Политика обработки персональных данных Согласие на обработку персональных данных Агентский Договор Все бренды. Мобильное приложение. Скачать в App Store Скачать в Google Play. | Metadata: {'id': 'ea13dad8-5978-442b-b4e4-7b22c226d72f', 'amount': 86.38, 'category': 'Shopping', 'transaction_date': '2025-11-24 00:00:00', '_id': '88754c75d8154f9e9912cf46cc51ade3', '_collection_name': 'transactions'}\n"
|
| 793 |
+
]
|
| 794 |
+
}
|
| 795 |
+
],
|
| 796 |
+
"source": [
|
| 797 |
+
"# # Initialize the store once\n",
|
| 798 |
+
"# # vector_store = sync_to_qdrant(\"money_rag.db\")\n",
|
| 799 |
+
"\n",
|
| 800 |
+
"# Test search\n",
|
| 801 |
+
"query = \"Where did I spend money on groceries or food?\"\n",
|
| 802 |
+
"results = vector_store.similarity_search(query, k=3)\n",
|
| 803 |
+
"\n",
|
| 804 |
+
"for doc in results:\n",
|
| 805 |
+
" print(f\"Match: {doc.page_content} | Metadata: {doc.metadata}\")"
|
| 806 |
+
]
|
| 807 |
+
},
|
| 808 |
+
{
|
| 809 |
+
"cell_type": "code",
|
| 810 |
+
"execution_count": 10,
|
| 811 |
+
"metadata": {},
|
| 812 |
+
"outputs": [
|
| 813 |
+
{
|
| 814 |
+
"name": "stdout",
|
| 815 |
+
"output_type": "stream",
|
| 816 |
+
"text": [
|
| 817 |
+
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
| 818 |
+
"\n",
|
| 819 |
+
"ow much did i spend in haircuts\n",
|
| 820 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 821 |
+
"Tool Calls:\n",
|
| 822 |
+
" semantic_search (f8fc4ebf-658f-4c58-a163-6bc4308ae46a)\n",
|
| 823 |
+
" Call ID: f8fc4ebf-658f-4c58-a163-6bc4308ae46a\n",
|
| 824 |
+
" Args:\n",
|
| 825 |
+
" topk: 10.0\n",
|
| 826 |
+
" query: haircuts\n",
|
| 827 |
+
"=================================\u001b[1m Tool Message \u001b[0m=================================\n",
|
| 828 |
+
"Name: semantic_search\n",
|
| 829 |
+
"\n",
|
| 830 |
+
"Result: TST*WOKS UP HUNTSVILLE AL00075396024313993332AA - Take a look at these nine new businesses that have just opened or announced an opening here in the Rocket City. Check Dress Up - Huntsville in Huntsville , AL, 920 Bob Wallace Ave SW #317 on Cylex and find ☎ (256) 585-2..., contact info, ⌚ opening hours. We're posted at the WeUp location for just a couple more hours —so if you're hungry, now's the time to pull up ! We've had some amazing folks swing through today, including the owner of Ms. Juju's Kitchen and one of your favorite local barbers Mike —and YES, they're repeat customers because the food speaks for itself! Detailed info and reviews on 27 top companies and startups in Huntsville in 2026. Get the latest updates on their products, jobs, funding, investors, founders and more. Check Your Statement: Look for entries starting with \"TST\" on your debit card statement. Note the date, amount, and any additional details like a business name or code. Match with Receipts: Compare the charge to recent purchases, especially at restaurants, cafes, or bars. The amount should match your receipt or include a tip. | Metadata: {'id': '737481c4-fc9c-48a5-8cd9-9f2a02844c12', 'amount': 15.21, 'category': 'Restaurants', 'transaction_date': '2025-04-14 00:00:00', '_id': 'd461cada0bc542e6803cd9d14460ad05', '_collection_name': 'transactions'}\n",
|
| 831 |
+
"Result: BEST BUY 00005140 - Visit your local Best Buy at 20290 Katy Fwy in Katy, TX for electronics, computers, appliances, cell phones, video games & more new tech. In- store pickup & free shipping. Learn about the \" Best Buy 00005140 Huntsville Al\" charge and why it appears on your credit card statement. First seen on March 20, 2023, Last updated on March 20, 2023. What is it?Also Appears on Statements As. Chkcardbest buy 00005140 huntsville al. Is it time you ditched store - bought bread and started making your own, being in total control of the ingredients and your health? You might just do that when you learn about the differences between homemade bread vs. store - bought bread. Best Buy Goes Out Of Business ?? Why Best Buy Is Tanking Hard - YouTube. Best Buy has closed 18 stores , but physical locations still key to strategy. Typically during the weekdays, AT&T store hours are from 9 AM to 9 PM on Monday through Friday. The weekend begins in a similar fashion, and the AT&T store will open at 9 AM and close at 9 PM as well for most locations . | Metadata: {'id': '112c618f-6a83-4bfd-bb37-1193a2632a80', 'amount': 816.42, 'category': 'Shopping', 'transaction_date': '2025-11-22 00:00:00', '_id': '2e1ced908f6e44e3936e32cb8352e625', '_collection_name': 'transactions'}\n",
|
| 832 |
+
"Result: SLIM & HUSKIES NASHVILLE TN - Slim & Husky ’s has quickly become one of Nashville ’s fastest-growing businesses , and their mission to support the Nashville community is very ... While this isn ’ t their only location around the city, Emmy Squared: The Gulch is located in the heart of Nashville . ... of experience in WordPress development , we deliver powerful, scalable WordPress development in Nashville , TN —delivering custom development and ... Whether your business is based in Nashville , or you want to give a national feel, our comfortable and spacious meeting room will provide a creative ... ... Reviews: Reviews ordered by recency and descriptiveness of user-identified themes such as wait time, length of visit, general tips, and location ... | Metadata: {'id': '536147de-9359-4206-84b5-08e316fbd784', 'amount': 16.9, 'category': 'Restaurants', 'transaction_date': '2025-05-19 00:00:00', '_id': '6f3179b1f3364948a07bd5188e95b836', '_collection_name': 'transactions'}\n",
|
| 833 |
+
"Result: HUNTSVILLE FLV - Huntsville FLV is a local establishment in Huntsville, AL that offers a variety of services to its customers. Specializing in providing solutions for everyday needs, Huntsville FLV aims to cater to the diverse needs of the community. Now that we've got the long-awaited Trader Joe's, Dave & Busters, Top Golf, and Cheesecake Factory in town, what is the next business you'd like to see come to Hsv? Dec 16, 2024 · Take a look at these three new businesses that have opened or are about to open here in the Huntsville area. With four unique services— Huntsville International Airport, Rail and Air Cargo, Space Port, and Jetplex Industrial Park—we deliver opportunities for businesses and travelers. Whether by land, air, or space, we link people to places and products to markets. Jan 3, 2024 · Find the best Retail Stores in Huntsville , AL. Search Huntsville , AL Retail Stores to find the top rated Retail Stores . | Metadata: {'id': 'f6171537-bbe6-4c47-9b75-81b443e2d3ad', 'amount': 3.25, 'category': 'Food & Drink', 'transaction_date': '2025-10-25 00:00:00', '_id': 'f65cd50fdc7d4d7f8d5ad2ff4e307a78', '_collection_name': 'transactions'}\n",
|
| 834 |
+
"Result: HUNTSVILLE FLV - Huntsville FLV is a local establishment in Huntsville, AL that offers a variety of services to its customers. Specializing in providing solutions for everyday needs, Huntsville FLV aims to cater to the diverse needs of the community. Now that we've got the long-awaited Trader Joe's, Dave & Busters, Top Golf, and Cheesecake Factory in town, what is the next business you'd like to see come to Hsv? Dec 16, 2024 · Take a look at these three new businesses that have opened or are about to open here in the Huntsville area. With four unique services— Huntsville International Airport, Rail and Air Cargo, Space Port, and Jetplex Industrial Park—we deliver opportunities for businesses and travelers. Whether by land, air, or space, we link people to places and products to markets. Jan 3, 2024 · Find the best Retail Stores in Huntsville , AL. Search Huntsville , AL Retail Stores to find the top rated Retail Stores . | Metadata: {'id': '5aad4b5a-848f-4539-9d3a-54dfce7f3ed9', 'amount': 3.25, 'category': 'Food & Drink', 'transaction_date': '2025-10-30 00:00:00', '_id': 'ed5ee1ae725b452983b42bda99cd129b', '_collection_name': 'transactions'}\n",
|
| 835 |
+
"Result: HUNTSVILLE FLV - Huntsville FLV is a local establishment in Huntsville, AL that offers a variety of services to its customers. Specializing in providing solutions for everyday needs, Huntsville FLV aims to cater to the diverse needs of the community. Now that we've got the long-awaited Trader Joe's, Dave & Busters, Top Golf, and Cheesecake Factory in town, what is the next business you'd like to see come to Hsv? Dec 16, 2024 · Take a look at these three new businesses that have opened or are about to open here in the Huntsville area. With four unique services— Huntsville International Airport, Rail and Air Cargo, Space Port, and Jetplex Industrial Park—we deliver opportunities for businesses and travelers. Whether by land, air, or space, we link people to places and products to markets. Jan 3, 2024 · Find the best Retail Stores in Huntsville , AL. Search Huntsville , AL Retail Stores to find the top rated Retail Stores . | Metadata: {'id': 'ea82ce94-0253-4e76-bdfa-2ca89be58129', 'amount': 3.25, 'category': 'Food & Drink', 'transaction_date': '2025-10-26 00:00:00', '_id': '6f27f08b44aa462282a2267ff5b2c339', '_collection_name': 'transactions'}\n",
|
| 836 |
+
"Result: LAS VEGAS SOUVENIRS AND LAS VEGAS NV - Earn $$ as a professional gift shop owner. Take your first steps and register today. Become a professional gift shop owner from the comfort of your home. Register today. amazon.com has been visited by 1M+ users in the past month Read Customer Reviews & Find Best Sellers. Free, Easy Returns On Millions Of Items. Shop Chairs, Tables, Place Mats, Chandeliers, Pendant Light Fixtures and More. faire.com has been visited by 100K+ users in the past month Empowering Independent Entrepreneurs To Chase Their Dreams, Shop From Over 100,000 Brands. Buy Wholesale Products Easily & Risk Free on Faire With Free Returns On Your First Order. Organic · Eco-Friendly · Free & Easy Returns · Small Batch Bonanza Gift Shop - The World's Largest Gift Shop located in Las Vegas , Nevada . 40,000 square feet of Vegas souvenirs, apparel, collectibles, Native American art and more since 1980. Open daily 9AM-midnight. This is a review for souvenir shops in Las Vegas , NV : \"Morris & Co is a high-end souvenir shop of luxury items like jewelry, clothing, gifts, accessories, snacks, and many other cool items like LIV nightclub accessories. M&M’s World Las Vegasis located inside a showcase mall on Las Vegas Blvd. The impressive four-level store was the first ever M&M store ! This isn’t a traditional gift shop, but that doesn’t mean you won’t find plenty of fun gifts and Las Vegas souvenirs . Guests can create their M&M design or choose from several premade, Vegas -themed designs. This is ... See full list on vegasfoodandfun.com Coca-Cola has been a staple in American homes for decades. If you’re one of the many Coca-Cola lovers, this gift shop is for you! The store features a variety of Coca-Cola and Las Vegas -themed merchandise. Many of the products sold in the Coca-Cola storeare vintage or limited edition items that can’t be purchased anywhere else. If the gifts aren’t ... See full list on vegasfoodandfun.com Welcome to Las Vegas has multiple gift shops around Sin City, but the store on Las Vegas Blvd inside The Venetian at Grand Canal Shoppesis the most impressive. The whole store is dedicated to celebrating fabulous Las Vegas ! You’ll find t-shirts, shot glasses, snow globes, onesies, and anything else you can imagine in Las Vegas memorabilia form. Thi... See full list on vegasfoodandfun.com Downtown Container Parkis a hub for shopping, dining, and entertainment in Downtown Las Vegas . The park is made of repurposed shipping containers, hence the unique name. The best part about visiting the Downtown Container Park is wandering from one store to another with amazing food in between. The park focuses on local retailers, so you’ll find a ... See full list on vegasfoodandfun.com Tokyo Discount is a store on Maryland Pkwy, Las Vegas , NV , that brings Japanese culture to Sin City. The discount shop has all things Tokyo including Hello Kitty, Japanese snacks, Pokemon, and Japanese decor. The downtown location makes it easy to explore Tokyo Discount during your Las Vegas trip! Whether you buy something Vegas -themed or something... See full list on vegasfoodandfun.com Local Oasis is a shop in the art district of Las Vegas that a local artist opened in 2020. The small shop sells art, gifts, and items from local brand owners. What better souvenir for your Vegas trip than a one- of -a-kind art piece? It’s important to remember this shop doesn’t have a huge selection of cheesy Las Vegas souvenirs , but that’s why many ... See full list on vegasfoodandfun.com Karma Connection is a shop that specializes in all things psychic, crystals, and natural healing. If you’re interested in those things, you’ll find this store is overflowing with possibilities! You can buy a new crystal to keep as your Las Vegas souvenir or get a personalized psychic reading. This gift shop is usually filled with like-minded people... See full list on vegasfoodandfun.com The Gamblers General Store has been appealing to casino lovers since 1984. The store is located on S Main Street, and it’s one of the only souvenir shops in Las Vegas that caters to people who love gambling. You’ll find an inventory that includes playing cards, customized chips, and casino memorabilia. There’s even a book section with books that te... See full list on vegasfoodandfun.com Hudson News is the best place to get a gift when entering Las Vegas via Harry Reid Airport, formerly known as McCarran International Airport. They have a lot of the same stuff you’d find in gift shops outside the airport, but it’s way more convenient! Grab a t-shirt, mugs, key chains, and several other airport-friendly last-minute gifts. You can al... See full list on vegasfoodandfun.com Bonanza Gift Shop is much more than just another souvenir shop on the Strip. It’s the world’s largest gift shop! The inventory at Bonanza doesn’t just compete with other souvenir shops. It blows those shops out of the water! Bonanza has reasonable prices and a prime location , making it a fan favorite over the years. The world’s largest gift shop ca... See full list on vegasfoodandfun.com Jan 24, 2024 · The store was established in 1984 and has since been a staple of Las Vegas as the one and only Gamblers general store. You won’t miss any gamblers’ favorite items; ranging from hand to find items to popular selections. This guide will give you all the best places to get souvenirs, the different types of gifts to expect in the shops, and even tips on how to get them back home. Get your shopping lists ready because you’ll check off everyone—and find a few fun items for your own keepsake. Just a short walk away, M&M’s Las Vegas at Showcase Mall is a four-story candy wonderland, where you can create personalized M&M’s in minutes. Whether you're a chocolate fanatic or just looking for a fun stop, these iconic stores are the sweetest attractions on the Strip! Because we are here \"in the heart of it\" we are able to offer you ALL the popular Las Vegas Souvenirs and Las Vegas Gift Items you will see around Vegas in the various giftshops. We continually strive to bring you what you see and want from Las Vegas in the form of Quality Products. Earn $$ as a professional gift shop owner. Take your first steps and register today. Become a professional gift shop owner from the comfort of your home. Register today. amazon.com has been visited by 1M+ users in the past month Read Customer Reviews & Find Best Sellers. Free, Easy Returns On Millions Of Items. Shop Chairs, Tables, Place Mats, Chandeliers, Pendant Light Fixtures and More. faire.com has been visited by 100K+ users in the past month Empowering Independent Entrepreneurs To Chase Their Dreams, Shop From Over 100,000 Brands. Buy Wholesale Products Easily & Risk Free on Faire With Free Returns On Your First Order. Organic · Eco-Friendly · Free & Easy Returns · Small Batch | Metadata: {'id': 'bac13cd8-cfcc-48f3-8dfa-18c3a3b2cc59', 'amount': 15.14, 'category': 'Merchandise', 'transaction_date': '2025-05-24 00:00:00', '_id': '415f62d03ac949f2b5ab56ef8cc8d105', '_collection_name': 'transactions'}\n",
|
| 837 |
+
"Result: UAH BURSARS OFFICE - The Bursar 's Office oversees the billing and collection of fees, adding funds to student accounts, and student refunds.Student Services Building, Room 123 301 Sparkman Drive Huntsville, AL 35899. 256.824.6226 bursar @ uah .edu. UAH Bursar Welcome to the Office of the Bursar The Bursar ’s Office manages student accounts, including the billing and collection of fees, and student refunds. Located in the Student Services… cs_ office . Dismiss.Your personal data will be processed and information from your device (cookies, unique identifiers, and other device data) may be stored by, accessed by and shared with 156 TCF vendor(s) and 81 ad partner(s), or used specifically by this site. UAH 1 199,50Цена предложения: UAH 1 199,50.Политика отмены PS Store . Меры предосторожности. О рейтингах. Searching for an Advance Auto Parts close to you? Advance has locations across North America which provide quality auto parts, advice and accessories for your car. | Metadata: {'id': 'eea3224b-1289-40bd-bf02-a54a6a7a7ce7', 'amount': 50.0, 'category': 'Education', 'transaction_date': '2025-08-21 00:00:00', '_id': '775b6f5066bd4435837a5f3c1b613aea', '_collection_name': 'transactions'}\n",
|
| 838 |
+
"Result: HANDELS HOMEMADE JONES V - Jan 2, 2026 · Monty Python's Life of Brian is a 1979 British surreal biblical black comedy film starring and written by the comedy group Monty Python It was directed by ... Nov 24, 2025 · In 1861, Welsh entrepreneur Pryce Pryce-Jones formed the first mail order business, an idea which would change the nature of retail. Selling Welsh flannel ... Dec 10, 2025 · Christmas music comprises a variety of genres of music regularly performed or heard around the Christmas season. Enter your address to see if Handel ' s Homemade Ice Cream - Jones Valley delivery is available to your location in Huntsville.What are some other stores nearby Handel ' s Homemade Ice Cream - Jones Valley? store locator pin Store Finder. All Our Fun-Filled Flavors. We have a collection of over 100 flavors. Below is a full list of our flavors. Be sure to check your local store to discover the flavors they are currently serving. | Metadata: {'id': '1c5b9d0f-8ecf-4d06-baa5-e6f036102eb0', 'amount': 8.18, 'category': 'Food & Drink', 'transaction_date': '2025-12-30 00:00:00', '_id': '8e207351ec57437aae0343b6c81ea434', '_collection_name': 'transactions'}\n",
|
| 839 |
+
"Result: SQ *SPILL COFFEE AND CREA - **Free refills of hot and iced brewed coffee and tea during same store visit. Excludes Cold Brew, Nitro Cold Brew, Iced Tea Lemonade, flavored Iced Tea and Starbucks Refreshers® base. At participating stores . Find out my IP address, public IPv4 and IPv6, IP location , ISP with additional information like the country, city, user agent, your browser and OS. osu! farm pp maps and beatmap recommendations... Square register is the POS software that’s simple, powerful, and free. Grand Tour Around Island 16 Locations plus Snorkeling. 4.9. (7,365).Donate to Kiva.org today to provide loans to local entrepreneurs and the Tripadvisor Foundation will match it, up to $150,000 USD collectively. Terms apply. | Metadata: {'id': 'b7708547-c125-427e-9e9e-f330ea66af71', 'amount': 4.39, 'category': 'Food & Drink', 'transaction_date': '2025-08-30 00:00:00', '_id': '5b48543fabe44e468e8db6844d868baf', '_collection_name': 'transactions'}\n",
|
| 840 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 841 |
+
"\n",
|
| 842 |
+
"I couldn't find any transactions related to haircuts.\n"
|
| 843 |
+
]
|
| 844 |
+
}
|
| 845 |
+
],
|
| 846 |
+
"source": [
|
| 847 |
+
"question = \"ow much did i spend in haircuts\"\n",
|
| 848 |
+
"steps = []\n",
|
| 849 |
+
"\n",
|
| 850 |
+
"for step in agent.stream(\n",
|
| 851 |
+
" {\"messages\": [{\"role\": \"user\", \"content\": question}]},\n",
|
| 852 |
+
" {\"configurable\": {\"thread_id\": \"1\"}},\n",
|
| 853 |
+
" stream_mode=\"values\",\n",
|
| 854 |
+
" context=RuntimeContext(db=db, vector_store=vector_store)\n",
|
| 855 |
+
"):\n",
|
| 856 |
+
" step[\"messages\"][-1].pretty_print()\n",
|
| 857 |
+
" steps.append(step)"
|
| 858 |
+
]
|
| 859 |
+
},
|
| 860 |
+
{
|
| 861 |
+
"cell_type": "code",
|
| 862 |
+
"execution_count": null,
|
| 863 |
+
"metadata": {},
|
| 864 |
+
"outputs": [
|
| 865 |
+
{
|
| 866 |
+
"name": "stdout",
|
| 867 |
+
"output_type": "stream",
|
| 868 |
+
"text": [
|
| 869 |
+
"💬 Chat with your financial data! (Type 'exit' to stop)\n",
|
| 870 |
+
"\n",
|
| 871 |
+
"--------------------------------------------------\n",
|
| 872 |
+
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
| 873 |
+
"\n",
|
| 874 |
+
"or db won't be in github..\n",
|
| 875 |
+
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 876 |
+
"\n",
|
| 877 |
+
"It seems like your last message might be incomplete or a comment about something else. Could you please let me know what you'd like me to do or if you have a question about your spending? I'm ready to help analyze your transactions.\n",
|
| 878 |
+
"--------------------------------------------------\n",
|
| 879 |
+
"\n"
|
| 880 |
+
]
|
| 881 |
+
}
|
| 882 |
+
],
|
| 883 |
+
"source": [
|
| 884 |
+
"# ...existing code...\n",
|
| 885 |
+
"# Interactive Chat Loop\n",
|
| 886 |
+
"print(\"💬 Chat with your financial data! (Type 'exit' to stop)\")\n",
|
| 887 |
+
"\n",
|
| 888 |
+
"while True:\n",
|
| 889 |
+
" try:\n",
|
| 890 |
+
" user_input = input(\"User: \")\n",
|
| 891 |
+
" if user_input.lower() in [\"exit\", \"quit\", \"q\"]:\n",
|
| 892 |
+
" print(\"Goodbye!\")\n",
|
| 893 |
+
" break\n",
|
| 894 |
+
" \n",
|
| 895 |
+
" print(\"\\n\" + \"-\"*50)\n",
|
| 896 |
+
" \n",
|
| 897 |
+
" # Stream the agent's response\n",
|
| 898 |
+
" for step in agent.stream(\n",
|
| 899 |
+
" {\"messages\": [{\"role\": \"user\", \"content\": user_input}]},\n",
|
| 900 |
+
" {\"configurable\": {\"thread_id\": \"1\"}}, # Keeps memory of the conversation\n",
|
| 901 |
+
" stream_mode=\"values\",\n",
|
| 902 |
+
" context=RuntimeContext(db=db, vector_store=vector_store)\n",
|
| 903 |
+
" ):\n",
|
| 904 |
+
" step[\"messages\"][-1].pretty_print()\n",
|
| 905 |
+
" \n",
|
| 906 |
+
" print(\"-\" * 50 + \"\\n\")\n",
|
| 907 |
+
" \n",
|
| 908 |
+
" except KeyboardInterrupt:\n",
|
| 909 |
+
" print(\"\\nGoodbye!\")\n",
|
| 910 |
+
" break"
|
| 911 |
+
]
|
| 912 |
+
},
|
| 913 |
+
{
|
| 914 |
+
"cell_type": "code",
|
| 915 |
+
"execution_count": null,
|
| 916 |
+
"metadata": {},
|
| 917 |
+
"outputs": [],
|
| 918 |
+
"source": []
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"cell_type": "code",
|
| 922 |
+
"execution_count": null,
|
| 923 |
+
"metadata": {},
|
| 924 |
+
"outputs": [],
|
| 925 |
+
"source": []
|
| 926 |
+
},
|
| 927 |
+
{
|
| 928 |
+
"cell_type": "code",
|
| 929 |
+
"execution_count": null,
|
| 930 |
+
"metadata": {},
|
| 931 |
+
"outputs": [],
|
| 932 |
+
"source": []
|
| 933 |
+
}
|
| 934 |
+
],
|
| 935 |
+
"metadata": {
|
| 936 |
+
"kernelspec": {
|
| 937 |
+
"display_name": "money_rag",
|
| 938 |
+
"language": "python",
|
| 939 |
+
"name": "python3"
|
| 940 |
+
},
|
| 941 |
+
"language_info": {
|
| 942 |
+
"codemirror_mode": {
|
| 943 |
+
"name": "ipython",
|
| 944 |
+
"version": 3
|
| 945 |
+
},
|
| 946 |
+
"file_extension": ".py",
|
| 947 |
+
"mimetype": "text/x-python",
|
| 948 |
+
"name": "python",
|
| 949 |
+
"nbconvert_exporter": "python",
|
| 950 |
+
"pygments_lexer": "ipython3",
|
| 951 |
+
"version": "3.12.7"
|
| 952 |
+
}
|
| 953 |
+
},
|
| 954 |
+
"nbformat": 4,
|
| 955 |
+
"nbformat_minor": 2
|
| 956 |
+
}
|
notebooks/2_test_vectordb.ipynb
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# Test MCP Server (Local Mode)\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"This notebook demonstrates how to use the `mcp_server.py` tools directly within a LangChain workflow. \n",
|
| 10 |
+
"This simulates how an Agent would interact with your Local or Cloud MCP Server."
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": null,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"outputs": [],
|
| 18 |
+
"source": [
|
| 19 |
+
"import sys\n",
|
| 20 |
+
"import os\n",
|
| 21 |
+
"import pandas as pd\n",
|
| 22 |
+
"from dotenv import load_dotenv\n",
|
| 23 |
+
"from langchain_core.tools import StructuredTool\n",
|
| 24 |
+
"from langchain_google_vertexai import ChatVertexAI\n",
|
| 25 |
+
"from langgraph.prebuilt import create_react_agent\n",
|
| 26 |
+
"\n",
|
| 27 |
+
"# 1. Setup Path to import mcp_server from parent directory\n",
|
| 28 |
+
"sys.path.append(\"..\")\n",
|
| 29 |
+
"\n",
|
| 30 |
+
"# 2. Setup Environment\n",
|
| 31 |
+
"load_dotenv()\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"# POINT DATABASE TO A LOCAL FOLDER FOR THIS NOTEBOOK\n",
|
| 34 |
+
"# This ensures we don't mess with deployment default paths, or we explicitly stick to 'temp_data'\n",
|
| 35 |
+
"# mcp_server defaults to using 'DATA_DIR' env var if set\n",
|
| 36 |
+
"os.environ[\"DATA_DIR\"] = os.path.abspath(\"../temp_data_notebook_test\")\n",
|
| 37 |
+
"print(f\"Using Data Dir: {os.environ['DATA_DIR']}\")\n",
|
| 38 |
+
"\n",
|
| 39 |
+
"# 3. Import Tools directly from Server Code\n",
|
| 40 |
+
"# (In a real remote scenario, you would use MCPClient instead)\n",
|
| 41 |
+
"from mcp_server import ingest_transactions, get_database_schema, query_database, semantic_search, clear_database\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"# 4. Wrap as LangChain Tools\n",
|
| 44 |
+
"tools = [\n",
|
| 45 |
+
" StructuredTool.from_function(ingest_transactions),\n",
|
| 46 |
+
" StructuredTool.from_function(clear_database),\n",
|
| 47 |
+
" StructuredTool.from_function(get_database_schema),\n",
|
| 48 |
+
" StructuredTool.from_function(query_database),\n",
|
| 49 |
+
" StructuredTool.from_function(semantic_search),\n",
|
| 50 |
+
"]\n",
|
| 51 |
+
"\n",
|
| 52 |
+
"# 5. Initialize Agent\n",
|
| 53 |
+
"llm = ChatVertexAI(model=\"gemini-1.5-flash-001\", temperature=0)\n",
|
| 54 |
+
"agent_executor = create_react_agent(llm, tools)\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"print(\"Agent and Tools loaded successfully!\")"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"cell_type": "code",
|
| 61 |
+
"execution_count": null,
|
| 62 |
+
"metadata": {},
|
| 63 |
+
"outputs": [],
|
| 64 |
+
"source": [
|
| 65 |
+
"# --- STEP 1: LOAD & INGEST DATA ---\n",
|
| 66 |
+
"# We will read one of the CSVs from demo_data and send it to the 'ingest_transactions' tool.\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"# Read Local File\n",
|
| 69 |
+
"csv_path = \"../demo_data/Chase5282_Activity20240110_20260110_20260111.CSV\"\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"if os.path.exists(csv_path):\n",
|
| 72 |
+
" with open(csv_path, 'r') as f:\n",
|
| 73 |
+
" csv_content = f.read()\n",
|
| 74 |
+
"\n",
|
| 75 |
+
" print(f\"Read {len(csv_content)} bytes from CSV.\")\n",
|
| 76 |
+
" \n",
|
| 77 |
+
" # Run the Agent to Ingest\n",
|
| 78 |
+
" print(\"\\n--- Ingesting Data via Agent ---\")\n",
|
| 79 |
+
" response = agent_executor.invoke(\n",
|
| 80 |
+
" {\"messages\": [(\"user\", f\"Please clear the database first, then ingest this transaction data:\\n\\n{csv_content}\")]}\n",
|
| 81 |
+
" )\n",
|
| 82 |
+
" print(response[\"messages\"][-1].content)\n",
|
| 83 |
+
"else:\n",
|
| 84 |
+
" print(f\"File not found: {csv_path}\")"
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"cell_type": "code",
|
| 89 |
+
"execution_count": null,
|
| 90 |
+
"metadata": {},
|
| 91 |
+
"outputs": [],
|
| 92 |
+
"source": [
|
| 93 |
+
"# --- STEP 2: TEST RETRIEVAL (RAG) ---\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"queries = [\n",
|
| 96 |
+
" \"What is the database schema?\",\n",
|
| 97 |
+
" \"How much did I spend at McDonalds?\",\n",
|
| 98 |
+
" \"Show me the top 3 most expensive transactions.\",\n",
|
| 99 |
+
" \"What is my total spending on Food & Drink?\" \n",
|
| 100 |
+
"]\n",
|
| 101 |
+
"\n",
|
| 102 |
+
"for q in queries:\n",
|
| 103 |
+
" print(f\"\\nUser: {q}\")\n",
|
| 104 |
+
" result = agent_executor.invoke({\"messages\": [(\"user\", q)]})\n",
|
| 105 |
+
" print(f\"AI: {result['messages'][-1].content}\")"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"cell_type": "code",
|
| 110 |
+
"execution_count": 3,
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"outputs": [],
|
| 113 |
+
"source": [
|
| 114 |
+
"%reload_ext autoreload\n",
|
| 115 |
+
"%autoreload 2"
|
| 116 |
+
]
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"cell_type": "code",
|
| 120 |
+
"execution_count": null,
|
| 121 |
+
"metadata": {},
|
| 122 |
+
"outputs": [],
|
| 123 |
+
"source": []
|
| 124 |
+
}
|
| 125 |
+
],
|
| 126 |
+
"metadata": {
|
| 127 |
+
"kernelspec": {
|
| 128 |
+
"display_name": "money_rag",
|
| 129 |
+
"language": "python",
|
| 130 |
+
"name": "python3"
|
| 131 |
+
},
|
| 132 |
+
"language_info": {
|
| 133 |
+
"codemirror_mode": {
|
| 134 |
+
"name": "ipython",
|
| 135 |
+
"version": 3
|
| 136 |
+
},
|
| 137 |
+
"file_extension": ".py",
|
| 138 |
+
"mimetype": "text/x-python",
|
| 139 |
+
"name": "python",
|
| 140 |
+
"nbconvert_exporter": "python",
|
| 141 |
+
"pygments_lexer": "ipython3",
|
| 142 |
+
"version": "3.12.7"
|
| 143 |
+
}
|
| 144 |
+
},
|
| 145 |
+
"nbformat": 4,
|
| 146 |
+
"nbformat_minor": 2
|
| 147 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- Core AI Orchestration ---
|
| 2 |
+
langchain>=1.2.3
|
| 3 |
+
langchain-community>=0.4.1
|
| 4 |
+
langchain-core>=1.2.7
|
| 5 |
+
langgraph>=1.0.6
|
| 6 |
+
pydantic>=2.12.5
|
| 7 |
+
|
| 8 |
+
# --- Model Providers ---
|
| 9 |
+
# Google / Vertex AI
|
| 10 |
+
langchain-google-vertexai>=2.1.2
|
| 11 |
+
langchain-google-genai>=2.0.0
|
| 12 |
+
google-cloud-aiplatform>=1.133.0
|
| 13 |
+
google-genai>=1.57.0
|
| 14 |
+
|
| 15 |
+
# OpenAI
|
| 16 |
+
langchain-openai>=1.1.7
|
| 17 |
+
openai>=2.15.0
|
| 18 |
+
|
| 19 |
+
# --- MCP & Tooling ---
|
| 20 |
+
fastmcp>=2.14.3
|
| 21 |
+
mcp>=1.25.0
|
| 22 |
+
# langchain-mcp>=0.2.1
|
| 23 |
+
langchain-mcp-adapters>=0.2.1
|
| 24 |
+
duckduckgo-search>=8.1.1
|
| 25 |
+
|
| 26 |
+
# --- Vector DB & Data ---
|
| 27 |
+
qdrant-client>=1.16.2
|
| 28 |
+
langchain-qdrant>=1.1.0
|
| 29 |
+
sqlalchemy>=2.0.45
|
| 30 |
+
pandas>=2.3.3
|
| 31 |
+
redis>=7.1.0
|
| 32 |
+
|
| 33 |
+
# --- Infrastructure & API ---
|
| 34 |
+
uvicorn>=0.40.0
|
| 35 |
+
starlette>=0.51.0
|
| 36 |
+
python-dotenv>=1.2.1
|
| 37 |
+
httpx>=0.28.1
|
| 38 |
+
requests>=2.32.5
|
| 39 |
+
tenacity>=9.1.2
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
streamlit>=1.53.0
|
| 43 |
+
ddgs>=9.10.0
|