Spaces:
Running
Running
Pulastya B
commited on
Commit
·
226ac39
0
Parent(s):
feat: Initial commit - Data Science Agent with React frontend and FastAPI backend
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +74 -0
- .env.example +19 -0
- .gcloudignore +59 -0
- .gitignore +71 -0
- BIGQUERY_SCHEMAS.md +691 -0
- CHECKLIST.md +97 -0
- DEPLOYMENT.md +495 -0
- Dockerfile +78 -0
- FRONTEND_INTEGRATION.md +234 -0
- FRRONTEEEND/.env.production +3 -0
- FRRONTEEEND/.gitignore +24 -0
- FRRONTEEEND/App.tsx +59 -0
- FRRONTEEEND/README.md +20 -0
- FRRONTEEEND/components/BackgroundPaths.tsx +148 -0
- FRRONTEEEND/components/ChatInterface.tsx +571 -0
- FRRONTEEEND/components/Footer.tsx +171 -0
- FRRONTEEEND/components/HeroGeometric.tsx +213 -0
- FRRONTEEEND/components/KeyCapabilities.tsx +91 -0
- FRRONTEEEND/components/Logo.tsx +92 -0
- FRRONTEEEND/components/ProblemSolution.tsx +70 -0
- FRRONTEEEND/components/Process.tsx +70 -0
- FRRONTEEEND/components/ShadowSection.tsx +222 -0
- FRRONTEEEND/components/TechStack.tsx +36 -0
- FRRONTEEEND/index.html +59 -0
- FRRONTEEEND/index.tsx +16 -0
- FRRONTEEEND/lib/utils.ts +7 -0
- FRRONTEEEND/metadata.json +5 -0
- FRRONTEEEND/package-lock.json +0 -0
- FRRONTEEEND/package.json +26 -0
- FRRONTEEEND/tsconfig.json +29 -0
- FRRONTEEEND/vite.config.ts +29 -0
- GEMINI_UPDATE.md +93 -0
- MIGRATION_COMPLETE.md +325 -0
- QUICK_REFERENCE.txt +71 -0
- README.md +632 -0
- build-and-deploy.ps1 +39 -0
- build-and-deploy.sh +33 -0
- cache_db/.gitkeep +0 -0
- chat_ui.py +1073 -0
- cloudbuild.yaml +69 -0
- data/.gitkeep +0 -0
- deploy.sh +171 -0
- examples/titanic_example.py +166 -0
- requirements.txt +98 -0
- setup-deployment.sh +78 -0
- src/__init__.py +7 -0
- src/api/__init__.py +4 -0
- src/api/app.py +513 -0
- src/cache/__init__.py +5 -0
- src/cache/cache_manager.py +292 -0
.dockerignore
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python cache and environment
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
.venv/
|
| 8 |
+
venv/
|
| 9 |
+
ENV/
|
| 10 |
+
env/
|
| 11 |
+
|
| 12 |
+
# Development files
|
| 13 |
+
.git/
|
| 14 |
+
.gitignore
|
| 15 |
+
.env
|
| 16 |
+
.env.local
|
| 17 |
+
*.log
|
| 18 |
+
|
| 19 |
+
# Output directories (not needed in container)
|
| 20 |
+
outputs/
|
| 21 |
+
cache_db/
|
| 22 |
+
temp/
|
| 23 |
+
test_data/
|
| 24 |
+
data/
|
| 25 |
+
|
| 26 |
+
# Frontend development files (will be built in Docker)
|
| 27 |
+
FRRONTEEEND/node_modules/
|
| 28 |
+
FRRONTEEEND/.env
|
| 29 |
+
FRRONTEEEND/.env.local
|
| 30 |
+
|
| 31 |
+
# Documentation and tests
|
| 32 |
+
*.md
|
| 33 |
+
!README.md
|
| 34 |
+
tests/
|
| 35 |
+
test_*.py
|
| 36 |
+
check_*.py
|
| 37 |
+
|
| 38 |
+
# Old Gradio UI (no longer used)
|
| 39 |
+
chat_ui.py
|
| 40 |
+
|
| 41 |
+
# IDE
|
| 42 |
+
.vscode/
|
| 43 |
+
.idea/
|
| 44 |
+
*.swp
|
| 45 |
+
*.swo
|
| 46 |
+
*~
|
| 47 |
+
|
| 48 |
+
# OS files
|
| 49 |
+
.DS_Store
|
| 50 |
+
Thumbs.db
|
| 51 |
+
|
| 52 |
+
# Jupyter notebooks
|
| 53 |
+
*.ipynb
|
| 54 |
+
.ipynb_checkpoints/
|
| 55 |
+
|
| 56 |
+
# Large model files (if any)
|
| 57 |
+
*.pkl
|
| 58 |
+
*.joblib
|
| 59 |
+
*.h5
|
| 60 |
+
*.pt
|
| 61 |
+
*.pth
|
| 62 |
+
|
| 63 |
+
# Documentation
|
| 64 |
+
docs/
|
| 65 |
+
PHASE*.md
|
| 66 |
+
PROJECT*.md
|
| 67 |
+
TOKEN*.md
|
| 68 |
+
TOOL*.md
|
| 69 |
+
FEATURE*.md
|
| 70 |
+
IMPLEMENTATION*.md
|
| 71 |
+
MIGRATION*.md
|
| 72 |
+
EDA_REPORTS*.md
|
| 73 |
+
GITHUB*.md
|
| 74 |
+
BIGQUERY*.md
|
.env.example
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Google Gemini API Configuration
|
| 2 |
+
GOOGLE_API_KEY=your_google_api_key_here
|
| 3 |
+
|
| 4 |
+
# Model Configuration
|
| 5 |
+
LLM_PROVIDER=gemini
|
| 6 |
+
REASONING_EFFORT=medium
|
| 7 |
+
|
| 8 |
+
# Cache Configuration
|
| 9 |
+
CACHE_DB_PATH=./cache_db/cache.db
|
| 10 |
+
CACHE_TTL_SECONDS=86400
|
| 11 |
+
|
| 12 |
+
# Output Configuration
|
| 13 |
+
OUTPUT_DIR=./outputs
|
| 14 |
+
DATA_DIR=./data
|
| 15 |
+
|
| 16 |
+
# Performance Configuration
|
| 17 |
+
MAX_PARALLEL_TOOLS=5
|
| 18 |
+
MAX_RETRIES=3
|
| 19 |
+
TIMEOUT_SECONDS=300
|
.gcloudignore
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file specifies files that are *not* uploaded to Google Cloud
|
| 2 |
+
# using gcloud. It follows the same syntax as .gitignore
|
| 3 |
+
|
| 4 |
+
.gcloudignore
|
| 5 |
+
.git
|
| 6 |
+
.gitignore
|
| 7 |
+
|
| 8 |
+
# Python
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.py[cod]
|
| 11 |
+
*$py.class
|
| 12 |
+
*.so
|
| 13 |
+
.Python
|
| 14 |
+
.venv/
|
| 15 |
+
venv/
|
| 16 |
+
ENV/
|
| 17 |
+
env/
|
| 18 |
+
|
| 19 |
+
# Local development
|
| 20 |
+
.env
|
| 21 |
+
.env.local
|
| 22 |
+
*.log
|
| 23 |
+
|
| 24 |
+
# Outputs and cache (regenerated in cloud)
|
| 25 |
+
outputs/
|
| 26 |
+
cache_db/
|
| 27 |
+
temp/
|
| 28 |
+
test_data/
|
| 29 |
+
data/
|
| 30 |
+
|
| 31 |
+
# Documentation
|
| 32 |
+
*.md
|
| 33 |
+
!README.md
|
| 34 |
+
|
| 35 |
+
# Tests
|
| 36 |
+
tests/
|
| 37 |
+
test_*.py
|
| 38 |
+
check_*.py
|
| 39 |
+
|
| 40 |
+
# IDE
|
| 41 |
+
.vscode/
|
| 42 |
+
.idea/
|
| 43 |
+
*.swp
|
| 44 |
+
*.swo
|
| 45 |
+
|
| 46 |
+
# OS
|
| 47 |
+
.DS_Store
|
| 48 |
+
Thumbs.db
|
| 49 |
+
|
| 50 |
+
# Jupyter
|
| 51 |
+
*.ipynb
|
| 52 |
+
.ipynb_checkpoints/
|
| 53 |
+
|
| 54 |
+
# Build artifacts
|
| 55 |
+
*.pkl
|
| 56 |
+
*.joblib
|
| 57 |
+
*.h5
|
| 58 |
+
*.pt
|
| 59 |
+
*.pth
|
.gitignore
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
|
| 23 |
+
# Virtual Environment
|
| 24 |
+
venv/
|
| 25 |
+
.venv/
|
| 26 |
+
env/
|
| 27 |
+
ENV/
|
| 28 |
+
|
| 29 |
+
# Environment Variables
|
| 30 |
+
.env
|
| 31 |
+
|
| 32 |
+
# IDE
|
| 33 |
+
.vscode/
|
| 34 |
+
.idea/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
*~
|
| 38 |
+
|
| 39 |
+
# Cache & Outputs
|
| 40 |
+
cache_db/*.db
|
| 41 |
+
cache_db/*.db-journal
|
| 42 |
+
cache_db/
|
| 43 |
+
outputs/
|
| 44 |
+
temp/
|
| 45 |
+
*.pkl
|
| 46 |
+
*.joblib
|
| 47 |
+
|
| 48 |
+
# Data files (except examples)
|
| 49 |
+
data/*.csv
|
| 50 |
+
data/*.parquet
|
| 51 |
+
!data/.gitkeep
|
| 52 |
+
|
| 53 |
+
# Cloud Run URL
|
| 54 |
+
.cloud_run_url
|
| 55 |
+
|
| 56 |
+
# Jupyter
|
| 57 |
+
.ipynb_checkpoints/
|
| 58 |
+
*.ipynb
|
| 59 |
+
|
| 60 |
+
# OS
|
| 61 |
+
.DS_Store
|
| 62 |
+
Thumbs.db
|
| 63 |
+
|
| 64 |
+
# Testing
|
| 65 |
+
.pytest_cache/
|
| 66 |
+
.coverage
|
| 67 |
+
htmlcov/
|
| 68 |
+
.tox/
|
| 69 |
+
|
| 70 |
+
# Logs
|
| 71 |
+
*.log
|
BIGQUERY_SCHEMAS.md
ADDED
|
@@ -0,0 +1,691 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BigQuery Output Schemas for Looker Compatibility
|
| 2 |
+
|
| 3 |
+
**Purpose**: Define stable BigQuery table schemas that BI tools (Looker, Data Studio) can query reliably.
|
| 4 |
+
|
| 5 |
+
**Design Principles**:
|
| 6 |
+
- ✅ **Stable Schema**: No breaking changes without versioning
|
| 7 |
+
- ✅ **Consistent Naming**: snake_case columns, clear dimension/metric separation
|
| 8 |
+
- ✅ **BI-Friendly Types**: Standard SQL types, no complex nested structures
|
| 9 |
+
- ✅ **Documented Grain**: Clear primary keys and update patterns
|
| 10 |
+
- ✅ **Dashboard-Ready**: Metrics aligned with common visualizations
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## 📊 Table 1: `model_metrics`
|
| 15 |
+
|
| 16 |
+
**Description**: Model performance metrics tracked over time for monitoring and comparison.
|
| 17 |
+
|
| 18 |
+
**Use Cases**:
|
| 19 |
+
- Performance dashboards
|
| 20 |
+
- Model comparison reports
|
| 21 |
+
- Drift detection alerts
|
| 22 |
+
- A/B test analysis
|
| 23 |
+
|
| 24 |
+
**Update Frequency**: On every model training run
|
| 25 |
+
|
| 26 |
+
**Grain**: One row per model training execution
|
| 27 |
+
|
| 28 |
+
### Schema
|
| 29 |
+
|
| 30 |
+
| Column Name | Type | Description | Dimension/Metric | Example |
|
| 31 |
+
|------------|------|-------------|------------------|---------|
|
| 32 |
+
| `project_id` | STRING | Google Cloud project ID | Dimension | `my-ml-project` |
|
| 33 |
+
| `dataset_id` | STRING | BigQuery dataset name | Dimension | `ml_models` |
|
| 34 |
+
| `model_id` | STRING | Unique model identifier | Dimension (Primary Key) | `xgboost_churn_20251223_153045` |
|
| 35 |
+
| `model_name` | STRING | Human-readable model name | Dimension | `Customer Churn Predictor` |
|
| 36 |
+
| `model_type` | STRING | Algorithm used | Dimension | `XGBoost`, `RandomForest`, `LightGBM` |
|
| 37 |
+
| `task_type` | STRING | ML task category | Dimension | `classification`, `regression` |
|
| 38 |
+
| `training_dataset` | STRING | Source table/file reference | Dimension | `project.dataset.train_data` |
|
| 39 |
+
| `target_column` | STRING | Prediction target name | Dimension | `churn`, `price`, `survived` |
|
| 40 |
+
| `created_at` | TIMESTAMP | Model training timestamp | Dimension (Time) | `2025-12-23 15:30:45 UTC` |
|
| 41 |
+
| `created_date` | DATE | Training date (for partitioning) | Dimension (Time) | `2025-12-23` |
|
| 42 |
+
| `feature_count` | INTEGER | Number of features used | Metric | `42` |
|
| 43 |
+
| `training_rows` | INTEGER | Training set size | Metric | `10000` |
|
| 44 |
+
| `test_rows` | INTEGER | Test set size | Metric | `2500` |
|
| 45 |
+
| `training_duration_seconds` | FLOAT | Time to train model | Metric | `123.45` |
|
| 46 |
+
| `accuracy` | FLOAT | Overall accuracy (0-1) | Metric | `0.95` |
|
| 47 |
+
| `precision` | FLOAT | Precision score (0-1) | Metric | `0.92` |
|
| 48 |
+
| `recall` | FLOAT | Recall score (0-1) | Metric | `0.88` |
|
| 49 |
+
| `f1_score` | FLOAT | F1 score (0-1) | Metric | `0.90` |
|
| 50 |
+
| `roc_auc` | FLOAT | ROC AUC score (0-1) | Metric | `0.94` |
|
| 51 |
+
| `pr_auc` | FLOAT | Precision-Recall AUC (0-1) | Metric | `0.91` |
|
| 52 |
+
| `mae` | FLOAT | Mean Absolute Error (regression) | Metric | `1234.56` |
|
| 53 |
+
| `mse` | FLOAT | Mean Squared Error (regression) | Metric | `567890.12` |
|
| 54 |
+
| `rmse` | FLOAT | Root Mean Squared Error (regression) | Metric | `753.59` |
|
| 55 |
+
| `r2_score` | FLOAT | R² coefficient (regression) | Metric | `0.85` |
|
| 56 |
+
| `cross_val_mean` | FLOAT | Mean CV score | Metric | `0.93` |
|
| 57 |
+
| `cross_val_std` | FLOAT | CV score std deviation | Metric | `0.02` |
|
| 58 |
+
| `hyperparameters` | STRING (JSON) | Model hyperparameters | Metadata | `{"max_depth": 6, "n_estimators": 100}` |
|
| 59 |
+
| `version` | STRING | Model version tag | Dimension | `v1.2.3` |
|
| 60 |
+
| `environment` | STRING | Training environment | Dimension | `production`, `staging`, `development` |
|
| 61 |
+
| `user_email` | STRING | User who trained model | Dimension | `data-scientist@company.com` |
|
| 62 |
+
|
| 63 |
+
### Partitioning & Clustering
|
| 64 |
+
|
| 65 |
+
```sql
|
| 66 |
+
-- Recommended table setup
|
| 67 |
+
CREATE TABLE `project.dataset.model_metrics`
|
| 68 |
+
(
|
| 69 |
+
-- columns as above
|
| 70 |
+
)
|
| 71 |
+
PARTITION BY created_date
|
| 72 |
+
CLUSTER BY model_type, task_type, environment
|
| 73 |
+
OPTIONS(
|
| 74 |
+
description="Model performance metrics for BI dashboards",
|
| 75 |
+
require_partition_filter=true
|
| 76 |
+
);
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
### Primary Dimensions for Looker
|
| 80 |
+
|
| 81 |
+
- **Time**: `created_at`, `created_date`
|
| 82 |
+
- **Model**: `model_type`, `model_name`, `task_type`
|
| 83 |
+
- **Performance Tier**: CASE expression on `accuracy`/`f1_score`
|
| 84 |
+
- `Excellent` (>0.90)
|
| 85 |
+
- `Good` (0.80-0.90)
|
| 86 |
+
- `Fair` (0.70-0.80)
|
| 87 |
+
- `Poor` (<0.70)
|
| 88 |
+
|
| 89 |
+
### Sample Looker View
|
| 90 |
+
|
| 91 |
+
```lookml
|
| 92 |
+
view: model_metrics {
|
| 93 |
+
sql_table_name: `project.dataset.model_metrics` ;;
|
| 94 |
+
|
| 95 |
+
dimension: model_id {
|
| 96 |
+
primary_key: yes
|
| 97 |
+
type: string
|
| 98 |
+
sql: ${TABLE}.model_id ;;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
dimension_group: created {
|
| 102 |
+
type: time
|
| 103 |
+
timeframes: [date, week, month, quarter, year]
|
| 104 |
+
sql: ${TABLE}.created_at ;;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
dimension: model_type {
|
| 108 |
+
type: string
|
| 109 |
+
sql: ${TABLE}.model_type ;;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
dimension: performance_tier {
|
| 113 |
+
type: string
|
| 114 |
+
sql: CASE
|
| 115 |
+
WHEN ${TABLE}.accuracy >= 0.90 THEN 'Excellent'
|
| 116 |
+
WHEN ${TABLE}.accuracy >= 0.80 THEN 'Good'
|
| 117 |
+
WHEN ${TABLE}.accuracy >= 0.70 THEN 'Fair'
|
| 118 |
+
ELSE 'Poor'
|
| 119 |
+
END ;;
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
measure: count {
|
| 123 |
+
type: count
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
measure: avg_accuracy {
|
| 127 |
+
type: average
|
| 128 |
+
sql: ${TABLE}.accuracy ;;
|
| 129 |
+
value_format_name: percent_2
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
measure: avg_f1_score {
|
| 133 |
+
type: average
|
| 134 |
+
sql: ${TABLE}.f1_score ;;
|
| 135 |
+
value_format_name: percent_2
|
| 136 |
+
}
|
| 137 |
+
}
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## 🎯 Table 2: `feature_importance`
|
| 143 |
+
|
| 144 |
+
**Description**: Feature importance scores for model interpretability.
|
| 145 |
+
|
| 146 |
+
**Use Cases**:
|
| 147 |
+
- Feature impact analysis
|
| 148 |
+
- Feature selection dashboards
|
| 149 |
+
- Model explainability reports
|
| 150 |
+
|
| 151 |
+
**Update Frequency**: On every model training run
|
| 152 |
+
|
| 153 |
+
**Grain**: One row per feature per model
|
| 154 |
+
|
| 155 |
+
### Schema
|
| 156 |
+
|
| 157 |
+
| Column Name | Type | Description | Dimension/Metric | Example |
|
| 158 |
+
|------------|------|-------------|------------------|---------|
|
| 159 |
+
| `model_id` | STRING | Foreign key to model_metrics | Dimension (Foreign Key) | `xgboost_churn_20251223_153045` |
|
| 160 |
+
| `feature_name` | STRING | Name of the feature | Dimension (Primary Key) | `age`, `total_purchases`, `days_since_last_login` |
|
| 161 |
+
| `importance_score` | FLOAT | Importance value (0-1) | Metric | `0.35` |
|
| 162 |
+
| `importance_rank` | INTEGER | Rank by importance (1=most important) | Metric | `1`, `2`, `3` |
|
| 163 |
+
| `importance_type` | STRING | Calculation method | Dimension | `gain`, `weight`, `cover`, `shap` |
|
| 164 |
+
| `feature_type` | STRING | Data type category | Dimension | `numeric`, `categorical`, `datetime`, `text` |
|
| 165 |
+
| `is_engineered` | BOOLEAN | Created by feature engineering? | Dimension | `true`, `false` |
|
| 166 |
+
| `created_at` | TIMESTAMP | When importance was calculated | Dimension (Time) | `2025-12-23 15:30:45 UTC` |
|
| 167 |
+
| `created_date` | DATE | Calculation date | Dimension (Time) | `2025-12-23` |
|
| 168 |
+
|
| 169 |
+
### Partitioning & Clustering
|
| 170 |
+
|
| 171 |
+
```sql
|
| 172 |
+
CREATE TABLE `project.dataset.feature_importance`
|
| 173 |
+
(
|
| 174 |
+
-- columns as above
|
| 175 |
+
)
|
| 176 |
+
PARTITION BY created_date
|
| 177 |
+
CLUSTER BY model_id, importance_rank
|
| 178 |
+
OPTIONS(
|
| 179 |
+
description="Feature importance scores for model explainability",
|
| 180 |
+
require_partition_filter=false -- Allow cross-model queries
|
| 181 |
+
);
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
### Primary Dimensions for Looker
|
| 185 |
+
|
| 186 |
+
- **Feature**: `feature_name`, `feature_type`, `is_engineered`
|
| 187 |
+
- **Model**: `model_id` (join to model_metrics)
|
| 188 |
+
- **Importance**: `importance_rank`, `importance_type`
|
| 189 |
+
|
| 190 |
+
### Sample Looker View
|
| 191 |
+
|
| 192 |
+
```lookml
|
| 193 |
+
view: feature_importance {
|
| 194 |
+
sql_table_name: `project.dataset.feature_importance` ;;
|
| 195 |
+
|
| 196 |
+
dimension: compound_key {
|
| 197 |
+
primary_key: yes
|
| 198 |
+
hidden: yes
|
| 199 |
+
sql: CONCAT(${TABLE}.model_id, '|', ${TABLE}.feature_name) ;;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
dimension: feature_name {
|
| 203 |
+
type: string
|
| 204 |
+
sql: ${TABLE}.feature_name ;;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
dimension: is_top_10 {
|
| 208 |
+
type: yesno
|
| 209 |
+
sql: ${TABLE}.importance_rank <= 10 ;;
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
measure: avg_importance {
|
| 213 |
+
type: average
|
| 214 |
+
sql: ${TABLE}.importance_score ;;
|
| 215 |
+
value_format_name: percent_2
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
measure: count_features {
|
| 219 |
+
type: count_distinct
|
| 220 |
+
sql: ${TABLE}.feature_name ;;
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
## 🔮 Table 3: `predictions`
|
| 228 |
+
|
| 229 |
+
**Description**: Model predictions with actuals for monitoring and evaluation.
|
| 230 |
+
|
| 231 |
+
**Use Cases**:
|
| 232 |
+
- Prediction monitoring
|
| 233 |
+
- Accuracy tracking over time
|
| 234 |
+
- Segment performance analysis
|
| 235 |
+
- Business impact measurement
|
| 236 |
+
|
| 237 |
+
**Update Frequency**: Real-time or batch (daily/hourly)
|
| 238 |
+
|
| 239 |
+
**Grain**: One row per prediction
|
| 240 |
+
|
| 241 |
+
### Schema
|
| 242 |
+
|
| 243 |
+
| Column Name | Type | Description | Dimension/Metric | Example |
|
| 244 |
+
|------------|------|-------------|------------------|---------|
|
| 245 |
+
| `prediction_id` | STRING | Unique prediction identifier | Dimension (Primary Key) | `pred_abc123xyz` |
|
| 246 |
+
| `model_id` | STRING | Model used for prediction | Dimension (Foreign Key) | `xgboost_churn_20251223_153045` |
|
| 247 |
+
| `entity_id` | STRING | Entity being predicted (customer_id, product_id, etc.) | Dimension | `customer_12345` |
|
| 248 |
+
| `predicted_at` | TIMESTAMP | When prediction was made | Dimension (Time) | `2025-12-23 15:30:45 UTC` |
|
| 249 |
+
| `predicted_date` | DATE | Prediction date (for partitioning) | Dimension (Time) | `2025-12-23` |
|
| 250 |
+
| `prediction_value` | FLOAT | Predicted value | Metric | `0.85` (probability), `49.99` (price) |
|
| 251 |
+
| `prediction_class` | STRING | Predicted class (classification) | Dimension | `churn`, `not_churn` |
|
| 252 |
+
| `prediction_confidence` | FLOAT | Model confidence (0-1) | Metric | `0.92` |
|
| 253 |
+
| `actual_value` | FLOAT | True value (when available) | Metric | `1.0` (churned), `52.50` (actual price) |
|
| 254 |
+
| `actual_class` | STRING | True class (when available) | Dimension | `churn`, `not_churn` |
|
| 255 |
+
| `actual_recorded_at` | TIMESTAMP | When actual became known | Dimension (Time) | `2025-12-30 10:00:00 UTC` |
|
| 256 |
+
| `is_correct` | BOOLEAN | Prediction was correct? | Dimension | `true`, `false` |
|
| 257 |
+
| `absolute_error` | FLOAT | \|predicted - actual\| | Metric | `2.51` |
|
| 258 |
+
| `squared_error` | FLOAT | (predicted - actual)² | Metric | `6.30` |
|
| 259 |
+
| `feature_values` | STRING (JSON) | Input features used | Metadata | `{"age": 35, "tenure": 24}` |
|
| 260 |
+
| `segment` | STRING | Business segment | Dimension | `enterprise`, `smb`, `consumer` |
|
| 261 |
+
| `region` | STRING | Geographic region | Dimension | `us-west`, `eu-central` |
|
| 262 |
+
| `model_version` | STRING | Model version | Dimension | `v1.2.3` |
|
| 263 |
+
| `prediction_latency_ms` | FLOAT | Inference time | Metric | `23.4` |
|
| 264 |
+
|
| 265 |
+
### Partitioning & Clustering
|
| 266 |
+
|
| 267 |
+
```sql
|
| 268 |
+
CREATE TABLE `project.dataset.predictions`
|
| 269 |
+
(
|
| 270 |
+
-- columns as above
|
| 271 |
+
)
|
| 272 |
+
PARTITION BY predicted_date
|
| 273 |
+
CLUSTER BY model_id, segment, is_correct
|
| 274 |
+
OPTIONS(
|
| 275 |
+
description="Model predictions with actuals for monitoring",
|
| 276 |
+
require_partition_filter=true,
|
| 277 |
+
partition_expiration_days=730 -- 2 years retention
|
| 278 |
+
);
|
| 279 |
+
```
|
| 280 |
+
|
| 281 |
+
### Primary Dimensions for Looker
|
| 282 |
+
|
| 283 |
+
- **Time**: `predicted_date`, days since prediction
|
| 284 |
+
- **Model**: `model_id`, `model_version`
|
| 285 |
+
- **Segment**: `segment`, `region`
|
| 286 |
+
- **Accuracy**: `is_correct`, error buckets
|
| 287 |
+
|
| 288 |
+
### Sample Looker View
|
| 289 |
+
|
| 290 |
+
```lookml
|
| 291 |
+
view: predictions {
|
| 292 |
+
sql_table_name: `project.dataset.predictions` ;;
|
| 293 |
+
|
| 294 |
+
dimension: prediction_id {
|
| 295 |
+
primary_key: yes
|
| 296 |
+
type: string
|
| 297 |
+
sql: ${TABLE}.prediction_id ;;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
dimension_group: predicted {
|
| 301 |
+
type: time
|
| 302 |
+
timeframes: [date, week, month]
|
| 303 |
+
sql: ${TABLE}.predicted_at ;;
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
dimension: segment {
|
| 307 |
+
type: string
|
| 308 |
+
sql: ${TABLE}.segment ;;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
dimension: error_bucket {
|
| 312 |
+
type: string
|
| 313 |
+
sql: CASE
|
| 314 |
+
WHEN ${TABLE}.absolute_error IS NULL THEN 'No Actual Yet'
|
| 315 |
+
WHEN ${TABLE}.absolute_error <= 0.1 THEN '0-10%'
|
| 316 |
+
WHEN ${TABLE}.absolute_error <= 0.2 THEN '10-20%'
|
| 317 |
+
ELSE '>20%'
|
| 318 |
+
END ;;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
measure: count {
|
| 322 |
+
type: count
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
measure: accuracy_rate {
|
| 326 |
+
type: average
|
| 327 |
+
sql: CAST(${TABLE}.is_correct AS FLOAT64) ;;
|
| 328 |
+
value_format_name: percent_1
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
measure: avg_confidence {
|
| 332 |
+
type: average
|
| 333 |
+
sql: ${TABLE}.prediction_confidence ;;
|
| 334 |
+
value_format_name: percent_2
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
measure: mae {
|
| 338 |
+
type: average
|
| 339 |
+
sql: ${TABLE}.absolute_error ;;
|
| 340 |
+
value_format_name: decimal_2
|
| 341 |
+
}
|
| 342 |
+
}
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
---
|
| 346 |
+
|
| 347 |
+
## 📋 Table 4: `data_profile_summary`
|
| 348 |
+
|
| 349 |
+
**Description**: Dataset profiling statistics for data quality monitoring.
|
| 350 |
+
|
| 351 |
+
**Use Cases**:
|
| 352 |
+
- Data quality dashboards
|
| 353 |
+
- Schema drift detection
|
| 354 |
+
- Data validation reports
|
| 355 |
+
- Column-level monitoring
|
| 356 |
+
|
| 357 |
+
**Update Frequency**: Daily or on-demand
|
| 358 |
+
|
| 359 |
+
**Grain**: One row per column per dataset per run
|
| 360 |
+
|
| 361 |
+
### Schema
|
| 362 |
+
|
| 363 |
+
| Column Name | Type | Description | Dimension/Metric | Example |
|
| 364 |
+
|------------|------|-------------|------------------|---------|
|
| 365 |
+
| `profile_id` | STRING | Unique profile run identifier | Dimension (Primary Key) | `profile_abc123xyz` |
|
| 366 |
+
| `dataset_name` | STRING | Source table/file name | Dimension | `project.dataset.customers` |
|
| 367 |
+
| `column_name` | STRING | Column being profiled | Dimension | `age`, `email`, `signup_date` |
|
| 368 |
+
| `profiled_at` | TIMESTAMP | When profiling ran | Dimension (Time) | `2025-12-23 15:30:45 UTC` |
|
| 369 |
+
| `profiled_date` | DATE | Profiling date | Dimension (Time) | `2025-12-23` |
|
| 370 |
+
| `data_type` | STRING | Column data type | Dimension | `INTEGER`, `STRING`, `FLOAT`, `TIMESTAMP` |
|
| 371 |
+
| `inferred_type` | STRING | Smart type inference | Dimension | `numeric`, `categorical`, `datetime`, `text`, `email` |
|
| 372 |
+
| `row_count` | INTEGER | Total rows in dataset | Metric | `10000` |
|
| 373 |
+
| `non_null_count` | INTEGER | Non-null values | Metric | `9850` |
|
| 374 |
+
| `null_count` | INTEGER | Null values | Metric | `150` |
|
| 375 |
+
| `null_percentage` | FLOAT | % null (0-100) | Metric | `1.5` |
|
| 376 |
+
| `unique_count` | INTEGER | Distinct values | Metric | `450` |
|
| 377 |
+
| `uniqueness_percentage` | FLOAT | % unique (0-100) | Metric | `4.5` |
|
| 378 |
+
| `min_value` | STRING | Minimum value (as string) | Metadata | `18`, `2020-01-01` |
|
| 379 |
+
| `max_value` | STRING | Maximum value (as string) | Metadata | `95`, `2025-12-23` |
|
| 380 |
+
| `mean_value` | FLOAT | Mean (numeric only) | Metric | `42.5` |
|
| 381 |
+
| `median_value` | FLOAT | Median (numeric only) | Metric | `38.0` |
|
| 382 |
+
| `std_dev` | FLOAT | Standard deviation (numeric only) | Metric | `15.2` |
|
| 383 |
+
| `skewness` | FLOAT | Distribution skewness | Metric | `0.85` |
|
| 384 |
+
| `kurtosis` | FLOAT | Distribution kurtosis | Metric | `2.1` |
|
| 385 |
+
| `top_value` | STRING | Most common value | Metadata | `male`, `active` |
|
| 386 |
+
| `top_value_frequency` | INTEGER | Count of most common value | Metric | `6500` |
|
| 387 |
+
| `top_value_percentage` | FLOAT | % of most common value | Metric | `65.0` |
|
| 388 |
+
| `has_outliers` | BOOLEAN | Outliers detected? | Dimension | `true`, `false` |
|
| 389 |
+
| `outlier_count` | INTEGER | Number of outliers | Metric | `23` |
|
| 390 |
+
| `outlier_percentage` | FLOAT | % outliers | Metric | `0.23` |
|
| 391 |
+
| `quality_score` | FLOAT | Overall quality score (0-100) | Metric | `92.5` |
|
| 392 |
+
| `quality_issues` | STRING (JSON) | Detected issues | Metadata | `["high_nulls", "duplicate_values"]` |
|
| 393 |
+
| `validation_status` | STRING | Quality check result | Dimension | `pass`, `warn`, `fail` |
|
| 394 |
+
|
| 395 |
+
### Partitioning & Clustering
|
| 396 |
+
|
| 397 |
+
```sql
|
| 398 |
+
CREATE TABLE `project.dataset.data_profile_summary`
|
| 399 |
+
(
|
| 400 |
+
-- columns as above
|
| 401 |
+
)
|
| 402 |
+
PARTITION BY profiled_date
|
| 403 |
+
CLUSTER BY dataset_name, validation_status
|
| 404 |
+
OPTIONS(
|
| 405 |
+
description="Dataset profiling for data quality monitoring",
|
| 406 |
+
require_partition_filter=true,
|
| 407 |
+
partition_expiration_days=90 -- 3 months retention
|
| 408 |
+
);
|
| 409 |
+
```
|
| 410 |
+
|
| 411 |
+
### Primary Dimensions for Looker
|
| 412 |
+
|
| 413 |
+
- **Dataset**: `dataset_name`
|
| 414 |
+
- **Column**: `column_name`, `data_type`, `inferred_type`
|
| 415 |
+
- **Quality**: `validation_status`, `quality_score` buckets
|
| 416 |
+
- **Time**: `profiled_date`
|
| 417 |
+
|
| 418 |
+
### Sample Looker View
|
| 419 |
+
|
| 420 |
+
```lookml
|
| 421 |
+
view: data_profile_summary {
|
| 422 |
+
sql_table_name: `project.dataset.data_profile_summary` ;;
|
| 423 |
+
|
| 424 |
+
dimension: compound_key {
|
| 425 |
+
primary_key: yes
|
| 426 |
+
hidden: yes
|
| 427 |
+
sql: CONCAT(${TABLE}.profile_id, '|', ${TABLE}.column_name) ;;
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
dimension: column_name {
|
| 431 |
+
type: string
|
| 432 |
+
sql: ${TABLE}.column_name ;;
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
dimension: quality_tier {
|
| 436 |
+
type: string
|
| 437 |
+
sql: CASE
|
| 438 |
+
WHEN ${TABLE}.quality_score >= 90 THEN 'Excellent'
|
| 439 |
+
WHEN ${TABLE}.quality_score >= 75 THEN 'Good'
|
| 440 |
+
WHEN ${TABLE}.quality_score >= 60 THEN 'Fair'
|
| 441 |
+
ELSE 'Poor'
|
| 442 |
+
END ;;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
dimension: has_quality_issues {
|
| 446 |
+
type: yesno
|
| 447 |
+
sql: ${TABLE}.validation_status IN ('warn', 'fail') ;;
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
measure: count_columns {
|
| 451 |
+
type: count_distinct
|
| 452 |
+
sql: ${TABLE}.column_name ;;
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
measure: avg_quality_score {
|
| 456 |
+
type: average
|
| 457 |
+
sql: ${TABLE}.quality_score ;;
|
| 458 |
+
value_format_name: decimal_1
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
measure: avg_null_percentage {
|
| 462 |
+
type: average
|
| 463 |
+
sql: ${TABLE}.null_percentage ;;
|
| 464 |
+
value_format_name: percent_1
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
measure: columns_with_issues {
|
| 468 |
+
type: count_distinct
|
| 469 |
+
sql: ${TABLE}.column_name ;;
|
| 470 |
+
filters: [has_quality_issues: "yes"]
|
| 471 |
+
}
|
| 472 |
+
}
|
| 473 |
+
```
|
| 474 |
+
|
| 475 |
+
---
|
| 476 |
+
|
| 477 |
+
## 🔄 Schema Evolution Guidelines
|
| 478 |
+
|
| 479 |
+
### ✅ **SAFE Changes** (Non-Breaking)
|
| 480 |
+
|
| 481 |
+
1. **Add new columns** (always nullable or with defaults)
|
| 482 |
+
```sql
|
| 483 |
+
ALTER TABLE `project.dataset.model_metrics`
|
| 484 |
+
ADD COLUMN IF NOT EXISTS new_metric FLOAT64;
|
| 485 |
+
```
|
| 486 |
+
|
| 487 |
+
2. **Add new tables** (doesn't affect existing dashboards)
|
| 488 |
+
|
| 489 |
+
3. **Lengthen STRING columns** (VARCHAR(50) → VARCHAR(100))
|
| 490 |
+
|
| 491 |
+
4. **Add indexes/clustering** (performance only)
|
| 492 |
+
|
| 493 |
+
5. **Add column descriptions**
|
| 494 |
+
```sql
|
| 495 |
+
ALTER TABLE `project.dataset.model_metrics`
|
| 496 |
+
ALTER COLUMN accuracy SET OPTIONS (description='Model accuracy (0-1)');
|
| 497 |
+
```
|
| 498 |
+
|
| 499 |
+
### ❌ **BREAKING Changes** (Require Dashboard Updates)
|
| 500 |
+
|
| 501 |
+
1. **Rename columns** → Use views for backward compatibility:
|
| 502 |
+
```sql
|
| 503 |
+
CREATE OR REPLACE VIEW `project.dataset.model_metrics_v2` AS
|
| 504 |
+
SELECT
|
| 505 |
+
model_id,
|
| 506 |
+
accuracy AS acc, -- renamed column
|
| 507 |
+
...
|
| 508 |
+
FROM `project.dataset.model_metrics`;
|
| 509 |
+
```
|
| 510 |
+
|
| 511 |
+
2. **Change data types** → Create new column, migrate, deprecate old:
|
| 512 |
+
```sql
|
| 513 |
+
-- Step 1: Add new column
|
| 514 |
+
ALTER TABLE model_metrics ADD COLUMN created_at_new TIMESTAMP;
|
| 515 |
+
|
| 516 |
+
-- Step 2: Backfill
|
| 517 |
+
UPDATE model_metrics SET created_at_new = CAST(created_at AS TIMESTAMP) WHERE true;
|
| 518 |
+
|
| 519 |
+
-- Step 3: Update dashboards to use new column
|
| 520 |
+
|
| 521 |
+
-- Step 4: Drop old column after validation period
|
| 522 |
+
ALTER TABLE model_metrics DROP COLUMN created_at;
|
| 523 |
+
```
|
| 524 |
+
|
| 525 |
+
3. **Remove columns** → Deprecate first, remove after 90 days
|
| 526 |
+
|
| 527 |
+
4. **Change partitioning** → Requires table recreation
|
| 528 |
+
|
| 529 |
+
### 🔄 **Versioning Strategy**
|
| 530 |
+
|
| 531 |
+
For major schema changes, create versioned tables:
|
| 532 |
+
|
| 533 |
+
```
|
| 534 |
+
project.dataset.model_metrics_v1 (deprecated, keep 90 days)
|
| 535 |
+
project.dataset.model_metrics_v2 (current)
|
| 536 |
+
project.dataset.model_metrics (view pointing to latest version)
|
| 537 |
+
```
|
| 538 |
+
|
| 539 |
+
---
|
| 540 |
+
|
| 541 |
+
## 📊 Dashboard-Ready Metrics Catalog
|
| 542 |
+
|
| 543 |
+
### Model Performance Metrics
|
| 544 |
+
|
| 545 |
+
| Metric Name | Calculation | Use Case |
|
| 546 |
+
|------------|-------------|----------|
|
| 547 |
+
| **Model Count** | `COUNT(DISTINCT model_id)` | Total models trained |
|
| 548 |
+
| **Avg Accuracy** | `AVG(accuracy)` | Overall model quality |
|
| 549 |
+
| **Accuracy Trend** | `AVG(accuracy) OVER (ORDER BY created_date)` | Performance over time |
|
| 550 |
+
| **Best Model** | `model_id WHERE accuracy = MAX(accuracy)` | Top performer |
|
| 551 |
+
| **Models by Type** | `COUNT(*) GROUP BY model_type` | Algorithm distribution |
|
| 552 |
+
| **Training Time** | `AVG(training_duration_seconds)` | Resource usage |
|
| 553 |
+
| **Recent Models** | `WHERE created_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)` | Latest activity |
|
| 554 |
+
|
| 555 |
+
### Feature Importance Metrics
|
| 556 |
+
|
| 557 |
+
| Metric Name | Calculation | Use Case |
|
| 558 |
+
|------------|-------------|----------|
|
| 559 |
+
| **Top Features** | `WHERE importance_rank <= 10` | Most impactful features |
|
| 560 |
+
| **Avg Importance** | `AVG(importance_score)` | Feature impact distribution |
|
| 561 |
+
| **Engineered Features** | `COUNT(*) WHERE is_engineered = true` | Feature engineering effectiveness |
|
| 562 |
+
| **Feature Stability** | `STDDEV(importance_score) GROUP BY feature_name` | Consistent predictors |
|
| 563 |
+
|
| 564 |
+
### Prediction Metrics
|
| 565 |
+
|
| 566 |
+
| Metric Name | Calculation | Use Case |
|
| 567 |
+
|------------|-------------|----------|
|
| 568 |
+
| **Accuracy Rate** | `AVG(CAST(is_correct AS FLOAT64))` | Real-world performance |
|
| 569 |
+
| **MAE** | `AVG(absolute_error)` | Average error magnitude |
|
| 570 |
+
| **RMSE** | `SQRT(AVG(squared_error))` | Error with outlier penalty |
|
| 571 |
+
| **Predictions/Day** | `COUNT(*) GROUP BY predicted_date` | Volume tracking |
|
| 572 |
+
| **Confidence Distribution** | `APPROX_QUANTILES(prediction_confidence, 10)` | Model calibration |
|
| 573 |
+
| **Segment Performance** | `AVG(is_correct) GROUP BY segment` | Fairness check |
|
| 574 |
+
|
| 575 |
+
### Data Quality Metrics
|
| 576 |
+
|
| 577 |
+
| Metric Name | Calculation | Use Case |
|
| 578 |
+
|------------|-------------|----------|
|
| 579 |
+
| **Data Quality Score** | `AVG(quality_score)` | Overall health |
|
| 580 |
+
| **Null Rate** | `AVG(null_percentage)` | Completeness |
|
| 581 |
+
| **Columns with Issues** | `COUNT(DISTINCT column_name) WHERE validation_status != 'pass'` | Problem areas |
|
| 582 |
+
| **Quality Trend** | `AVG(quality_score) OVER (ORDER BY profiled_date)` | Improving/degrading? |
|
| 583 |
+
|
| 584 |
+
---
|
| 585 |
+
|
| 586 |
+
## 🎯 Sample Looker Explores
|
| 587 |
+
|
| 588 |
+
### Explore 1: Model Performance Analysis
|
| 589 |
+
|
| 590 |
+
```lookml
|
| 591 |
+
explore: model_metrics {
|
| 592 |
+
label: "Model Performance"
|
| 593 |
+
description: "Track model accuracy, training time, and comparison"
|
| 594 |
+
|
| 595 |
+
join: feature_importance {
|
| 596 |
+
type: left_outer
|
| 597 |
+
sql_on: ${model_metrics.model_id} = ${feature_importance.model_id} ;;
|
| 598 |
+
relationship: one_to_many
|
| 599 |
+
}
|
| 600 |
+
}
|
| 601 |
+
```
|
| 602 |
+
|
| 603 |
+
### Explore 2: Prediction Monitoring
|
| 604 |
+
|
| 605 |
+
```lookml
|
| 606 |
+
explore: predictions {
|
| 607 |
+
label: "Prediction Monitoring"
|
| 608 |
+
description: "Real-time prediction accuracy and drift"
|
| 609 |
+
|
| 610 |
+
join: model_metrics {
|
| 611 |
+
type: left_outer
|
| 612 |
+
sql_on: ${predictions.model_id} = ${model_metrics.model_id} ;;
|
| 613 |
+
relationship: many_to_one
|
| 614 |
+
}
|
| 615 |
+
}
|
| 616 |
+
```
|
| 617 |
+
|
| 618 |
+
### Explore 3: Data Quality Dashboard
|
| 619 |
+
|
| 620 |
+
```lookml
|
| 621 |
+
explore: data_profile_summary {
|
| 622 |
+
label: "Data Quality"
|
| 623 |
+
description: "Monitor data health and schema drift"
|
| 624 |
+
}
|
| 625 |
+
```
|
| 626 |
+
|
| 627 |
+
---
|
| 628 |
+
|
| 629 |
+
## 📝 Implementation Checklist
|
| 630 |
+
|
| 631 |
+
### Phase 1: Setup (Week 1)
|
| 632 |
+
- [ ] Create all 4 BigQuery tables with partitioning
|
| 633 |
+
- [ ] Set up service account permissions
|
| 634 |
+
- [ ] Configure table expiration policies
|
| 635 |
+
- [ ] Document table owners and update SLAs
|
| 636 |
+
|
| 637 |
+
### Phase 2: Integration (Week 2)
|
| 638 |
+
- [ ] Update tools to write to these schemas
|
| 639 |
+
- [ ] Add schema validation in CI/CD
|
| 640 |
+
- [ ] Create data dictionary in Looker
|
| 641 |
+
- [ ] Set up table monitoring alerts
|
| 642 |
+
|
| 643 |
+
### Phase 3: BI Layer (Week 3)
|
| 644 |
+
- [ ] Create Looker views for all 4 tables
|
| 645 |
+
- [ ] Build explores with joins
|
| 646 |
+
- [ ] Create initial dashboards
|
| 647 |
+
- [ ] Set up scheduled data refreshes
|
| 648 |
+
|
| 649 |
+
### Phase 4: Validation (Week 4)
|
| 650 |
+
- [ ] Backfill historical data
|
| 651 |
+
- [ ] Verify dashboard accuracy
|
| 652 |
+
- [ ] Train stakeholders on dashboards
|
| 653 |
+
- [ ] Document runbooks for common issues
|
| 654 |
+
|
| 655 |
+
---
|
| 656 |
+
|
| 657 |
+
## 🔗 Related Tools
|
| 658 |
+
|
| 659 |
+
**BigQuery Write Tools** (src/bigquery/):
|
| 660 |
+
- `bigquery_write_results()` - Generic write function
|
| 661 |
+
- Helper: `bigquery_write_model_metrics()` - Specialized writer
|
| 662 |
+
- Helper: `bigquery_write_feature_importance()` - Specialized writer
|
| 663 |
+
- Helper: `bigquery_write_predictions()` - Specialized writer
|
| 664 |
+
- Helper: `bigquery_write_data_profile()` - Specialized writer
|
| 665 |
+
|
| 666 |
+
**Example Usage**:
|
| 667 |
+
```python
|
| 668 |
+
from src.bigquery import bigquery_write_results
|
| 669 |
+
|
| 670 |
+
# Write model metrics
|
| 671 |
+
bigquery_write_results(
|
| 672 |
+
data=metrics_df,
|
| 673 |
+
table_id="project.dataset.model_metrics",
|
| 674 |
+
write_disposition="WRITE_APPEND"
|
| 675 |
+
)
|
| 676 |
+
```
|
| 677 |
+
|
| 678 |
+
---
|
| 679 |
+
|
| 680 |
+
## 📚 Additional Resources
|
| 681 |
+
|
| 682 |
+
- [BigQuery Best Practices](https://cloud.google.com/bigquery/docs/best-practices)
|
| 683 |
+
- [Looker LookML Reference](https://cloud.google.com/looker/docs/reference/lookml-quick-reference)
|
| 684 |
+
- [Schema Design for BI](https://cloud.google.com/architecture/bigquery-data-warehouse)
|
| 685 |
+
|
| 686 |
+
---
|
| 687 |
+
|
| 688 |
+
**Last Updated**: December 23, 2025
|
| 689 |
+
**Schema Version**: 1.0.0
|
| 690 |
+
**Maintained By**: Data Science Team
|
| 691 |
+
**Review Cadence**: Quarterly
|
CHECKLIST.md
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ✅ Pre-Launch Checklist
|
| 2 |
+
|
| 3 |
+
## Before Running the Application
|
| 4 |
+
|
| 5 |
+
### 1. Environment Variables ⚠️ **REQUIRED**
|
| 6 |
+
|
| 7 |
+
You MUST set your API key before starting:
|
| 8 |
+
|
| 9 |
+
```powershell
|
| 10 |
+
# Windows PowerShell
|
| 11 |
+
$env:GOOGLE_API_KEY="your-google-api-key-here"
|
| 12 |
+
|
| 13 |
+
# Verify it's set
|
| 14 |
+
echo $env:GOOGLE_API_KEY
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
### 2. Build Status ✅
|
| 18 |
+
|
| 19 |
+
- [x] Frontend dependencies installed
|
| 20 |
+
- [x] Frontend built (FRRONTEEEND/dist exists)
|
| 21 |
+
- [x] Backend code updated with new endpoints
|
| 22 |
+
- [x] Configuration files in place
|
| 23 |
+
|
| 24 |
+
### 3. Quick Start Commands
|
| 25 |
+
|
| 26 |
+
**Option A - Use the start script:**
|
| 27 |
+
```powershell
|
| 28 |
+
.\start.ps1
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
**Option B - Manual start:**
|
| 32 |
+
```powershell
|
| 33 |
+
# Make sure you're in the project root
|
| 34 |
+
Set-Location "c:\Users\Pulastya\Videos\DS AGENTTTT"
|
| 35 |
+
|
| 36 |
+
# Set API key (if not already set)
|
| 37 |
+
$env:GOOGLE_API_KEY="your-key-here"
|
| 38 |
+
|
| 39 |
+
# Start the server
|
| 40 |
+
python src\api\app.py
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### 4. Access the Application
|
| 44 |
+
|
| 45 |
+
Once the server starts, open your browser to:
|
| 46 |
+
**http://localhost:8080**
|
| 47 |
+
|
| 48 |
+
You should see:
|
| 49 |
+
1. **Landing Page** - Professional homepage with agent features
|
| 50 |
+
2. **Launch Console** button - Click to open the chat interface
|
| 51 |
+
3. **Chat Interface** - Modern conversational UI
|
| 52 |
+
|
| 53 |
+
### 5. Test the Chat
|
| 54 |
+
|
| 55 |
+
Try these sample prompts:
|
| 56 |
+
- "What can you do?"
|
| 57 |
+
- "Explain your data science capabilities"
|
| 58 |
+
- "How do I upload a dataset?"
|
| 59 |
+
- "What ML models do you support?"
|
| 60 |
+
|
| 61 |
+
### 6. Expected Console Output
|
| 62 |
+
|
| 63 |
+
When you start the server, you should see:
|
| 64 |
+
```
|
| 65 |
+
INFO: Started server process [####]
|
| 66 |
+
INFO: Waiting for application startup.
|
| 67 |
+
✅ Agent initialized with provider: groq
|
| 68 |
+
✅ Frontend assets mounted from C:\Users\Pulastya\Videos\DS AGENTTTT\FRRONTEEEND\dist
|
| 69 |
+
INFO: Application startup complete.
|
| 70 |
+
INFO: Uvicorn running on http://0.0.0.0:8080
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
### 7. Troubleshooting Quick Reference
|
| 74 |
+
|
| 75 |
+
| Issue | Solution |
|
| 76 |
+
|-------|----------|
|
| 77 |
+
| "Agent not initialized" | Set GOOGLE_API_KEY environment variable |
|
| 78 |
+
| "Frontend not found" | Run `cd FRRONTEEEND && npm run build` |
|
| 79 |
+
| Port 8080 in use | Kill the process or change PORT env var |
|
| 80 |
+
| Import errors | Run `pip install -r requirements.txt` |
|
| 81 |
+
|
| 82 |
+
## Next Steps After Launch
|
| 83 |
+
|
| 84 |
+
1. **Test the chat** with the agent
|
| 85 |
+
2. **Upload a dataset** (feature coming soon in chat)
|
| 86 |
+
3. **Try the API endpoints** at http://localhost:8080/docs
|
| 87 |
+
4. **Customize the frontend** in FRRONTEEEND/components/
|
| 88 |
+
|
| 89 |
+
## Documentation
|
| 90 |
+
|
| 91 |
+
- 📖 [MIGRATION_COMPLETE.md](MIGRATION_COMPLETE.md) - What was changed
|
| 92 |
+
- 📖 [FRONTEND_INTEGRATION.md](FRONTEND_INTEGRATION.md) - Technical details
|
| 93 |
+
- 📖 [README.md](README.md) - Main project docs
|
| 94 |
+
|
| 95 |
+
---
|
| 96 |
+
|
| 97 |
+
**Ready to launch?** Run `.\start.ps1` and visit http://localhost:8080 🚀
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Google Cloud Run Deployment Guide
|
| 2 |
+
|
| 3 |
+
Complete guide to deploy the Data Science Agent to Google Cloud Run as a serverless API.
|
| 4 |
+
|
| 5 |
+
## 📋 Prerequisites
|
| 6 |
+
|
| 7 |
+
1. **Google Cloud Platform Account**
|
| 8 |
+
- Active GCP account with billing enabled
|
| 9 |
+
- Project created (or use existing project)
|
| 10 |
+
|
| 11 |
+
2. **Install Google Cloud SDK**
|
| 12 |
+
```bash
|
| 13 |
+
# macOS (Homebrew)
|
| 14 |
+
brew install --cask google-cloud-sdk
|
| 15 |
+
|
| 16 |
+
# Or download from: https://cloud.google.com/sdk/install
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
3. **Authenticate with GCP**
|
| 20 |
+
```bash
|
| 21 |
+
gcloud auth login
|
| 22 |
+
gcloud auth application-default login
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
4. **Set Your Project**
|
| 26 |
+
```bash
|
| 27 |
+
gcloud config set project YOUR_PROJECT_ID
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## 🎯 Deployment Options
|
| 33 |
+
|
| 34 |
+
### Option 1: Automated Deployment (Recommended)
|
| 35 |
+
|
| 36 |
+
Use the provided deployment script for one-command deployment:
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
# Set required environment variables
|
| 40 |
+
export GCP_PROJECT_ID="your-project-id"
|
| 41 |
+
export GROQ_API_KEY="your-groq-api-key"
|
| 42 |
+
export GOOGLE_API_KEY="your-google-api-key" # Optional for Gemini
|
| 43 |
+
|
| 44 |
+
# Run deployment script
|
| 45 |
+
./deploy.sh
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
**What it does:**
|
| 49 |
+
- ✅ Enables required GCP APIs (Cloud Build, Cloud Run, Secret Manager)
|
| 50 |
+
- ✅ Creates secrets for API keys
|
| 51 |
+
- ✅ Builds Docker container
|
| 52 |
+
- ✅ Deploys to Cloud Run
|
| 53 |
+
- ✅ Returns service URL
|
| 54 |
+
|
| 55 |
+
**Configuration options:**
|
| 56 |
+
```bash
|
| 57 |
+
# Optional: Customize deployment
|
| 58 |
+
export CLOUD_RUN_REGION="us-central1" # Change region
|
| 59 |
+
export MEMORY="4Gi" # Increase memory
|
| 60 |
+
export CPU="2" # Set CPU count
|
| 61 |
+
export MAX_INSTANCES="10" # Scale limit
|
| 62 |
+
export TIMEOUT="900" # Request timeout (15 min)
|
| 63 |
+
|
| 64 |
+
./deploy.sh
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
### Option 2: Manual Deployment
|
| 70 |
+
|
| 71 |
+
Step-by-step manual deployment for full control:
|
| 72 |
+
|
| 73 |
+
#### Step 1: Enable APIs
|
| 74 |
+
```bash
|
| 75 |
+
gcloud services enable \
|
| 76 |
+
cloudbuild.googleapis.com \
|
| 77 |
+
run.googleapis.com \
|
| 78 |
+
containerregistry.googleapis.com \
|
| 79 |
+
secretmanager.googleapis.com
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
#### Step 2: Create Secrets
|
| 83 |
+
```bash
|
| 84 |
+
# Create GROQ API key secret
|
| 85 |
+
echo -n "your-groq-api-key" | gcloud secrets create GROQ_API_KEY --data-file=-
|
| 86 |
+
|
| 87 |
+
# Create Google API key secret (optional)
|
| 88 |
+
echo -n "your-google-api-key" | gcloud secrets create GOOGLE_API_KEY --data-file=-
|
| 89 |
+
|
| 90 |
+
# Grant Cloud Run access to secrets
|
| 91 |
+
PROJECT_NUMBER=$(gcloud projects describe $(gcloud config get-value project) --format="value(projectNumber)")
|
| 92 |
+
gcloud secrets add-iam-policy-binding GROQ_API_KEY \
|
| 93 |
+
--member="serviceAccount:${PROJECT_NUMBER}-compute@developer.gserviceaccount.com" \
|
| 94 |
+
--role="roles/secretmanager.secretAccessor"
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
#### Step 3: Build Container
|
| 98 |
+
```bash
|
| 99 |
+
gcloud builds submit --tag gcr.io/$(gcloud config get-value project)/data-science-agent
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
#### Step 4: Deploy to Cloud Run
|
| 103 |
+
```bash
|
| 104 |
+
gcloud run deploy data-science-agent \
|
| 105 |
+
--image gcr.io/$(gcloud config get-value project)/data-science-agent \
|
| 106 |
+
--platform managed \
|
| 107 |
+
--region us-central1 \
|
| 108 |
+
--allow-unauthenticated \
|
| 109 |
+
--memory 4Gi \
|
| 110 |
+
--cpu 2 \
|
| 111 |
+
--timeout 900 \
|
| 112 |
+
--max-instances 10 \
|
| 113 |
+
--set-env-vars LLM_PROVIDER=groq,REASONING_EFFORT=medium \
|
| 114 |
+
--set-secrets GROQ_API_KEY=GROQ_API_KEY:latest,GOOGLE_API_KEY=GOOGLE_API_KEY:latest
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
---
|
| 118 |
+
|
| 119 |
+
### Option 3: CI/CD with Cloud Build Triggers
|
| 120 |
+
|
| 121 |
+
Automated deployment on git push:
|
| 122 |
+
|
| 123 |
+
#### Step 1: Connect Repository
|
| 124 |
+
```bash
|
| 125 |
+
# Connect GitHub/GitLab/Bitbucket repository
|
| 126 |
+
gcloud beta builds connections create github connection-name \
|
| 127 |
+
--region=us-central1
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
#### Step 2: Create Build Trigger
|
| 131 |
+
```bash
|
| 132 |
+
gcloud builds triggers create github \
|
| 133 |
+
--name="deploy-data-science-agent" \
|
| 134 |
+
--repo-name="Data-Science-Agent" \
|
| 135 |
+
--repo-owner="Surfing-Ninja" \
|
| 136 |
+
--branch-pattern="^main$" \
|
| 137 |
+
--build-config="cloudbuild.yaml"
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
Now every push to `main` branch automatically deploys! 🎉
|
| 141 |
+
|
| 142 |
+
---
|
| 143 |
+
|
| 144 |
+
## 🧪 Testing the Deployment
|
| 145 |
+
|
| 146 |
+
### 1. Health Check
|
| 147 |
+
```bash
|
| 148 |
+
SERVICE_URL=$(gcloud run services describe data-science-agent \
|
| 149 |
+
--region us-central1 \
|
| 150 |
+
--format 'value(status.url)')
|
| 151 |
+
|
| 152 |
+
curl $SERVICE_URL/health
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
**Expected response:**
|
| 156 |
+
```json
|
| 157 |
+
{
|
| 158 |
+
"status": "healthy",
|
| 159 |
+
"agent_ready": true,
|
| 160 |
+
"provider": "groq",
|
| 161 |
+
"tools_count": 82
|
| 162 |
+
}
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
### 2. List Available Tools
|
| 166 |
+
```bash
|
| 167 |
+
curl $SERVICE_URL/tools | jq
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
### 3. Profile a Dataset
|
| 171 |
+
```bash
|
| 172 |
+
curl -X POST $SERVICE_URL/profile \
|
| 173 |
+
-F "file=@test_data/sample.csv"
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### 4. Run Full Analysis
|
| 177 |
+
```bash
|
| 178 |
+
curl -X POST $SERVICE_URL/run \
|
| 179 |
+
-F "file=@test_data/sample.csv" \
|
| 180 |
+
-F "task_description=Analyze this dataset, detect outliers, and train a prediction model" \
|
| 181 |
+
-F "target_col=target" \
|
| 182 |
+
| jq
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
---
|
| 186 |
+
|
| 187 |
+
## 📊 Monitoring & Logs
|
| 188 |
+
|
| 189 |
+
### View Real-time Logs
|
| 190 |
+
```bash
|
| 191 |
+
gcloud run logs tail data-science-agent --region us-central1
|
| 192 |
+
```
|
| 193 |
+
|
| 194 |
+
### View Recent Logs
|
| 195 |
+
```bash
|
| 196 |
+
gcloud run logs read data-science-agent \
|
| 197 |
+
--region us-central1 \
|
| 198 |
+
--limit 50
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
### Cloud Console Monitoring
|
| 202 |
+
- Go to: https://console.cloud.google.com/run
|
| 203 |
+
- Click on `data-science-agent`
|
| 204 |
+
- View: Metrics, Logs, Revisions
|
| 205 |
+
|
| 206 |
+
---
|
| 207 |
+
|
| 208 |
+
## 💰 Cost Estimation
|
| 209 |
+
|
| 210 |
+
### Cloud Run Pricing (as of Dec 2024)
|
| 211 |
+
**Free Tier** (per month):
|
| 212 |
+
- 2 million requests
|
| 213 |
+
- 360,000 GB-seconds of memory
|
| 214 |
+
- 180,000 vCPU-seconds
|
| 215 |
+
|
| 216 |
+
**Paid Tier** (us-central1):
|
| 217 |
+
- CPU: $0.00002400 per vCPU-second
|
| 218 |
+
- Memory: $0.00000250 per GB-second
|
| 219 |
+
- Requests: $0.40 per million requests
|
| 220 |
+
|
| 221 |
+
**Example Cost for 4Gi Memory, 2 vCPU:**
|
| 222 |
+
- 1 request taking 60 seconds
|
| 223 |
+
- CPU: 2 vCPU × 60s × $0.000024 = $0.00288
|
| 224 |
+
- Memory: 4GB × 60s × $0.0000025 = $0.0006
|
| 225 |
+
- Request: $0.0000004
|
| 226 |
+
- **Total: ~$0.0035 per request**
|
| 227 |
+
|
| 228 |
+
**Monthly estimate for 1000 requests/month:**
|
| 229 |
+
- ~$3.50/month (well within free tier for testing!)
|
| 230 |
+
|
| 231 |
+
---
|
| 232 |
+
|
| 233 |
+
## 🔒 Security Best Practices
|
| 234 |
+
|
| 235 |
+
### 1. Enable Authentication (Production)
|
| 236 |
+
```bash
|
| 237 |
+
# Deploy with authentication required
|
| 238 |
+
gcloud run deploy data-science-agent \
|
| 239 |
+
--no-allow-unauthenticated \
|
| 240 |
+
--region us-central1 \
|
| 241 |
+
--image gcr.io/PROJECT_ID/data-science-agent
|
| 242 |
+
|
| 243 |
+
# Create service account for clients
|
| 244 |
+
gcloud iam service-accounts create api-client
|
| 245 |
+
|
| 246 |
+
# Grant invoker role
|
| 247 |
+
gcloud run services add-iam-policy-binding data-science-agent \
|
| 248 |
+
--member="serviceAccount:api-client@PROJECT_ID.iam.gserviceaccount.com" \
|
| 249 |
+
--role="roles/run.invoker" \
|
| 250 |
+
--region us-central1
|
| 251 |
+
```
|
| 252 |
+
|
| 253 |
+
### 2. Use VPC Connector (For BigQuery/GCS)
|
| 254 |
+
```bash
|
| 255 |
+
# Create VPC connector
|
| 256 |
+
gcloud compute networks vpc-access connectors create ds-agent-connector \
|
| 257 |
+
--network default \
|
| 258 |
+
--region us-central1 \
|
| 259 |
+
--range 10.8.0.0/28
|
| 260 |
+
|
| 261 |
+
# Deploy with VPC
|
| 262 |
+
gcloud run deploy data-science-agent \
|
| 263 |
+
--vpc-connector ds-agent-connector \
|
| 264 |
+
--region us-central1
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
### 3. Restrict API Keys
|
| 268 |
+
- Set **Application restrictions** in Google Cloud Console
|
| 269 |
+
- Whitelist only Cloud Run service URL
|
| 270 |
+
- Set **API restrictions** to only required APIs
|
| 271 |
+
|
| 272 |
+
---
|
| 273 |
+
|
| 274 |
+
## 🔧 Configuration Options
|
| 275 |
+
|
| 276 |
+
### Environment Variables
|
| 277 |
+
```bash
|
| 278 |
+
# Set during deployment
|
| 279 |
+
--set-env-vars KEY1=value1,KEY2=value2
|
| 280 |
+
|
| 281 |
+
# Available variables:
|
| 282 |
+
LLM_PROVIDER=groq # or "gemini"
|
| 283 |
+
REASONING_EFFORT=medium # low, medium, high
|
| 284 |
+
CACHE_TTL_SECONDS=86400 # Cache lifetime
|
| 285 |
+
ARTIFACT_BACKEND=local # or "gcs" for cloud storage
|
| 286 |
+
GCS_BUCKET_NAME=your-bucket # If using GCS backend
|
| 287 |
+
OUTPUT_DIR=/tmp/outputs # Output directory
|
| 288 |
+
MAX_PARALLEL_TOOLS=5 # Concurrent tool execution
|
| 289 |
+
MAX_RETRIES=3 # Tool retry attempts
|
| 290 |
+
TIMEOUT_SECONDS=300 # Tool timeout
|
| 291 |
+
```
|
| 292 |
+
|
| 293 |
+
### Resource Limits
|
| 294 |
+
```bash
|
| 295 |
+
--memory 4Gi # 128Mi to 32Gi
|
| 296 |
+
--cpu 2 # 1 to 8 vCPU
|
| 297 |
+
--timeout 900 # Max 3600s (1 hour)
|
| 298 |
+
--max-instances 10 # Scale limit
|
| 299 |
+
--min-instances 0 # Always-warm instances
|
| 300 |
+
--concurrency 10 # Requests per instance
|
| 301 |
+
```
|
| 302 |
+
|
| 303 |
+
---
|
| 304 |
+
|
| 305 |
+
## 🐛 Troubleshooting
|
| 306 |
+
|
| 307 |
+
### Build Fails
|
| 308 |
+
```bash
|
| 309 |
+
# Check build logs
|
| 310 |
+
gcloud builds list --limit=5
|
| 311 |
+
gcloud builds log BUILD_ID
|
| 312 |
+
|
| 313 |
+
# Common fixes:
|
| 314 |
+
# - Ensure Dockerfile is in root directory
|
| 315 |
+
# - Check requirements.txt has all dependencies
|
| 316 |
+
# - Increase build timeout: --timeout=1200s
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
### Deployment Fails
|
| 320 |
+
```bash
|
| 321 |
+
# Check service status
|
| 322 |
+
gcloud run services describe data-science-agent --region us-central1
|
| 323 |
+
|
| 324 |
+
# Common fixes:
|
| 325 |
+
# - Ensure APIs are enabled
|
| 326 |
+
# - Check secrets exist and are accessible
|
| 327 |
+
# - Verify service account permissions
|
| 328 |
+
```
|
| 329 |
+
|
| 330 |
+
### Runtime Errors
|
| 331 |
+
```bash
|
| 332 |
+
# View logs
|
| 333 |
+
gcloud run logs tail data-science-agent --region us-central1
|
| 334 |
+
|
| 335 |
+
# Common issues:
|
| 336 |
+
# - API keys not set: Check secrets
|
| 337 |
+
# - Import errors: Ensure all dependencies in requirements.txt
|
| 338 |
+
# - Memory issues: Increase --memory limit
|
| 339 |
+
# - Timeout: Increase --timeout value
|
| 340 |
+
```
|
| 341 |
+
|
| 342 |
+
### Container Crashes
|
| 343 |
+
```bash
|
| 344 |
+
# Test locally first
|
| 345 |
+
docker build -t ds-agent .
|
| 346 |
+
docker run -p 8080:8080 \
|
| 347 |
+
-e GROQ_API_KEY="your-key" \
|
| 348 |
+
ds-agent
|
| 349 |
+
|
| 350 |
+
curl http://localhost:8080/health
|
| 351 |
+
```
|
| 352 |
+
|
| 353 |
+
---
|
| 354 |
+
|
| 355 |
+
## 🚀 Advanced Features
|
| 356 |
+
|
| 357 |
+
### Custom Domain
|
| 358 |
+
```bash
|
| 359 |
+
# Map custom domain
|
| 360 |
+
gcloud run domain-mappings create \
|
| 361 |
+
--service data-science-agent \
|
| 362 |
+
--domain api.yourdomain.com \
|
| 363 |
+
--region us-central1
|
| 364 |
+
```
|
| 365 |
+
|
| 366 |
+
### Load Balancing
|
| 367 |
+
```bash
|
| 368 |
+
# Create multiple regional deployments
|
| 369 |
+
for region in us-central1 us-east1 europe-west1; do
|
| 370 |
+
gcloud run deploy data-science-agent \
|
| 371 |
+
--image gcr.io/PROJECT_ID/data-science-agent \
|
| 372 |
+
--region $region
|
| 373 |
+
done
|
| 374 |
+
|
| 375 |
+
# Set up global load balancer
|
| 376 |
+
# Follow: https://cloud.google.com/load-balancing/docs/https/setup-global-ext-https-serverless
|
| 377 |
+
```
|
| 378 |
+
|
| 379 |
+
### Multi-Region Deployment
|
| 380 |
+
```bash
|
| 381 |
+
# Deploy to multiple regions for high availability
|
| 382 |
+
./deploy.sh CLOUD_RUN_REGION=us-central1
|
| 383 |
+
./deploy.sh CLOUD_RUN_REGION=europe-west1
|
| 384 |
+
./deploy.sh CLOUD_RUN_REGION=asia-east1
|
| 385 |
+
```
|
| 386 |
+
|
| 387 |
+
---
|
| 388 |
+
|
| 389 |
+
## 📝 API Documentation
|
| 390 |
+
|
| 391 |
+
Once deployed, access Swagger docs at:
|
| 392 |
+
```
|
| 393 |
+
https://YOUR_SERVICE_URL/docs
|
| 394 |
+
```
|
| 395 |
+
|
| 396 |
+
### Available Endpoints
|
| 397 |
+
|
| 398 |
+
#### `GET /` - Health Check
|
| 399 |
+
Returns service status and tool count.
|
| 400 |
+
|
| 401 |
+
#### `GET /health` - Detailed Health
|
| 402 |
+
Returns agent readiness and provider info.
|
| 403 |
+
|
| 404 |
+
#### `GET /tools` - List Tools
|
| 405 |
+
Returns all 82 available tools organized by category.
|
| 406 |
+
|
| 407 |
+
#### `POST /run` - Run Full Analysis
|
| 408 |
+
Upload dataset and execute complete data science workflow.
|
| 409 |
+
|
| 410 |
+
**Parameters:**
|
| 411 |
+
- `file`: CSV/Parquet file (multipart/form-data)
|
| 412 |
+
- `task_description`: Natural language task description
|
| 413 |
+
- `target_col`: Target column for ML (optional)
|
| 414 |
+
- `use_cache`: Enable caching (default: true)
|
| 415 |
+
- `max_iterations`: Max workflow steps (default: 20)
|
| 416 |
+
|
| 417 |
+
#### `POST /profile` - Quick Profile
|
| 418 |
+
Quick dataset profiling without full workflow.
|
| 419 |
+
|
| 420 |
+
**Parameters:**
|
| 421 |
+
- `file`: CSV/Parquet file (multipart/form-data)
|
| 422 |
+
|
| 423 |
+
---
|
| 424 |
+
|
| 425 |
+
## 🔄 Updates & Rollbacks
|
| 426 |
+
|
| 427 |
+
### Update Deployment
|
| 428 |
+
```bash
|
| 429 |
+
# Rebuild and redeploy
|
| 430 |
+
./deploy.sh
|
| 431 |
+
```
|
| 432 |
+
|
| 433 |
+
### Rollback to Previous Revision
|
| 434 |
+
```bash
|
| 435 |
+
# List revisions
|
| 436 |
+
gcloud run revisions list --service data-science-agent --region us-central1
|
| 437 |
+
|
| 438 |
+
# Rollback
|
| 439 |
+
gcloud run services update-traffic data-science-agent \
|
| 440 |
+
--to-revisions REVISION_NAME=100 \
|
| 441 |
+
--region us-central1
|
| 442 |
+
```
|
| 443 |
+
|
| 444 |
+
### Blue/Green Deployment
|
| 445 |
+
```bash
|
| 446 |
+
# Deploy new version with tag
|
| 447 |
+
gcloud run deploy data-science-agent \
|
| 448 |
+
--tag blue \
|
| 449 |
+
--no-traffic \
|
| 450 |
+
--region us-central1
|
| 451 |
+
|
| 452 |
+
# Test: https://blue---data-science-agent-HASH.run.app
|
| 453 |
+
|
| 454 |
+
# Switch traffic
|
| 455 |
+
gcloud run services update-traffic data-science-agent \
|
| 456 |
+
--to-tags blue=100 \
|
| 457 |
+
--region us-central1
|
| 458 |
+
```
|
| 459 |
+
|
| 460 |
+
---
|
| 461 |
+
|
| 462 |
+
## 📚 Additional Resources
|
| 463 |
+
|
| 464 |
+
- **Cloud Run Docs**: https://cloud.google.com/run/docs
|
| 465 |
+
- **Pricing Calculator**: https://cloud.google.com/products/calculator
|
| 466 |
+
- **Best Practices**: https://cloud.google.com/run/docs/tips
|
| 467 |
+
- **Quotas & Limits**: https://cloud.google.com/run/quotas
|
| 468 |
+
|
| 469 |
+
---
|
| 470 |
+
|
| 471 |
+
## ✅ Deployment Checklist
|
| 472 |
+
|
| 473 |
+
- [ ] GCP project created and billing enabled
|
| 474 |
+
- [ ] Google Cloud SDK installed and authenticated
|
| 475 |
+
- [ ] API keys obtained (GROQ_API_KEY, GOOGLE_API_KEY)
|
| 476 |
+
- [ ] Secrets created in Secret Manager
|
| 477 |
+
- [ ] Docker container builds successfully locally
|
| 478 |
+
- [ ] Cloud Run APIs enabled
|
| 479 |
+
- [ ] Service deployed to Cloud Run
|
| 480 |
+
- [ ] Health check endpoint returns 200
|
| 481 |
+
- [ ] Test dataset profiled successfully
|
| 482 |
+
- [ ] Full analysis workflow tested
|
| 483 |
+
- [ ] Monitoring/logging configured
|
| 484 |
+
- [ ] Cost alerts set up (optional)
|
| 485 |
+
- [ ] Custom domain mapped (optional)
|
| 486 |
+
- [ ] CI/CD pipeline configured (optional)
|
| 487 |
+
|
| 488 |
+
---
|
| 489 |
+
|
| 490 |
+
**Need help?** Check the troubleshooting section or view logs with:
|
| 491 |
+
```bash
|
| 492 |
+
gcloud run logs tail data-science-agent --region us-central1
|
| 493 |
+
```
|
| 494 |
+
|
| 495 |
+
Happy deploying! 🎉
|
Dockerfile
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Multi-stage build for Google Cloud Run
|
| 2 |
+
# Stage 1: Build Frontend
|
| 3 |
+
FROM node:20-alpine as frontend-builder
|
| 4 |
+
|
| 5 |
+
WORKDIR /frontend
|
| 6 |
+
|
| 7 |
+
# Copy frontend files
|
| 8 |
+
COPY FRRONTEEEND/package*.json ./
|
| 9 |
+
RUN npm install
|
| 10 |
+
|
| 11 |
+
COPY FRRONTEEEND/ ./
|
| 12 |
+
RUN npm run build
|
| 13 |
+
|
| 14 |
+
# Stage 2: Build Python environment
|
| 15 |
+
FROM python:3.13-slim as builder
|
| 16 |
+
|
| 17 |
+
# Install system dependencies
|
| 18 |
+
RUN apt-get update && apt-get install -y \
|
| 19 |
+
gcc \
|
| 20 |
+
g++ \
|
| 21 |
+
make \
|
| 22 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 23 |
+
|
| 24 |
+
# Create virtual environment
|
| 25 |
+
RUN python -m venv /opt/venv
|
| 26 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 27 |
+
|
| 28 |
+
# Copy requirements and install Python packages
|
| 29 |
+
COPY requirements.txt .
|
| 30 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 31 |
+
pip install --no-cache-dir -r requirements.txt
|
| 32 |
+
|
| 33 |
+
# Stage 3: Runtime environment
|
| 34 |
+
FROM python:3.13-slim
|
| 35 |
+
|
| 36 |
+
# Install runtime dependencies only
|
| 37 |
+
RUN apt-get update && apt-get install -y \
|
| 38 |
+
libgomp1 \
|
| 39 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 40 |
+
|
| 41 |
+
# Copy virtual environment from builder
|
| 42 |
+
COPY --from=builder /opt/venv /opt/venv
|
| 43 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 44 |
+
|
| 45 |
+
# Set working directory
|
| 46 |
+
WORKDIR /app
|
| 47 |
+
|
| 48 |
+
# Copy application code
|
| 49 |
+
COPY src/ /app/src/
|
| 50 |
+
COPY examples/ /app/examples/
|
| 51 |
+
|
| 52 |
+
# Copy built frontend from frontend-builder
|
| 53 |
+
COPY --from=frontend-builder /frontend/dist /app/FRRONTEEEND/dist
|
| 54 |
+
|
| 55 |
+
# Create necessary directories for Cloud Run ephemeral storage
|
| 56 |
+
RUN mkdir -p /tmp/data_science_agent \
|
| 57 |
+
/tmp/outputs/models \
|
| 58 |
+
/tmp/outputs/plots \
|
| 59 |
+
/tmp/outputs/reports \
|
| 60 |
+
/tmp/outputs/data \
|
| 61 |
+
/tmp/cache_db
|
| 62 |
+
|
| 63 |
+
# Set environment variables
|
| 64 |
+
ENV PYTHONUNBUFFERED=1
|
| 65 |
+
ENV PORT=8080
|
| 66 |
+
ENV OUTPUT_DIR=/tmp/outputs
|
| 67 |
+
ENV CACHE_DB_PATH=/tmp/cache_db/cache.db
|
| 68 |
+
ENV ARTIFACT_BACKEND=local
|
| 69 |
+
|
| 70 |
+
# Cloud Run expects the service to listen on the PORT env variable
|
| 71 |
+
EXPOSE 8080
|
| 72 |
+
|
| 73 |
+
# Health check (optional, Cloud Run handles this)
|
| 74 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 75 |
+
CMD python -c "import requests; requests.get('http://localhost:8080/health')" || exit 1
|
| 76 |
+
|
| 77 |
+
# Run the FastAPI application
|
| 78 |
+
CMD ["python", "src/api/app.py"]
|
FRONTEND_INTEGRATION.md
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data Science Agent - Frontend Integration Guide
|
| 2 |
+
|
| 3 |
+
## 🎉 New React Frontend
|
| 4 |
+
|
| 5 |
+
The application now features a modern, professional React frontend that replaces the old Gradio interface.
|
| 6 |
+
|
| 7 |
+
### Features
|
| 8 |
+
|
| 9 |
+
- **Beautiful Landing Page**: Showcases the agent's capabilities with modern design
|
| 10 |
+
- **Professional Chat Interface**: NextChat-style conversational UI
|
| 11 |
+
- **Direct Backend Integration**: Communicates with your FastAPI backend
|
| 12 |
+
- **Responsive Design**: Works on all devices
|
| 13 |
+
- **Dark Theme**: Modern, eye-friendly interface
|
| 14 |
+
|
| 15 |
+
## 🚀 Quick Start
|
| 16 |
+
|
| 17 |
+
### Prerequisites
|
| 18 |
+
|
| 19 |
+
- Python 3.13+
|
| 20 |
+
- Node.js 20+
|
| 21 |
+
- npm (comes with Node.js)
|
| 22 |
+
|
| 23 |
+
### Running the Application
|
| 24 |
+
|
| 25 |
+
#### Option 1: Using the Build Script (Recommended)
|
| 26 |
+
|
| 27 |
+
**Windows:**
|
| 28 |
+
```powershell
|
| 29 |
+
.\build-and-deploy.ps1
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
**Linux/Mac:**
|
| 33 |
+
```bash
|
| 34 |
+
chmod +x build-and-deploy.sh
|
| 35 |
+
./build-and-deploy.sh
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
Then start the server:
|
| 39 |
+
```bash
|
| 40 |
+
python src/api/app.py
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
#### Option 2: Manual Steps
|
| 44 |
+
|
| 45 |
+
1. **Build the Frontend:**
|
| 46 |
+
```bash
|
| 47 |
+
cd FRRONTEEEND
|
| 48 |
+
npm.cmd install
|
| 49 |
+
npm.cmd run build
|
| 50 |
+
cd ..
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
2. **Install Python Dependencies:**
|
| 54 |
+
```bash
|
| 55 |
+
pip install -r requirements.txt
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
3. **Start the Backend Server:**
|
| 59 |
+
```bash
|
| 60 |
+
python src/api/app.py
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
4. **Access the Application:**
|
| 64 |
+
Open your browser and navigate to: http://localhost:8080
|
| 65 |
+
|
| 66 |
+
## 🏗️ Architecture
|
| 67 |
+
|
| 68 |
+
### Backend (FastAPI)
|
| 69 |
+
- **Location**: `src/api/app.py`
|
| 70 |
+
- **Port**: 8080
|
| 71 |
+
- **Endpoints**:
|
| 72 |
+
- `GET /` - Health check & landing page
|
| 73 |
+
- `POST /chat` - Chat interface endpoint
|
| 74 |
+
- `POST /run` - Full data science workflow
|
| 75 |
+
- `POST /profile` - Dataset profiling
|
| 76 |
+
- `GET /tools` - List available tools
|
| 77 |
+
|
| 78 |
+
### Frontend (React + Vite)
|
| 79 |
+
- **Location**: `FRRONTEEEND/`
|
| 80 |
+
- **Build Output**: `FRRONTEEEND/dist/`
|
| 81 |
+
- **Dev Port**: 3000 (development mode)
|
| 82 |
+
- **Production**: Served by FastAPI at port 8080
|
| 83 |
+
|
| 84 |
+
## 🔧 Development Mode
|
| 85 |
+
|
| 86 |
+
If you want to develop the frontend with hot-reloading:
|
| 87 |
+
|
| 88 |
+
1. **Terminal 1 - Backend:**
|
| 89 |
+
```bash
|
| 90 |
+
python src/api/app.py
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
2. **Terminal 2 - Frontend:**
|
| 94 |
+
```bash
|
| 95 |
+
cd FRRONTEEEND
|
| 96 |
+
npm.cmd run dev
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
Access:
|
| 100 |
+
- Frontend (dev): http://localhost:3000
|
| 101 |
+
- Backend API: http://localhost:8080
|
| 102 |
+
|
| 103 |
+
## 🌐 API Integration
|
| 104 |
+
|
| 105 |
+
The frontend now communicates with your FastAPI backend instead of calling external APIs directly.
|
| 106 |
+
|
| 107 |
+
### Environment Variables
|
| 108 |
+
|
| 109 |
+
Create `FRRONTEEEND/.env` for local development:
|
| 110 |
+
```env
|
| 111 |
+
VITE_API_URL=http://localhost:8080
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
For production, update `FRRONTEEEND/.env.production`:
|
| 115 |
+
```env
|
| 116 |
+
VITE_API_URL=https://your-cloud-run-url.run.app
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
## 📦 Deployment
|
| 120 |
+
|
| 121 |
+
### Docker Build
|
| 122 |
+
|
| 123 |
+
The Dockerfile now includes a multi-stage build that:
|
| 124 |
+
1. Builds the React frontend
|
| 125 |
+
2. Builds the Python environment
|
| 126 |
+
3. Combines both in the final image
|
| 127 |
+
|
| 128 |
+
```bash
|
| 129 |
+
docker build -t data-science-agent .
|
| 130 |
+
docker run -p 8080:8080 data-science-agent
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### Google Cloud Run
|
| 134 |
+
|
| 135 |
+
```bash
|
| 136 |
+
gcloud builds submit --tag gcr.io/YOUR-PROJECT-ID/data-science-agent
|
| 137 |
+
gcloud run deploy data-science-agent \
|
| 138 |
+
--image gcr.io/YOUR-PROJECT-ID/data-science-agent \
|
| 139 |
+
--platform managed \
|
| 140 |
+
--region us-central1 \
|
| 141 |
+
--allow-unauthenticated \
|
| 142 |
+
--set-env-vars GROQ_API_KEY=your-api-key
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
## 🔄 What Changed
|
| 146 |
+
|
| 147 |
+
### Removed
|
| 148 |
+
- ❌ Gradio interface (`chat_ui.py` - kept for reference)
|
| 149 |
+
- ❌ Direct Google GenAI calls from frontend
|
| 150 |
+
- ❌ Gradio dependency
|
| 151 |
+
|
| 152 |
+
### Added
|
| 153 |
+
- ✅ React + TypeScript frontend with Vite
|
| 154 |
+
- ✅ Professional landing page
|
| 155 |
+
- ✅ Modern chat interface
|
| 156 |
+
- ✅ `/chat` API endpoint
|
| 157 |
+
- ✅ CORS support in FastAPI
|
| 158 |
+
- ✅ Static file serving for React app
|
| 159 |
+
- ✅ Multi-stage Docker build
|
| 160 |
+
|
| 161 |
+
## 🛠️ Tech Stack
|
| 162 |
+
|
| 163 |
+
### Frontend
|
| 164 |
+
- React 19
|
| 165 |
+
- TypeScript 5.8
|
| 166 |
+
- Vite 6
|
| 167 |
+
- Tailwind CSS
|
| 168 |
+
- Framer Motion (animations)
|
| 169 |
+
- Lucide React (icons)
|
| 170 |
+
|
| 171 |
+
### Backend (unchanged)
|
| 172 |
+
- FastAPI
|
| 173 |
+
- Python 3.13
|
| 174 |
+
- Groq API
|
| 175 |
+
- Polars, DuckDB
|
| 176 |
+
- Scikit-learn, XGBoost, LightGBM
|
| 177 |
+
|
| 178 |
+
## 📁 Project Structure
|
| 179 |
+
|
| 180 |
+
```
|
| 181 |
+
.
|
| 182 |
+
├── FRRONTEEEND/ # React frontend
|
| 183 |
+
│ ├── components/ # React components
|
| 184 |
+
│ ├── dist/ # Built frontend (after npm run build)
|
| 185 |
+
│ ├── package.json
|
| 186 |
+
│ ├── vite.config.ts
|
| 187 |
+
│ └── .env # Frontend environment variables
|
| 188 |
+
├── src/
|
| 189 |
+
│ ├── api/
|
| 190 |
+
│ │ └── app.py # FastAPI backend (updated)
|
| 191 |
+
│ ├── tools/ # Data science tools
|
| 192 |
+
│ └── orchestrator.py # Main agent logic
|
| 193 |
+
├── requirements.txt # Python dependencies (updated)
|
| 194 |
+
├── Dockerfile # Multi-stage build (updated)
|
| 195 |
+
├── build-and-deploy.ps1 # Windows build script
|
| 196 |
+
└── build-and-deploy.sh # Linux/Mac build script
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
## 🐛 Troubleshooting
|
| 200 |
+
|
| 201 |
+
### Frontend doesn't load
|
| 202 |
+
- Make sure you've run `npm run build` in the FRRONTEEEND directory
|
| 203 |
+
- Check that `FRRONTEEEND/dist/` exists and contains files
|
| 204 |
+
|
| 205 |
+
### API errors in chat
|
| 206 |
+
- Ensure the backend is running on port 8080
|
| 207 |
+
- Check that `GROQ_API_KEY` is set in your environment
|
| 208 |
+
- Verify the API URL in `.env` file
|
| 209 |
+
|
| 210 |
+
### CORS errors
|
| 211 |
+
- The backend now has CORS enabled for development
|
| 212 |
+
- For production, update the `allow_origins` in `src/api/app.py`
|
| 213 |
+
|
| 214 |
+
## 📝 Notes
|
| 215 |
+
|
| 216 |
+
- The old `chat_ui.py` has been kept for reference but is no longer used
|
| 217 |
+
- All chat functionality now goes through the `/chat` endpoint
|
| 218 |
+
- The frontend is automatically served by FastAPI in production mode
|
| 219 |
+
- Session history is maintained in the frontend (browser)
|
| 220 |
+
|
| 221 |
+
## 🎯 Next Steps
|
| 222 |
+
|
| 223 |
+
1. **Customize the frontend**: Edit files in `FRRONTEEEND/components/`
|
| 224 |
+
2. **Add file upload**: Extend `ChatInterface.tsx` to handle file uploads
|
| 225 |
+
3. **Add visualization**: Display charts from the backend in the chat
|
| 226 |
+
4. **Authentication**: Add user authentication if needed
|
| 227 |
+
|
| 228 |
+
## 📞 Support
|
| 229 |
+
|
| 230 |
+
For issues or questions:
|
| 231 |
+
1. Check the console logs (browser & terminal)
|
| 232 |
+
2. Verify environment variables
|
| 233 |
+
3. Ensure all dependencies are installed
|
| 234 |
+
4. Review the API documentation at http://localhost:8080/docs
|
FRRONTEEEND/.env.production
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Production API Configuration
|
| 2 |
+
# Update this to your production API URL
|
| 3 |
+
VITE_API_URL=https://your-cloud-run-url.run.app
|
FRRONTEEEND/.gitignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Logs
|
| 2 |
+
logs
|
| 3 |
+
*.log
|
| 4 |
+
npm-debug.log*
|
| 5 |
+
yarn-debug.log*
|
| 6 |
+
yarn-error.log*
|
| 7 |
+
pnpm-debug.log*
|
| 8 |
+
lerna-debug.log*
|
| 9 |
+
|
| 10 |
+
node_modules
|
| 11 |
+
dist
|
| 12 |
+
dist-ssr
|
| 13 |
+
*.local
|
| 14 |
+
|
| 15 |
+
# Editor directories and files
|
| 16 |
+
.vscode/*
|
| 17 |
+
!.vscode/extensions.json
|
| 18 |
+
.idea
|
| 19 |
+
.DS_Store
|
| 20 |
+
*.suo
|
| 21 |
+
*.ntvs*
|
| 22 |
+
*.njsproj
|
| 23 |
+
*.sln
|
| 24 |
+
*.sw?
|
FRRONTEEEND/App.tsx
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React, { useState } from 'react';
|
| 3 |
+
import { HeroGeometric } from './components/HeroGeometric';
|
| 4 |
+
import ProblemSolution from './components/ProblemSolution';
|
| 5 |
+
import KeyCapabilities from './components/KeyCapabilities';
|
| 6 |
+
import Process from './components/Process';
|
| 7 |
+
import TechStack from './components/TechStack';
|
| 8 |
+
import Footer from './components/Footer';
|
| 9 |
+
import { BackgroundPaths } from './components/BackgroundPaths';
|
| 10 |
+
import { Logo } from './components/Logo';
|
| 11 |
+
import { ChatInterface } from './components/ChatInterface';
|
| 12 |
+
|
| 13 |
+
const App: React.FC = () => {
|
| 14 |
+
const [view, setView] = useState<'landing' | 'chat'>('landing');
|
| 15 |
+
|
| 16 |
+
if (view === 'chat') {
|
| 17 |
+
return <ChatInterface onBack={() => setView('landing')} />;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
return (
|
| 21 |
+
<div className="min-h-screen bg-[#030303] text-white selection:bg-indigo-500/30">
|
| 22 |
+
{/* Navigation (Overlay) */}
|
| 23 |
+
<nav className="fixed top-0 left-0 right-0 z-50 flex justify-between items-center px-6 py-4 backdrop-blur-md bg-[#030303]/20 border-b border-white/5">
|
| 24 |
+
<div className="flex items-center gap-3 cursor-pointer" onClick={() => setView('landing')}>
|
| 25 |
+
<Logo className="w-10 h-10" />
|
| 26 |
+
<span className="font-bold tracking-tight text-lg hidden sm:block uppercase text-white">
|
| 27 |
+
DATA SCIENCE AGENT
|
| 28 |
+
</span>
|
| 29 |
+
</div>
|
| 30 |
+
|
| 31 |
+
<button
|
| 32 |
+
onClick={() => setView('chat')}
|
| 33 |
+
className="px-5 py-2 bg-white/5 hover:bg-white/10 border border-white/10 rounded-lg text-sm font-medium transition-all"
|
| 34 |
+
>
|
| 35 |
+
Launch Console
|
| 36 |
+
</button>
|
| 37 |
+
</nav>
|
| 38 |
+
|
| 39 |
+
<main>
|
| 40 |
+
<HeroGeometric onChatClick={() => setView('chat')} />
|
| 41 |
+
<TechStack />
|
| 42 |
+
<ProblemSolution />
|
| 43 |
+
<KeyCapabilities />
|
| 44 |
+
|
| 45 |
+
{/* Transitional background paths section */}
|
| 46 |
+
<BackgroundPaths
|
| 47 |
+
title="Intelligence Without Limits"
|
| 48 |
+
subtitle="The agent continuously learns from your specific domain, optimizing its own tools and reasoning strategies to solve your hardest data challenges."
|
| 49 |
+
/>
|
| 50 |
+
|
| 51 |
+
<Process />
|
| 52 |
+
</main>
|
| 53 |
+
|
| 54 |
+
<Footer />
|
| 55 |
+
</div>
|
| 56 |
+
);
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
export default App;
|
FRRONTEEEND/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div align="center">
|
| 2 |
+
<img width="1200" height="475" alt="GHBanner" src="https://github.com/user-attachments/assets/0aa67016-6eaf-458a-adb2-6e31a0763ed6" />
|
| 3 |
+
</div>
|
| 4 |
+
|
| 5 |
+
# Run and deploy your AI Studio app
|
| 6 |
+
|
| 7 |
+
This contains everything you need to run your app locally.
|
| 8 |
+
|
| 9 |
+
View your app in AI Studio: https://ai.studio/apps/drive/1gChoktTuh429q26FzxS4BPo0q0LnlRE9
|
| 10 |
+
|
| 11 |
+
## Run Locally
|
| 12 |
+
|
| 13 |
+
**Prerequisites:** Node.js
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
1. Install dependencies:
|
| 17 |
+
`npm install`
|
| 18 |
+
2. Set the `GEMINI_API_KEY` in [.env.local](.env.local) to your Gemini API key
|
| 19 |
+
3. Run the app:
|
| 20 |
+
`npm run dev`
|
FRRONTEEEND/components/BackgroundPaths.tsx
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from "react";
|
| 3 |
+
import { motion } from "framer-motion";
|
| 4 |
+
import { ArrowRight } from "lucide-react";
|
| 5 |
+
import { cn } from "../lib/utils";
|
| 6 |
+
|
| 7 |
+
function FloatingPaths({ position }: { position: number }) {
|
| 8 |
+
const paths = Array.from({ length: 36 }, (_, i) => ({
|
| 9 |
+
id: i,
|
| 10 |
+
d: `M-${380 - i * 5 * position} -${189 + i * 6}C-${
|
| 11 |
+
380 - i * 5 * position
|
| 12 |
+
} -${189 + i * 6} -${312 - i * 5 * position} ${216 - i * 6} ${
|
| 13 |
+
152 - i * 5 * position
|
| 14 |
+
} ${343 - i * 6}C${616 - i * 5 * position} ${470 - i * 6} ${
|
| 15 |
+
684 - i * 5 * position
|
| 16 |
+
} ${875 - i * 6} ${684 - i * 5 * position} ${875 - i * 6}`,
|
| 17 |
+
color: `rgba(99,102,241,${0.05 + i * 0.01})`, // Using indigo-500 tint
|
| 18 |
+
width: 0.5 + i * 0.03,
|
| 19 |
+
}));
|
| 20 |
+
|
| 21 |
+
return (
|
| 22 |
+
<div className="absolute inset-0 pointer-events-none">
|
| 23 |
+
<svg
|
| 24 |
+
className="w-full h-full text-indigo-500/20"
|
| 25 |
+
viewBox="0 0 696 316"
|
| 26 |
+
fill="none"
|
| 27 |
+
>
|
| 28 |
+
<title>Background Paths</title>
|
| 29 |
+
{paths.map((path) => (
|
| 30 |
+
<motion.path
|
| 31 |
+
key={path.id}
|
| 32 |
+
d={path.d}
|
| 33 |
+
stroke="currentColor"
|
| 34 |
+
strokeWidth={path.width}
|
| 35 |
+
strokeOpacity={0.1 + path.id * 0.02}
|
| 36 |
+
initial={{ pathLength: 0.3, opacity: 0.4 }}
|
| 37 |
+
animate={{
|
| 38 |
+
pathLength: 1,
|
| 39 |
+
opacity: [0.2, 0.5, 0.2],
|
| 40 |
+
pathOffset: [0, 1, 0],
|
| 41 |
+
}}
|
| 42 |
+
transition={{
|
| 43 |
+
duration: 15 + Math.random() * 10,
|
| 44 |
+
repeat: Number.POSITIVE_INFINITY,
|
| 45 |
+
ease: "linear",
|
| 46 |
+
}}
|
| 47 |
+
/>
|
| 48 |
+
))}
|
| 49 |
+
</svg>
|
| 50 |
+
</div>
|
| 51 |
+
);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
export function BackgroundPaths({
|
| 55 |
+
title = "The Future is Autonomous",
|
| 56 |
+
subtitle = "Scale your data engineering and predictive modeling beyond human limits.",
|
| 57 |
+
}: {
|
| 58 |
+
title?: string;
|
| 59 |
+
subtitle?: string;
|
| 60 |
+
}) {
|
| 61 |
+
const words = title.split(" ");
|
| 62 |
+
|
| 63 |
+
return (
|
| 64 |
+
<section className="relative min-h-[80vh] w-full flex items-center justify-center overflow-hidden bg-[#030303]">
|
| 65 |
+
<div className="absolute inset-0">
|
| 66 |
+
<FloatingPaths position={1} />
|
| 67 |
+
<FloatingPaths position={-1} />
|
| 68 |
+
</div>
|
| 69 |
+
|
| 70 |
+
<div className="relative z-10 container mx-auto px-4 md:px-6 text-center">
|
| 71 |
+
<motion.div
|
| 72 |
+
initial={{ opacity: 0 }}
|
| 73 |
+
animate={{ opacity: 1 }}
|
| 74 |
+
transition={{ duration: 2 }}
|
| 75 |
+
className="max-w-4xl mx-auto"
|
| 76 |
+
>
|
| 77 |
+
<h2 className="text-5xl sm:text-6xl md:text-8xl font-extrabold mb-8 tracking-tighter">
|
| 78 |
+
{words.map((word, wordIndex) => (
|
| 79 |
+
<span
|
| 80 |
+
key={wordIndex}
|
| 81 |
+
className="inline-block mr-4 last:mr-0"
|
| 82 |
+
>
|
| 83 |
+
{word.split("").map((letter, letterIndex) => (
|
| 84 |
+
<motion.span
|
| 85 |
+
key={`${wordIndex}-${letterIndex}`}
|
| 86 |
+
initial={{ y: 50, opacity: 0 }}
|
| 87 |
+
whileInView={{ y: 0, opacity: 1 }}
|
| 88 |
+
viewport={{ once: true }}
|
| 89 |
+
transition={{
|
| 90 |
+
delay:
|
| 91 |
+
wordIndex * 0.1 +
|
| 92 |
+
letterIndex * 0.02,
|
| 93 |
+
type: "spring",
|
| 94 |
+
stiffness: 150,
|
| 95 |
+
damping: 25,
|
| 96 |
+
}}
|
| 97 |
+
className="inline-block text-transparent bg-clip-text
|
| 98 |
+
bg-gradient-to-r from-white via-white/90 to-white/70"
|
| 99 |
+
>
|
| 100 |
+
{letter}
|
| 101 |
+
</motion.span>
|
| 102 |
+
))}
|
| 103 |
+
</span>
|
| 104 |
+
))}
|
| 105 |
+
</h2>
|
| 106 |
+
|
| 107 |
+
<motion.p
|
| 108 |
+
initial={{ opacity: 0, y: 20 }}
|
| 109 |
+
whileInView={{ opacity: 1, y: 0 }}
|
| 110 |
+
viewport={{ once: true }}
|
| 111 |
+
transition={{ delay: 0.5 }}
|
| 112 |
+
className="text-white/40 text-xl font-medium mb-12 max-w-2xl mx-auto tracking-tight"
|
| 113 |
+
>
|
| 114 |
+
{subtitle}
|
| 115 |
+
</motion.p>
|
| 116 |
+
|
| 117 |
+
<motion.div
|
| 118 |
+
initial={{ opacity: 0, scale: 0.9 }}
|
| 119 |
+
whileInView={{ opacity: 1, scale: 1 }}
|
| 120 |
+
viewport={{ once: true }}
|
| 121 |
+
transition={{ delay: 0.8 }}
|
| 122 |
+
className="inline-block group relative bg-gradient-to-b from-white/10 to-indigo-500/10
|
| 123 |
+
p-px rounded-2xl backdrop-blur-lg
|
| 124 |
+
overflow-hidden shadow-lg hover:shadow-indigo-500/20 transition-all duration-300"
|
| 125 |
+
>
|
| 126 |
+
<button
|
| 127 |
+
className="rounded-[1.15rem] px-10 py-5 text-lg font-bold backdrop-blur-md
|
| 128 |
+
bg-white/95 hover:bg-white text-black transition-all duration-300
|
| 129 |
+
group-hover:-translate-y-0.5 border border-white/10
|
| 130 |
+
flex items-center gap-3"
|
| 131 |
+
>
|
| 132 |
+
<span className="opacity-90 group-hover:opacity-100 transition-opacity">
|
| 133 |
+
Deploy Your First Agent
|
| 134 |
+
</span>
|
| 135 |
+
<ArrowRight
|
| 136 |
+
className="w-5 h-5 opacity-70 group-hover:opacity-100 group-hover:translate-x-1.5
|
| 137 |
+
transition-all duration-300"
|
| 138 |
+
/>
|
| 139 |
+
</button>
|
| 140 |
+
</motion.div>
|
| 141 |
+
</motion.div>
|
| 142 |
+
</div>
|
| 143 |
+
|
| 144 |
+
{/* Subtle glow effect at the bottom */}
|
| 145 |
+
<div className="absolute bottom-0 left-1/2 -translate-x-1/2 w-full h-px bg-gradient-to-r from-transparent via-indigo-500/50 to-transparent shadow-[0_0_50px_2px_rgba(99,102,241,0.2)]" />
|
| 146 |
+
</section>
|
| 147 |
+
);
|
| 148 |
+
}
|
FRRONTEEEND/components/ChatInterface.tsx
ADDED
|
@@ -0,0 +1,571 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React, { useState, useRef, useEffect } from 'react';
|
| 3 |
+
import { motion, AnimatePresence } from 'framer-motion';
|
| 4 |
+
import { Send, Plus, Search, Settings, MoreHorizontal, User, Bot, ArrowLeft, Paperclip, Sparkles, Trash2, X, Upload } from 'lucide-react';
|
| 5 |
+
import { cn } from '../lib/utils';
|
| 6 |
+
import { Logo } from './Logo';
|
| 7 |
+
import ReactMarkdown from 'react-markdown';
|
| 8 |
+
|
| 9 |
+
interface Message {
|
| 10 |
+
id: string;
|
| 11 |
+
role: 'user' | 'assistant';
|
| 12 |
+
content: string;
|
| 13 |
+
timestamp: Date;
|
| 14 |
+
file?: {
|
| 15 |
+
name: string;
|
| 16 |
+
size: number;
|
| 17 |
+
};
|
| 18 |
+
reports?: Array<{
|
| 19 |
+
name: string;
|
| 20 |
+
path: string;
|
| 21 |
+
}>;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
interface ChatSession {
|
| 25 |
+
id: string;
|
| 26 |
+
title: string;
|
| 27 |
+
messages: Message[];
|
| 28 |
+
updatedAt: Date;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
export const ChatInterface: React.FC<{ onBack: () => void }> = ({ onBack }) => {
|
| 32 |
+
const [sessions, setSessions] = useState<ChatSession[]>([
|
| 33 |
+
{
|
| 34 |
+
id: '1',
|
| 35 |
+
title: 'ML Model Analysis',
|
| 36 |
+
messages: [],
|
| 37 |
+
updatedAt: new Date(),
|
| 38 |
+
}
|
| 39 |
+
]);
|
| 40 |
+
const [activeSessionId, setActiveSessionId] = useState('1');
|
| 41 |
+
const [input, setInput] = useState('');
|
| 42 |
+
const [isTyping, setIsTyping] = useState(false);
|
| 43 |
+
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
|
| 44 |
+
const [reportModalUrl, setReportModalUrl] = useState<string | null>(null);
|
| 45 |
+
const fileInputRef = useRef<HTMLInputElement>(null);
|
| 46 |
+
const scrollRef = useRef<HTMLDivElement>(null);
|
| 47 |
+
|
| 48 |
+
const activeSession = sessions.find(s => s.id === activeSessionId) || sessions[0];
|
| 49 |
+
|
| 50 |
+
useEffect(() => {
|
| 51 |
+
if (scrollRef.current) {
|
| 52 |
+
scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
|
| 53 |
+
}
|
| 54 |
+
}, [activeSession.messages, isTyping]);
|
| 55 |
+
|
| 56 |
+
const handleSend = async () => {
|
| 57 |
+
if ((!input.trim() && !uploadedFile) || isTyping) return;
|
| 58 |
+
|
| 59 |
+
const userMessage: Message = {
|
| 60 |
+
id: Date.now().toString(),
|
| 61 |
+
role: 'user',
|
| 62 |
+
content: input || (uploadedFile ? `Uploaded: ${uploadedFile.name}` : ''),
|
| 63 |
+
timestamp: new Date(),
|
| 64 |
+
file: uploadedFile ? { name: uploadedFile.name, size: uploadedFile.size } : undefined,
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
const newMessages = [...activeSession.messages, userMessage];
|
| 68 |
+
updateSession(activeSessionId, newMessages);
|
| 69 |
+
setInput('');
|
| 70 |
+
setIsTyping(true);
|
| 71 |
+
|
| 72 |
+
try {
|
| 73 |
+
// Use the current origin if running on same server, otherwise use env variable
|
| 74 |
+
const API_URL = window.location.origin;
|
| 75 |
+
console.log('API URL:', API_URL);
|
| 76 |
+
|
| 77 |
+
let response;
|
| 78 |
+
|
| 79 |
+
if (uploadedFile) {
|
| 80 |
+
const formData = new FormData();
|
| 81 |
+
formData.append('file', uploadedFile);
|
| 82 |
+
formData.append('task_description', input || 'Analyze this dataset and provide insights');
|
| 83 |
+
formData.append('use_cache', 'true');
|
| 84 |
+
formData.append('max_iterations', '20');
|
| 85 |
+
|
| 86 |
+
response = await fetch(`${API_URL}/run`, {
|
| 87 |
+
method: 'POST',
|
| 88 |
+
body: formData
|
| 89 |
+
});
|
| 90 |
+
|
| 91 |
+
setUploadedFile(null);
|
| 92 |
+
} else {
|
| 93 |
+
response = await fetch(`${API_URL}/chat`, {
|
| 94 |
+
method: 'POST',
|
| 95 |
+
headers: {
|
| 96 |
+
'Content-Type': 'application/json',
|
| 97 |
+
},
|
| 98 |
+
body: JSON.stringify({
|
| 99 |
+
messages: newMessages.map(m => ({
|
| 100 |
+
role: m.role,
|
| 101 |
+
content: m.content
|
| 102 |
+
})),
|
| 103 |
+
stream: false
|
| 104 |
+
})
|
| 105 |
+
});
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
if (!response.ok) {
|
| 109 |
+
throw new Error(`API error: ${response.status}`);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
const data = await response.json();
|
| 113 |
+
|
| 114 |
+
let assistantContent = '';
|
| 115 |
+
let reports: Array<{name: string, path: string}> = [];
|
| 116 |
+
|
| 117 |
+
if (uploadedFile && data.result) {
|
| 118 |
+
const result = data.result;
|
| 119 |
+
assistantContent = `✅ Analysis Complete!\n\n`;
|
| 120 |
+
|
| 121 |
+
// Extract report paths from workflow history
|
| 122 |
+
if (result.workflow_history) {
|
| 123 |
+
const reportTools = ['generate_ydata_profiling_report', 'generate_sweetviz_report', 'generate_combined_eda_report'];
|
| 124 |
+
result.workflow_history.forEach((step: any) => {
|
| 125 |
+
if (reportTools.includes(step.tool)) {
|
| 126 |
+
// Check multiple possible locations for the report path
|
| 127 |
+
const reportPath = step.result?.output_path || step.result?.report_path || step.arguments?.output_path;
|
| 128 |
+
|
| 129 |
+
if (reportPath && (step.result?.success !== false)) {
|
| 130 |
+
reports.push({
|
| 131 |
+
name: step.tool.replace('generate_', '').replace(/_/g, ' ').replace('report', '').trim(),
|
| 132 |
+
path: reportPath
|
| 133 |
+
});
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
});
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
// Also check for report paths mentioned in the summary text
|
| 140 |
+
if (result.summary && !reports.length) {
|
| 141 |
+
const reportPathMatch = result.summary.match(/\.(\/outputs\/reports\/[^\s]+\.html)/);
|
| 142 |
+
if (reportPathMatch) {
|
| 143 |
+
reports.push({
|
| 144 |
+
name: 'ydata profiling',
|
| 145 |
+
path: reportPathMatch[1]
|
| 146 |
+
});
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
if (result.summary) {
|
| 151 |
+
assistantContent += `**Summary:**\n${result.summary}\n\n`;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
if (result.workflow_history && result.workflow_history.length > 0) {
|
| 155 |
+
assistantContent += `**Tools Used:** ${result.workflow_history.length} steps\n\n`;
|
| 156 |
+
assistantContent += `**Final Result:**\n${result.final_result || 'Analysis completed successfully'}`;
|
| 157 |
+
}
|
| 158 |
+
} else if (data.success && data.message) {
|
| 159 |
+
assistantContent = data.message;
|
| 160 |
+
} else {
|
| 161 |
+
throw new Error('Invalid response from API');
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
updateSession(activeSessionId, [...newMessages, {
|
| 165 |
+
id: (Date.now() + 1).toString(),
|
| 166 |
+
role: 'assistant',
|
| 167 |
+
content: assistantContent,
|
| 168 |
+
timestamp: new Date(),
|
| 169 |
+
reports: reports.length > 0 ? reports : undefined
|
| 170 |
+
}]);
|
| 171 |
+
} catch (error: any) {
|
| 172 |
+
console.error("Chat Error:", error);
|
| 173 |
+
|
| 174 |
+
let errorMessage = "I'm sorry, I encountered an error processing your request.";
|
| 175 |
+
|
| 176 |
+
if (error.message) {
|
| 177 |
+
errorMessage += `\n\n**Error:** ${error.message}`;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
// Try to parse response error
|
| 181 |
+
try {
|
| 182 |
+
const errorText = await error.text?.();
|
| 183 |
+
if (errorText) {
|
| 184 |
+
const errorData = JSON.parse(errorText);
|
| 185 |
+
if (errorData.detail) {
|
| 186 |
+
errorMessage = `**Error:** ${typeof errorData.detail === 'string' ? errorData.detail : JSON.stringify(errorData.detail)}`;
|
| 187 |
+
}
|
| 188 |
+
}
|
| 189 |
+
} catch (e) {
|
| 190 |
+
// Ignore parsing errors
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
updateSession(activeSessionId, [...newMessages, {
|
| 194 |
+
id: 'err-' + Date.now(),
|
| 195 |
+
role: 'assistant',
|
| 196 |
+
content: errorMessage,
|
| 197 |
+
timestamp: new Date()
|
| 198 |
+
}]);
|
| 199 |
+
} finally {
|
| 200 |
+
setIsTyping(false);
|
| 201 |
+
}
|
| 202 |
+
};
|
| 203 |
+
|
| 204 |
+
const updateSession = (id: string, messages: Message[]) => {
|
| 205 |
+
setSessions(prev => prev.map(s => {
|
| 206 |
+
if (s.id === id) {
|
| 207 |
+
return { ...s, messages, updatedAt: new Date() };
|
| 208 |
+
}
|
| 209 |
+
return s;
|
| 210 |
+
}));
|
| 211 |
+
};
|
| 212 |
+
|
| 213 |
+
const createNewChat = () => {
|
| 214 |
+
const newId = Date.now().toString();
|
| 215 |
+
const newSession: ChatSession = {
|
| 216 |
+
id: newId,
|
| 217 |
+
title: 'New Chat',
|
| 218 |
+
messages: [],
|
| 219 |
+
updatedAt: new Date()
|
| 220 |
+
};
|
| 221 |
+
setSessions([newSession, ...sessions]);
|
| 222 |
+
setActiveSessionId(newId);
|
| 223 |
+
};
|
| 224 |
+
|
| 225 |
+
const deleteSession = (e: React.MouseEvent, id: string) => {
|
| 226 |
+
e.stopPropagation();
|
| 227 |
+
if (sessions.length === 1) return;
|
| 228 |
+
setSessions(prev => prev.filter(s => s.id !== id));
|
| 229 |
+
if (activeSessionId === id) {
|
| 230 |
+
setActiveSessionId(sessions.find(s => s.id !== id)?.id || '');
|
| 231 |
+
}
|
| 232 |
+
};
|
| 233 |
+
|
| 234 |
+
const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
|
| 235 |
+
const file = e.target.files?.[0];
|
| 236 |
+
if (file) {
|
| 237 |
+
const validTypes = ['.csv', '.parquet'];
|
| 238 |
+
const fileExt = file.name.substring(file.name.lastIndexOf('.')).toLowerCase();
|
| 239 |
+
|
| 240 |
+
if (validTypes.includes(fileExt)) {
|
| 241 |
+
setUploadedFile(file);
|
| 242 |
+
} else {
|
| 243 |
+
alert('Please upload a CSV or Parquet file');
|
| 244 |
+
}
|
| 245 |
+
}
|
| 246 |
+
};
|
| 247 |
+
|
| 248 |
+
const removeFile = () => {
|
| 249 |
+
setUploadedFile(null);
|
| 250 |
+
if (fileInputRef.current) {
|
| 251 |
+
fileInputRef.current.value = '';
|
| 252 |
+
}
|
| 253 |
+
};
|
| 254 |
+
|
| 255 |
+
return (
|
| 256 |
+
<div className="flex h-screen w-full bg-[#050505] overflow-hidden text-white/90">
|
| 257 |
+
{/* Sidebar */}
|
| 258 |
+
<aside className="w-[280px] hidden md:flex flex-col border-r border-white/5 bg-[#0a0a0a]/50 backdrop-blur-xl">
|
| 259 |
+
<div className="p-4 flex flex-col h-full">
|
| 260 |
+
<div className="flex items-center gap-3 mb-8 px-2">
|
| 261 |
+
<Logo className="w-8 h-8" />
|
| 262 |
+
<span className="font-bold tracking-tight text-sm uppercase">Console</span>
|
| 263 |
+
</div>
|
| 264 |
+
|
| 265 |
+
<button
|
| 266 |
+
onClick={createNewChat}
|
| 267 |
+
className="w-full flex items-center gap-3 px-4 py-3 rounded-xl bg-white/5 hover:bg-white/10 border border-white/10 transition-all text-sm font-medium mb-6 group"
|
| 268 |
+
>
|
| 269 |
+
<Plus className="w-4 h-4 group-hover:scale-110 transition-transform" />
|
| 270 |
+
New Conversation
|
| 271 |
+
</button>
|
| 272 |
+
|
| 273 |
+
<div className="flex-1 overflow-y-auto space-y-2 custom-scrollbar">
|
| 274 |
+
<p className="px-3 text-[10px] uppercase tracking-widest text-white/30 font-bold mb-2">History</p>
|
| 275 |
+
{sessions.map(session => (
|
| 276 |
+
<div
|
| 277 |
+
key={session.id}
|
| 278 |
+
onClick={() => setActiveSessionId(session.id)}
|
| 279 |
+
className={cn(
|
| 280 |
+
"group flex items-center justify-between px-4 py-3 rounded-xl cursor-pointer transition-all text-sm",
|
| 281 |
+
activeSessionId === session.id
|
| 282 |
+
? "bg-white/10 text-white border border-white/10 shadow-lg"
|
| 283 |
+
: "text-white/40 hover:text-white/70 hover:bg-white/5"
|
| 284 |
+
)}
|
| 285 |
+
>
|
| 286 |
+
<span className="truncate flex-1 pr-2">{session.title}</span>
|
| 287 |
+
<Trash2
|
| 288 |
+
onClick={(e) => deleteSession(e, session.id)}
|
| 289 |
+
className="w-4 h-4 opacity-0 group-hover:opacity-100 hover:text-rose-400 transition-all"
|
| 290 |
+
/>
|
| 291 |
+
</div>
|
| 292 |
+
))}
|
| 293 |
+
</div>
|
| 294 |
+
|
| 295 |
+
<div className="mt-auto pt-4 border-t border-white/5 flex items-center justify-between px-2">
|
| 296 |
+
<button onClick={onBack} className="p-2 hover:bg-white/5 rounded-lg transition-colors text-white/40 hover:text-white">
|
| 297 |
+
<ArrowLeft className="w-5 h-5" />
|
| 298 |
+
</button>
|
| 299 |
+
<div className="flex gap-2">
|
| 300 |
+
<button className="p-2 hover:bg-white/5 rounded-lg transition-colors text-white/40 hover:text-white">
|
| 301 |
+
<Settings className="w-5 h-5" />
|
| 302 |
+
</button>
|
| 303 |
+
<button className="p-2 hover:bg-white/5 rounded-lg transition-colors text-white/40 hover:text-white">
|
| 304 |
+
<User className="w-5 h-5" />
|
| 305 |
+
</button>
|
| 306 |
+
</div>
|
| 307 |
+
</div>
|
| 308 |
+
</div>
|
| 309 |
+
</aside>
|
| 310 |
+
|
| 311 |
+
{/* Main Chat Area */}
|
| 312 |
+
<main className="flex-1 flex flex-col relative bg-gradient-to-b from-[#080808] to-[#050505]">
|
| 313 |
+
{/* Top Header */}
|
| 314 |
+
<header className="h-16 flex items-center justify-between px-6 border-b border-white/5 backdrop-blur-md bg-black/20 sticky top-0 z-10">
|
| 315 |
+
<div className="flex items-center gap-4">
|
| 316 |
+
<button onClick={onBack} className="md:hidden p-2 hover:bg-white/5 rounded-lg">
|
| 317 |
+
<ArrowLeft className="w-5 h-5" />
|
| 318 |
+
</button>
|
| 319 |
+
<div>
|
| 320 |
+
<h2 className="text-sm font-bold text-white tracking-tight">{activeSession.title}</h2>
|
| 321 |
+
<p className="text-[10px] text-white/30 font-medium">{activeSession.messages.length} messages in session</p>
|
| 322 |
+
</div>
|
| 323 |
+
</div>
|
| 324 |
+
<div className="flex items-center gap-3">
|
| 325 |
+
<button className="p-2 text-white/40 hover:text-white transition-colors">
|
| 326 |
+
<Search className="w-5 h-5" />
|
| 327 |
+
</button>
|
| 328 |
+
<button className="p-2 text-white/40 hover:text-white transition-colors">
|
| 329 |
+
<MoreHorizontal className="w-5 h-5" />
|
| 330 |
+
</button>
|
| 331 |
+
</div>
|
| 332 |
+
</header>
|
| 333 |
+
|
| 334 |
+
{/* Message List */}
|
| 335 |
+
<div
|
| 336 |
+
ref={scrollRef}
|
| 337 |
+
className="flex-1 overflow-y-auto p-4 md:p-8 space-y-8 scroll-smooth"
|
| 338 |
+
>
|
| 339 |
+
{activeSession.messages.length === 0 ? (
|
| 340 |
+
<div className="h-full flex flex-col items-center justify-center text-center px-4">
|
| 341 |
+
<motion.div
|
| 342 |
+
initial={{ opacity: 0, scale: 0.9 }}
|
| 343 |
+
animate={{ opacity: 1, scale: 1 }}
|
| 344 |
+
className="w-16 h-16 bg-gradient-to-br from-indigo-500/20 to-rose-500/20 rounded-2xl flex items-center justify-center mb-6 border border-white/10"
|
| 345 |
+
>
|
| 346 |
+
<Sparkles className="w-8 h-8 text-indigo-400" />
|
| 347 |
+
</motion.div>
|
| 348 |
+
<h1 className="text-2xl font-extrabold text-white mb-3">Welcome, Data Scientist</h1>
|
| 349 |
+
<p className="text-white/40 max-w-sm leading-relaxed text-sm">
|
| 350 |
+
I'm your autonomous agent ready to profile data, train models, or build dashboards.
|
| 351 |
+
Try uploading a dataset or describing your ML objective.
|
| 352 |
+
</p>
|
| 353 |
+
<div className="grid grid-cols-1 sm:grid-cols-2 gap-3 mt-8 w-full max-w-lg">
|
| 354 |
+
{[
|
| 355 |
+
"Profile my sales.csv",
|
| 356 |
+
"Train a XGBoost classifier",
|
| 357 |
+
"Generate a correlation heatmap",
|
| 358 |
+
"Explain feature importance"
|
| 359 |
+
].map(prompt => (
|
| 360 |
+
<button
|
| 361 |
+
key={prompt}
|
| 362 |
+
onClick={() => setInput(prompt)}
|
| 363 |
+
className="text-left px-4 py-3 rounded-xl bg-white/[0.03] border border-white/5 hover:bg-white/5 transition-all text-xs text-white/60 hover:text-white"
|
| 364 |
+
>
|
| 365 |
+
"{prompt}"
|
| 366 |
+
</button>
|
| 367 |
+
))}
|
| 368 |
+
</div>
|
| 369 |
+
</div>
|
| 370 |
+
) : (
|
| 371 |
+
activeSession.messages.map((msg) => (
|
| 372 |
+
<motion.div
|
| 373 |
+
key={msg.id}
|
| 374 |
+
initial={{ opacity: 0, y: 10 }}
|
| 375 |
+
animate={{ opacity: 1, y: 0 }}
|
| 376 |
+
className={cn(
|
| 377 |
+
"flex w-full gap-4",
|
| 378 |
+
msg.role === 'user' ? "flex-row-reverse" : "flex-row"
|
| 379 |
+
)}
|
| 380 |
+
>
|
| 381 |
+
<div className={cn(
|
| 382 |
+
"w-8 h-8 rounded-lg flex items-center justify-center shrink-0 border border-white/10",
|
| 383 |
+
msg.role === 'user' ? "bg-indigo-500/20" : "bg-white/5"
|
| 384 |
+
)}>
|
| 385 |
+
{msg.role === 'user' ? <User className="w-4 h-4" /> : <Bot className="w-4 h-4 text-indigo-400" />}
|
| 386 |
+
</div>
|
| 387 |
+
<div className={cn(
|
| 388 |
+
"max-w-[80%] md:max-w-[70%] p-4 rounded-2xl text-sm leading-relaxed",
|
| 389 |
+
msg.role === 'user'
|
| 390 |
+
? "bg-indigo-600/20 text-indigo-50 border border-indigo-500/20"
|
| 391 |
+
: "bg-white/[0.03] text-white/80 border border-white/5"
|
| 392 |
+
)}>
|
| 393 |
+
{msg.file && (
|
| 394 |
+
<div className="mb-2 flex items-center gap-2 text-xs bg-white/5 rounded-lg px-3 py-2 border border-white/10">
|
| 395 |
+
<Paperclip className="w-3 h-3" />
|
| 396 |
+
<span className="font-medium">{msg.file.name}</span>
|
| 397 |
+
<span className="text-white/40">({(msg.file.size / 1024).toFixed(1)} KB)</span>
|
| 398 |
+
</div>
|
| 399 |
+
)}
|
| 400 |
+
{msg.role === 'assistant' ? (
|
| 401 |
+
<ReactMarkdown
|
| 402 |
+
className="prose prose-invert prose-sm max-w-none prose-p:leading-relaxed prose-pre:bg-black/40 prose-pre:border prose-pre:border-white/10 prose-headings:text-white prose-strong:text-white prose-li:text-white/80"
|
| 403 |
+
components={{
|
| 404 |
+
p: ({node, ...props}) => <p className="mb-3 last:mb-0" {...props} />,
|
| 405 |
+
ul: ({node, ...props}) => <ul className="mb-3 space-y-1" {...props} />,
|
| 406 |
+
ol: ({node, ...props}) => <ol className="mb-3 space-y-1" {...props} />,
|
| 407 |
+
li: ({node, ...props}) => <li className="ml-4" {...props} />,
|
| 408 |
+
strong: ({node, ...props}) => <strong className="font-semibold text-white" {...props} />,
|
| 409 |
+
code: ({node, inline, ...props}: any) =>
|
| 410 |
+
inline ?
|
| 411 |
+
<code className="px-1.5 py-0.5 rounded bg-white/10 text-indigo-300 text-xs font-mono" {...props} /> :
|
| 412 |
+
<code className="block p-3 rounded-lg bg-black/40 border border-white/10 text-xs font-mono overflow-x-auto" {...props} />
|
| 413 |
+
}}
|
| 414 |
+
>
|
| 415 |
+
{msg.content || ''}
|
| 416 |
+
</ReactMarkdown>
|
| 417 |
+
) : (
|
| 418 |
+
msg.content || (msg.role === 'assistant' && isTyping && "...")
|
| 419 |
+
)}
|
| 420 |
+
{msg.reports && msg.reports.length > 0 && (
|
| 421 |
+
<div className="mt-4 flex flex-wrap gap-2">
|
| 422 |
+
{msg.reports.map((report, idx) => (
|
| 423 |
+
<button
|
| 424 |
+
key={idx}
|
| 425 |
+
onClick={() => setReportModalUrl(`${window.location.origin}${report.path}`)}
|
| 426 |
+
className="flex items-center gap-2 px-4 py-2 rounded-lg bg-indigo-500/20 hover:bg-indigo-500/30 border border-indigo-500/30 text-indigo-200 text-xs font-medium transition-all group"
|
| 427 |
+
>
|
| 428 |
+
<Sparkles className="w-3.5 h-3.5 group-hover:scale-110 transition-transform" />
|
| 429 |
+
View {report.name} Report
|
| 430 |
+
</button>
|
| 431 |
+
))}
|
| 432 |
+
</div>
|
| 433 |
+
)}
|
| 434 |
+
<div className="mt-2 text-[10px] opacity-20 font-mono">
|
| 435 |
+
{msg.timestamp.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' })}
|
| 436 |
+
</div>
|
| 437 |
+
</div>
|
| 438 |
+
</motion.div>
|
| 439 |
+
))
|
| 440 |
+
)}
|
| 441 |
+
{isTyping && activeSession.messages[activeSession.messages.length - 1]?.role === 'user' && (
|
| 442 |
+
<div className="flex gap-4">
|
| 443 |
+
<div className="w-8 h-8 rounded-lg flex items-center justify-center shrink-0 bg-white/5 border border-white/10">
|
| 444 |
+
<Bot className="w-4 h-4 text-indigo-400" />
|
| 445 |
+
</div>
|
| 446 |
+
<div className="bg-white/[0.03] p-4 rounded-2xl border border-white/5">
|
| 447 |
+
<div className="flex gap-1">
|
| 448 |
+
<span className="w-1.5 h-1.5 bg-white/20 rounded-full animate-bounce [animation-delay:-0.3s]"></span>
|
| 449 |
+
<span className="w-1.5 h-1.5 bg-white/20 rounded-full animate-bounce [animation-delay:-0.15s]"></span>
|
| 450 |
+
<span className="w-1.5 h-1.5 bg-white/20 rounded-full animate-bounce"></span>
|
| 451 |
+
</div>
|
| 452 |
+
</div>
|
| 453 |
+
</div>
|
| 454 |
+
)}
|
| 455 |
+
</div>
|
| 456 |
+
|
| 457 |
+
{/* Input Bar */}
|
| 458 |
+
<div className="p-4 md:p-8 pt-0">
|
| 459 |
+
<div className="max-w-4xl mx-auto relative">
|
| 460 |
+
<div className="absolute -top-10 left-4 flex gap-2">
|
| 461 |
+
<input
|
| 462 |
+
ref={fileInputRef}
|
| 463 |
+
type="file"
|
| 464 |
+
accept=".csv,.parquet"
|
| 465 |
+
onChange={handleFileSelect}
|
| 466 |
+
className="hidden"
|
| 467 |
+
id="file-upload"
|
| 468 |
+
/>
|
| 469 |
+
<label
|
| 470 |
+
htmlFor="file-upload"
|
| 471 |
+
className="flex items-center gap-1.5 px-3 py-1 rounded-full bg-white/[0.03] border border-white/5 text-[10px] text-white/40 hover:text-white hover:bg-white/5 transition-all cursor-pointer"
|
| 472 |
+
>
|
| 473 |
+
<Upload className="w-3 h-3" /> Upload Dataset
|
| 474 |
+
</label>
|
| 475 |
+
{uploadedFile && (
|
| 476 |
+
<div className="flex items-center gap-2 px-3 py-1 rounded-full bg-indigo-500/20 border border-indigo-500/30 text-[10px] text-indigo-200">
|
| 477 |
+
<Paperclip className="w-3 h-3" />
|
| 478 |
+
<span className="max-w-[150px] truncate">{uploadedFile.name}</span>
|
| 479 |
+
<button onClick={removeFile} className="hover:text-white transition-colors">
|
| 480 |
+
<X className="w-3 h-3" />
|
| 481 |
+
</button>
|
| 482 |
+
</div>
|
| 483 |
+
)}
|
| 484 |
+
</div>
|
| 485 |
+
<div className="relative group">
|
| 486 |
+
<textarea
|
| 487 |
+
value={input}
|
| 488 |
+
onChange={(e) => setInput(e.target.value)}
|
| 489 |
+
onKeyDown={(e) => {
|
| 490 |
+
if (e.key === 'Enter' && !e.shiftKey) {
|
| 491 |
+
e.preventDefault();
|
| 492 |
+
handleSend();
|
| 493 |
+
}
|
| 494 |
+
}}
|
| 495 |
+
placeholder={uploadedFile ? "Describe what you want to do with this dataset..." : "Ask your agent anything or upload a dataset..."}
|
| 496 |
+
className="w-full bg-[#0d0d0d] border border-white/10 rounded-2xl p-4 pr-16 text-sm min-h-[56px] max-h-48 resize-none focus:outline-none focus:border-indigo-500/50 focus:ring-1 focus:ring-indigo-500/20 transition-all text-white/90 placeholder:text-white/20 shadow-2xl"
|
| 497 |
+
/>
|
| 498 |
+
<button
|
| 499 |
+
onClick={handleSend}
|
| 500 |
+
disabled={(!input.trim() && !uploadedFile) || isTyping}
|
| 501 |
+
className={cn(
|
| 502 |
+
"absolute right-3 bottom-3 p-2.5 rounded-xl transition-all",
|
| 503 |
+
(input.trim() || uploadedFile) && !isTyping
|
| 504 |
+
? "bg-white text-black hover:scale-105 active:scale-95"
|
| 505 |
+
: "bg-white/5 text-white/20 cursor-not-allowed"
|
| 506 |
+
)}
|
| 507 |
+
>
|
| 508 |
+
<Send className="w-4 h-4" />
|
| 509 |
+
</button>
|
| 510 |
+
</div>
|
| 511 |
+
<p className="text-center mt-3 text-[10px] text-white/20 font-medium">
|
| 512 |
+
Enterprise Data Agent v3.1 | Secured with end-to-end encryption
|
| 513 |
+
</p>
|
| 514 |
+
</div>
|
| 515 |
+
</div>
|
| 516 |
+
</main>
|
| 517 |
+
|
| 518 |
+
{/* Report Modal */}
|
| 519 |
+
<AnimatePresence>
|
| 520 |
+
{reportModalUrl && (
|
| 521 |
+
<motion.div
|
| 522 |
+
initial={{ opacity: 0 }}
|
| 523 |
+
animate={{ opacity: 1 }}
|
| 524 |
+
exit={{ opacity: 0 }}
|
| 525 |
+
className="fixed inset-0 bg-black/80 backdrop-blur-sm z-50 flex items-center justify-center p-4"
|
| 526 |
+
onClick={() => setReportModalUrl(null)}
|
| 527 |
+
>
|
| 528 |
+
<motion.div
|
| 529 |
+
initial={{ scale: 0.95, opacity: 0 }}
|
| 530 |
+
animate={{ scale: 1, opacity: 1 }}
|
| 531 |
+
exit={{ scale: 0.95, opacity: 0 }}
|
| 532 |
+
className="bg-[#0a0a0a] border border-white/10 rounded-2xl w-full max-w-7xl h-[90vh] flex flex-col overflow-hidden shadow-2xl"
|
| 533 |
+
onClick={(e) => e.stopPropagation()}
|
| 534 |
+
>
|
| 535 |
+
<div className="flex items-center justify-between p-4 border-b border-white/5">
|
| 536 |
+
<h3 className="text-lg font-semibold text-white">Data Profiling Report</h3>
|
| 537 |
+
<button
|
| 538 |
+
onClick={() => setReportModalUrl(null)}
|
| 539 |
+
className="p-2 rounded-lg hover:bg-white/5 transition-colors"
|
| 540 |
+
>
|
| 541 |
+
<X className="w-5 h-5" />
|
| 542 |
+
</button>
|
| 543 |
+
</div>
|
| 544 |
+
<iframe
|
| 545 |
+
src={reportModalUrl}
|
| 546 |
+
className="flex-1 w-full bg-white"
|
| 547 |
+
title="Report Viewer"
|
| 548 |
+
/>
|
| 549 |
+
</motion.div>
|
| 550 |
+
</motion.div>
|
| 551 |
+
)}
|
| 552 |
+
</AnimatePresence>
|
| 553 |
+
|
| 554 |
+
<style>{`
|
| 555 |
+
.custom-scrollbar::-webkit-scrollbar {
|
| 556 |
+
width: 4px;
|
| 557 |
+
}
|
| 558 |
+
.custom-scrollbar::-webkit-scrollbar-track {
|
| 559 |
+
background: transparent;
|
| 560 |
+
}
|
| 561 |
+
.custom-scrollbar::-webkit-scrollbar-thumb {
|
| 562 |
+
background: rgba(255, 255, 255, 0.05);
|
| 563 |
+
border-radius: 10px;
|
| 564 |
+
}
|
| 565 |
+
.custom-scrollbar::-webkit-scrollbar-thumb:hover {
|
| 566 |
+
background: rgba(255, 255, 255, 0.1);
|
| 567 |
+
}
|
| 568 |
+
`}</style>
|
| 569 |
+
</div>
|
| 570 |
+
);
|
| 571 |
+
};
|
FRRONTEEEND/components/Footer.tsx
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React, { useRef, useId, useEffect } from 'react';
|
| 3 |
+
import { motion, animate, useMotionValue, AnimationPlaybackControls } from 'framer-motion';
|
| 4 |
+
import { ArrowRight } from 'lucide-react';
|
| 5 |
+
import { Logo } from './Logo';
|
| 6 |
+
|
| 7 |
+
function mapRange(
|
| 8 |
+
value: number,
|
| 9 |
+
fromLow: number,
|
| 10 |
+
fromHigh: number,
|
| 11 |
+
toLow: number,
|
| 12 |
+
toHigh: number
|
| 13 |
+
): number {
|
| 14 |
+
if (fromLow === fromHigh) {
|
| 15 |
+
return toLow;
|
| 16 |
+
}
|
| 17 |
+
const percentage = (value - fromLow) / (fromHigh - fromLow);
|
| 18 |
+
return toLow + percentage * (toHigh - toLow);
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
const Footer = () => {
|
| 22 |
+
const id = useId().replace(/:/g, "");
|
| 23 |
+
const instanceId = `footer-shadow-${id}`;
|
| 24 |
+
const feColorMatrixRef = useRef<SVGFEColorMatrixElement>(null);
|
| 25 |
+
const hueRotateMotionValue = useMotionValue(0);
|
| 26 |
+
const hueRotateAnimation = useRef<AnimationPlaybackControls | null>(null);
|
| 27 |
+
|
| 28 |
+
// Configuration from ShadowSection
|
| 29 |
+
const animationScale = 50;
|
| 30 |
+
const animationSpeed = 15;
|
| 31 |
+
const displacementScale = mapRange(animationScale, 1, 100, 20, 100);
|
| 32 |
+
const animationDuration = mapRange(animationSpeed, 1, 100, 1000, 50);
|
| 33 |
+
|
| 34 |
+
useEffect(() => {
|
| 35 |
+
if (feColorMatrixRef.current) {
|
| 36 |
+
hueRotateAnimation.current = animate(hueRotateMotionValue, 360, {
|
| 37 |
+
duration: animationDuration / 25,
|
| 38 |
+
repeat: Infinity,
|
| 39 |
+
repeatType: "loop",
|
| 40 |
+
ease: "linear",
|
| 41 |
+
onUpdate: (value: number) => {
|
| 42 |
+
if (feColorMatrixRef.current) {
|
| 43 |
+
feColorMatrixRef.current.setAttribute("values", String(value));
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
});
|
| 47 |
+
return () => hueRotateAnimation.current?.stop();
|
| 48 |
+
}
|
| 49 |
+
}, [animationDuration, hueRotateMotionValue]);
|
| 50 |
+
|
| 51 |
+
return (
|
| 52 |
+
<footer className="bg-[#030303] overflow-hidden">
|
| 53 |
+
{/* High-Impact CTA with Atmospheric Shadow UI */}
|
| 54 |
+
<section className="relative w-full py-32 md:py-48 flex items-center justify-center border-t border-white/5">
|
| 55 |
+
<div
|
| 56 |
+
className="absolute inset-0 pointer-events-none overflow-hidden"
|
| 57 |
+
style={{
|
| 58 |
+
filter: `url(#${instanceId}) blur(12px)`,
|
| 59 |
+
opacity: 0.8
|
| 60 |
+
}}
|
| 61 |
+
>
|
| 62 |
+
<svg style={{ position: "absolute", width: 0, height: 0 }}>
|
| 63 |
+
<defs>
|
| 64 |
+
<filter id={instanceId}>
|
| 65 |
+
<feTurbulence
|
| 66 |
+
result="undulation"
|
| 67 |
+
numOctaves="2"
|
| 68 |
+
baseFrequency={`${mapRange(animationScale, 0, 100, 0.001, 0.0005)},${mapRange(animationScale, 0, 100, 0.004, 0.002)}`}
|
| 69 |
+
seed="0"
|
| 70 |
+
type="turbulence"
|
| 71 |
+
/>
|
| 72 |
+
<feColorMatrix
|
| 73 |
+
ref={feColorMatrixRef}
|
| 74 |
+
in="undulation"
|
| 75 |
+
type="hueRotate"
|
| 76 |
+
values="180"
|
| 77 |
+
/>
|
| 78 |
+
<feColorMatrix
|
| 79 |
+
in="dist"
|
| 80 |
+
result="circulation"
|
| 81 |
+
type="matrix"
|
| 82 |
+
values="4 0 0 0 1 4 0 0 0 1 4 0 0 0 1 1 0 0 0 0"
|
| 83 |
+
/>
|
| 84 |
+
<feDisplacementMap
|
| 85 |
+
in="SourceGraphic"
|
| 86 |
+
in2="circulation"
|
| 87 |
+
scale={displacementScale}
|
| 88 |
+
result="dist"
|
| 89 |
+
/>
|
| 90 |
+
<feDisplacementMap
|
| 91 |
+
in="dist"
|
| 92 |
+
in2="undulation"
|
| 93 |
+
scale={displacementScale}
|
| 94 |
+
result="output"
|
| 95 |
+
/>
|
| 96 |
+
</filter>
|
| 97 |
+
</defs>
|
| 98 |
+
</svg>
|
| 99 |
+
<div
|
| 100 |
+
style={{
|
| 101 |
+
backgroundColor: 'rgba(99, 102, 241, 0.4)',
|
| 102 |
+
maskImage: `url('https://framerusercontent.com/images/ceBGguIpUU8luwByxuQz79t7To.png')`,
|
| 103 |
+
maskSize: "cover",
|
| 104 |
+
maskRepeat: "no-repeat",
|
| 105 |
+
maskPosition: "center",
|
| 106 |
+
width: "120%",
|
| 107 |
+
height: "120%",
|
| 108 |
+
position: 'absolute',
|
| 109 |
+
top: '-10%',
|
| 110 |
+
left: '-10%'
|
| 111 |
+
}}
|
| 112 |
+
/>
|
| 113 |
+
</div>
|
| 114 |
+
|
| 115 |
+
{/* Noise overlay */}
|
| 116 |
+
<div
|
| 117 |
+
className="absolute inset-0 pointer-events-none opacity-[0.03]"
|
| 118 |
+
style={{
|
| 119 |
+
backgroundImage: `url("https://framerusercontent.com/images/g0QcWrxr87K0ufOxIUFBakwYA8.png")`,
|
| 120 |
+
backgroundSize: '100px',
|
| 121 |
+
backgroundRepeat: "repeat",
|
| 122 |
+
}}
|
| 123 |
+
/>
|
| 124 |
+
|
| 125 |
+
<div className="relative z-20 max-w-7xl mx-auto px-6 text-center">
|
| 126 |
+
<motion.div
|
| 127 |
+
initial={{ opacity: 0, y: 30 }}
|
| 128 |
+
whileInView={{ opacity: 1, y: 0 }}
|
| 129 |
+
viewport={{ once: true }}
|
| 130 |
+
transition={{ duration: 0.8 }}
|
| 131 |
+
>
|
| 132 |
+
<h2 className="text-4xl md:text-7xl font-extrabold text-white mb-8 tracking-tighter">
|
| 133 |
+
Ready to automate your workflow?
|
| 134 |
+
</h2>
|
| 135 |
+
<p className="text-white/50 text-xl md:text-2xl mb-12 max-w-2xl mx-auto font-medium leading-relaxed">
|
| 136 |
+
Build smarter ML workflows with AI autonomy. Join the next generation of data scientists.
|
| 137 |
+
</p>
|
| 138 |
+
<button className="group relative px-10 py-5 bg-white text-black font-extrabold rounded-2xl transition-all hover:scale-105 active:scale-95 shadow-[0_0_50px_-12px_rgba(255,255,255,0.5)] flex items-center gap-3 mx-auto">
|
| 139 |
+
Get Started Now
|
| 140 |
+
<ArrowRight className="w-5 h-5 group-hover:translate-x-1 transition-transform" />
|
| 141 |
+
</button>
|
| 142 |
+
</motion.div>
|
| 143 |
+
</div>
|
| 144 |
+
|
| 145 |
+
{/* Gradient fades to blend with rest of footer */}
|
| 146 |
+
<div className="absolute inset-x-0 bottom-0 h-40 bg-gradient-to-t from-[#030303] to-transparent z-10" />
|
| 147 |
+
<div className="absolute inset-x-0 top-0 h-40 bg-gradient-to-b from-[#030303] to-transparent z-10" />
|
| 148 |
+
</section>
|
| 149 |
+
|
| 150 |
+
{/* Main Footer Links */}
|
| 151 |
+
<div className="max-w-7xl mx-auto px-6 pb-20">
|
| 152 |
+
<div className="pt-8 border-t border-white/5 flex flex-col md:flex-row justify-between items-center gap-6">
|
| 153 |
+
<div className="flex items-center gap-4">
|
| 154 |
+
<Logo className="w-8 h-8" />
|
| 155 |
+
<span className="text-white font-extrabold tracking-tight uppercase">DATA SCIENCE AGENT</span>
|
| 156 |
+
</div>
|
| 157 |
+
<div className="text-white/30 text-[10px] sm:text-xs font-semibold uppercase tracking-wider">
|
| 158 |
+
© 2025 Data Science Agent. Built for the autonomous future.
|
| 159 |
+
</div>
|
| 160 |
+
<div className="flex gap-8 text-white/40 text-sm font-bold italic">
|
| 161 |
+
<a href="#" className="hover:text-white transition-colors">Twitter</a>
|
| 162 |
+
<a href="#" className="hover:text-white transition-colors">GitHub</a>
|
| 163 |
+
<a href="#" className="hover:text-white transition-colors">Docs</a>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
</div>
|
| 167 |
+
</footer>
|
| 168 |
+
);
|
| 169 |
+
};
|
| 170 |
+
|
| 171 |
+
export default Footer;
|
FRRONTEEEND/components/HeroGeometric.tsx
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import { motion, Variants } from "framer-motion";
|
| 4 |
+
import { Circle, MessageSquare } from "lucide-react";
|
| 5 |
+
import { cn } from "../lib/utils";
|
| 6 |
+
|
| 7 |
+
function ElegantShape({
|
| 8 |
+
className,
|
| 9 |
+
delay = 0,
|
| 10 |
+
width = 400,
|
| 11 |
+
height = 100,
|
| 12 |
+
rotate = 0,
|
| 13 |
+
gradient = "from-white/[0.08]",
|
| 14 |
+
}: {
|
| 15 |
+
className?: string;
|
| 16 |
+
delay?: number;
|
| 17 |
+
width?: number;
|
| 18 |
+
height?: number;
|
| 19 |
+
rotate?: number;
|
| 20 |
+
gradient?: string;
|
| 21 |
+
}) {
|
| 22 |
+
return (
|
| 23 |
+
<motion.div
|
| 24 |
+
initial={{
|
| 25 |
+
opacity: 0,
|
| 26 |
+
y: -150,
|
| 27 |
+
rotate: rotate - 15,
|
| 28 |
+
}}
|
| 29 |
+
animate={{
|
| 30 |
+
opacity: 1,
|
| 31 |
+
y: 0,
|
| 32 |
+
rotate: rotate,
|
| 33 |
+
}}
|
| 34 |
+
transition={{
|
| 35 |
+
duration: 2.4,
|
| 36 |
+
delay,
|
| 37 |
+
ease: [0.23, 0.86, 0.39, 0.96],
|
| 38 |
+
opacity: { duration: 1.2 },
|
| 39 |
+
}}
|
| 40 |
+
className={cn("absolute", className)}
|
| 41 |
+
>
|
| 42 |
+
<motion.div
|
| 43 |
+
animate={{
|
| 44 |
+
y: [0, 15, 0],
|
| 45 |
+
}}
|
| 46 |
+
transition={{
|
| 47 |
+
duration: 12,
|
| 48 |
+
repeat: Number.POSITIVE_INFINITY,
|
| 49 |
+
ease: "easeInOut",
|
| 50 |
+
}}
|
| 51 |
+
style={{
|
| 52 |
+
width,
|
| 53 |
+
height,
|
| 54 |
+
}}
|
| 55 |
+
className="relative"
|
| 56 |
+
>
|
| 57 |
+
<div
|
| 58 |
+
className={cn(
|
| 59 |
+
"absolute inset-0 rounded-full",
|
| 60 |
+
"bg-gradient-to-r to-transparent",
|
| 61 |
+
gradient,
|
| 62 |
+
"backdrop-blur-[2px] border-2 border-white/[0.15]",
|
| 63 |
+
"shadow-[0_8px_32px_0_rgba(255,255,255,0.1)]",
|
| 64 |
+
"after:absolute after:inset-0 after:rounded-full",
|
| 65 |
+
"after:bg-[radial-gradient(circle_at_50%_50%,rgba(255,255,255,0.2),transparent_70%)]"
|
| 66 |
+
)}
|
| 67 |
+
/>
|
| 68 |
+
</motion.div>
|
| 69 |
+
</motion.div>
|
| 70 |
+
);
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
export function HeroGeometric({
|
| 74 |
+
badge = "Autonomous AI for Data Science",
|
| 75 |
+
title1 = "DATA SCIENCE AGENT",
|
| 76 |
+
title2 = "Autonomous AI for End-to-End ML",
|
| 77 |
+
onChatClick,
|
| 78 |
+
}: {
|
| 79 |
+
badge?: string;
|
| 80 |
+
title1?: string;
|
| 81 |
+
title2?: string;
|
| 82 |
+
onChatClick?: () => void;
|
| 83 |
+
}) {
|
| 84 |
+
const fadeUpVariants: Variants = {
|
| 85 |
+
hidden: { opacity: 0, y: 30 },
|
| 86 |
+
visible: (i: number) => ({
|
| 87 |
+
opacity: 1,
|
| 88 |
+
y: 0,
|
| 89 |
+
transition: {
|
| 90 |
+
duration: 1,
|
| 91 |
+
delay: 0.5 + i * 0.2,
|
| 92 |
+
ease: [0.25, 0.4, 0.25, 1] as [number, number, number, number],
|
| 93 |
+
},
|
| 94 |
+
}),
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
return (
|
| 98 |
+
<div className="relative min-h-screen w-full flex items-center justify-center overflow-hidden bg-[#030303]">
|
| 99 |
+
<div className="absolute inset-0 bg-gradient-to-br from-indigo-500/[0.05] via-transparent to-rose-500/[0.05] blur-3xl" />
|
| 100 |
+
|
| 101 |
+
<div className="absolute inset-0 overflow-hidden">
|
| 102 |
+
<ElegantShape
|
| 103 |
+
delay={0.3}
|
| 104 |
+
width={600}
|
| 105 |
+
height={140}
|
| 106 |
+
rotate={12}
|
| 107 |
+
gradient="from-indigo-500/[0.15]"
|
| 108 |
+
className="left-[-10%] md:left-[-5%] top-[15%] md:top-[20%]"
|
| 109 |
+
/>
|
| 110 |
+
<ElegantShape
|
| 111 |
+
delay={0.5}
|
| 112 |
+
width={500}
|
| 113 |
+
height={120}
|
| 114 |
+
rotate={-15}
|
| 115 |
+
gradient="from-rose-500/[0.15]"
|
| 116 |
+
className="right-[-5%] md:right-[0%] top-[70%] md:top-[75%]"
|
| 117 |
+
/>
|
| 118 |
+
<ElegantShape
|
| 119 |
+
delay={0.4}
|
| 120 |
+
width={300}
|
| 121 |
+
height={80}
|
| 122 |
+
rotate={-8}
|
| 123 |
+
gradient="from-violet-500/[0.15]"
|
| 124 |
+
className="left-[5%] md:left-[10%] bottom-[5%] md:bottom-[10%]"
|
| 125 |
+
/>
|
| 126 |
+
<ElegantShape
|
| 127 |
+
delay={0.6}
|
| 128 |
+
width={200}
|
| 129 |
+
height={60}
|
| 130 |
+
rotate={20}
|
| 131 |
+
gradient="from-amber-500/[0.15]"
|
| 132 |
+
className="right-[15%] md:right-[20%] top-[10%] md:top-[15%]"
|
| 133 |
+
/>
|
| 134 |
+
<ElegantShape
|
| 135 |
+
delay={0.7}
|
| 136 |
+
width={150}
|
| 137 |
+
height={40}
|
| 138 |
+
rotate={-25}
|
| 139 |
+
gradient="from-cyan-500/[0.15]"
|
| 140 |
+
className="left-[20%] md:left-[25%] top-[5%] md:top-[10%]"
|
| 141 |
+
/>
|
| 142 |
+
</div>
|
| 143 |
+
|
| 144 |
+
<div className="relative z-10 container mx-auto px-4 md:px-6">
|
| 145 |
+
<div className="max-w-4xl mx-auto text-center">
|
| 146 |
+
<motion.div
|
| 147 |
+
custom={0}
|
| 148 |
+
variants={fadeUpVariants}
|
| 149 |
+
initial="hidden"
|
| 150 |
+
animate="visible"
|
| 151 |
+
className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-white/[0.03] border border-white/[0.08] mb-6 md:mb-10"
|
| 152 |
+
>
|
| 153 |
+
<Circle className="h-2 w-2 fill-indigo-500/80" />
|
| 154 |
+
<span className="text-xs font-semibold text-white/60 tracking-[0.1em] uppercase">
|
| 155 |
+
{badge}
|
| 156 |
+
</span>
|
| 157 |
+
</motion.div>
|
| 158 |
+
|
| 159 |
+
<motion.div
|
| 160 |
+
custom={1}
|
| 161 |
+
variants={fadeUpVariants}
|
| 162 |
+
initial="hidden"
|
| 163 |
+
animate="visible"
|
| 164 |
+
>
|
| 165 |
+
<h1 className="text-3xl sm:text-4xl md:text-6xl font-extrabold mb-6 md:mb-8 tracking-tight leading-[1.1]">
|
| 166 |
+
<span className="bg-clip-text text-transparent bg-gradient-to-b from-white to-white/80">
|
| 167 |
+
{title1}
|
| 168 |
+
</span>
|
| 169 |
+
<br />
|
| 170 |
+
<span
|
| 171 |
+
className={cn(
|
| 172 |
+
"bg-clip-text text-transparent bg-gradient-to-r from-indigo-300 via-white/90 to-rose-300"
|
| 173 |
+
)}
|
| 174 |
+
>
|
| 175 |
+
{title2}
|
| 176 |
+
</span>
|
| 177 |
+
</h1>
|
| 178 |
+
</motion.div>
|
| 179 |
+
|
| 180 |
+
<motion.div
|
| 181 |
+
custom={2}
|
| 182 |
+
variants={fadeUpVariants}
|
| 183 |
+
initial="hidden"
|
| 184 |
+
animate="visible"
|
| 185 |
+
>
|
| 186 |
+
<p className="text-sm sm:text-base md:text-lg text-white/40 mb-10 leading-relaxed font-normal tracking-tight max-w-xl mx-auto px-4">
|
| 187 |
+
Upload your data. Describe your goal.
|
| 188 |
+
Let AI handle profiling, modeling, visualization, and strategic insights autonomously.
|
| 189 |
+
</p>
|
| 190 |
+
</motion.div>
|
| 191 |
+
|
| 192 |
+
<motion.div
|
| 193 |
+
custom={3}
|
| 194 |
+
variants={fadeUpVariants}
|
| 195 |
+
initial="hidden"
|
| 196 |
+
animate="visible"
|
| 197 |
+
className="flex flex-col sm:flex-row items-center justify-center gap-4 px-4"
|
| 198 |
+
>
|
| 199 |
+
<button
|
| 200 |
+
onClick={onChatClick}
|
| 201 |
+
className="w-full sm:w-auto px-8 py-3.5 bg-white text-black font-bold rounded-xl hover:bg-white/90 transition-all flex items-center justify-center gap-2 group text-sm shadow-xl"
|
| 202 |
+
>
|
| 203 |
+
Chat Now
|
| 204 |
+
<MessageSquare className="w-4 h-4 fill-black group-hover:translate-x-0.5 transition-transform" />
|
| 205 |
+
</button>
|
| 206 |
+
</motion.div>
|
| 207 |
+
</div>
|
| 208 |
+
</div>
|
| 209 |
+
|
| 210 |
+
<div className="absolute inset-0 bg-gradient-to-t from-[#030303] via-transparent to-[#030303]/80 pointer-events-none" />
|
| 211 |
+
</div>
|
| 212 |
+
);
|
| 213 |
+
}
|
FRRONTEEEND/components/KeyCapabilities.tsx
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import { motion } from 'framer-motion';
|
| 4 |
+
import { Database, Wrench, Cpu, Brain, LineChart, Server } from 'lucide-react';
|
| 5 |
+
import { cn } from '../lib/utils';
|
| 6 |
+
|
| 7 |
+
const capabilities = [
|
| 8 |
+
{
|
| 9 |
+
title: "Autonomous ML Pipelines",
|
| 10 |
+
description: "End-to-end automation from profiling to deployment without manual coding.",
|
| 11 |
+
icon: Database,
|
| 12 |
+
color: "from-blue-500/20 to-cyan-500/20",
|
| 13 |
+
hover: "hover:bg-blue-500/10 hover:border-blue-500/30 hover:shadow-[0_0_30px_-10px_rgba(59,130,246,0.2)]"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
title: "82+ Specialized Tools",
|
| 17 |
+
description: "An extensive arsenal for cleaning, statistical testing, and predictive modeling.",
|
| 18 |
+
icon: Wrench,
|
| 19 |
+
color: "from-purple-500/20 to-pink-500/20",
|
| 20 |
+
hover: "hover:bg-pink-500/10 hover:border-pink-500/30 hover:shadow-[0_0_30px_-10px_rgba(236,72,153,0.2)]"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
title: "Dual LLM Intelligence",
|
| 24 |
+
description: "Orchestrated by Groq (for speed) and Gemini (for deep reasoning).",
|
| 25 |
+
icon: Brain,
|
| 26 |
+
color: "from-orange-500/20 to-amber-500/20",
|
| 27 |
+
hover: "hover:bg-amber-500/10 hover:border-amber-500/30 hover:shadow-[0_0_30px_-10px_rgba(245,158,11,0.2)]"
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
title: "Session Memory",
|
| 31 |
+
description: "Maintains context across complex workflows, allowing for iterative refinement.",
|
| 32 |
+
icon: Cpu,
|
| 33 |
+
color: "from-emerald-500/20 to-teal-500/20",
|
| 34 |
+
hover: "hover:bg-emerald-500/10 hover:border-emerald-500/30 hover:shadow-[0_0_30px_-10px_rgba(16,185,129,0.2)]"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
title: "Visual Insights",
|
| 38 |
+
description: "Automatic generation of publication-quality charts and explainability reports.",
|
| 39 |
+
icon: LineChart,
|
| 40 |
+
color: "from-indigo-500/20 to-blue-500/20",
|
| 41 |
+
hover: "hover:bg-indigo-500/10 hover:border-indigo-500/30 hover:shadow-[0_0_30px_-10px_rgba(99,102,241,0.2)]"
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
title: "Cloud Run Ready",
|
| 45 |
+
description: "Deploy your optimized models directly to production-grade cloud environments.",
|
| 46 |
+
icon: Server,
|
| 47 |
+
color: "from-rose-500/20 to-red-500/20",
|
| 48 |
+
hover: "hover:bg-rose-500/10 hover:border-rose-500/30 hover:shadow-[0_0_30px_-10px_rgba(244,63,94,0.2)]"
|
| 49 |
+
}
|
| 50 |
+
];
|
| 51 |
+
|
| 52 |
+
const KeyCapabilities = () => {
|
| 53 |
+
return (
|
| 54 |
+
<section id="features" className="py-24 bg-[#030303]">
|
| 55 |
+
<div className="max-w-7xl mx-auto px-6">
|
| 56 |
+
<div className="text-center mb-16">
|
| 57 |
+
<h2 className="text-4xl md:text-5xl font-extrabold text-white mb-4 tracking-tight">Powerful Orchestration</h2>
|
| 58 |
+
<p className="text-white/40 text-xl font-medium">Not just a chatbot, but a true system of intelligence.</p>
|
| 59 |
+
</div>
|
| 60 |
+
|
| 61 |
+
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-8">
|
| 62 |
+
{capabilities.map((cap, i) => (
|
| 63 |
+
<motion.div
|
| 64 |
+
key={i}
|
| 65 |
+
initial={{ opacity: 0, y: 20 }}
|
| 66 |
+
whileInView={{ opacity: 1, y: 0 }}
|
| 67 |
+
viewport={{ once: true }}
|
| 68 |
+
transition={{ delay: i * 0.1 }}
|
| 69 |
+
whileHover={{ scale: 1.02, y: -5 }}
|
| 70 |
+
className={cn(
|
| 71 |
+
"group p-8 rounded-2xl bg-white/[0.02] border border-white/[0.08] transition-all duration-300 cursor-default",
|
| 72 |
+
cap.hover
|
| 73 |
+
)}
|
| 74 |
+
>
|
| 75 |
+
<div className={cn(
|
| 76 |
+
"w-12 h-12 rounded-lg bg-gradient-to-br flex items-center justify-center mb-6 group-hover:scale-110 transition-transform duration-300",
|
| 77 |
+
cap.color
|
| 78 |
+
)}>
|
| 79 |
+
<cap.icon className="w-6 h-6 text-white" />
|
| 80 |
+
</div>
|
| 81 |
+
<h3 className="text-xl font-bold text-white mb-3 tracking-tight">{cap.title}</h3>
|
| 82 |
+
<p className="text-white/50 leading-relaxed font-medium">{cap.description}</p>
|
| 83 |
+
</motion.div>
|
| 84 |
+
))}
|
| 85 |
+
</div>
|
| 86 |
+
</div>
|
| 87 |
+
</section>
|
| 88 |
+
);
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
export default KeyCapabilities;
|
FRRONTEEEND/components/Logo.tsx
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import { cn } from '../lib/utils';
|
| 4 |
+
|
| 5 |
+
interface LogoProps {
|
| 6 |
+
className?: string;
|
| 7 |
+
showText?: boolean;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
export const Logo: React.FC<LogoProps> = ({ className, showText = false }) => {
|
| 11 |
+
return (
|
| 12 |
+
<div className={cn("flex flex-col items-center", className)}>
|
| 13 |
+
<svg
|
| 14 |
+
viewBox="0 0 120 120"
|
| 15 |
+
className="w-full h-full"
|
| 16 |
+
fill="none"
|
| 17 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 18 |
+
>
|
| 19 |
+
<defs>
|
| 20 |
+
<linearGradient id="logoGradient" x1="0%" y1="0%" x2="100%" y2="100%">
|
| 21 |
+
<stop offset="0%" stopColor="#22d3ee" />
|
| 22 |
+
<stop offset="100%" stopColor="#6366f1" />
|
| 23 |
+
</linearGradient>
|
| 24 |
+
<filter id="glow" x="-20%" y="-20%" width="140%" height="140%">
|
| 25 |
+
<feGaussianBlur stdDeviation="2" result="blur" />
|
| 26 |
+
<feComposite in="SourceGraphic" in2="blur" operator="over" />
|
| 27 |
+
</filter>
|
| 28 |
+
</defs>
|
| 29 |
+
|
| 30 |
+
{/* Central Core */}
|
| 31 |
+
<circle cx="60" cy="60" r="6" fill="url(#logoGradient)" filter="url(#glow)" />
|
| 32 |
+
|
| 33 |
+
{/* Inner Circuit Ring */}
|
| 34 |
+
<circle cx="60" cy="60" r="18" stroke="url(#logoGradient)" strokeWidth="1" strokeDasharray="2 4" opacity="0.4" />
|
| 35 |
+
|
| 36 |
+
{/* Complex Neural Paths (Stylized) */}
|
| 37 |
+
<g opacity="0.8">
|
| 38 |
+
{[0, 45, 90, 135, 180, 225, 270, 315].map((angle) => (
|
| 39 |
+
<g key={angle} transform={`rotate(${angle} 60 60)`}>
|
| 40 |
+
<path
|
| 41 |
+
d="M60 35 L60 30 M60 30 L55 25 M60 30 L65 25"
|
| 42 |
+
stroke="url(#logoGradient)"
|
| 43 |
+
strokeWidth="1.5"
|
| 44 |
+
strokeLinecap="round"
|
| 45 |
+
/>
|
| 46 |
+
<circle cx="55" cy="25" r="1.5" fill="url(#logoGradient)" />
|
| 47 |
+
<circle cx="65" cy="25" r="1.5" fill="url(#logoGradient)" />
|
| 48 |
+
</g>
|
| 49 |
+
))}
|
| 50 |
+
</g>
|
| 51 |
+
|
| 52 |
+
{/* Middle Dashed Ring */}
|
| 53 |
+
<circle cx="60" cy="60" r="32" stroke="url(#logoGradient)" strokeWidth="1.5" strokeDasharray="10 6" opacity="0.6" />
|
| 54 |
+
|
| 55 |
+
{/* Outer Orbital with Squares */}
|
| 56 |
+
<circle cx="60" cy="60" r="45" stroke="url(#logoGradient)" strokeWidth="0.5" opacity="0.3" />
|
| 57 |
+
{[0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330].map((angle) => (
|
| 58 |
+
<rect
|
| 59 |
+
key={angle}
|
| 60 |
+
x="58"
|
| 61 |
+
y="12"
|
| 62 |
+
width="4"
|
| 63 |
+
height="4"
|
| 64 |
+
fill="url(#logoGradient)"
|
| 65 |
+
transform={`rotate(${angle} 60 60)`}
|
| 66 |
+
rx="1"
|
| 67 |
+
/>
|
| 68 |
+
))}
|
| 69 |
+
|
| 70 |
+
{/* Connection Spokes */}
|
| 71 |
+
{[0, 90, 180, 270].map((angle) => (
|
| 72 |
+
<line
|
| 73 |
+
key={angle}
|
| 74 |
+
x1="60"
|
| 75 |
+
y1="16"
|
| 76 |
+
x2="60"
|
| 77 |
+
y2="30"
|
| 78 |
+
stroke="url(#logoGradient)"
|
| 79 |
+
strokeWidth="1"
|
| 80 |
+
opacity="0.5"
|
| 81 |
+
transform={`rotate(${angle} 60 60)`}
|
| 82 |
+
/>
|
| 83 |
+
))}
|
| 84 |
+
</svg>
|
| 85 |
+
{showText && (
|
| 86 |
+
<span className="mt-2 text-white font-extrabold tracking-widest text-[10px] sm:text-xs uppercase">
|
| 87 |
+
DATA SCIENCE AGENT
|
| 88 |
+
</span>
|
| 89 |
+
)}
|
| 90 |
+
</div>
|
| 91 |
+
);
|
| 92 |
+
};
|
FRRONTEEEND/components/ProblemSolution.tsx
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import { motion } from 'framer-motion';
|
| 4 |
+
import { AlertCircle, Zap, ShieldCheck, Clock } from 'lucide-react';
|
| 5 |
+
|
| 6 |
+
const ProblemSolution = () => {
|
| 7 |
+
return (
|
| 8 |
+
<section className="py-24 relative bg-[#030303] overflow-hidden">
|
| 9 |
+
<div className="max-w-7xl mx-auto px-6">
|
| 10 |
+
<div className="grid grid-cols-1 lg:grid-cols-2 gap-16 items-center">
|
| 11 |
+
<motion.div
|
| 12 |
+
initial={{ opacity: 0, x: -30 }}
|
| 13 |
+
whileInView={{ opacity: 1, x: 0 }}
|
| 14 |
+
viewport={{ once: true }}
|
| 15 |
+
transition={{ duration: 0.8 }}
|
| 16 |
+
>
|
| 17 |
+
<h2 className="text-3xl md:text-5xl font-extrabold text-white mb-6 tracking-tight">
|
| 18 |
+
The Data Science <span className="text-rose-400">Bottleneck</span>
|
| 19 |
+
</h2>
|
| 20 |
+
<p className="text-white/60 text-lg mb-8 leading-relaxed font-medium">
|
| 21 |
+
Modern data science is 80% manual labor. Cleaning messy datasets, engineering features, and tuning models takes weeks of repetitive effort. Mistakes are costly, and scaling insights is slow.
|
| 22 |
+
</p>
|
| 23 |
+
<ul className="space-y-4">
|
| 24 |
+
{[
|
| 25 |
+
{ icon: AlertCircle, text: "Error-prone manual data preprocessing", color: "text-rose-400" },
|
| 26 |
+
{ icon: Clock, text: "Days spent on hyperparameter tuning", color: "text-rose-400" },
|
| 27 |
+
{ icon: AlertCircle, text: "Disconnected silos of code and insights", color: "text-rose-400" },
|
| 28 |
+
].map((item, i) => (
|
| 29 |
+
<li key={i} className="flex items-center gap-3 text-white/80 font-semibold">
|
| 30 |
+
<item.icon className={`w-5 h-5 ${item.color}`} />
|
| 31 |
+
<span>{item.text}</span>
|
| 32 |
+
</li>
|
| 33 |
+
))}
|
| 34 |
+
</ul>
|
| 35 |
+
</motion.div>
|
| 36 |
+
|
| 37 |
+
<motion.div
|
| 38 |
+
initial={{ opacity: 0, x: 30 }}
|
| 39 |
+
whileInView={{ opacity: 1, x: 0 }}
|
| 40 |
+
viewport={{ once: true }}
|
| 41 |
+
transition={{ duration: 0.8 }}
|
| 42 |
+
className="relative p-8 md:p-12 rounded-3xl bg-gradient-to-br from-indigo-500/10 via-white/5 to-rose-500/10 border border-white/10"
|
| 43 |
+
>
|
| 44 |
+
<div className="absolute -top-6 -right-6 w-32 h-32 bg-indigo-500/20 blur-3xl" />
|
| 45 |
+
<h2 className="text-3xl md:text-5xl font-extrabold text-white mb-6 tracking-tight">
|
| 46 |
+
The <span className="text-indigo-400">Autonomous</span> Solution
|
| 47 |
+
</h2>
|
| 48 |
+
<p className="text-white/60 text-lg mb-8 leading-relaxed font-medium">
|
| 49 |
+
DATA SCIENCE AGENT automates the entire lifecycle. From raw CSV to production-ready models and interactive dashboards, our agent uses 82+ specialized tools to deliver precision at scale.
|
| 50 |
+
</p>
|
| 51 |
+
<ul className="space-y-4">
|
| 52 |
+
{[
|
| 53 |
+
{ icon: Zap, text: "Instant feature engineering and selection", color: "text-indigo-400" },
|
| 54 |
+
{ icon: ShieldCheck, text: "Automated error recovery and re-training", color: "text-indigo-400" },
|
| 55 |
+
{ icon: Zap, text: "Explainable AI (XAI) reports by default", color: "text-indigo-400" },
|
| 56 |
+
].map((item, i) => (
|
| 57 |
+
<li key={i} className="flex items-center gap-3 text-white/80 font-semibold">
|
| 58 |
+
<item.icon className={`w-5 h-5 ${item.color}`} />
|
| 59 |
+
<span>{item.text}</span>
|
| 60 |
+
</li>
|
| 61 |
+
))}
|
| 62 |
+
</ul>
|
| 63 |
+
</motion.div>
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
</section>
|
| 67 |
+
);
|
| 68 |
+
};
|
| 69 |
+
|
| 70 |
+
export default ProblemSolution;
|
FRRONTEEEND/components/Process.tsx
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import { motion } from 'framer-motion';
|
| 4 |
+
|
| 5 |
+
const steps = [
|
| 6 |
+
{
|
| 7 |
+
number: "01",
|
| 8 |
+
title: "Ingest Data",
|
| 9 |
+
description: "Upload your raw CSV, JSON, or Parquet files directly to the secure environment."
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
number: "02",
|
| 13 |
+
title: "Define Objective",
|
| 14 |
+
description: "Describe what you want to achieve in natural language. 'Predict churn' or 'Find outliers'."
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
number: "03",
|
| 18 |
+
title: "Agent Execution",
|
| 19 |
+
description: "The agent orchestrates tools to clean, transform, and model your data autonomously."
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
number: "04",
|
| 23 |
+
title: "Receive Assets",
|
| 24 |
+
description: "Get fully trained models, performance metrics, and interactive explainable reports."
|
| 25 |
+
}
|
| 26 |
+
];
|
| 27 |
+
|
| 28 |
+
const Process = () => {
|
| 29 |
+
return (
|
| 30 |
+
<section id="process" className="py-24 bg-[#030303] border-y border-white/5">
|
| 31 |
+
<div className="max-w-7xl mx-auto px-6">
|
| 32 |
+
<div className="text-center mb-20">
|
| 33 |
+
<h2 className="text-4xl md:text-5xl font-extrabold text-white mb-4 tracking-tight">How it Works</h2>
|
| 34 |
+
<p className="text-white/40 text-xl font-medium">From raw data to actionable intelligence in 4 steps.</p>
|
| 35 |
+
</div>
|
| 36 |
+
|
| 37 |
+
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-12">
|
| 38 |
+
{steps.map((step, i) => (
|
| 39 |
+
<motion.div
|
| 40 |
+
key={i}
|
| 41 |
+
initial={{ opacity: 0, scale: 0.95 }}
|
| 42 |
+
whileInView={{ opacity: 1, scale: 1 }}
|
| 43 |
+
viewport={{ once: true }}
|
| 44 |
+
transition={{ delay: i * 0.1 }}
|
| 45 |
+
className="relative"
|
| 46 |
+
>
|
| 47 |
+
<span className="text-7xl font-extrabold text-white/5 absolute -top-10 -left-4 select-none italic">
|
| 48 |
+
{step.number}
|
| 49 |
+
</span>
|
| 50 |
+
<div className="relative z-10">
|
| 51 |
+
<h3 className="text-xl font-bold text-white mb-4 flex items-center gap-2 tracking-tight">
|
| 52 |
+
<span className="w-1.5 h-1.5 rounded-full bg-indigo-500" />
|
| 53 |
+
{step.title}
|
| 54 |
+
</h3>
|
| 55 |
+
<p className="text-white/40 leading-relaxed font-medium">
|
| 56 |
+
{step.description}
|
| 57 |
+
</p>
|
| 58 |
+
</div>
|
| 59 |
+
{i < steps.length - 1 && (
|
| 60 |
+
<div className="hidden lg:block absolute top-1/2 -right-6 w-12 h-[1px] bg-gradient-to-r from-white/10 to-transparent" />
|
| 61 |
+
)}
|
| 62 |
+
</motion.div>
|
| 63 |
+
))}
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
</section>
|
| 67 |
+
);
|
| 68 |
+
};
|
| 69 |
+
|
| 70 |
+
export default Process;
|
FRRONTEEEND/components/ShadowSection.tsx
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
'use client';
|
| 3 |
+
|
| 4 |
+
import React, { useRef, useId, useEffect, CSSProperties } from 'react';
|
| 5 |
+
import { animate, useMotionValue, AnimationPlaybackControls, motion } from 'framer-motion';
|
| 6 |
+
import { cn } from '../lib/utils';
|
| 7 |
+
|
| 8 |
+
// Type definitions
|
| 9 |
+
interface ResponsiveImage {
|
| 10 |
+
src: string;
|
| 11 |
+
alt?: string;
|
| 12 |
+
srcSet?: string;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
interface AnimationConfig {
|
| 16 |
+
preview?: boolean;
|
| 17 |
+
scale: number;
|
| 18 |
+
speed: number;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
interface NoiseConfig {
|
| 22 |
+
opacity: number;
|
| 23 |
+
scale: number;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
interface ShadowOverlayProps {
|
| 27 |
+
type?: 'preset' | 'custom';
|
| 28 |
+
presetIndex?: number;
|
| 29 |
+
customImage?: ResponsiveImage;
|
| 30 |
+
sizing?: 'fill' | 'stretch';
|
| 31 |
+
color?: string;
|
| 32 |
+
animation?: AnimationConfig;
|
| 33 |
+
noise?: NoiseConfig;
|
| 34 |
+
style?: CSSProperties;
|
| 35 |
+
className?: string;
|
| 36 |
+
title?: string;
|
| 37 |
+
description?: string;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
function mapRange(
|
| 41 |
+
value: number,
|
| 42 |
+
fromLow: number,
|
| 43 |
+
fromHigh: number,
|
| 44 |
+
toLow: number,
|
| 45 |
+
toHigh: number
|
| 46 |
+
): number {
|
| 47 |
+
if (fromLow === fromHigh) {
|
| 48 |
+
return toLow;
|
| 49 |
+
}
|
| 50 |
+
const percentage = (value - fromLow) / (fromHigh - fromLow);
|
| 51 |
+
return toLow + percentage * (toHigh - toLow);
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
const useInstanceId = (): string => {
|
| 55 |
+
const id = useId();
|
| 56 |
+
const cleanId = id.replace(/:/g, "");
|
| 57 |
+
const instanceId = `shadowoverlay-${cleanId}`;
|
| 58 |
+
return instanceId;
|
| 59 |
+
};
|
| 60 |
+
|
| 61 |
+
export function ShadowSection({
|
| 62 |
+
sizing = 'fill',
|
| 63 |
+
color = 'rgba(99, 102, 241, 0.6)',
|
| 64 |
+
animation = { scale: 50, speed: 15 },
|
| 65 |
+
noise = { opacity: 0.1, scale: 0.5 },
|
| 66 |
+
style,
|
| 67 |
+
className,
|
| 68 |
+
title = "Cognitive Core",
|
| 69 |
+
description = "The unseen intelligence powering your most critical decisions."
|
| 70 |
+
}: ShadowOverlayProps) {
|
| 71 |
+
const id = useInstanceId();
|
| 72 |
+
const animationEnabled = animation && animation.scale > 0;
|
| 73 |
+
const feColorMatrixRef = useRef<SVGFEColorMatrixElement>(null);
|
| 74 |
+
const hueRotateMotionValue = useMotionValue(180);
|
| 75 |
+
const hueRotateAnimation = useRef<AnimationPlaybackControls | null>(null);
|
| 76 |
+
|
| 77 |
+
const displacementScale = animation ? mapRange(animation.scale, 1, 100, 20, 100) : 0;
|
| 78 |
+
const animationDuration = animation ? mapRange(animation.speed, 1, 100, 1000, 50) : 1;
|
| 79 |
+
|
| 80 |
+
useEffect(() => {
|
| 81 |
+
if (feColorMatrixRef.current && animationEnabled) {
|
| 82 |
+
if (hueRotateAnimation.current) {
|
| 83 |
+
hueRotateAnimation.current.stop();
|
| 84 |
+
}
|
| 85 |
+
hueRotateMotionValue.set(0);
|
| 86 |
+
hueRotateAnimation.current = animate(hueRotateMotionValue, 360, {
|
| 87 |
+
duration: animationDuration / 25,
|
| 88 |
+
repeat: Infinity,
|
| 89 |
+
repeatType: "loop",
|
| 90 |
+
repeatDelay: 0,
|
| 91 |
+
ease: "linear",
|
| 92 |
+
delay: 0,
|
| 93 |
+
onUpdate: (value: number) => {
|
| 94 |
+
if (feColorMatrixRef.current) {
|
| 95 |
+
feColorMatrixRef.current.setAttribute("values", String(value));
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
});
|
| 99 |
+
|
| 100 |
+
return () => {
|
| 101 |
+
if (hueRotateAnimation.current) {
|
| 102 |
+
hueRotateAnimation.current.stop();
|
| 103 |
+
}
|
| 104 |
+
};
|
| 105 |
+
}
|
| 106 |
+
}, [animationEnabled, animationDuration, hueRotateMotionValue]);
|
| 107 |
+
|
| 108 |
+
return (
|
| 109 |
+
<section
|
| 110 |
+
className={cn("relative w-full h-[70vh] min-h-[500px] overflow-hidden bg-[#030303]", className)}
|
| 111 |
+
style={style}
|
| 112 |
+
>
|
| 113 |
+
<div
|
| 114 |
+
style={{
|
| 115 |
+
position: "absolute",
|
| 116 |
+
inset: -displacementScale,
|
| 117 |
+
filter: animationEnabled ? `url(#${id}) blur(8px)` : "none"
|
| 118 |
+
}}
|
| 119 |
+
>
|
| 120 |
+
{animationEnabled && (
|
| 121 |
+
<svg style={{ position: "absolute", width: 0, height: 0 }}>
|
| 122 |
+
<defs>
|
| 123 |
+
<filter id={id}>
|
| 124 |
+
<feTurbulence
|
| 125 |
+
result="undulation"
|
| 126 |
+
numOctaves="2"
|
| 127 |
+
baseFrequency={`${mapRange(animation.scale, 0, 100, 0.001, 0.0005)},${mapRange(animation.scale, 0, 100, 0.004, 0.002)}`}
|
| 128 |
+
seed="0"
|
| 129 |
+
type="turbulence"
|
| 130 |
+
/>
|
| 131 |
+
<feColorMatrix
|
| 132 |
+
ref={feColorMatrixRef}
|
| 133 |
+
in="undulation"
|
| 134 |
+
type="hueRotate"
|
| 135 |
+
values="180"
|
| 136 |
+
/>
|
| 137 |
+
<feColorMatrix
|
| 138 |
+
in="dist"
|
| 139 |
+
result="circulation"
|
| 140 |
+
type="matrix"
|
| 141 |
+
values="4 0 0 0 1 4 0 0 0 1 4 0 0 0 1 1 0 0 0 0"
|
| 142 |
+
/>
|
| 143 |
+
<feDisplacementMap
|
| 144 |
+
in="SourceGraphic"
|
| 145 |
+
in2="circulation"
|
| 146 |
+
scale={displacementScale}
|
| 147 |
+
result="dist"
|
| 148 |
+
/>
|
| 149 |
+
<feDisplacementMap
|
| 150 |
+
in="dist"
|
| 151 |
+
in2="undulation"
|
| 152 |
+
scale={displacementScale}
|
| 153 |
+
result="output"
|
| 154 |
+
/>
|
| 155 |
+
</filter>
|
| 156 |
+
</defs>
|
| 157 |
+
</svg>
|
| 158 |
+
)}
|
| 159 |
+
<div
|
| 160 |
+
style={{
|
| 161 |
+
backgroundColor: color,
|
| 162 |
+
maskImage: `url('https://framerusercontent.com/images/ceBGguIpUU8luwByxuQz79t7To.png')`,
|
| 163 |
+
maskSize: sizing === "stretch" ? "100% 100%" : "cover",
|
| 164 |
+
maskRepeat: "no-repeat",
|
| 165 |
+
maskPosition: "center",
|
| 166 |
+
width: "100%",
|
| 167 |
+
height: "100%"
|
| 168 |
+
}}
|
| 169 |
+
/>
|
| 170 |
+
</div>
|
| 171 |
+
|
| 172 |
+
<div
|
| 173 |
+
style={{
|
| 174 |
+
position: "absolute",
|
| 175 |
+
top: "50%",
|
| 176 |
+
left: "50%",
|
| 177 |
+
transform: "translate(-50%, -50%)",
|
| 178 |
+
textAlign: "center",
|
| 179 |
+
zIndex: 20,
|
| 180 |
+
width: '100%',
|
| 181 |
+
padding: '0 2rem'
|
| 182 |
+
}}
|
| 183 |
+
>
|
| 184 |
+
<motion.h2
|
| 185 |
+
initial={{ opacity: 0, y: 20 }}
|
| 186 |
+
whileInView={{ opacity: 1, y: 0 }}
|
| 187 |
+
viewport={{ once: true }}
|
| 188 |
+
className="md:text-7xl text-5xl lg:text-8xl font-heading font-bold text-center text-white relative z-20 tracking-tighter mb-4"
|
| 189 |
+
>
|
| 190 |
+
{title}
|
| 191 |
+
</motion.h2>
|
| 192 |
+
<motion.p
|
| 193 |
+
initial={{ opacity: 0, y: 20 }}
|
| 194 |
+
whileInView={{ opacity: 1, y: 0 }}
|
| 195 |
+
viewport={{ once: true }}
|
| 196 |
+
transition={{ delay: 0.2 }}
|
| 197 |
+
className="text-white/60 text-lg md:text-xl font-sans max-w-xl mx-auto"
|
| 198 |
+
>
|
| 199 |
+
{description}
|
| 200 |
+
</motion.p>
|
| 201 |
+
</div>
|
| 202 |
+
|
| 203 |
+
{noise && noise.opacity > 0 && (
|
| 204 |
+
<div
|
| 205 |
+
style={{
|
| 206 |
+
position: "absolute",
|
| 207 |
+
inset: 0,
|
| 208 |
+
backgroundImage: `url("https://framerusercontent.com/images/g0QcWrxr87K0ufOxIUFBakwYA8.png")`,
|
| 209 |
+
backgroundSize: noise.scale * 200,
|
| 210 |
+
backgroundRepeat: "repeat",
|
| 211 |
+
opacity: noise.opacity / 2,
|
| 212 |
+
zIndex: 15
|
| 213 |
+
}}
|
| 214 |
+
/>
|
| 215 |
+
)}
|
| 216 |
+
|
| 217 |
+
{/* Bottom Vignette */}
|
| 218 |
+
<div className="absolute inset-x-0 bottom-0 h-40 bg-gradient-to-t from-[#030303] to-transparent z-30" />
|
| 219 |
+
<div className="absolute inset-x-0 top-0 h-40 bg-gradient-to-b from-[#030303] to-transparent z-30" />
|
| 220 |
+
</section>
|
| 221 |
+
);
|
| 222 |
+
}
|
FRRONTEEEND/components/TechStack.tsx
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import { motion } from 'framer-motion';
|
| 4 |
+
|
| 5 |
+
const techs = [
|
| 6 |
+
"Python", "Polars", "Pandas", "Scikit-Learn", "XGBoost", "LightGBM", "Groq", "Gemini", "FastAPI", "Cloud Run", "Docker", "PyTorch"
|
| 7 |
+
];
|
| 8 |
+
|
| 9 |
+
const TechStack = () => {
|
| 10 |
+
return (
|
| 11 |
+
<section className="py-24 bg-[#030303]">
|
| 12 |
+
<div className="max-w-7xl mx-auto px-6">
|
| 13 |
+
<div className="text-center mb-12">
|
| 14 |
+
<h3 className="text-xs font-bold uppercase tracking-[0.3em] text-white/30 italic">Built with the modern AI Stack</h3>
|
| 15 |
+
</div>
|
| 16 |
+
|
| 17 |
+
<div className="flex flex-wrap justify-center gap-4 md:gap-6 opacity-60">
|
| 18 |
+
{techs.map((tech, i) => (
|
| 19 |
+
<motion.div
|
| 20 |
+
key={tech}
|
| 21 |
+
initial={{ opacity: 0 }}
|
| 22 |
+
whileInView={{ opacity: 1 }}
|
| 23 |
+
viewport={{ once: true }}
|
| 24 |
+
transition={{ delay: i * 0.05 }}
|
| 25 |
+
className="px-5 py-2 rounded-lg border border-white/5 bg-white/[0.02] text-white/80 font-bold text-xs md:text-sm whitespace-nowrap tracking-wide uppercase"
|
| 26 |
+
>
|
| 27 |
+
{tech}
|
| 28 |
+
</motion.div>
|
| 29 |
+
))}
|
| 30 |
+
</div>
|
| 31 |
+
</div>
|
| 32 |
+
</section>
|
| 33 |
+
);
|
| 34 |
+
};
|
| 35 |
+
|
| 36 |
+
export default TechStack;
|
FRRONTEEEND/index.html
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
<!DOCTYPE html>
|
| 3 |
+
<html lang="en">
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8" />
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 7 |
+
<title>Data Science Agent </title>
|
| 8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 9 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 10 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 11 |
+
<link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:ital,wght@0,200;0,300;0,400;0,500;0,600;0,700;0,800;1,200;1,300;1,400;1,500;1,600;1,700;1,800&display=swap" rel="stylesheet">
|
| 12 |
+
<script>
|
| 13 |
+
tailwind.config = {
|
| 14 |
+
theme: {
|
| 15 |
+
extend: {
|
| 16 |
+
fontFamily: {
|
| 17 |
+
sans: ['Plus Jakarta Sans', 'sans-serif'],
|
| 18 |
+
heading: ['Plus Jakarta Sans', 'sans-serif'],
|
| 19 |
+
mono: ['Plus Jakarta Sans', 'sans-serif'],
|
| 20 |
+
},
|
| 21 |
+
},
|
| 22 |
+
},
|
| 23 |
+
}
|
| 24 |
+
</script>
|
| 25 |
+
<style>
|
| 26 |
+
body {
|
| 27 |
+
margin: 0;
|
| 28 |
+
background-color: #030303;
|
| 29 |
+
overflow-x: hidden;
|
| 30 |
+
font-family: 'Plus Jakarta Sans', sans-serif;
|
| 31 |
+
-webkit-font-smoothing: antialiased;
|
| 32 |
+
-moz-osx-font-smoothing: grayscale;
|
| 33 |
+
}
|
| 34 |
+
::selection {
|
| 35 |
+
background-color: rgba(99, 102, 241, 0.3);
|
| 36 |
+
color: white;
|
| 37 |
+
}
|
| 38 |
+
</style>
|
| 39 |
+
<script type="importmap">
|
| 40 |
+
{
|
| 41 |
+
"imports": {
|
| 42 |
+
"react": "https://esm.sh/react@^19.2.3",
|
| 43 |
+
"react-dom/": "https://esm.sh/react-dom@^19.2.3/",
|
| 44 |
+
"react/": "https://esm.sh/react@^19.2.3/",
|
| 45 |
+
"clsx": "https://esm.sh/clsx@^2.1.1",
|
| 46 |
+
"tailwind-merge": "https://esm.sh/tailwind-merge@^3.4.0",
|
| 47 |
+
"framer-motion": "https://esm.sh/framer-motion@^12.23.26",
|
| 48 |
+
"lucide-react": "https://esm.sh/lucide-react@^0.562.0",
|
| 49 |
+
"@google/genai": "https://esm.sh/@google/genai@^1.34.0"
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
</script>
|
| 53 |
+
<link rel="stylesheet" href="/index.css">
|
| 54 |
+
</head>
|
| 55 |
+
<body>
|
| 56 |
+
<div id="root"></div>
|
| 57 |
+
<script type="module" src="/index.tsx"></script>
|
| 58 |
+
</body>
|
| 59 |
+
</html>
|
FRRONTEEEND/index.tsx
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import React from 'react';
|
| 3 |
+
import ReactDOM from 'react-dom/client';
|
| 4 |
+
import App from './App';
|
| 5 |
+
|
| 6 |
+
const rootElement = document.getElementById('root');
|
| 7 |
+
if (!rootElement) {
|
| 8 |
+
throw new Error("Could not find root element to mount to");
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
const root = ReactDOM.createRoot(rootElement);
|
| 12 |
+
root.render(
|
| 13 |
+
<React.StrictMode>
|
| 14 |
+
<App />
|
| 15 |
+
</React.StrictMode>
|
| 16 |
+
);
|
FRRONTEEEND/lib/utils.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import { clsx, type ClassValue } from 'clsx';
|
| 3 |
+
import { twMerge } from 'tailwind-merge';
|
| 4 |
+
|
| 5 |
+
export function cn(...inputs: ClassValue[]) {
|
| 6 |
+
return twMerge(clsx(inputs));
|
| 7 |
+
}
|
FRRONTEEEND/metadata.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "Data Science Agent",
|
| 3 |
+
"description": "A production-grade autonomous AI agent for end-to-end data science workflows, featuring 82+ specialized tools and dual LLM support.",
|
| 4 |
+
"requestFramePermissions": []
|
| 5 |
+
}
|
FRRONTEEEND/package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
FRRONTEEEND/package.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "data-science-agent",
|
| 3 |
+
"private": true,
|
| 4 |
+
"version": "0.0.0",
|
| 5 |
+
"type": "module",
|
| 6 |
+
"scripts": {
|
| 7 |
+
"dev": "vite",
|
| 8 |
+
"build": "vite build",
|
| 9 |
+
"preview": "vite preview"
|
| 10 |
+
},
|
| 11 |
+
"dependencies": {
|
| 12 |
+
"react": "^19.2.3",
|
| 13 |
+
"react-dom": "^19.2.3",
|
| 14 |
+
"clsx": "^2.1.1",
|
| 15 |
+
"tailwind-merge": "^3.4.0",
|
| 16 |
+
"framer-motion": "^12.23.26",
|
| 17 |
+
"lucide-react": "^0.562.0",
|
| 18 |
+
"react-markdown": "^9.0.1"
|
| 19 |
+
},
|
| 20 |
+
"devDependencies": {
|
| 21 |
+
"@types/node": "^22.14.0",
|
| 22 |
+
"@vitejs/plugin-react": "^5.0.0",
|
| 23 |
+
"typescript": "~5.8.2",
|
| 24 |
+
"vite": "^6.2.0"
|
| 25 |
+
}
|
| 26 |
+
}
|
FRRONTEEEND/tsconfig.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"target": "ES2022",
|
| 4 |
+
"experimentalDecorators": true,
|
| 5 |
+
"useDefineForClassFields": false,
|
| 6 |
+
"module": "ESNext",
|
| 7 |
+
"lib": [
|
| 8 |
+
"ES2022",
|
| 9 |
+
"DOM",
|
| 10 |
+
"DOM.Iterable"
|
| 11 |
+
],
|
| 12 |
+
"skipLibCheck": true,
|
| 13 |
+
"types": [
|
| 14 |
+
"node"
|
| 15 |
+
],
|
| 16 |
+
"moduleResolution": "bundler",
|
| 17 |
+
"isolatedModules": true,
|
| 18 |
+
"moduleDetection": "force",
|
| 19 |
+
"allowJs": true,
|
| 20 |
+
"jsx": "react-jsx",
|
| 21 |
+
"paths": {
|
| 22 |
+
"@/*": [
|
| 23 |
+
"./*"
|
| 24 |
+
]
|
| 25 |
+
},
|
| 26 |
+
"allowImportingTsExtensions": true,
|
| 27 |
+
"noEmit": true
|
| 28 |
+
}
|
| 29 |
+
}
|
FRRONTEEEND/vite.config.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import path from 'path';
|
| 2 |
+
import { defineConfig, loadEnv } from 'vite';
|
| 3 |
+
import react from '@vitejs/plugin-react';
|
| 4 |
+
|
| 5 |
+
export default defineConfig(({ mode }) => {
|
| 6 |
+
const env = loadEnv(mode, '.', '');
|
| 7 |
+
return {
|
| 8 |
+
server: {
|
| 9 |
+
port: 3000,
|
| 10 |
+
host: '0.0.0.0',
|
| 11 |
+
proxy: {
|
| 12 |
+
'/api': {
|
| 13 |
+
target: env.VITE_API_URL || 'http://localhost:8080',
|
| 14 |
+
changeOrigin: true,
|
| 15 |
+
rewrite: (path) => path.replace(/^\/api/, '')
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
},
|
| 19 |
+
plugins: [react()],
|
| 20 |
+
define: {
|
| 21 |
+
'import.meta.env.VITE_API_URL': JSON.stringify(env.VITE_API_URL || 'http://localhost:8080')
|
| 22 |
+
},
|
| 23 |
+
resolve: {
|
| 24 |
+
alias: {
|
| 25 |
+
'@': path.resolve(__dirname, '.'),
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
};
|
| 29 |
+
});
|
GEMINI_UPDATE.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🔄 Updated to Use Google Gemini!
|
| 2 |
+
|
| 3 |
+
## What Changed
|
| 4 |
+
|
| 5 |
+
The application now uses **Google Gemini (gemini-2.0-flash-exp)** instead of Groq for the chat interface.
|
| 6 |
+
|
| 7 |
+
## Required Setup
|
| 8 |
+
|
| 9 |
+
### 1. Set Your Google API Key
|
| 10 |
+
|
| 11 |
+
```powershell
|
| 12 |
+
# Windows PowerShell
|
| 13 |
+
$env:GOOGLE_API_KEY="your-google-api-key-here"
|
| 14 |
+
|
| 15 |
+
# Verify it's set
|
| 16 |
+
echo $env:GOOGLE_API_KEY
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
### 2. Get Your API Key
|
| 20 |
+
|
| 21 |
+
If you don't have a Google API key:
|
| 22 |
+
1. Go to [Google AI Studio](https://aistudio.google.com/app/apikey)
|
| 23 |
+
2. Create a new API key
|
| 24 |
+
3. Copy and set it as shown above
|
| 25 |
+
|
| 26 |
+
## Quick Start
|
| 27 |
+
|
| 28 |
+
```powershell
|
| 29 |
+
# Set your API key
|
| 30 |
+
$env:GOOGLE_API_KEY="your-key-here"
|
| 31 |
+
|
| 32 |
+
# Run the application
|
| 33 |
+
.\start.ps1
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
Then open: **http://localhost:8080**
|
| 37 |
+
|
| 38 |
+
## What's Using Gemini
|
| 39 |
+
|
| 40 |
+
- ✅ **Chat Interface** (`/chat` endpoint) - Uses Gemini 2.0 Flash
|
| 41 |
+
- ℹ️ **Full Workflow** (`/run` endpoint) - Uses the main agent (configurable via LLM_PROVIDER)
|
| 42 |
+
|
| 43 |
+
## Technical Details
|
| 44 |
+
|
| 45 |
+
The `/chat` endpoint now:
|
| 46 |
+
- Uses `google.generativeai` SDK
|
| 47 |
+
- Model: `gemini-2.0-flash-exp`
|
| 48 |
+
- Maintains conversation history
|
| 49 |
+
- Professional data science system instruction
|
| 50 |
+
|
| 51 |
+
## Expected Console Output
|
| 52 |
+
|
| 53 |
+
When you start the server:
|
| 54 |
+
```
|
| 55 |
+
INFO: Started server process [####]
|
| 56 |
+
INFO: Waiting for application startup.
|
| 57 |
+
✅ Agent initialized with provider: gemini
|
| 58 |
+
✅ Frontend assets mounted from C:\Users\Pulastya\Videos\DS AGENTTTT\FRRONTEEEND\dist
|
| 59 |
+
INFO: Application startup complete.
|
| 60 |
+
INFO: Uvicorn running on http://0.0.0.0:8080
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
## Files Updated
|
| 64 |
+
|
| 65 |
+
- ✅ [src/api/app.py](src/api/app.py) - `/chat` endpoint now uses Gemini
|
| 66 |
+
- ✅ [.env.example](.env.example) - Updated to GOOGLE_API_KEY
|
| 67 |
+
- ✅ [start.ps1](start.ps1) - Updated environment variable reference
|
| 68 |
+
- ✅ [start.sh](start.sh) - Updated environment variable reference
|
| 69 |
+
- ✅ [CHECKLIST.md](CHECKLIST.md) - Updated instructions
|
| 70 |
+
- ✅ [FRRONTEEEND/.env](FRRONTEEEND/.env) - Added note about Gemini
|
| 71 |
+
|
| 72 |
+
## Troubleshooting
|
| 73 |
+
|
| 74 |
+
### Error: "API key not configured"
|
| 75 |
+
**Solution**: Make sure you've set the environment variable:
|
| 76 |
+
```powershell
|
| 77 |
+
$env:GOOGLE_API_KEY="your-actual-api-key"
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
### Error: "Module google.generativeai not found"
|
| 81 |
+
**Solution**: The dependency is already in requirements.txt. Verify it's installed:
|
| 82 |
+
```bash
|
| 83 |
+
pip install google-generativeai
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
### Rate Limits
|
| 87 |
+
Gemini 2.0 Flash has generous rate limits:
|
| 88 |
+
- Free tier: 15 RPM (requests per minute)
|
| 89 |
+
- 1 million TPM (tokens per minute)
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
**Ready?** Set your `GOOGLE_API_KEY` and run `.\start.ps1` 🚀
|
MIGRATION_COMPLETE.md
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎉 Frontend Migration Complete!
|
| 2 |
+
|
| 3 |
+
## Summary
|
| 4 |
+
|
| 5 |
+
Successfully replaced the old Gradio interface with a modern React-based frontend featuring:
|
| 6 |
+
- **Professional Landing Page**: Showcases the agent's capabilities
|
| 7 |
+
- **Modern Chat Interface**: NextChat-style conversational UI
|
| 8 |
+
- **Direct Backend Integration**: Communicates with FastAPI backend
|
| 9 |
+
- **Beautiful Design**: Dark theme with animations and responsive layout
|
| 10 |
+
|
| 11 |
+
## What Was Changed
|
| 12 |
+
|
| 13 |
+
### ✅ Backend Updates ([src/api/app.py](src/api/app.py))
|
| 14 |
+
1. **Added CORS middleware** for frontend communication
|
| 15 |
+
2. **Created `/chat` endpoint** for conversational interface
|
| 16 |
+
3. **Static file serving** for built React app
|
| 17 |
+
4. **Catch-all route** to serve `index.html` for client-side routing
|
| 18 |
+
|
| 19 |
+
### ✅ Frontend Updates
|
| 20 |
+
1. **Removed Google GenAI dependency** from [package.json](FRRONTEEEND/package.json)
|
| 21 |
+
2. **Updated ChatInterface.tsx** to call backend `/chat` endpoint instead of external API
|
| 22 |
+
3. **Added environment configuration**:
|
| 23 |
+
- `.env` for local development
|
| 24 |
+
- `.env.production` for production builds
|
| 25 |
+
4. **Updated vite.config.ts** with proxy configuration
|
| 26 |
+
|
| 27 |
+
### ✅ Configuration Files
|
| 28 |
+
1. **requirements.txt**: Commented out Gradio (no longer needed)
|
| 29 |
+
2. **Dockerfile**: Added multi-stage build for React frontend
|
| 30 |
+
3. **.dockerignore**: Excluded node_modules and frontend dev files
|
| 31 |
+
4. **New Scripts**:
|
| 32 |
+
- `start.ps1` / `start.sh` - Quick start scripts
|
| 33 |
+
- `build-and-deploy.ps1` / `build-and-deploy.sh` - Build scripts
|
| 34 |
+
|
| 35 |
+
### ✅ Documentation
|
| 36 |
+
- **FRONTEND_INTEGRATION.md**: Complete integration guide
|
| 37 |
+
- **README.md**: Updated with frontend announcement
|
| 38 |
+
|
| 39 |
+
## 🚀 How to Run
|
| 40 |
+
|
| 41 |
+
### Quick Start (Recommended)
|
| 42 |
+
|
| 43 |
+
**Windows:**
|
| 44 |
+
```powershell
|
| 45 |
+
.\start.ps1
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
**Linux/Mac:**
|
| 49 |
+
```bash
|
| 50 |
+
chmod +x start.sh
|
| 51 |
+
./start.sh
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
### Manual Steps
|
| 55 |
+
|
| 56 |
+
1. **Build Frontend** (already done ✅):
|
| 57 |
+
```bash
|
| 58 |
+
cd FRRONTEEEND
|
| 59 |
+
npm.cmd install
|
| 60 |
+
npm.cmd run build
|
| 61 |
+
cd ..
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
2. **Set Environment Variables**:
|
| 65 |
+
```powershell
|
| 66 |
+
# Required
|
| 67 |
+
$env:GROQ_API_KEY="your-groq-api-key-here"
|
| 68 |
+
|
| 69 |
+
# Optional
|
| 70 |
+
$env:GOOGLE_API_KEY="your-google-api-key"
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
3. **Start Backend**:
|
| 74 |
+
```bash
|
| 75 |
+
python src\api\app.py
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
4. **Access Application**:
|
| 79 |
+
Open browser to: **http://localhost:8080**
|
| 80 |
+
|
| 81 |
+
## 🏗️ Architecture
|
| 82 |
+
|
| 83 |
+
```
|
| 84 |
+
┌─────────────────────────────────────────────────────────┐
|
| 85 |
+
│ Browser │
|
| 86 |
+
│ │
|
| 87 |
+
│ ┌──────────────────────────────────────────────────┐ │
|
| 88 |
+
│ │ React Frontend (Port 8080) │ │
|
| 89 |
+
│ │ - Landing Page (HeroGeometric, etc.) │ │
|
| 90 |
+
│ │ - Chat Interface (ChatInterface.tsx) │ │
|
| 91 |
+
│ └──────────────────────────────────────────────────┘ │
|
| 92 |
+
│ │ │
|
| 93 |
+
│ │ HTTP POST /chat │
|
| 94 |
+
└─────────────────────────┼────────────────────────────────┘
|
| 95 |
+
│
|
| 96 |
+
▼
|
| 97 |
+
┌─────────────────────────────────────────────────────────┐
|
| 98 |
+
│ FastAPI Backend (Port 8080) │
|
| 99 |
+
│ │
|
| 100 |
+
│ ┌──────────────────────────────────────────────────┐ │
|
| 101 |
+
│ │ API Endpoints │ │
|
| 102 |
+
│ │ - POST /chat → Chat with agent │ │
|
| 103 |
+
│ │ - POST /run → Full workflow │ │
|
| 104 |
+
│ │ - POST /profile → Dataset profiling │ │
|
| 105 |
+
│ │ - GET /tools → List tools │ │
|
| 106 |
+
│ │ - GET /* → Serve React app │ │
|
| 107 |
+
│ └──────────────────────────────────────────────────┘ │
|
| 108 |
+
│ │ │
|
| 109 |
+
│ ▼ │
|
| 110 |
+
│ ┌──────────────────────────────────────────────────┐ │
|
| 111 |
+
│ │ DataScienceCopilot (orchestrator.py) │ │
|
| 112 |
+
│ │ - 82+ Tools │ │
|
| 113 |
+
│ │ - Groq LLM │ │
|
| 114 |
+
│ │ - Session Memory │ │
|
| 115 |
+
│ └──────────────────────────────────────────────────┘ │
|
| 116 |
+
└─────────────────────────────────────────────────────────┘
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
## 🎯 Key Endpoints
|
| 120 |
+
|
| 121 |
+
### `/chat` - Conversational Interface
|
| 122 |
+
```typescript
|
| 123 |
+
POST /chat
|
| 124 |
+
Content-Type: application/json
|
| 125 |
+
|
| 126 |
+
{
|
| 127 |
+
"messages": [
|
| 128 |
+
{"role": "user", "content": "Profile my dataset"},
|
| 129 |
+
{"role": "assistant", "content": "..."}
|
| 130 |
+
],
|
| 131 |
+
"stream": false
|
| 132 |
+
}
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
**Response:**
|
| 136 |
+
```json
|
| 137 |
+
{
|
| 138 |
+
"success": true,
|
| 139 |
+
"message": "I can help you profile your dataset...",
|
| 140 |
+
"model": "llama-3.3-70b-versatile",
|
| 141 |
+
"provider": "groq"
|
| 142 |
+
}
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### `/run` - Complete Workflow
|
| 146 |
+
```bash
|
| 147 |
+
POST /run
|
| 148 |
+
Content-Type: multipart/form-data
|
| 149 |
+
|
| 150 |
+
file: <dataset.csv>
|
| 151 |
+
task_description: "Predict house prices"
|
| 152 |
+
target_col: "price"
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
### `/profile` - Quick Profiling
|
| 156 |
+
```bash
|
| 157 |
+
POST /profile
|
| 158 |
+
Content-Type: multipart/form-data
|
| 159 |
+
|
| 160 |
+
file: <dataset.csv>
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
## 📝 Environment Variables
|
| 164 |
+
|
| 165 |
+
### Backend (.env or system)
|
| 166 |
+
```env
|
| 167 |
+
# Required
|
| 168 |
+
GROQ_API_KEY=your-groq-api-key
|
| 169 |
+
|
| 170 |
+
# Optional
|
| 171 |
+
GOOGLE_API_KEY=your-google-api-key
|
| 172 |
+
GCP_PROJECT_ID=your-project-id
|
| 173 |
+
LLM_PROVIDER=groq # or "gemini"
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### Frontend (FRRONTEEEND/.env)
|
| 177 |
+
```env
|
| 178 |
+
# Development
|
| 179 |
+
VITE_API_URL=http://localhost:8080
|
| 180 |
+
|
| 181 |
+
# Production (FRRONTEEEND/.env.production)
|
| 182 |
+
VITE_API_URL=https://your-cloud-run-url.run.app
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
## 🐳 Docker Deployment
|
| 186 |
+
|
| 187 |
+
The Dockerfile now includes a multi-stage build:
|
| 188 |
+
|
| 189 |
+
```bash
|
| 190 |
+
# Build image
|
| 191 |
+
docker build -t data-science-agent .
|
| 192 |
+
|
| 193 |
+
# Run container
|
| 194 |
+
docker run -p 8080:8080 \
|
| 195 |
+
-e GROQ_API_KEY=your-key \
|
| 196 |
+
data-science-agent
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
## ☁️ Google Cloud Run Deployment
|
| 200 |
+
|
| 201 |
+
```bash
|
| 202 |
+
# Build and push
|
| 203 |
+
gcloud builds submit --tag gcr.io/YOUR-PROJECT-ID/data-science-agent
|
| 204 |
+
|
| 205 |
+
# Deploy
|
| 206 |
+
gcloud run deploy data-science-agent \
|
| 207 |
+
--image gcr.io/YOUR-PROJECT-ID/data-science-agent \
|
| 208 |
+
--platform managed \
|
| 209 |
+
--region us-central1 \
|
| 210 |
+
--allow-unauthenticated \
|
| 211 |
+
--set-env-vars GROQ_API_KEY=your-api-key
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
## 🔍 Testing
|
| 215 |
+
|
| 216 |
+
### Test Backend API
|
| 217 |
+
```bash
|
| 218 |
+
# Health check
|
| 219 |
+
curl http://localhost:8080/health
|
| 220 |
+
|
| 221 |
+
# List tools
|
| 222 |
+
curl http://localhost:8080/tools
|
| 223 |
+
|
| 224 |
+
# Chat
|
| 225 |
+
curl -X POST http://localhost:8080/chat \
|
| 226 |
+
-H "Content-Type: application/json" \
|
| 227 |
+
-d '{
|
| 228 |
+
"messages": [
|
| 229 |
+
{"role": "user", "content": "Hello, what can you do?"}
|
| 230 |
+
]
|
| 231 |
+
}'
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
### Test Frontend
|
| 235 |
+
1. Open browser: http://localhost:8080
|
| 236 |
+
2. Click "Launch Console"
|
| 237 |
+
3. Type a message and send
|
| 238 |
+
|
| 239 |
+
## 🎨 Frontend Development
|
| 240 |
+
|
| 241 |
+
For frontend development with hot-reloading:
|
| 242 |
+
|
| 243 |
+
**Terminal 1 - Backend:**
|
| 244 |
+
```bash
|
| 245 |
+
python src\api\app.py
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
**Terminal 2 - Frontend:**
|
| 249 |
+
```bash
|
| 250 |
+
cd FRRONTEEEND
|
| 251 |
+
npm.cmd run dev
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
Access:
|
| 255 |
+
- Frontend Dev: http://localhost:3000
|
| 256 |
+
- Backend API: http://localhost:8080
|
| 257 |
+
|
| 258 |
+
## 📦 Build Status
|
| 259 |
+
|
| 260 |
+
✅ **Frontend Built**: FRRONTEEEND/dist/ contains:
|
| 261 |
+
- index.html
|
| 262 |
+
- assets/index-[hash].js (384 KB)
|
| 263 |
+
|
| 264 |
+
✅ **Backend Ready**: src/api/app.py configured to:
|
| 265 |
+
- Serve static files from FRRONTEEEND/dist/assets
|
| 266 |
+
- Route all non-API requests to index.html
|
| 267 |
+
- Handle /chat endpoint
|
| 268 |
+
|
| 269 |
+
## 🔄 Migration Notes
|
| 270 |
+
|
| 271 |
+
### What's Deprecated
|
| 272 |
+
- ❌ `chat_ui.py` - Old Gradio interface (kept for reference)
|
| 273 |
+
- ❌ Direct Google GenAI calls from frontend
|
| 274 |
+
|
| 275 |
+
### What's New
|
| 276 |
+
- ✅ React 19 + TypeScript
|
| 277 |
+
- ✅ Vite 6 build system
|
| 278 |
+
- ✅ Tailwind CSS styling
|
| 279 |
+
- ✅ Framer Motion animations
|
| 280 |
+
- ✅ Backend-first architecture
|
| 281 |
+
|
| 282 |
+
## 🐛 Troubleshooting
|
| 283 |
+
|
| 284 |
+
### Issue: Frontend shows 404
|
| 285 |
+
**Solution**: Make sure you've built the frontend:
|
| 286 |
+
```bash
|
| 287 |
+
cd FRRONTEEEND
|
| 288 |
+
npm.cmd run build
|
| 289 |
+
```
|
| 290 |
+
|
| 291 |
+
### Issue: API errors in chat
|
| 292 |
+
**Solution**:
|
| 293 |
+
1. Check backend is running: `python src\api\app.py`
|
| 294 |
+
2. Verify GROQ_API_KEY is set
|
| 295 |
+
3. Check console for errors
|
| 296 |
+
|
| 297 |
+
### Issue: CORS errors
|
| 298 |
+
**Solution**: The backend has CORS enabled. If issues persist, check the `allow_origins` in app.py
|
| 299 |
+
|
| 300 |
+
### Issue: Module import errors
|
| 301 |
+
**Solution**: Make sure all Python dependencies are installed:
|
| 302 |
+
```bash
|
| 303 |
+
pip install -r requirements.txt
|
| 304 |
+
```
|
| 305 |
+
|
| 306 |
+
## 📚 Additional Resources
|
| 307 |
+
|
| 308 |
+
- **[FRONTEND_INTEGRATION.md](FRONTEND_INTEGRATION.md)** - Detailed integration guide
|
| 309 |
+
- **[README.md](README.md)** - Main project documentation
|
| 310 |
+
- **[DEPLOYMENT.md](DEPLOYMENT.md)** - Cloud deployment guide
|
| 311 |
+
|
| 312 |
+
## ✨ Next Steps
|
| 313 |
+
|
| 314 |
+
1. **File Upload**: Add file upload capability to ChatInterface
|
| 315 |
+
2. **Visualizations**: Display charts and plots in chat
|
| 316 |
+
3. **Session Persistence**: Store chat history in backend
|
| 317 |
+
4. **Authentication**: Add user authentication
|
| 318 |
+
5. **Streaming**: Implement streaming responses
|
| 319 |
+
6. **Dark/Light Mode**: Add theme toggle
|
| 320 |
+
|
| 321 |
+
---
|
| 322 |
+
|
| 323 |
+
**Status**: ✅ Ready to use!
|
| 324 |
+
|
| 325 |
+
**Last Updated**: December 27, 2025
|
QUICK_REFERENCE.txt
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
╔═══════════════════════════════════════════════════════════════╗
|
| 2 |
+
║ 🚀 DATA SCIENCE AGENT - QUICK REFERENCE ║
|
| 3 |
+
║ Now powered by Google Gemini! 🤖 ║
|
| 4 |
+
╚═══════════════════════════════════════════════════════════════╝
|
| 5 |
+
|
| 6 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 7 |
+
│ 1. SET API KEY (REQUIRED!) │
|
| 8 |
+
└───────────────────────────────────────────────────────────────┘
|
| 9 |
+
|
| 10 |
+
PowerShell:
|
| 11 |
+
$env:GOOGLE_API_KEY="your-google-api-key-here"
|
| 12 |
+
|
| 13 |
+
Get your key: https://aistudio.google.com/app/apikey
|
| 14 |
+
|
| 15 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 16 |
+
│ 2. START THE APPLICATION │
|
| 17 |
+
└───────────────────────────────────────────────────────────────┘
|
| 18 |
+
|
| 19 |
+
.\start.ps1
|
| 20 |
+
|
| 21 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 22 |
+
│ 3. ACCESS THE APP │
|
| 23 |
+
└───────────────────────────────────────────────────────────────┘
|
| 24 |
+
|
| 25 |
+
Open browser: http://localhost:8080
|
| 26 |
+
|
| 27 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 28 |
+
│ WHAT'S INCLUDED │
|
| 29 |
+
└───────────────────────────────────────────────────────────────┘
|
| 30 |
+
|
| 31 |
+
✅ Modern React frontend with landing page
|
| 32 |
+
✅ Professional chat interface
|
| 33 |
+
✅ Google Gemini 2.0 Flash integration
|
| 34 |
+
✅ 82+ data science tools
|
| 35 |
+
✅ Complete ML pipeline automation
|
| 36 |
+
|
| 37 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 38 |
+
│ KEY FILES │
|
| 39 |
+
└───────────────────────────────────────────────────────────────┘
|
| 40 |
+
|
| 41 |
+
📖 GEMINI_UPDATE.md - Gemini migration details
|
| 42 |
+
📖 CHECKLIST.md - Pre-launch checklist
|
| 43 |
+
📖 MIGRATION_COMPLETE.md - Full change log
|
| 44 |
+
📖 FRONTEND_INTEGRATION.md - Technical docs
|
| 45 |
+
|
| 46 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 47 |
+
│ TROUBLESHOOTING │
|
| 48 |
+
└───────────────────────────────────────────────────────────────┘
|
| 49 |
+
|
| 50 |
+
Issue: "API key not configured"
|
| 51 |
+
→ Set: $env:GOOGLE_API_KEY="your-key"
|
| 52 |
+
|
| 53 |
+
Issue: "Frontend not found"
|
| 54 |
+
→ Run: cd FRRONTEEEND && npm run build
|
| 55 |
+
|
| 56 |
+
Issue: "Module not found"
|
| 57 |
+
→ Run: pip install -r requirements.txt
|
| 58 |
+
|
| 59 |
+
┌───────────────────────────────────────────────────────────────┐
|
| 60 |
+
│ API ENDPOINTS │
|
| 61 |
+
└───────────────────────────────────────────────────────────────┘
|
| 62 |
+
|
| 63 |
+
POST /chat - Chat with Gemini agent
|
| 64 |
+
POST /run - Full ML workflow
|
| 65 |
+
POST /profile - Quick dataset profiling
|
| 66 |
+
GET /tools - List available tools
|
| 67 |
+
GET /docs - API documentation
|
| 68 |
+
|
| 69 |
+
╔═══════════════════════════════════════════════════════════════╗
|
| 70 |
+
║ Ready to start? Run: .\start.ps1 ║
|
| 71 |
+
���═══════════════════════════════════════════════════════════════╝
|
README.md
ADDED
|
@@ -0,0 +1,632 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data Science Agent 🤖
|
| 2 |
+
|
| 3 |
+
A production-grade **autonomous AI agent** for end-to-end data science workflows. Upload datasets, describe your goal in natural language, and let the AI handle profiling, cleaning, feature engineering, model training, and visualization.
|
| 4 |
+
|
| 5 |
+
**Key Differentiator**: Not just a chatbot - a true AI agent with 75+ specialized tools, intelligent orchestration, dual LLM support, session memory, code interpreter, and Cloud Run API.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
> ## 🎉 **NEW: Modern React Frontend!**
|
| 10 |
+
>
|
| 11 |
+
> The application now features a **professional React-based web interface** with a beautiful landing page and chat UI, replacing the old Gradio interface.
|
| 12 |
+
>
|
| 13 |
+
> **Quick Start:**
|
| 14 |
+
> ```powershell
|
| 15 |
+
> .\start.ps1 # Windows
|
| 16 |
+
> ```
|
| 17 |
+
> or
|
| 18 |
+
> ```bash
|
| 19 |
+
> ./start.sh # Linux/Mac
|
| 20 |
+
> ```
|
| 21 |
+
>
|
| 22 |
+
> 📖 **[See Full Frontend Integration Guide →](FRONTEND_INTEGRATION.md)**
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## 🎯 Project Vision
|
| 27 |
+
|
| 28 |
+
Build an **autonomous data science system** that achieves **50-70th percentile performance** on Kaggle competitions through intelligent automation, proving AI agents can handle real-world ML workflows end-to-end.
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
## ✨ Core Features
|
| 33 |
+
|
| 34 |
+
### **🤖 Intelligent Agent System**
|
| 35 |
+
- **82+ Specialized Tools** across 11 categories (profiling, cleaning, feature engineering, ML, visualization, BigQuery)
|
| 36 |
+
- **Dual LLM Support**: Groq (llama-3.3-70b) + Google Gemini (2.0-flash-exp)
|
| 37 |
+
- **Smart Orchestration**: LLM-powered function calling with intelligent tool chaining
|
| 38 |
+
- **Session Memory**: Contextual awareness across conversations ("cross-validate it", "try with Ridge")
|
| 39 |
+
- **Code Interpreter**: Write and execute custom Python code for tasks beyond predefined tools
|
| 40 |
+
- **Error Recovery**: Automatic retry with corrected parameters
|
| 41 |
+
- **Reasoning Modules**: Dedicated LLM reasoning layer with 19 specialized functions
|
| 42 |
+
- **Cloud Integration**: BigQuery data access + GCS artifact storage
|
| 43 |
+
|
| 44 |
+
### 🎨 **Multiple Interfaces**
|
| 45 |
+
- **Gradio Web UI** (`chat_ui.py`): Upload files, chat interface, visual plots
|
| 46 |
+
- **CLI Interface** (`src/cli.py`): Command-line workflow automation
|
| 47 |
+
- **REST API** (`src/api/app.py`): Cloud Run-ready FastAPI wrapper
|
| 48 |
+
- **Python SDK**: Direct programmatic access
|
| 49 |
+
|
| 50 |
+
### 📊 **Complete ML Pipeline**
|
| 51 |
+
1. **Data Profiling** → Statistics, types, quality issues
|
| 52 |
+
2. **Data Cleaning** → Smart imputation, outlier handling, type conversion
|
| 53 |
+
3. **Feature Engineering** → Time features, encoding, interactions, ratios
|
| 54 |
+
4. **Model Training** → XGBoost, LightGBM, CatBoost, ensemble methods
|
| 55 |
+
5. **Hyperparameter Tuning** → Optuna-based optimization
|
| 56 |
+
6. **Visualization** → Matplotlib, Plotly, interactive dashboards
|
| 57 |
+
7. **EDA Reports** → Sweetviz, ydata-profiling HTML reports
|
| 58 |
+
8. **Explainability** → SHAP values, feature importance
|
| 59 |
+
|
| 60 |
+
### ⚡ **Performance & Scale**
|
| 61 |
+
- **Token Optimization**: 34% reduction in LLM context (compressed tool schemas)
|
| 62 |
+
- **SQLite Caching**: Memoization of expensive operations with TTL
|
| 63 |
+
- **Polars & DuckDB**: 10-100x faster than pandas for large datasets
|
| 64 |
+
- **Rate Limiting**: Intelligent API call management (Groq: 12K TPM, Gemini: 10 RPM)
|
| 65 |
+
- **Cloud Ready**: FastAPI service for Google Cloud Run deployment
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## 🏗️ Architecture
|
| 70 |
+
|
| 71 |
+
### **System Design**
|
| 72 |
+
|
| 73 |
+
```
|
| 74 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 75 |
+
│ User Interfaces │
|
| 76 |
+
│ Gradio UI │ CLI │ REST API │ Python SDK │
|
| 77 |
+
└─────────────────────────┬───────────────────────────────────┘
|
| 78 |
+
▼
|
| 79 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 80 |
+
│ DataScienceCopilot Orchestrator │
|
| 81 |
+
│ • LLM Function Calling (Groq/Gemini) │
|
| 82 |
+
│ • Session Memory Management │
|
| 83 |
+
│ • Tool Execution & Chaining │
|
| 84 |
+
│ • Error Recovery & Retry Logic │
|
| 85 |
+
└─────────────────────────┬───────────────────────────────────┘
|
| 86 |
+
▼
|
| 87 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 88 |
+
│ 75+ Specialized Tools │
|
| 89 |
+
│ Data Profiling │ Cleaning │ Feature Engineering │
|
| 90 |
+
│ Model Training │ Visualization │ EDA Reports │
|
| 91 |
+
│ NLP/Text │ Computer Vision │ Time Series │ MLOps │
|
| 92 |
+
└───────���─────────────────┬───────────────────────────────────┘
|
| 93 |
+
▼
|
| 94 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 95 |
+
│ Execution & Storage Backends │
|
| 96 |
+
│ Local: Polars, sklearn, XGBoost │
|
| 97 |
+
│ Cloud: BigQuery, Vertex AI, Cloud Storage (planned) │
|
| 98 |
+
│ Cache: SQLite with TTL │
|
| 99 |
+
└─────────────────────────────────────────────────────────────┘
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
### **Tech Stack**
|
| 103 |
+
|
| 104 |
+
| Layer | Technologies |
|
| 105 |
+
|-------|-------------|
|
| 106 |
+
| **LLM** | Groq (llama-3.3-70b), Google Gemini (2.0-flash-exp) |
|
| 107 |
+
| **Data Processing** | Polars, DuckDB, Pandas, PyArrow, BigQuery |
|
| 108 |
+
| **ML/AI** | scikit-learn, XGBoost, LightGBM, CatBoost, Optuna |
|
| 109 |
+
| **Visualization** | Matplotlib, Seaborn, Plotly |
|
| 110 |
+
| **EDA Reports** | Sweetviz, ydata-profiling |
|
| 111 |
+
| **Explainability** | SHAP, LIME |
|
| 112 |
+
| **APIs** | FastAPI, Uvicorn |
|
| 113 |
+
| **UI** | Gradio, Typer + Rich (CLI) |
|
| 114 |
+
| **Storage** | SQLite (cache), CSV, Parquet, Google Cloud Storage |
|
| 115 |
+
| **Cloud** | Google Cloud Run, BigQuery, GCS, Vertex AI (planned) |
|
| 116 |
+
|
| 117 |
+
---
|
| 118 |
+
|
| 119 |
+
## 🚀 Quick Start
|
| 120 |
+
|
| 121 |
+
### **Prerequisites**
|
| 122 |
+
- Python 3.9+
|
| 123 |
+
- API Keys: [Groq](https://console.groq.com) or [Google AI Studio](https://makersuite.google.com/app/apikey)
|
| 124 |
+
|
| 125 |
+
### **Installation**
|
| 126 |
+
|
| 127 |
+
```bash
|
| 128 |
+
# Clone repository
|
| 129 |
+
git clone https://github.com/Surfing-Ninja/Data-Science-Agent.git
|
| 130 |
+
cd Data-Science-Agent
|
| 131 |
+
|
| 132 |
+
# Create virtual environment
|
| 133 |
+
python -m venv .venv
|
| 134 |
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
| 135 |
+
|
| 136 |
+
# Install dependencies
|
| 137 |
+
pip install -r requirements.txt
|
| 138 |
+
|
| 139 |
+
# Set up environment variables
|
| 140 |
+
cp .env.example .env
|
| 141 |
+
# Edit .env and add your API keys:
|
| 142 |
+
# GROQ_API_KEY=your_groq_key
|
| 143 |
+
# GOOGLE_API_KEY=your_google_key (optional)
|
| 144 |
+
# LLM_PROVIDER=groq # or "gemini"
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
### **Usage Examples**
|
| 148 |
+
|
| 149 |
+
#### **1. Gradio Web UI** (Recommended for beginners)
|
| 150 |
+
```bash
|
| 151 |
+
python chat_ui.py
|
| 152 |
+
# Opens at http://localhost:7860
|
| 153 |
+
# Upload CSV → Ask: "Analyze this data and predict house prices"
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
#### **2. CLI Interface**
|
| 157 |
+
```bash
|
| 158 |
+
# Complete workflow
|
| 159 |
+
python src/cli.py analyze data.csv --target price --task "Predict house prices"
|
| 160 |
+
|
| 161 |
+
# Quick profiling
|
| 162 |
+
python src/cli.py profile data.csv
|
| 163 |
+
|
| 164 |
+
# Train models only
|
| 165 |
+
python src/cli.py train cleaned.csv Survived --task-type classification
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
#### **3. Python SDK**
|
| 169 |
+
```python
|
| 170 |
+
from src.orchestrator import DataScienceCopilot
|
| 171 |
+
|
| 172 |
+
# Initialize agent
|
| 173 |
+
agent = DataScienceCopilot(
|
| 174 |
+
provider="groq", # or "gemini"
|
| 175 |
+
reasoning_effort="medium"
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# Run workflow
|
| 179 |
+
result = agent.analyze(
|
| 180 |
+
file_path="titanic.csv",
|
| 181 |
+
task_description="Build a model to predict passenger survival",
|
| 182 |
+
target_col="Survived"
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
print(f"Status: {result['status']}")
|
| 186 |
+
print(f"Best Model: {result['best_model']}")
|
| 187 |
+
print(f"Accuracy: {result['best_score']}")
|
| 188 |
+
```
|
| 189 |
+
|
| 190 |
+
#### **4. REST API** (Cloud Run Ready)
|
| 191 |
+
```bash
|
| 192 |
+
# Start local server
|
| 193 |
+
cd src/api
|
| 194 |
+
python app.py
|
| 195 |
+
# Server runs at http://localhost:8080
|
| 196 |
+
|
| 197 |
+
# Make API call
|
| 198 |
+
curl -X POST http://localhost:8080/run \
|
| 199 |
+
-F "file=@data.csv" \
|
| 200 |
+
-F "task_description=Analyze and predict churn" \
|
| 201 |
+
-F "target_col=churn"
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
---
|
| 205 |
+
|
| 206 |
+
## 📁 Project Structure
|
| 207 |
+
|
| 208 |
+
```
|
| 209 |
+
Data-Science-Agent/
|
| 210 |
+
├── src/
|
| 211 |
+
│ ├── orchestrator.py # Main agent brain (1,136 lines)
|
| 212 |
+
│ ├── cli.py # CLI interface (346 lines)
|
| 213 |
+
│ ├── api/
|
| 214 |
+
│ │ └── app.py # FastAPI Cloud Run wrapper (331 lines)
|
| 215 |
+
│ ├── bigquery/ # BigQuery integration 🆕
|
| 216 |
+
│ │ ├── __init__.py # BigQuery tools (4 functions)
|
| 217 |
+
│ │ └── client.py # BigQuery client wrapper
|
| 218 |
+
│ ├── storage/ # Artifact storage 🆕
|
| 219 |
+
│ │ ├── artifact_store.py # Local + GCS backends (613 lines)
|
| 220 |
+
│ │ └── helpers.py # Storage helper functions (125 lines)
|
| 221 |
+
│ ├── reasoning/ # LLM reasoning layer 🆕
|
| 222 |
+
│ │ ├── __init__.py # Core reasoning engine (350 lines)
|
| 223 |
+
│ │ ├── data_understanding.py # Data insights (6 functions)
|
| 224 |
+
│ │ ├── model_explanation.py # Model interpretation (6 functions)
|
| 225 |
+
│ │ └── business_summary.py # Business translations (7 functions)
|
| 226 |
+
│ ├── cache/
|
| 227 |
+
│ │ └── cache_manager.py # SQLite caching with TTL
|
| 228 |
+
│ ├── tools/ # 82+ specialized tools
|
| 229 |
+
│ │ ├── data_profiling.py # Dataset analysis
|
| 230 |
+
│ │ ├── data_cleaning.py # Cleaning & preprocessing
|
| 231 |
+
│ │ ├── feature_engineering.py # Feature creation
|
| 232 |
+
│ │ ├── model_training.py # ML training
|
| 233 |
+
│ │ ├── visualization_engine.py # Matplotlib/Seaborn plots
|
| 234 |
+
│ │ ├── plotly_visualizations.py # Interactive charts
|
| 235 |
+
│ │ ├── eda_reports.py # Sweetviz, ydata-profiling
|
| 236 |
+
│ │ ├── advanced_*.py # Advanced features
|
| 237 |
+
│ │ └── tools_registry.py # All 82 tool definitions (1,600+ lines)
|
| 238 |
+
│ └── utils/ # Helper utilities
|
| 239 |
+
│ ├── polars_helpers.py # Data manipulation
|
| 240 |
+
│ └── validation.py # Input validation
|
| 241 |
+
├── chat_ui.py # Gradio web interface (912 lines)
|
| 242 |
+
├── examples/
|
| 243 |
+
│ └── titanic_example.py # Complete workflow demo
|
| 244 |
+
├── outputs/
|
| 245 |
+
│ ├── data/ # Processed datasets
|
| 246 |
+
│ ├── models/ # Trained models (.pkl)
|
| 247 |
+
│ ├── plots/ # Visualizations (.png, .html)
|
| 248 |
+
│ └── reports/ # EDA reports (.html)
|
| 249 |
+
├── cache_db/ # SQLite cache storage
|
| 250 |
+
├── requirements.txt # Python dependencies
|
| 251 |
+
├── .env.example # Environment template
|
| 252 |
+
└── README.md # This file
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
---
|
| 256 |
+
|
| 257 |
+
## 🛠️ Tool Categories (82 Tools Total)
|
| 258 |
+
|
| 259 |
+
### **📊 Data Profiling & Analysis (7 tools)**
|
| 260 |
+
- `profile_dataset`, `detect_data_quality_issues`, `analyze_correlations`, `get_smart_summary`, `compare_datasets`, `calculate_statistics`, `detect_skewness`
|
| 261 |
+
|
| 262 |
+
### **☁️ BigQuery Integration (4 tools)** 🆕
|
| 263 |
+
- `bigquery_profile_table`, `bigquery_load_table`, `bigquery_execute_query`, `bigquery_write_results`
|
| 264 |
+
|
| 265 |
+
### **🧹 Data Cleaning (8 tools)**
|
| 266 |
+
- `clean_missing_values`, `handle_outliers`, `remove_duplicates`, `filter_rows`, `rename_columns`, `drop_columns`, `sort_data`, `fix_data_types`
|
| 267 |
+
|
| 268 |
+
### **🔧 Feature Engineering (13 tools)**
|
| 269 |
+
- `encode_categorical`, `force_numeric_conversion`, `smart_type_inference`, `create_time_features`, `create_interaction_features`, `create_aggregation_features`, `create_ratio_features`, `create_statistical_features`, `create_log_features`, `create_binned_features`, `engineer_text_features`, `auto_feature_engineering`, `auto_feature_selection`
|
| 270 |
+
|
| 271 |
+
### **🤖 Model Training & Tuning (6 tools)**
|
| 272 |
+
- `train_baseline_models`, `hyperparameter_tuning`, `train_ensemble_models`, `perform_cross_validation`, `generate_model_report`, `auto_ml_pipeline`
|
| 273 |
+
|
| 274 |
+
### **📈 Visualization (11 tools)**
|
| 275 |
+
- `generate_all_plots`, `generate_data_quality_plots`, `generate_eda_plots`, `generate_model_performance_plots`, `generate_feature_importance_plot`, `generate_interactive_scatter`, `generate_interactive_histogram`, `generate_interactive_correlation_heatmap`, `generate_interactive_box_plots`, `generate_interactive_time_series`, `generate_plotly_dashboard`
|
| 276 |
+
|
| 277 |
+
### **📊 EDA Reports (3 tools)**
|
| 278 |
+
- `generate_sweetviz_report`, `generate_ydata_profiling_report`, `generate_combined_eda_report`
|
| 279 |
+
|
| 280 |
+
### **🔬 Advanced Analysis (11 tools)**
|
| 281 |
+
- `perform_eda_analysis`, `detect_model_issues`, `detect_anomalies`, `detect_and_handle_multicollinearity`, `perform_statistical_tests`, `analyze_root_cause`, `detect_trends_and_seasonality`, `detect_anomalies_advanced`, `perform_hypothesis_testing`, `analyze_distribution`, `perform_segment_analysis`
|
| 282 |
+
|
| 283 |
+
### **📝 Data Wrangling (3 tools)**
|
| 284 |
+
- `merge_datasets`, `concat_datasets`, `reshape_dataset`
|
| 285 |
+
|
| 286 |
+
### **🚀 MLOps & Production (5 tools)**
|
| 287 |
+
- `monitor_model_drift`, `explain_predictions`, `generate_model_card`, `perform_ab_test_analysis`, `detect_feature_leakage`
|
| 288 |
+
|
| 289 |
+
### **⏰ Time Series (3 tools)**
|
| 290 |
+
- `forecast_time_series`, `detect_seasonality_trends`, `create_time_series_features`
|
| 291 |
+
|
| 292 |
+
### **💼 Business Intelligence (4 tools)**
|
| 293 |
+
- `perform_cohort_analysis`, `perform_rfm_analysis`, `detect_causal_relationships`, `generate_business_insights`
|
| 294 |
+
|
| 295 |
+
### **📚 NLP/Text (4 tools)**
|
| 296 |
+
- `perform_topic_modeling`, `perform_named_entity_recognition`, `analyze_sentiment_advanced`, `perform_text_similarity`
|
| 297 |
+
|
| 298 |
+
### **🖼️ Computer Vision (3 tools)**
|
| 299 |
+
- `extract_image_features`, `perform_image_clustering`, `analyze_tabular_image_hybrid`
|
| 300 |
+
|
| 301 |
+
---
|
| 302 |
+
|
| 303 |
+
## 🎯 Advanced Features
|
| 304 |
+
|
| 305 |
+
### **1. Session Memory**
|
| 306 |
+
The agent remembers context across conversations:
|
| 307 |
+
|
| 308 |
+
```python
|
| 309 |
+
# Conversation 1
|
| 310 |
+
"Train a model on earthquake.csv to predict magnitude"
|
| 311 |
+
→ Agent trains XGBoost, achieves 0.92 R²
|
| 312 |
+
|
| 313 |
+
# Conversation 2 (Same session)
|
| 314 |
+
"Cross-validate it"
|
| 315 |
+
→ Agent knows: model=XGBoost, dataset=earthquake.csv, target=magnitude
|
| 316 |
+
→ Runs 5-fold CV automatically
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
### **2. Code Interpreter**
|
| 320 |
+
Execute custom Python code for tasks beyond predefined tools:
|
| 321 |
+
|
| 322 |
+
```python
|
| 323 |
+
User: "Make a Plotly scatter with custom dropdown filters"
|
| 324 |
+
|
| 325 |
+
Agent: execute_python_code(code='''
|
| 326 |
+
import plotly.graph_objects as go
|
| 327 |
+
df = pd.read_csv('./temp/data.csv')
|
| 328 |
+
# Custom visualization code...
|
| 329 |
+
fig.write_html('./outputs/code/custom_plot.html')
|
| 330 |
+
''')
|
| 331 |
+
```
|
| 332 |
+
|
| 333 |
+
### **3. Token Optimization**
|
| 334 |
+
System stays under LLM token limits even with 75 tools:
|
| 335 |
+
|
| 336 |
+
| Component | Before | After | Savings |
|
| 337 |
+
|-----------|--------|-------|---------|
|
| 338 |
+
| Tool Schemas | 8,193 tokens | 5,463 tokens | 34% |
|
| 339 |
+
| Tool Results | 5,000+ tokens | 50-200 tokens | 90%+ |
|
| 340 |
+
|
| 341 |
+
### **4. Error Recovery**
|
| 342 |
+
Agent learns from errors and auto-corrects:
|
| 343 |
+
|
| 344 |
+
```python
|
| 345 |
+
# Attempt 1
|
| 346 |
+
train_baseline_models(target_col="magnitude")
|
| 347 |
+
→ Error: Column 'magnitude' not found. Hint: Did you mean 'mag'?
|
| 348 |
+
|
| 349 |
+
# Attempt 2 (Automatic)
|
| 350 |
+
train_baseline_models(target_col="mag")
|
| 351 |
+
→ Success! Trained 4 models, best: XGBoost (0.92 R²)
|
| 352 |
+
```
|
| 353 |
+
|
| 354 |
+
---
|
| 355 |
+
|
| 356 |
+
## ☁️ Cloud Features
|
| 357 |
+
|
| 358 |
+
### **1. BigQuery Integration** 🆕
|
| 359 |
+
Direct access to BigQuery tables without local downloads:
|
| 360 |
+
|
| 361 |
+
```python
|
| 362 |
+
# Profile a BigQuery table
|
| 363 |
+
agent.chat("Profile the table project.dataset.sales")
|
| 364 |
+
|
| 365 |
+
# Query and analyze
|
| 366 |
+
agent.chat("Query top 10 customers by revenue from BigQuery")
|
| 367 |
+
|
| 368 |
+
# Write results back
|
| 369 |
+
agent.chat("Write the cleaned data to BigQuery table project.dataset.sales_clean")
|
| 370 |
+
```
|
| 371 |
+
|
| 372 |
+
**Available Tools:**
|
| 373 |
+
- `bigquery_profile_table`: Get statistics for any BigQuery table
|
| 374 |
+
- `bigquery_load_table`: Load BigQuery data into local Polars DataFrame
|
| 375 |
+
- `bigquery_execute_query`: Run SQL queries directly on BigQuery
|
| 376 |
+
- `bigquery_write_results`: Write processed data back to BigQuery
|
| 377 |
+
|
| 378 |
+
**Setup:**
|
| 379 |
+
```bash
|
| 380 |
+
# Install BigQuery dependencies
|
| 381 |
+
pip install google-cloud-bigquery db-dtypes
|
| 382 |
+
|
| 383 |
+
# Set environment variable
|
| 384 |
+
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account-key.json"
|
| 385 |
+
```
|
| 386 |
+
|
| 387 |
+
**Looker-Compatible Schemas:**
|
| 388 |
+
|
| 389 |
+
The project defines stable BigQuery table schemas for BI tools (see [`BIGQUERY_SCHEMAS.md`](BIGQUERY_SCHEMAS.md)):
|
| 390 |
+
- 📊 `model_metrics` - Model performance tracking over time
|
| 391 |
+
- 🎯 `feature_importance` - Feature impact analysis
|
| 392 |
+
- 🔮 `predictions` - Prediction monitoring with actuals
|
| 393 |
+
- 📋 `data_profile_summary` - Data quality metrics
|
| 394 |
+
|
| 395 |
+
**Design Principles:**
|
| 396 |
+
- Stable schemas (no breaking changes without versioning)
|
| 397 |
+
- Consistent snake_case naming
|
| 398 |
+
- Clear dimension/metric separation
|
| 399 |
+
- Dashboard-ready with sample Looker views
|
| 400 |
+
|
| 401 |
+
### **2. Artifact Storage** 🆕
|
| 402 |
+
Unified storage abstraction - switch between local and GCS with zero code changes:
|
| 403 |
+
|
| 404 |
+
```python
|
| 405 |
+
# Local storage (default)
|
| 406 |
+
agent.save_model(model, "my_model.pkl")
|
| 407 |
+
# → Saves to outputs/models/my_model.pkl
|
| 408 |
+
|
| 409 |
+
# GCS storage (automatic when GCS credentials present)
|
| 410 |
+
agent.save_model(model, "my_model.pkl")
|
| 411 |
+
# → Saves to gs://your-bucket/models/my_model_v1.pkl with versioning
|
| 412 |
+
```
|
| 413 |
+
|
| 414 |
+
**Features:**
|
| 415 |
+
- **Automatic Backend Selection**: Uses GCS if credentials available, falls back to local
|
| 416 |
+
- **Versioning**: Automatic version suffixes for GCS artifacts
|
| 417 |
+
- **Metadata**: Stores creation time, size, checksums
|
| 418 |
+
- **Unified API**: Same code works for local and cloud storage
|
| 419 |
+
|
| 420 |
+
**Setup:**
|
| 421 |
+
```bash
|
| 422 |
+
# Install GCS dependencies
|
| 423 |
+
pip install google-cloud-storage
|
| 424 |
+
|
| 425 |
+
# Set bucket (optional, defaults to local)
|
| 426 |
+
export GCS_BUCKET="your-gcs-bucket-name"
|
| 427 |
+
```
|
| 428 |
+
|
| 429 |
+
### **3. Reasoning Modules** 🆕
|
| 430 |
+
Dedicated LLM reasoning layer with clear boundaries (no raw data access, no training decisions):
|
| 431 |
+
|
| 432 |
+
```python
|
| 433 |
+
from reasoning.data_understanding import explain_dataset
|
| 434 |
+
from reasoning.model_explanation import explain_model_performance
|
| 435 |
+
from reasoning.business_summary import create_executive_summary
|
| 436 |
+
|
| 437 |
+
# Data insights
|
| 438 |
+
insights = explain_dataset(summary={
|
| 439 |
+
"rows": 10000,
|
| 440 |
+
"columns": 20,
|
| 441 |
+
"missing_values": {"age": {"count": 150, "percentage": 1.5}}
|
| 442 |
+
})
|
| 443 |
+
|
| 444 |
+
# Model explanations
|
| 445 |
+
explanation = explain_model_performance(metrics={
|
| 446 |
+
"accuracy": 0.95,
|
| 447 |
+
"precision": 0.92,
|
| 448 |
+
"recall": 0.88
|
| 449 |
+
}, task_type="classification")
|
| 450 |
+
|
| 451 |
+
# Business summaries
|
| 452 |
+
summary = create_executive_summary(
|
| 453 |
+
project_results={"model_accuracy": 0.95},
|
| 454 |
+
project_name="churn_prediction",
|
| 455 |
+
business_objective="Reduce customer churn"
|
| 456 |
+
)
|
| 457 |
+
```
|
| 458 |
+
|
| 459 |
+
**19 Reasoning Functions:**
|
| 460 |
+
- **Data Understanding**: explain_dataset, suggest_transformations, identify_feature_engineering_opportunities, explain_missing_values, compare_datasets (6 functions)
|
| 461 |
+
- **Model Explanation**: explain_model_performance, interpret_feature_importance, diagnose_model_failure, explain_prediction, compare_models, explain_overfitting (6 functions)
|
| 462 |
+
- **Business Summary**: create_executive_summary, estimate_business_impact, create_stakeholder_report, translate_technical_to_business, prioritize_next_steps, explain_to_customer, assess_deployment_readiness (7 functions)
|
| 463 |
+
|
| 464 |
+
**Design Principles:**
|
| 465 |
+
- ✅ **NO Raw Data Access**: Only summaries/statistics allowed
|
| 466 |
+
- ✅ **NO Training Decisions**: Only explanations, never execution
|
| 467 |
+
- ✅ **Structured Output**: JSON schemas for cacheability
|
| 468 |
+
- ✅ **Dual Backend**: Works with both Gemini and Groq
|
| 469 |
+
|
| 470 |
+
---
|
| 471 |
+
|
| 472 |
+
## 🔧 Configuration
|
| 473 |
+
|
| 474 |
+
### **Environment Variables** (`.env`)
|
| 475 |
+
|
| 476 |
+
```bash
|
| 477 |
+
# LLM Provider
|
| 478 |
+
LLM_PROVIDER=groq # "groq" or "gemini"
|
| 479 |
+
GROQ_API_KEY=your_groq_key
|
| 480 |
+
GOOGLE_API_KEY=your_google_key # Optional
|
| 481 |
+
|
| 482 |
+
# Model Selection
|
| 483 |
+
GROQ_MODEL=llama-3.3-70b-versatile
|
| 484 |
+
GEMINI_MODEL=gemini-2.0-flash-exp
|
| 485 |
+
REASONING_EFFORT=medium # low, medium, high
|
| 486 |
+
|
| 487 |
+
# Cache Settings
|
| 488 |
+
CACHE_DB_PATH=./cache_db/cache.db
|
| 489 |
+
CACHE_TTL_SECONDS=86400 # 24 hours
|
| 490 |
+
|
| 491 |
+
# Cloud Features (Optional)
|
| 492 |
+
GCS_BUCKET=your-gcs-bucket-name # For artifact storage
|
| 493 |
+
GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-key.json # For BigQuery + GCS
|
| 494 |
+
|
| 495 |
+
# Cloud Run (for API deployment)
|
| 496 |
+
PORT=8080
|
| 497 |
+
```
|
| 498 |
+
|
| 499 |
+
### **Provider Comparison**
|
| 500 |
+
|
| 501 |
+
| Feature | Groq | Gemini |
|
| 502 |
+
|---------|------|--------|
|
| 503 |
+
| **Model** | llama-3.3-70b-versatile | gemini-2.0-flash-exp |
|
| 504 |
+
| **Speed** | ⚡ Extremely fast (LPU) | 🚀 Very fast |
|
| 505 |
+
| **Free Tier** | 100K tokens/day | 1,500 requests/day |
|
| 506 |
+
| **Rate Limit** | 12K tokens/min | 10 requests/min |
|
| 507 |
+
| **Best For** | High-volume, low-latency | Free tier, high quota |
|
| 508 |
+
|
| 509 |
+
---
|
| 510 |
+
|
| 511 |
+
## 🚀 Cloud Deployment (Google Cloud Run)
|
| 512 |
+
|
| 513 |
+
### **Deploy REST API**
|
| 514 |
+
|
| 515 |
+
```bash
|
| 516 |
+
# 1. Build Docker image (Dockerfile provided)
|
| 517 |
+
docker build -t data-science-agent .
|
| 518 |
+
|
| 519 |
+
# 2. Push to Google Container Registry
|
| 520 |
+
gcloud builds submit --tag gcr.io/PROJECT_ID/data-science-agent
|
| 521 |
+
|
| 522 |
+
# 3. Deploy to Cloud Run
|
| 523 |
+
gcloud run deploy data-science-agent \
|
| 524 |
+
--image gcr.io/PROJECT_ID/data-science-agent \
|
| 525 |
+
--platform managed \
|
| 526 |
+
--region us-central1 \
|
| 527 |
+
--allow-unauthenticated \
|
| 528 |
+
--memory 4Gi \
|
| 529 |
+
--timeout 3600 \
|
| 530 |
+
--set-env-vars GROQ_API_KEY=your_key,LLM_PROVIDER=groq
|
| 531 |
+
|
| 532 |
+
# 4. Test deployment
|
| 533 |
+
curl -X POST https://your-service-url/run \
|
| 534 |
+
-F "file=@data.csv" \
|
| 535 |
+
-F "task_description=Predict churn"
|
| 536 |
+
```
|
| 537 |
+
|
| 538 |
+
### **API Endpoints**
|
| 539 |
+
|
| 540 |
+
- `GET /` - Health check
|
| 541 |
+
- `GET /health` - Readiness probe
|
| 542 |
+
- `POST /run` - Full analysis workflow
|
| 543 |
+
- `POST /profile` - Quick dataset profiling
|
| 544 |
+
- `GET /tools` - List all available tools
|
| 545 |
+
|
| 546 |
+
---
|
| 547 |
+
|
| 548 |
+
## 🗺️ Roadmap
|
| 549 |
+
|
| 550 |
+
### **Phase 1: Core Agent** ✅ COMPLETE
|
| 551 |
+
- [x] 75 specialized tools
|
| 552 |
+
- [x] Dual LLM support (Groq + Gemini)
|
| 553 |
+
- [x] CLI + Gradio UI
|
| 554 |
+
- [x] SQLite caching
|
| 555 |
+
- [x] Token optimization
|
| 556 |
+
|
| 557 |
+
### **Phase 2: Intelligence** ✅ COMPLETE
|
| 558 |
+
- [x] Session memory
|
| 559 |
+
- [x] Code interpreter
|
| 560 |
+
- [x] Error recovery
|
| 561 |
+
- [x] EDA reports (Sweetviz, ydata-profiling)
|
| 562 |
+
- [x] Interactive Plotly visualizations
|
| 563 |
+
|
| 564 |
+
### **Phase 3: Cloud Native** ✅ COMPLETE
|
| 565 |
+
- [x] FastAPI Cloud Run wrapper with 4 REST endpoints
|
| 566 |
+
- [x] BigQuery integration (4 tools: profile, load, query, write)
|
| 567 |
+
- [x] Artifact Storage abstraction (Local ↔ GCS switching)
|
| 568 |
+
- [x] Reasoning modules for LLM explanations (19 functions)
|
| 569 |
+
- [x] Looker-compatible BigQuery schemas (4 stable tables)
|
| 570 |
+
- [ ] Vertex AI model training (planned)
|
| 571 |
+
- [ ] Cloud Logging & Monitoring (planned)
|
| 572 |
+
|
| 573 |
+
### **Phase 4: Enterprise** 📋 PLANNED
|
| 574 |
+
- [ ] Multi-user authentication
|
| 575 |
+
- [ ] Team workspaces
|
| 576 |
+
- [ ] Model registry
|
| 577 |
+
- [ ] Automated retraining pipelines
|
| 578 |
+
|
| 579 |
+
### **Phase 5: Kaggle Integration** 🎯 FUTURE
|
| 580 |
+
- [ ] Direct Kaggle API integration
|
| 581 |
+
- [ ] Automated competition workflow
|
| 582 |
+
- [ ] Ensemble strategies
|
| 583 |
+
- [ ] Submission automation
|
| 584 |
+
|
| 585 |
+
---
|
| 586 |
+
|
| 587 |
+
## 🤝 Contributing
|
| 588 |
+
|
| 589 |
+
Contributions welcome! Areas for improvement:
|
| 590 |
+
|
| 591 |
+
1. **New Tools**: Time series forecasting, NLP preprocessing, image augmentation
|
| 592 |
+
2. **Cloud Backends**: AWS, Azure support
|
| 593 |
+
3. **Performance**: Optimize tool execution, reduce latency
|
| 594 |
+
4. **UI/UX**: Better visualization, workflow builder
|
| 595 |
+
5. **Documentation**: Tutorials, video guides, blog posts
|
| 596 |
+
|
| 597 |
+
---
|
| 598 |
+
|
| 599 |
+
## 📜 License
|
| 600 |
+
|
| 601 |
+
MIT License - See LICENSE file for details
|
| 602 |
+
|
| 603 |
+
---
|
| 604 |
+
|
| 605 |
+
## 📧 Support & Community
|
| 606 |
+
|
| 607 |
+
- **Issues**: [GitHub Issues](https://github.com/Surfing-Ninja/Data-Science-Agent/issues)
|
| 608 |
+
- **Discussions**: [GitHub Discussions](https://github.com/Surfing-Ninja/Data-Science-Agent/discussions)
|
| 609 |
+
|
| 610 |
+
---
|
| 611 |
+
|
| 612 |
+
## 📊 Project Stats
|
| 613 |
+
|
| 614 |
+
- **Lines of Code**: ~18,000+
|
| 615 |
+
- **Tools**: 82 specialized functions (75 core + 4 BigQuery + 3 storage helpers)
|
| 616 |
+
- **Reasoning Functions**: 19 LLM-powered explanation modules
|
| 617 |
+
- **Supported Models**: 10+ (LR, Ridge, Lasso, RF, XGBoost, LightGBM, CatBoost, etc.)
|
| 618 |
+
- **Visualization Types**: 20+ (static + interactive)
|
| 619 |
+
- **Data Formats**: CSV, Parquet, JSON, BigQuery tables
|
| 620 |
+
- **Cloud Platforms**: Google Cloud (Run, BigQuery, GCS) - AWS/Azure planned
|
| 621 |
+
|
| 622 |
+
---
|
| 623 |
+
|
| 624 |
+
<div align="center">
|
| 625 |
+
|
| 626 |
+
**Built with ❤️ for the Data Science Community**
|
| 627 |
+
|
| 628 |
+
*"Making data science accessible through AI automation"*
|
| 629 |
+
|
| 630 |
+
⭐ Star this repo if you find it useful! ⭐
|
| 631 |
+
|
| 632 |
+
</div>
|
build-and-deploy.ps1
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Build and Deploy Script for Data Science Agent (Windows)
|
| 2 |
+
|
| 3 |
+
Write-Host "🚀 Building and Deploying Data Science Agent..." -ForegroundColor Cyan
|
| 4 |
+
|
| 5 |
+
# Step 1: Build React Frontend
|
| 6 |
+
Write-Host ""
|
| 7 |
+
Write-Host "📦 Building React frontend..." -ForegroundColor Yellow
|
| 8 |
+
Set-Location FRRONTEEEND
|
| 9 |
+
npm.cmd install
|
| 10 |
+
if ($LASTEXITCODE -ne 0) {
|
| 11 |
+
Write-Host "❌ Frontend npm install failed!" -ForegroundColor Red
|
| 12 |
+
exit 1
|
| 13 |
+
}
|
| 14 |
+
npm.cmd run build
|
| 15 |
+
if ($LASTEXITCODE -ne 0) {
|
| 16 |
+
Write-Host "❌ Frontend build failed!" -ForegroundColor Red
|
| 17 |
+
exit 1
|
| 18 |
+
}
|
| 19 |
+
Set-Location ..
|
| 20 |
+
|
| 21 |
+
Write-Host ""
|
| 22 |
+
Write-Host "✅ Frontend built successfully!" -ForegroundColor Green
|
| 23 |
+
Write-Host " Built files are in: FRRONTEEEND\dist" -ForegroundColor Gray
|
| 24 |
+
|
| 25 |
+
# Step 2: Install Python dependencies
|
| 26 |
+
Write-Host ""
|
| 27 |
+
Write-Host "📦 Installing Python dependencies..." -ForegroundColor Yellow
|
| 28 |
+
pip install -r requirements.txt
|
| 29 |
+
if ($LASTEXITCODE -ne 0) {
|
| 30 |
+
Write-Host "⚠️ Some Python dependencies may have failed to install" -ForegroundColor Yellow
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
Write-Host ""
|
| 34 |
+
Write-Host "✅ Build complete!" -ForegroundColor Green
|
| 35 |
+
Write-Host ""
|
| 36 |
+
Write-Host "To run the application:" -ForegroundColor Cyan
|
| 37 |
+
Write-Host " python src\api\app.py" -ForegroundColor White
|
| 38 |
+
Write-Host ""
|
| 39 |
+
Write-Host "Access the app at: http://localhost:8080" -ForegroundColor Green
|
build-and-deploy.sh
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Build and Deploy Script for Data Science Agent
|
| 3 |
+
|
| 4 |
+
set -e # Exit on error
|
| 5 |
+
|
| 6 |
+
echo "🚀 Building and Deploying Data Science Agent..."
|
| 7 |
+
|
| 8 |
+
# Step 1: Build React Frontend
|
| 9 |
+
echo ""
|
| 10 |
+
echo "📦 Building React frontend..."
|
| 11 |
+
cd FRRONTEEEND
|
| 12 |
+
npm.cmd install
|
| 13 |
+
npm.cmd run build
|
| 14 |
+
cd ..
|
| 15 |
+
|
| 16 |
+
# Step 2: Copy built frontend to deployment location (if needed)
|
| 17 |
+
echo ""
|
| 18 |
+
echo "✅ Frontend built successfully!"
|
| 19 |
+
echo " Built files are in: FRRONTEEEND/dist"
|
| 20 |
+
|
| 21 |
+
# Step 3: Install Python dependencies
|
| 22 |
+
echo ""
|
| 23 |
+
echo "📦 Installing Python dependencies..."
|
| 24 |
+
pip install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
echo ""
|
| 27 |
+
echo "✅ Build complete!"
|
| 28 |
+
echo ""
|
| 29 |
+
echo "To run the application:"
|
| 30 |
+
echo " 1. Backend: python -m uvicorn src.api.app:app --host 0.0.0.0 --port 8080"
|
| 31 |
+
echo " 2. Or use: python src/api/app.py"
|
| 32 |
+
echo ""
|
| 33 |
+
echo "Access the app at: http://localhost:8080"
|
cache_db/.gitkeep
ADDED
|
File without changes
|
chat_ui.py
ADDED
|
@@ -0,0 +1,1073 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Agent Data Scientist - Interactive Chat UI
|
| 3 |
+
==============================================
|
| 4 |
+
|
| 5 |
+
A simple web interface to interact with your AI Agent.
|
| 6 |
+
Upload datasets, ask questions, and get AI-powered insights!
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import sys
|
| 11 |
+
import os
|
| 12 |
+
import shutil
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import traceback
|
| 15 |
+
|
| 16 |
+
# Add src to path
|
| 17 |
+
sys.path.append('src')
|
| 18 |
+
|
| 19 |
+
from tools.data_profiling import profile_dataset, detect_data_quality_issues
|
| 20 |
+
from tools.model_training import train_baseline_models
|
| 21 |
+
|
| 22 |
+
# Try to import AI agent (optional)
|
| 23 |
+
try:
|
| 24 |
+
from orchestrator import DataScienceCopilot
|
| 25 |
+
agent = DataScienceCopilot()
|
| 26 |
+
AI_ENABLED = True
|
| 27 |
+
print("✅ AI Agent loaded successfully!")
|
| 28 |
+
print(f"📊 Model: {agent.model}")
|
| 29 |
+
print(f"🔧 Tools available: {len(agent.tool_functions)}")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"ℹ️ Running in manual mode (AI agent not available)")
|
| 32 |
+
print(f" Error: {str(e)}")
|
| 33 |
+
print("💡 You can still use all the quick actions and tools!")
|
| 34 |
+
AI_ENABLED = False
|
| 35 |
+
agent = None
|
| 36 |
+
|
| 37 |
+
# Store uploaded file path
|
| 38 |
+
current_file = None
|
| 39 |
+
current_profile = None
|
| 40 |
+
last_agent_response = None # Store last agent response for visualization extraction
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# Helper functions for Gradio 6.x message format
|
| 44 |
+
def add_message(history, role, content):
|
| 45 |
+
"""Add a message to history in Gradio 6.x format."""
|
| 46 |
+
if history is None:
|
| 47 |
+
history = []
|
| 48 |
+
history.append({"role": role, "content": content})
|
| 49 |
+
return history
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def add_user_message(history, content):
|
| 53 |
+
"""Add a user message to history."""
|
| 54 |
+
return add_message(history, "user", content)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def add_assistant_message(history, content):
|
| 58 |
+
"""Add an assistant message to history."""
|
| 59 |
+
return add_message(history, "assistant", content)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def update_last_assistant_message(history, content):
|
| 63 |
+
"""Update the last assistant message in history."""
|
| 64 |
+
if history and len(history) > 0 and history[-1].get("role") == "assistant":
|
| 65 |
+
history[-1]["content"] = content
|
| 66 |
+
return history
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def get_last_user_content(history):
|
| 70 |
+
"""Get the content of the last user message."""
|
| 71 |
+
if history:
|
| 72 |
+
for msg in reversed(history):
|
| 73 |
+
if msg.get("role") == "user":
|
| 74 |
+
return msg.get("content", "")
|
| 75 |
+
return ""
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def analyze_dataset(file, user_message, history):
|
| 79 |
+
"""Process uploaded dataset(s) and user message. Supports single or multiple file uploads."""
|
| 80 |
+
global current_file, current_profile, last_agent_response
|
| 81 |
+
|
| 82 |
+
# Initialize with empty plot list (will collect PNG file paths)
|
| 83 |
+
plots_paths = []
|
| 84 |
+
html_reports = [] # Initialize HTML reports list
|
| 85 |
+
|
| 86 |
+
# Initialize history if None
|
| 87 |
+
if history is None:
|
| 88 |
+
history = []
|
| 89 |
+
|
| 90 |
+
# Debug: Log the call
|
| 91 |
+
print(f"[DEBUG] analyze_dataset called - file: {file is not None}, message: '{user_message}', current_file: {current_file}")
|
| 92 |
+
|
| 93 |
+
try:
|
| 94 |
+
# Handle file uploads (single or multiple)
|
| 95 |
+
if file is not None:
|
| 96 |
+
# file can be a single filepath or a list of filepaths
|
| 97 |
+
files_to_process = file if isinstance(file, list) else [file]
|
| 98 |
+
|
| 99 |
+
# Filter out None values
|
| 100 |
+
files_to_process = [f for f in files_to_process if f is not None]
|
| 101 |
+
|
| 102 |
+
if len(files_to_process) > 0:
|
| 103 |
+
print(f"[DEBUG] Processing {len(files_to_process)} file(s) upload")
|
| 104 |
+
|
| 105 |
+
# Copy all files to simpler paths
|
| 106 |
+
os.makedirs("./temp", exist_ok=True)
|
| 107 |
+
processed_files = []
|
| 108 |
+
seen_files = {} # Track files by content hash to detect duplicates
|
| 109 |
+
duplicate_count = 0
|
| 110 |
+
|
| 111 |
+
for uploaded_file in files_to_process:
|
| 112 |
+
simple_filename = Path(uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file).name
|
| 113 |
+
file_source = uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file
|
| 114 |
+
|
| 115 |
+
# Calculate file hash to detect duplicates (even with different names)
|
| 116 |
+
import hashlib
|
| 117 |
+
hasher = hashlib.md5()
|
| 118 |
+
with open(file_source, 'rb') as f:
|
| 119 |
+
# Read file in chunks to handle large files efficiently
|
| 120 |
+
for chunk in iter(lambda: f.read(8192), b""):
|
| 121 |
+
hasher.update(chunk)
|
| 122 |
+
file_hash = hasher.hexdigest()
|
| 123 |
+
|
| 124 |
+
# Check if this exact file was already uploaded
|
| 125 |
+
if file_hash in seen_files:
|
| 126 |
+
print(f"[DEBUG] Duplicate file detected: {simple_filename} (same as {seen_files[file_hash]})")
|
| 127 |
+
duplicate_count += 1
|
| 128 |
+
continue # Skip duplicate
|
| 129 |
+
|
| 130 |
+
# Not a duplicate - process it
|
| 131 |
+
simple_path = f"./temp/{simple_filename}"
|
| 132 |
+
|
| 133 |
+
# Handle filename collision (different files with same name)
|
| 134 |
+
if os.path.exists(simple_path):
|
| 135 |
+
# Check if existing file is the same (by comparing with already processed files)
|
| 136 |
+
existing_in_processed = simple_path in processed_files
|
| 137 |
+
if not existing_in_processed:
|
| 138 |
+
# Different file with same name - add suffix
|
| 139 |
+
base_name = Path(simple_filename).stem
|
| 140 |
+
extension = Path(simple_filename).suffix
|
| 141 |
+
counter = 1
|
| 142 |
+
while os.path.exists(f"./temp/{base_name}_{counter}{extension}"):
|
| 143 |
+
counter += 1
|
| 144 |
+
simple_filename = f"{base_name}_{counter}{extension}"
|
| 145 |
+
simple_path = f"./temp/{simple_filename}"
|
| 146 |
+
print(f"[DEBUG] Filename collision - renamed to: {simple_filename}")
|
| 147 |
+
|
| 148 |
+
shutil.copy2(file_source, simple_path)
|
| 149 |
+
processed_files.append(simple_path)
|
| 150 |
+
seen_files[file_hash] = simple_filename
|
| 151 |
+
print(f"[DEBUG] Copied file to: {simple_path}")
|
| 152 |
+
|
| 153 |
+
# Set current_file to the first file (for single-file operations)
|
| 154 |
+
# For multi-file operations, the agent will use all files from ./temp/
|
| 155 |
+
current_file = processed_files[0] if processed_files else None
|
| 156 |
+
|
| 157 |
+
# Only show file upload response if there's no user message
|
| 158 |
+
if not (user_message and user_message.strip()):
|
| 159 |
+
if len(processed_files) == 0:
|
| 160 |
+
# All files were duplicates
|
| 161 |
+
response = f"⚠️ **No New Files Uploaded**\n\n"
|
| 162 |
+
response += f"All {len(files_to_process)} file(s) were duplicates of already uploaded files.\n\n"
|
| 163 |
+
response += "Your previously uploaded dataset is still active."
|
| 164 |
+
elif len(processed_files) == 1:
|
| 165 |
+
# Single file upload - show detailed profile
|
| 166 |
+
response = f"📊 **Dataset Uploaded Successfully!**\n\n"
|
| 167 |
+
if duplicate_count > 0:
|
| 168 |
+
response += f"ℹ️ *({duplicate_count} duplicate file(s) were skipped)*\n\n"
|
| 169 |
+
response += f"**File:** {Path(current_file).name}\n\n"
|
| 170 |
+
|
| 171 |
+
# Get basic profile
|
| 172 |
+
profile = profile_dataset(current_file)
|
| 173 |
+
current_profile = profile
|
| 174 |
+
|
| 175 |
+
response += f"**Dataset Overview:**\n"
|
| 176 |
+
response += f"- Rows: {profile['shape']['rows']:,}\n"
|
| 177 |
+
response += f"- Columns: {profile['shape']['columns']}\n"
|
| 178 |
+
|
| 179 |
+
# Handle memory_usage (can be float or dict)
|
| 180 |
+
memory = profile.get('memory_usage', 0)
|
| 181 |
+
if isinstance(memory, dict):
|
| 182 |
+
memory = memory.get('total_mb', 0)
|
| 183 |
+
response += f"- Memory: {memory:.2f} MB\n\n"
|
| 184 |
+
|
| 185 |
+
response += f"**Column Types:**\n"
|
| 186 |
+
response += f"- Numeric: {len(profile['column_types']['numeric'])} columns\n"
|
| 187 |
+
response += f"- Categorical: {len(profile['column_types']['categorical'])} columns\n"
|
| 188 |
+
response += f"- Datetime: {len(profile['column_types']['datetime'])} columns\n\n"
|
| 189 |
+
|
| 190 |
+
# Check data quality
|
| 191 |
+
quality = detect_data_quality_issues(current_file)
|
| 192 |
+
if quality['critical']:
|
| 193 |
+
response += f"🔴 **Critical Issues:** {len(quality['critical'])}\n"
|
| 194 |
+
for issue in quality['critical'][:3]:
|
| 195 |
+
response += f" - {issue['message']}\n"
|
| 196 |
+
if quality['warning']:
|
| 197 |
+
response += f"🟡 **Warnings:** {len(quality['warning'])}\n"
|
| 198 |
+
for issue in quality['warning'][:3]:
|
| 199 |
+
response += f" - {issue['message']}\n"
|
| 200 |
+
else:
|
| 201 |
+
# Multiple files uploaded
|
| 202 |
+
response = f"📊 **{len(processed_files)} Datasets Uploaded Successfully!**\n\n"
|
| 203 |
+
if duplicate_count > 0:
|
| 204 |
+
response += f"ℹ️ *({duplicate_count} duplicate file(s) were skipped)*\n\n"
|
| 205 |
+
response += f"**Files:**\n"
|
| 206 |
+
for i, fp in enumerate(processed_files, 1):
|
| 207 |
+
response += f"{i}. {Path(fp).name}\n"
|
| 208 |
+
response += f"\n**💡 You can now use multi-dataset operations!**\n\n"
|
| 209 |
+
|
| 210 |
+
response += f"\n\n💬 **What would you like to do with {'this dataset' if len(processed_files) == 1 else 'these datasets'}?**\n\n"
|
| 211 |
+
response += "You can ask me to:\n"
|
| 212 |
+
if len(processed_files) > 1:
|
| 213 |
+
response += "- **Merge these datasets** (e.g., 'merge customers and orders on customer_id')\n"
|
| 214 |
+
response += "- **Combine/concatenate** them (e.g., 'combine all monthly sales files')\n"
|
| 215 |
+
response += "- Train a classification or regression model\n"
|
| 216 |
+
response += "- Analyze specific columns\n"
|
| 217 |
+
response += "- Detect outliers\n"
|
| 218 |
+
response += "- Engineer features\n"
|
| 219 |
+
response += "- Generate predictions\n"
|
| 220 |
+
response += "- And much more!\n"
|
| 221 |
+
|
| 222 |
+
# Add assistant message to history
|
| 223 |
+
history = add_assistant_message(history, response)
|
| 224 |
+
yield history, "", [], []
|
| 225 |
+
return
|
| 226 |
+
# If user uploaded file AND sent a message, don't return - continue to process the message
|
| 227 |
+
elif user_message and user_message.strip():
|
| 228 |
+
# Continue processing the message below
|
| 229 |
+
pass
|
| 230 |
+
|
| 231 |
+
# If user sends a message about the current file
|
| 232 |
+
print(f"[DEBUG] Checking message conditions: user_message={bool(user_message and user_message.strip())}, current_file={bool(current_file)}")
|
| 233 |
+
if user_message and user_message.strip() and current_file:
|
| 234 |
+
print(f"[DEBUG] User message detected. AI_ENABLED={AI_ENABLED}, agent={agent is not None}")
|
| 235 |
+
if AI_ENABLED and agent:
|
| 236 |
+
print(f"[DEBUG] Entering AI Agent block...")
|
| 237 |
+
try:
|
| 238 |
+
# Show immediate processing message
|
| 239 |
+
print(f"🤖 AI Agent analyzing: {user_message}")
|
| 240 |
+
history = add_user_message(history, user_message)
|
| 241 |
+
history = add_assistant_message(history, "🤖 **AI Agent is thinking...**\n\n⏳ Analyzing your request and planning the workflow...")
|
| 242 |
+
yield history, "", [], []
|
| 243 |
+
|
| 244 |
+
# Use the AI agent to process the request
|
| 245 |
+
print(f"📂 File path: {current_file}")
|
| 246 |
+
print(f"📝 Task: {user_message}")
|
| 247 |
+
print(f"🚀 Calling agent.analyze()...")
|
| 248 |
+
|
| 249 |
+
agent_response = agent.analyze(
|
| 250 |
+
file_path=current_file,
|
| 251 |
+
task_description=user_message,
|
| 252 |
+
use_cache=False, # Disable cache to avoid dict hashing issues
|
| 253 |
+
stream=False
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
print(f"✅ Agent response received: {agent_response.get('status', 'unknown')}")
|
| 257 |
+
|
| 258 |
+
# Store agent response for visualization extraction
|
| 259 |
+
last_agent_response = agent_response
|
| 260 |
+
|
| 261 |
+
# Format the response
|
| 262 |
+
if agent_response.get('status') == 'success':
|
| 263 |
+
response = f"🤖 **AI Agent Analysis Complete!**\n\n"
|
| 264 |
+
response += f"{agent_response.get('summary', '')}\n\n"
|
| 265 |
+
|
| 266 |
+
if 'workflow_history' in agent_response and agent_response['workflow_history']:
|
| 267 |
+
response += f"**Execution Summary:**\n"
|
| 268 |
+
response += f"- Tools Executed: {len(agent_response['workflow_history'])}\n"
|
| 269 |
+
response += f"- Iterations: {agent_response.get('iterations', 0)}\n"
|
| 270 |
+
response += f"- Time: {agent_response.get('execution_time', 0):.1f}s\n\n"
|
| 271 |
+
|
| 272 |
+
# Find and display MODEL TRAINING RESULTS with ALL METRICS
|
| 273 |
+
model_results = None
|
| 274 |
+
for step in agent_response['workflow_history']:
|
| 275 |
+
if step.get('tool') == 'train_baseline_models':
|
| 276 |
+
result = step.get('result', {})
|
| 277 |
+
if isinstance(result, dict) and 'result' in result:
|
| 278 |
+
model_results = result['result']
|
| 279 |
+
elif isinstance(result, dict):
|
| 280 |
+
model_results = result
|
| 281 |
+
break
|
| 282 |
+
|
| 283 |
+
if model_results and 'models' in model_results:
|
| 284 |
+
response += f"## 🎯 Model Training Results\n\n"
|
| 285 |
+
task_type = model_results.get('task_type', 'unknown')
|
| 286 |
+
response += f"**Task Type:** {task_type.title()}\n"
|
| 287 |
+
response += f"**Features:** {model_results.get('n_features', 0)}\n"
|
| 288 |
+
response += f"**Training Samples:** {model_results.get('train_size', 0):,}\n"
|
| 289 |
+
response += f"**Test Samples:** {model_results.get('test_size', 0):,}\n\n"
|
| 290 |
+
|
| 291 |
+
# Show ALL models tested
|
| 292 |
+
response += "### 📊 All Models Tested:\n\n"
|
| 293 |
+
models_data = model_results.get('models', {})
|
| 294 |
+
|
| 295 |
+
for model_name, model_info in models_data.items():
|
| 296 |
+
if 'test_metrics' in model_info:
|
| 297 |
+
metrics = model_info['test_metrics']
|
| 298 |
+
response += f"**{model_name}:**\n"
|
| 299 |
+
|
| 300 |
+
if task_type == 'classification':
|
| 301 |
+
response += f"- Accuracy: {metrics.get('accuracy', 0):.4f}\n"
|
| 302 |
+
response += f"- Precision: {metrics.get('precision', 0):.4f}\n"
|
| 303 |
+
response += f"- Recall: {metrics.get('recall', 0):.4f}\n"
|
| 304 |
+
response += f"- F1 Score: {metrics.get('f1', 0):.4f}\n"
|
| 305 |
+
else:
|
| 306 |
+
response += f"- R² Score: {metrics.get('r2', 0):.4f}\n"
|
| 307 |
+
response += f"- RMSE: {metrics.get('rmse', 0):.2f}\n"
|
| 308 |
+
response += f"- MAE: {metrics.get('mae', 0):.2f}\n"
|
| 309 |
+
response += f"- MAPE: {metrics.get('mape', 0):.2f}%\n"
|
| 310 |
+
response += "\n"
|
| 311 |
+
|
| 312 |
+
# Highlight BEST MODEL
|
| 313 |
+
best_model = model_results.get('best_model', {})
|
| 314 |
+
if best_model and best_model.get('name'):
|
| 315 |
+
response += f"### 🏆 Best Model: **{best_model['name']}**\n"
|
| 316 |
+
response += f"Score: {best_model.get('score', 0):.4f}\n\n"
|
| 317 |
+
|
| 318 |
+
# Show workflow execution summary
|
| 319 |
+
response += "### 🔧 Workflow Steps:\n"
|
| 320 |
+
for i, step in enumerate(agent_response['workflow_history'], 1):
|
| 321 |
+
tool_name = step['tool']
|
| 322 |
+
success = step['result'].get('success', False)
|
| 323 |
+
icon = "✅" if success else "❌"
|
| 324 |
+
response += f"{i}. {icon} {tool_name}\n"
|
| 325 |
+
response += "\n"
|
| 326 |
+
|
| 327 |
+
# Check for plots AND reports in workflow results
|
| 328 |
+
html_reports = [] # Separate list for HTML reports
|
| 329 |
+
|
| 330 |
+
for step in agent_response['workflow_history']:
|
| 331 |
+
result = step.get('result', {})
|
| 332 |
+
|
| 333 |
+
# Deep search for plots and reports in nested results
|
| 334 |
+
def find_plots_and_reports(obj, plots_list, reports_list):
|
| 335 |
+
if isinstance(obj, dict):
|
| 336 |
+
# Check direct plot/report keys
|
| 337 |
+
for key in ['plot_path', 'plot_file', 'output_path', 'html_path', 'report_path',
|
| 338 |
+
'plots', 'plot_paths', 'performance_plots', 'feature_importance_plot']:
|
| 339 |
+
if key in obj and obj[key]:
|
| 340 |
+
if isinstance(obj[key], list):
|
| 341 |
+
for path in obj[key]:
|
| 342 |
+
if isinstance(path, str) and os.path.exists(path):
|
| 343 |
+
if path.endswith('.html'):
|
| 344 |
+
# Check if it's a report (in reports folder) or interactive plot
|
| 345 |
+
if '/reports/' in path or 'report' in Path(path).stem.lower():
|
| 346 |
+
reports_list.append(path)
|
| 347 |
+
else:
|
| 348 |
+
reports_list.append(path) # Interactive plots also go to reports
|
| 349 |
+
elif path.endswith(('.png', '.jpg', '.jpeg')):
|
| 350 |
+
plots_list.append(path)
|
| 351 |
+
elif isinstance(obj[key], str) and os.path.exists(obj[key]):
|
| 352 |
+
if obj[key].endswith('.html'):
|
| 353 |
+
if '/reports/' in obj[key] or 'report' in Path(obj[key]).stem.lower():
|
| 354 |
+
reports_list.append(obj[key])
|
| 355 |
+
else:
|
| 356 |
+
reports_list.append(obj[key])
|
| 357 |
+
elif obj[key].endswith(('.png', '.jpg', '.jpeg')):
|
| 358 |
+
plots_list.append(obj[key])
|
| 359 |
+
# Recursively search nested dicts
|
| 360 |
+
for value in obj.values():
|
| 361 |
+
find_plots_and_reports(value, plots_list, reports_list)
|
| 362 |
+
|
| 363 |
+
find_plots_and_reports(result, plots_paths, html_reports)
|
| 364 |
+
|
| 365 |
+
# Remove duplicates while preserving order
|
| 366 |
+
plots_paths = list(dict.fromkeys(plots_paths))
|
| 367 |
+
html_reports = list(dict.fromkeys(html_reports))
|
| 368 |
+
|
| 369 |
+
# Display visualization and report information in response
|
| 370 |
+
if plots_paths or html_reports:
|
| 371 |
+
response += f"## 📊 Generated Outputs\n\n"
|
| 372 |
+
|
| 373 |
+
if plots_paths:
|
| 374 |
+
response += f"### 📈 Visualizations ({len(plots_paths)} plots)\n"
|
| 375 |
+
response += "✅ Plots are displayed in the **Visualization Gallery** below!\n\n"
|
| 376 |
+
|
| 377 |
+
# List plot files
|
| 378 |
+
for i, plot_path in enumerate(plots_paths[:10], 1):
|
| 379 |
+
try:
|
| 380 |
+
plot_name = Path(plot_path).stem.replace('_', ' ').title()
|
| 381 |
+
rel_path = os.path.relpath(plot_path, '.')
|
| 382 |
+
response += f"{i}. 📊 **{plot_name}**\n"
|
| 383 |
+
response += f" 📁 `{rel_path}`\n\n"
|
| 384 |
+
except Exception as e:
|
| 385 |
+
response += f"{i}. ❌ Error: {str(e)}\n"
|
| 386 |
+
|
| 387 |
+
if html_reports:
|
| 388 |
+
response += f"### 📋 Reports & Interactive Plots ({len(html_reports)} files)\n"
|
| 389 |
+
response += "✅ Reports are displayed in the **Reports Viewer** below!\n\n"
|
| 390 |
+
|
| 391 |
+
# List report files
|
| 392 |
+
for i, report_path in enumerate(html_reports[:10], 1):
|
| 393 |
+
try:
|
| 394 |
+
report_name = Path(report_path).stem.replace('_', ' ').title()
|
| 395 |
+
rel_path = os.path.relpath(report_path, '.')
|
| 396 |
+
file_size = os.path.getsize(report_path) / 1024 # KB
|
| 397 |
+
response += f"{i}. 📄 **{report_name}**\n"
|
| 398 |
+
response += f" 📁 `{rel_path}` ({file_size:.1f} KB)\n\n"
|
| 399 |
+
except Exception as e:
|
| 400 |
+
response += f"{i}. ❌ Error: {str(e)}\n"
|
| 401 |
+
else:
|
| 402 |
+
response += "ℹ️ No visualizations or reports were generated in this workflow.\n"
|
| 403 |
+
else:
|
| 404 |
+
response = f"⚠️ **AI Agent Status:** {agent_response.get('status', 'unknown')}\n\n"
|
| 405 |
+
response += f"{agent_response.get('message', agent_response.get('error', 'Unknown error'))}\n"
|
| 406 |
+
|
| 407 |
+
# Update the last assistant message with the response
|
| 408 |
+
history = update_last_assistant_message(history, response)
|
| 409 |
+
|
| 410 |
+
# Return plot paths for gallery and html_reports for HTML viewer
|
| 411 |
+
# Store html_reports in a format the HTML component can use
|
| 412 |
+
yield history, "", plots_paths if plots_paths else [], html_reports if html_reports else []
|
| 413 |
+
return
|
| 414 |
+
except Exception as e:
|
| 415 |
+
import sys
|
| 416 |
+
exc_type, exc_value, exc_traceback = sys.exc_info()
|
| 417 |
+
response = f"⚠️ **AI Agent Error:**\n\n"
|
| 418 |
+
response += f"**Error Type:** {exc_type.__name__}\n\n"
|
| 419 |
+
response += f"**Error Message:** {str(e)}\n\n"
|
| 420 |
+
response += f"**Full Traceback:**\n```python\n{traceback.format_exc()}\n```\n\n"
|
| 421 |
+
response += "💡 **Fallback Options:**\n"
|
| 422 |
+
response += "- Use the **Quick Train** feature on the right\n"
|
| 423 |
+
response += "- Try manual commands: `profile`, `quality`, `columns`\n"
|
| 424 |
+
# Update the last assistant message with error
|
| 425 |
+
history = update_last_assistant_message(history, response)
|
| 426 |
+
yield history, "", plots_paths if plots_paths else []
|
| 427 |
+
return
|
| 428 |
+
else:
|
| 429 |
+
# Manual mode - Handle commands directly
|
| 430 |
+
user_msg_lower = user_message.lower().strip()
|
| 431 |
+
|
| 432 |
+
# Handle simple commands manually
|
| 433 |
+
if 'profile' in user_msg_lower:
|
| 434 |
+
response = "📊 **Dataset Profile:**\n\n"
|
| 435 |
+
if current_profile:
|
| 436 |
+
response += f"**Shape:** {current_profile['shape']['rows']:,} rows × {current_profile['shape']['columns']} columns\n\n"
|
| 437 |
+
response += f"**Column Types:**\n"
|
| 438 |
+
response += f"- Numeric: {len(current_profile['column_types']['numeric'])} columns\n"
|
| 439 |
+
response += f"- Categorical: {len(current_profile['column_types']['categorical'])} columns\n"
|
| 440 |
+
response += f"- Datetime: {len(current_profile['column_types']['datetime'])} columns\n\n"
|
| 441 |
+
response += f"**Overall Stats:**\n"
|
| 442 |
+
response += f"- Total cells: {current_profile['overall_stats']['total_cells']:,}\n"
|
| 443 |
+
response += f"- Null values: {current_profile['overall_stats']['total_nulls']} ({current_profile['overall_stats']['null_percentage']:.1f}%)\n"
|
| 444 |
+
response += f"- Duplicates: {current_profile['overall_stats']['duplicate_rows']}\n"
|
| 445 |
+
else:
|
| 446 |
+
response += "Profile information is available at the top of the chat!"
|
| 447 |
+
|
| 448 |
+
elif 'quality' in user_msg_lower or 'issues' in user_msg_lower:
|
| 449 |
+
quality = detect_data_quality_issues(current_file)
|
| 450 |
+
response = "🔍 **Data Quality Report:**\n\n"
|
| 451 |
+
|
| 452 |
+
if quality['critical']:
|
| 453 |
+
response += f"🔴 **Critical Issues:** {len(quality['critical'])}\n"
|
| 454 |
+
for issue in quality['critical']:
|
| 455 |
+
response += f" • {issue['message']}\n"
|
| 456 |
+
response += "\n"
|
| 457 |
+
|
| 458 |
+
if quality['warning']:
|
| 459 |
+
response += f"🟡 **Warnings:** {len(quality['warning'])}\n"
|
| 460 |
+
for issue in quality['warning'][:5]: # Show first 5
|
| 461 |
+
response += f" • {issue['message']}\n"
|
| 462 |
+
if len(quality['warning']) > 5:
|
| 463 |
+
response += f" • ... and {len(quality['warning']) - 5} more\n"
|
| 464 |
+
response += "\n"
|
| 465 |
+
|
| 466 |
+
if quality['info']:
|
| 467 |
+
response += f"🔵 **Info:** {len(quality['info'])} observations\n"
|
| 468 |
+
|
| 469 |
+
if not quality['critical'] and not quality['warning'] and not quality['info']:
|
| 470 |
+
response += "✅ No issues detected! Your data looks good.\n"
|
| 471 |
+
|
| 472 |
+
elif 'columns' in user_msg_lower or 'column' in user_msg_lower:
|
| 473 |
+
if current_profile:
|
| 474 |
+
response = "📋 **Dataset Columns:**\n\n"
|
| 475 |
+
for col, info in current_profile['columns'].items():
|
| 476 |
+
nulls = info.get('null_count', 0)
|
| 477 |
+
null_pct = (nulls / current_profile['shape']['rows'] * 100) if current_profile['shape']['rows'] > 0 else 0
|
| 478 |
+
response += f"• **{col}** ({info['type']})\n"
|
| 479 |
+
response += f" - Nulls: {nulls} ({null_pct:.1f}%)\n"
|
| 480 |
+
if 'unique' in info:
|
| 481 |
+
response += f" - Unique: {info['unique']}\n"
|
| 482 |
+
else:
|
| 483 |
+
response = "📋 **Columns:** Please upload a file first to see column information."
|
| 484 |
+
|
| 485 |
+
elif 'help' in user_msg_lower:
|
| 486 |
+
response = "💡 **Available Commands:**\n\n"
|
| 487 |
+
response += "**Manual Commands:**\n"
|
| 488 |
+
response += "• `profile` - Show detailed dataset statistics\n"
|
| 489 |
+
response += "• `quality` - Check data quality issues\n"
|
| 490 |
+
response += "• `columns` - List all columns with details\n"
|
| 491 |
+
response += "• `help` - Show this help message\n\n"
|
| 492 |
+
response += "**Quick Actions:**\n"
|
| 493 |
+
response += "• Use the **Quick Train** panel on the right to train models\n"
|
| 494 |
+
response += "• Check **Dataset Info** in the sidebar for quick stats\n"
|
| 495 |
+
|
| 496 |
+
else:
|
| 497 |
+
# Default response for unrecognized commands
|
| 498 |
+
response = f"💬 **You said:** {user_message}\n\n"
|
| 499 |
+
response += "⚠️ AI agent is not available. I can respond to these commands:\n\n"
|
| 500 |
+
response += "• `profile` - Show detailed statistics\n"
|
| 501 |
+
response += "• `quality` - Check data quality\n"
|
| 502 |
+
response += "• `columns` - List all columns\n"
|
| 503 |
+
response += "• `help` - Show available commands\n\n"
|
| 504 |
+
response += "**Or use Quick Train** on the right to train models directly!\n"
|
| 505 |
+
|
| 506 |
+
# Add user message and assistant response
|
| 507 |
+
history = add_user_message(history, user_message)
|
| 508 |
+
history = add_assistant_message(history, response)
|
| 509 |
+
yield history, "", [], []
|
| 510 |
+
return
|
| 511 |
+
|
| 512 |
+
# If no file is uploaded yet
|
| 513 |
+
if user_message and user_message.strip() and not current_file:
|
| 514 |
+
response = "⚠️ **Please upload a dataset first!**\n\n"
|
| 515 |
+
response += "Click the 'Upload Dataset' button above and select a CSV or Parquet file."
|
| 516 |
+
# Add user message and assistant response
|
| 517 |
+
history = add_user_message(history, user_message)
|
| 518 |
+
history = add_assistant_message(history, response)
|
| 519 |
+
yield history, "", [], []
|
| 520 |
+
return
|
| 521 |
+
|
| 522 |
+
except Exception as e:
|
| 523 |
+
error_msg = f"❌ **Error:** {str(e)}\n\n"
|
| 524 |
+
error_msg += "**Traceback:**\n```\n" + traceback.format_exc() + "\n```"
|
| 525 |
+
if user_message:
|
| 526 |
+
# Check if we already added the user message
|
| 527 |
+
last_user = get_last_user_content(history)
|
| 528 |
+
if last_user != user_message:
|
| 529 |
+
history = add_user_message(history, user_message)
|
| 530 |
+
history = add_assistant_message(history, error_msg)
|
| 531 |
+
else:
|
| 532 |
+
history = add_assistant_message(history, error_msg)
|
| 533 |
+
yield history, "", [], []
|
| 534 |
+
return
|
| 535 |
+
|
| 536 |
+
# Default return if nothing matched
|
| 537 |
+
yield history, "", [], []
|
| 538 |
+
|
| 539 |
+
|
| 540 |
+
def quick_profile(file):
|
| 541 |
+
"""Quick profile display in the sidebar."""
|
| 542 |
+
if file is None:
|
| 543 |
+
return "No file uploaded yet."
|
| 544 |
+
|
| 545 |
+
try:
|
| 546 |
+
profile = profile_dataset(file.name)
|
| 547 |
+
|
| 548 |
+
info = f"**{Path(file.name).name}**\n\n"
|
| 549 |
+
info += f"📊 {profile['shape']['rows']:,} rows × {profile['shape']['columns']} cols\n\n"
|
| 550 |
+
info += f"**Columns:**\n"
|
| 551 |
+
for col, col_info in list(profile['columns'].items())[:10]:
|
| 552 |
+
info += f"- {col} ({col_info['type']})\n"
|
| 553 |
+
|
| 554 |
+
if len(profile['columns']) > 10:
|
| 555 |
+
info += f"- ... and {len(profile['columns']) - 10} more\n"
|
| 556 |
+
|
| 557 |
+
return info
|
| 558 |
+
except Exception as e:
|
| 559 |
+
return f"Error: {str(e)}"
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
def train_model_ui(file, target_col, model_type, test_size, progress=gr.Progress()):
|
| 563 |
+
"""Train a model directly from the UI."""
|
| 564 |
+
if file is None:
|
| 565 |
+
return "⚠️ Please upload a dataset first!"
|
| 566 |
+
|
| 567 |
+
if not target_col:
|
| 568 |
+
return "⚠️ Please specify a target column!"
|
| 569 |
+
|
| 570 |
+
# Clean up the target column name - remove surrounding quotes if present
|
| 571 |
+
target_col = target_col.strip().strip("'").strip('"')
|
| 572 |
+
|
| 573 |
+
try:
|
| 574 |
+
# Show progress
|
| 575 |
+
progress(0, desc="🔄 Loading dataset...")
|
| 576 |
+
yield "⏳ **Training in progress...**\n\n📊 Loading dataset..."
|
| 577 |
+
|
| 578 |
+
import time
|
| 579 |
+
time.sleep(0.5) # Brief pause for UI feedback
|
| 580 |
+
|
| 581 |
+
progress(0.2, desc="🔄 Preparing data...")
|
| 582 |
+
yield "⏳ **Training in progress...**\n\n📊 Dataset loaded\n🔄 Preparing data..."
|
| 583 |
+
|
| 584 |
+
time.sleep(0.3)
|
| 585 |
+
# Determine problem type
|
| 586 |
+
problem_type = "classification" if model_type == "Classification" else "regression"
|
| 587 |
+
|
| 588 |
+
progress(0.4, desc="🤖 Training models...")
|
| 589 |
+
yield "⏳ **Training in progress...**\n\n📊 Dataset loaded\n✅ Data prepared\n🤖 Training multiple models..."
|
| 590 |
+
|
| 591 |
+
# Train baseline models
|
| 592 |
+
result = train_baseline_models(
|
| 593 |
+
file.name,
|
| 594 |
+
target_col=target_col,
|
| 595 |
+
task_type=problem_type,
|
| 596 |
+
test_size=test_size
|
| 597 |
+
)
|
| 598 |
+
|
| 599 |
+
progress(0.9, desc="📊 Evaluating results...")
|
| 600 |
+
|
| 601 |
+
# Check if training was successful
|
| 602 |
+
if result.get('status') == 'error':
|
| 603 |
+
yield f"❌ **Training Failed**\n\n{result.get('message', 'Unknown error')}"
|
| 604 |
+
return
|
| 605 |
+
|
| 606 |
+
if 'best_model' not in result:
|
| 607 |
+
yield f"❌ **Training Failed**\n\nNo models were successfully trained. Result: {result}"
|
| 608 |
+
return
|
| 609 |
+
|
| 610 |
+
# Get the best model
|
| 611 |
+
best_model_name = result['best_model']['name']
|
| 612 |
+
if not best_model_name:
|
| 613 |
+
yield f"❌ **Training Failed**\n\nNo model could be selected as best model."
|
| 614 |
+
return
|
| 615 |
+
|
| 616 |
+
best_model_info = result['models'][best_model_name]
|
| 617 |
+
best_metrics = best_model_info.get('test_metrics', {})
|
| 618 |
+
|
| 619 |
+
output = f"✅ **Model Training Complete!**\n\n"
|
| 620 |
+
output += f"## 🏆 Best Model: **{best_model_name}**\n\n"
|
| 621 |
+
|
| 622 |
+
output += f"**Dataset Info:**\n"
|
| 623 |
+
output += f"- Features: {result.get('n_features', 0)}\n"
|
| 624 |
+
output += f"- Training samples: {result.get('train_size', 0):,}\n"
|
| 625 |
+
output += f"- Test samples: {result.get('test_size', 0):,}\n\n"
|
| 626 |
+
|
| 627 |
+
if problem_type == "classification":
|
| 628 |
+
output += f"**Test Metrics:**\n"
|
| 629 |
+
output += f"- ✅ Accuracy: {best_metrics.get('accuracy', 0):.4f}\n"
|
| 630 |
+
output += f"- 🎯 Precision: {best_metrics.get('precision', 0):.4f}\n"
|
| 631 |
+
output += f"- 📊 Recall: {best_metrics.get('recall', 0):.4f}\n"
|
| 632 |
+
output += f"- 🔥 F1 Score: {best_metrics.get('f1', 0):.4f}\n\n"
|
| 633 |
+
else:
|
| 634 |
+
output += f"**Test Metrics:**\n"
|
| 635 |
+
output += f"- 📈 R² Score: {best_metrics.get('r2', 0):.4f}\n"
|
| 636 |
+
output += f"- 📉 RMSE: {best_metrics.get('rmse', 0):.2f}\n"
|
| 637 |
+
output += f"- 📊 MAE: {best_metrics.get('mae', 0):.2f}\n"
|
| 638 |
+
output += f"- 💯 MAPE: {best_metrics.get('mape', 0):.2f}%\n\n"
|
| 639 |
+
|
| 640 |
+
output += f"## 📊 All Models Comparison:\n\n"
|
| 641 |
+
for model_name, model_info in result['models'].items():
|
| 642 |
+
if 'test_metrics' in model_info:
|
| 643 |
+
test_metrics = model_info['test_metrics']
|
| 644 |
+
indicator = "🏆 " if model_name == best_model_name else " "
|
| 645 |
+
if problem_type == "classification":
|
| 646 |
+
f1 = test_metrics.get('f1', 0)
|
| 647 |
+
acc = test_metrics.get('accuracy', 0)
|
| 648 |
+
output += f"{indicator}**{model_name}:**\n"
|
| 649 |
+
output += f" - F1: {f1:.4f} | Accuracy: {acc:.4f}\n"
|
| 650 |
+
else:
|
| 651 |
+
r2 = test_metrics.get('r2', 0)
|
| 652 |
+
rmse = test_metrics.get('rmse', 0)
|
| 653 |
+
output += f"{indicator}**{model_name}:**\n"
|
| 654 |
+
output += f" - R²: {r2:.4f} | RMSE: {rmse:.2f}\n"
|
| 655 |
+
elif 'status' in model_info and model_info['status'] == 'error':
|
| 656 |
+
output += f" ❌ **{model_name}:** {model_info.get('message', 'Error')}\n"
|
| 657 |
+
|
| 658 |
+
# Display generated plots if available
|
| 659 |
+
plots_to_show = []
|
| 660 |
+
|
| 661 |
+
# Check for performance plots
|
| 662 |
+
if 'performance_plots' in result and result['performance_plots']:
|
| 663 |
+
if isinstance(result['performance_plots'], list):
|
| 664 |
+
plots_to_show.extend(result['performance_plots'])
|
| 665 |
+
else:
|
| 666 |
+
plots_to_show.append(result['performance_plots'])
|
| 667 |
+
|
| 668 |
+
# Check for feature importance plot
|
| 669 |
+
if 'feature_importance_plot' in result and result['feature_importance_plot']:
|
| 670 |
+
plots_to_show.append(result['feature_importance_plot'])
|
| 671 |
+
|
| 672 |
+
# Embed plots
|
| 673 |
+
if plots_to_show:
|
| 674 |
+
output += f"\n\n📊 **Visualizations:**\n\n"
|
| 675 |
+
for plot_path in plots_to_show:
|
| 676 |
+
if isinstance(plot_path, str) and plot_path.endswith('.html') and os.path.exists(plot_path):
|
| 677 |
+
try:
|
| 678 |
+
with open(plot_path, 'r', encoding='utf-8') as f:
|
| 679 |
+
plot_html = f.read()
|
| 680 |
+
# Add plot title based on filename
|
| 681 |
+
plot_name = Path(plot_path).stem.replace('_', ' ').title()
|
| 682 |
+
output += f"**{plot_name}:**\n"
|
| 683 |
+
output += f'<iframe srcdoc="{plot_html.replace(chr(34), """)}" width="100%" height="500" frameborder="0"></iframe>\n\n'
|
| 684 |
+
except Exception as e:
|
| 685 |
+
# Fallback to file path
|
| 686 |
+
output += f"📁 {Path(plot_path).name}: `{plot_path}`\n"
|
| 687 |
+
|
| 688 |
+
progress(1.0, desc="✅ Complete!")
|
| 689 |
+
yield output
|
| 690 |
+
|
| 691 |
+
except Exception as e:
|
| 692 |
+
yield f"❌ **Error:** {str(e)}\n\n```\n{traceback.format_exc()}\n```"
|
| 693 |
+
|
| 694 |
+
|
| 695 |
+
def clear_conversation():
|
| 696 |
+
"""Clear the conversation and reset state."""
|
| 697 |
+
global current_file, current_profile
|
| 698 |
+
current_file = None
|
| 699 |
+
current_profile = None
|
| 700 |
+
return [], None, "", [], ""
|
| 701 |
+
|
| 702 |
+
|
| 703 |
+
def format_html_reports(html_paths):
|
| 704 |
+
"""Format HTML reports/plots for display in HTML component."""
|
| 705 |
+
if not html_paths or len(html_paths) == 0:
|
| 706 |
+
return "<div style='text-align:center; padding:40px; color:#666;'>No reports generated yet. Try: 'Generate a quality report' or 'Create interactive visualizations'</div>"
|
| 707 |
+
|
| 708 |
+
html_output = """
|
| 709 |
+
<style>
|
| 710 |
+
.report-container {
|
| 711 |
+
padding: 20px;
|
| 712 |
+
background: #f8f9fa;
|
| 713 |
+
}
|
| 714 |
+
.report-card {
|
| 715 |
+
margin-bottom: 30px;
|
| 716 |
+
border: 2px solid #dee2e6;
|
| 717 |
+
border-radius: 12px;
|
| 718 |
+
overflow: hidden;
|
| 719 |
+
background: white;
|
| 720 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
| 721 |
+
}
|
| 722 |
+
.report-header {
|
| 723 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 724 |
+
color: white;
|
| 725 |
+
padding: 15px 20px;
|
| 726 |
+
font-weight: bold;
|
| 727 |
+
font-size: 18px;
|
| 728 |
+
display: flex;
|
| 729 |
+
justify-content: space-between;
|
| 730 |
+
align-items: center;
|
| 731 |
+
}
|
| 732 |
+
.report-meta {
|
| 733 |
+
font-size: 12px;
|
| 734 |
+
opacity: 0.9;
|
| 735 |
+
}
|
| 736 |
+
.report-iframe {
|
| 737 |
+
width: 100%;
|
| 738 |
+
min-height: 600px;
|
| 739 |
+
border: none;
|
| 740 |
+
background: white;
|
| 741 |
+
}
|
| 742 |
+
.report-footer {
|
| 743 |
+
background: #f8f9fa;
|
| 744 |
+
padding: 10px 20px;
|
| 745 |
+
font-size: 12px;
|
| 746 |
+
color: #666;
|
| 747 |
+
border-top: 1px solid #dee2e6;
|
| 748 |
+
}
|
| 749 |
+
</style>
|
| 750 |
+
<div class="report-container">
|
| 751 |
+
"""
|
| 752 |
+
|
| 753 |
+
html_output += f"<h2 style='color: #667eea; margin-bottom: 20px;'>📋 {len(html_paths)} Report(s) Generated</h2>"
|
| 754 |
+
|
| 755 |
+
for i, html_path in enumerate(html_paths, 1):
|
| 756 |
+
try:
|
| 757 |
+
# Get file metadata
|
| 758 |
+
file_name = Path(html_path).name
|
| 759 |
+
file_size = os.path.getsize(html_path) / 1024 # KB
|
| 760 |
+
report_title = Path(html_path).stem.replace('_', ' ').title()
|
| 761 |
+
|
| 762 |
+
# Read the HTML content
|
| 763 |
+
with open(html_path, 'r', encoding='utf-8') as f:
|
| 764 |
+
html_content = f.read()
|
| 765 |
+
|
| 766 |
+
# Escape the content for embedding
|
| 767 |
+
escaped_content = html_content.replace('\\', '\\\\').replace('"', '"').replace("'", "\\'")
|
| 768 |
+
|
| 769 |
+
html_output += f"""
|
| 770 |
+
<div class="report-card">
|
| 771 |
+
<div class="report-header">
|
| 772 |
+
<span>📊 {i}. {report_title}</span>
|
| 773 |
+
<span class="report-meta">{file_size:.1f} KB</span>
|
| 774 |
+
</div>
|
| 775 |
+
<iframe class="report-iframe" srcdoc="{escaped_content}"></iframe>
|
| 776 |
+
<div class="report-footer">
|
| 777 |
+
📁 {html_path}
|
| 778 |
+
</div>
|
| 779 |
+
</div>
|
| 780 |
+
"""
|
| 781 |
+
except Exception as e:
|
| 782 |
+
html_output += f"""
|
| 783 |
+
<div class="report-card">
|
| 784 |
+
<div class="report-header" style="background: linear-gradient(135deg, #f44336 0%, #e91e63 100%);">
|
| 785 |
+
<span>❌ Error loading: {Path(html_path).name}</span>
|
| 786 |
+
</div>
|
| 787 |
+
<div style="padding: 20px;">
|
| 788 |
+
<p><strong>Error:</strong> {str(e)}</p>
|
| 789 |
+
<p><strong>Path:</strong> {html_path}</p>
|
| 790 |
+
</div>
|
| 791 |
+
</div>
|
| 792 |
+
"""
|
| 793 |
+
|
| 794 |
+
html_output += "</div>"
|
| 795 |
+
|
| 796 |
+
return html_output
|
| 797 |
+
|
| 798 |
+
|
| 799 |
+
def extract_and_display_plots(agent_response):
|
| 800 |
+
"""Extract plots from agent response and format them for display."""
|
| 801 |
+
plots_html = ""
|
| 802 |
+
|
| 803 |
+
if not agent_response or agent_response.get('status') != 'success':
|
| 804 |
+
return gr.update(value="<p style='text-align:center; color:#666;'>No visualizations generated yet. Upload a dataset and run analysis!</p>")
|
| 805 |
+
|
| 806 |
+
workflow_history = agent_response.get('workflow_history', [])
|
| 807 |
+
if not workflow_history:
|
| 808 |
+
return gr.update(value="<p style='text-align:center; color:#666;'>No visualizations in this workflow.</p>")
|
| 809 |
+
|
| 810 |
+
# Find all plots
|
| 811 |
+
plots_paths = []
|
| 812 |
+
|
| 813 |
+
def find_plots(obj, plots_list):
|
| 814 |
+
if isinstance(obj, dict):
|
| 815 |
+
# Check direct plot keys
|
| 816 |
+
for key in ['plot_path', 'plot_file', 'html_path', 'output_path',
|
| 817 |
+
'plots', 'plot_paths', 'performance_plots', 'feature_importance_plot']:
|
| 818 |
+
if key in obj and obj[key]:
|
| 819 |
+
if isinstance(obj[key], list):
|
| 820 |
+
for plot_path in obj[key]:
|
| 821 |
+
if isinstance(plot_path, str) and plot_path.endswith('.html') and os.path.exists(plot_path):
|
| 822 |
+
plots_list.append(plot_path)
|
| 823 |
+
elif isinstance(obj[key], str) and obj[key].endswith('.html') and os.path.exists(obj[key]):
|
| 824 |
+
plots_list.append(obj[key])
|
| 825 |
+
# Recursively search nested dicts
|
| 826 |
+
for value in obj.values():
|
| 827 |
+
find_plots(value, plots_list)
|
| 828 |
+
|
| 829 |
+
for step in workflow_history:
|
| 830 |
+
result = step.get('result', {})
|
| 831 |
+
find_plots(result, plots_paths)
|
| 832 |
+
|
| 833 |
+
# Remove duplicates while preserving order
|
| 834 |
+
plots_paths = list(dict.fromkeys(plots_paths))
|
| 835 |
+
|
| 836 |
+
if not plots_paths:
|
| 837 |
+
return gr.update(value="<p style='text-align:center; color:#666;'>No plots were generated in this analysis.</p>")
|
| 838 |
+
|
| 839 |
+
# Build HTML gallery
|
| 840 |
+
plots_html = f"""
|
| 841 |
+
<div style='padding: 20px;'>
|
| 842 |
+
<h2 style='color: #1f77b4; margin-bottom: 20px;'>📊 Visualization Gallery ({len(plots_paths)} plots)</h2>
|
| 843 |
+
"""
|
| 844 |
+
|
| 845 |
+
for i, plot_path in enumerate(plots_paths, 1):
|
| 846 |
+
try:
|
| 847 |
+
with open(plot_path, 'r', encoding='utf-8') as f:
|
| 848 |
+
plot_content = f.read()
|
| 849 |
+
|
| 850 |
+
plot_name = Path(plot_path).stem.replace('_', ' ').title()
|
| 851 |
+
|
| 852 |
+
plots_html += f"""
|
| 853 |
+
<div style='margin-bottom: 30px; border: 1px solid #ddd; border-radius: 8px; overflow: hidden;'>
|
| 854 |
+
<div style='background: linear-gradient(90deg, #1f77b4, #2ca02c); color: white; padding: 10px 15px; font-weight: bold;'>
|
| 855 |
+
{i}. {plot_name}
|
| 856 |
+
</div>
|
| 857 |
+
<div style='padding: 10px; background: white;'>
|
| 858 |
+
<iframe srcdoc='{plot_content.replace("'", "'").replace('"', """)}'
|
| 859 |
+
width='100%' height='500' frameborder='0'
|
| 860 |
+
style='border: none; border-radius: 5px;'></iframe>
|
| 861 |
+
</div>
|
| 862 |
+
<div style='background: #f8f9fa; padding: 8px 15px; font-size: 12px; color: #666;'>
|
| 863 |
+
📁 {plot_path}
|
| 864 |
+
</div>
|
| 865 |
+
</div>
|
| 866 |
+
"""
|
| 867 |
+
except Exception as e:
|
| 868 |
+
plots_html += f"""
|
| 869 |
+
<div style='margin-bottom: 20px; padding: 15px; border: 1px solid #f44336; border-radius: 5px; background: #ffebee;'>
|
| 870 |
+
<strong>❌ Failed to load: {Path(plot_path).name}</strong><br>
|
| 871 |
+
<small>{str(e)}</small>
|
| 872 |
+
</div>
|
| 873 |
+
"""
|
| 874 |
+
|
| 875 |
+
plots_html += "</div>"
|
| 876 |
+
|
| 877 |
+
return gr.update(value=plots_html)
|
| 878 |
+
|
| 879 |
+
|
| 880 |
+
# Custom CSS for better visual feedback
|
| 881 |
+
custom_css = """
|
| 882 |
+
.status-box {
|
| 883 |
+
padding: 10px;
|
| 884 |
+
border-radius: 5px;
|
| 885 |
+
background: linear-gradient(90deg, #e8f5e9 0%, #c8e6c9 100%);
|
| 886 |
+
margin-bottom: 10px;
|
| 887 |
+
text-align: center;
|
| 888 |
+
font-weight: bold;
|
| 889 |
+
}
|
| 890 |
+
"""
|
| 891 |
+
|
| 892 |
+
# Create the Gradio interface
|
| 893 |
+
with gr.Blocks(title="AI Agent Data Scientist", theme=gr.themes.Soft(), css=custom_css) as demo:
|
| 894 |
+
gr.Markdown("""
|
| 895 |
+
# 🤖 AI Agent Data Scientist
|
| 896 |
+
|
| 897 |
+
Upload your dataset and chat with the AI agent to perform data science tasks!
|
| 898 |
+
|
| 899 |
+
**Features:**
|
| 900 |
+
- 📊 Automatic dataset profiling
|
| 901 |
+
- 🤖 Natural language queries
|
| 902 |
+
- 🎯 Model training (classification & regression)
|
| 903 |
+
- 🔍 Data quality analysis
|
| 904 |
+
- 📈 Feature engineering
|
| 905 |
+
- 🎨 **NEW:** Automatic visualization generation!
|
| 906 |
+
- And 59 tools total!
|
| 907 |
+
""")
|
| 908 |
+
|
| 909 |
+
# Store agent response for visualization extraction
|
| 910 |
+
agent_response_state = gr.State(None)
|
| 911 |
+
|
| 912 |
+
with gr.Row():
|
| 913 |
+
# Left column - Main chat interface
|
| 914 |
+
with gr.Column(scale=2):
|
| 915 |
+
# Status indicator
|
| 916 |
+
status_box = gr.Markdown("🟢 **Ready** - Upload a dataset to begin", elem_classes=["status-box"])
|
| 917 |
+
|
| 918 |
+
chatbot = gr.Chatbot(
|
| 919 |
+
label="Chat with AI Agent",
|
| 920 |
+
height=450,
|
| 921 |
+
show_label=True,
|
| 922 |
+
avatar_images=(None, "🤖"),
|
| 923 |
+
sanitize_html=False # Allow HTML content including iframes
|
| 924 |
+
)
|
| 925 |
+
|
| 926 |
+
with gr.Row():
|
| 927 |
+
file_upload = gr.File(
|
| 928 |
+
label="📁 Upload Dataset(s) (CSV/Parquet) - Single or Multiple Files",
|
| 929 |
+
file_types=[".csv", ".parquet"],
|
| 930 |
+
file_count="multiple", # Allow multiple file uploads
|
| 931 |
+
type="filepath"
|
| 932 |
+
)
|
| 933 |
+
|
| 934 |
+
with gr.Row():
|
| 935 |
+
user_input = gr.Textbox(
|
| 936 |
+
label="Your Message",
|
| 937 |
+
placeholder="Ask anything: 'train a model', 'analyze my data', 'generate visualizations'",
|
| 938 |
+
lines=2,
|
| 939 |
+
scale=4
|
| 940 |
+
)
|
| 941 |
+
submit_btn = gr.Button("📤 Send", variant="primary", scale=1)
|
| 942 |
+
|
| 943 |
+
with gr.Row():
|
| 944 |
+
clear_btn = gr.Button("🗑️ Clear", variant="secondary") # Right column - Quick actions and info
|
| 945 |
+
with gr.Column(scale=1):
|
| 946 |
+
gr.Markdown("## 📊 Dataset Info")
|
| 947 |
+
dataset_info = gr.Markdown("Upload a dataset to see information here.")
|
| 948 |
+
|
| 949 |
+
gr.Markdown("## 🎯 Quick Train")
|
| 950 |
+
with gr.Group():
|
| 951 |
+
target_column = gr.Textbox(
|
| 952 |
+
label="Target Column",
|
| 953 |
+
placeholder="e.g., 'price', 'class', 'label'"
|
| 954 |
+
)
|
| 955 |
+
model_type_choice = gr.Radio(
|
| 956 |
+
["Classification", "Regression"],
|
| 957 |
+
label="Model Type",
|
| 958 |
+
value="Classification"
|
| 959 |
+
)
|
| 960 |
+
test_size_slider = gr.Slider(
|
| 961 |
+
0.1, 0.5, 0.3,
|
| 962 |
+
label="Test Size",
|
| 963 |
+
step=0.05
|
| 964 |
+
)
|
| 965 |
+
train_btn = gr.Button("🚀 Train Model", variant="primary")
|
| 966 |
+
|
| 967 |
+
training_output = gr.Markdown("Training results will appear here.")
|
| 968 |
+
|
| 969 |
+
gr.Markdown("""
|
| 970 |
+
## 💡 Example Queries
|
| 971 |
+
|
| 972 |
+
- "Train a classification model to predict [target]"
|
| 973 |
+
- "Show me statistics for [column]"
|
| 974 |
+
- "Detect outliers in the dataset"
|
| 975 |
+
- "What are the most important features?"
|
| 976 |
+
- "Generate a quality report"
|
| 977 |
+
- "Create polynomial features"
|
| 978 |
+
- "Balance the dataset using SMOTE"
|
| 979 |
+
""")
|
| 980 |
+
|
| 981 |
+
# Visualization Gallery Section (Full Width)
|
| 982 |
+
with gr.Row():
|
| 983 |
+
with gr.Column():
|
| 984 |
+
gr.Markdown("## 🎨 Visualization Gallery")
|
| 985 |
+
visualization_gallery = gr.Gallery(
|
| 986 |
+
label="Generated Plots (PNG/JPG)",
|
| 987 |
+
show_label=True,
|
| 988 |
+
elem_id="gallery",
|
| 989 |
+
columns=2,
|
| 990 |
+
height=400
|
| 991 |
+
)
|
| 992 |
+
|
| 993 |
+
# Reports Viewer Section (Full Width)
|
| 994 |
+
with gr.Row():
|
| 995 |
+
with gr.Column():
|
| 996 |
+
gr.Markdown("## 📋 Reports & Interactive Visualizations")
|
| 997 |
+
gr.Markdown("*HTML reports and interactive Plotly charts will be displayed here*")
|
| 998 |
+
reports_viewer = gr.HTML(
|
| 999 |
+
value="<div style='text-align:center; padding:40px; color:#666;'>No reports generated yet. Try: 'Generate a quality report' or 'Create interactive visualizations'</div>",
|
| 1000 |
+
elem_id="reports_viewer"
|
| 1001 |
+
)
|
| 1002 |
+
|
| 1003 |
+
# Create state to hold HTML report paths
|
| 1004 |
+
html_reports_state = gr.State([])
|
| 1005 |
+
|
| 1006 |
+
# Event handlers with streaming support
|
| 1007 |
+
submit_result = submit_btn.click(
|
| 1008 |
+
fn=analyze_dataset,
|
| 1009 |
+
inputs=[file_upload, user_input, chatbot],
|
| 1010 |
+
outputs=[chatbot, user_input, visualization_gallery, html_reports_state],
|
| 1011 |
+
show_progress="full" # Show progress bar
|
| 1012 |
+
)
|
| 1013 |
+
submit_result.then(
|
| 1014 |
+
fn=format_html_reports,
|
| 1015 |
+
inputs=[html_reports_state],
|
| 1016 |
+
outputs=[reports_viewer]
|
| 1017 |
+
)
|
| 1018 |
+
|
| 1019 |
+
user_input_result = user_input.submit(
|
| 1020 |
+
fn=analyze_dataset,
|
| 1021 |
+
inputs=[file_upload, user_input, chatbot],
|
| 1022 |
+
outputs=[chatbot, user_input, visualization_gallery, html_reports_state],
|
| 1023 |
+
show_progress="full"
|
| 1024 |
+
)
|
| 1025 |
+
user_input_result.then(
|
| 1026 |
+
fn=format_html_reports,
|
| 1027 |
+
inputs=[html_reports_state],
|
| 1028 |
+
outputs=[reports_viewer]
|
| 1029 |
+
)
|
| 1030 |
+
|
| 1031 |
+
file_result = file_upload.change(
|
| 1032 |
+
fn=analyze_dataset,
|
| 1033 |
+
inputs=[file_upload, gr.Textbox(value="", visible=False), chatbot],
|
| 1034 |
+
outputs=[chatbot, user_input, visualization_gallery, html_reports_state],
|
| 1035 |
+
show_progress="full"
|
| 1036 |
+
)
|
| 1037 |
+
file_result.then(
|
| 1038 |
+
fn=quick_profile,
|
| 1039 |
+
inputs=[file_upload],
|
| 1040 |
+
outputs=[dataset_info]
|
| 1041 |
+
)
|
| 1042 |
+
file_result.then(
|
| 1043 |
+
fn=format_html_reports,
|
| 1044 |
+
inputs=[html_reports_state],
|
| 1045 |
+
outputs=[reports_viewer]
|
| 1046 |
+
)
|
| 1047 |
+
|
| 1048 |
+
train_btn.click(
|
| 1049 |
+
fn=train_model_ui,
|
| 1050 |
+
inputs=[file_upload, target_column, model_type_choice, test_size_slider],
|
| 1051 |
+
outputs=[training_output],
|
| 1052 |
+
show_progress="full" # Show progress bar
|
| 1053 |
+
)
|
| 1054 |
+
|
| 1055 |
+
clear_btn.click(
|
| 1056 |
+
clear_conversation,
|
| 1057 |
+
outputs=[chatbot, file_upload, user_input, visualization_gallery, reports_viewer]
|
| 1058 |
+
)
|
| 1059 |
+
|
| 1060 |
+
if __name__ == "__main__":
|
| 1061 |
+
print("=" * 70)
|
| 1062 |
+
print("🚀 Starting AI Agent Data Scientist Chat UI...")
|
| 1063 |
+
print("=" * 70)
|
| 1064 |
+
print("\n🌐 The UI will open in your browser automatically.")
|
| 1065 |
+
print("💡 If it doesn't, copy the URL shown below.\n")
|
| 1066 |
+
|
| 1067 |
+
demo.launch(
|
| 1068 |
+
share=False, # Set to True to create a public link
|
| 1069 |
+
server_name="0.0.0.0", # Listen on all interfaces
|
| 1070 |
+
server_port=7865, # Changed port to avoid conflict
|
| 1071 |
+
show_error=True,
|
| 1072 |
+
inbrowser=True # Auto-open browser
|
| 1073 |
+
)
|
cloudbuild.yaml
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Google Cloud Build configuration for automated deployments
|
| 2 |
+
# Triggered on git push to main branch
|
| 3 |
+
|
| 4 |
+
steps:
|
| 5 |
+
# Step 1: Build the container image
|
| 6 |
+
- name: 'gcr.io/cloud-builders/docker'
|
| 7 |
+
args:
|
| 8 |
+
- 'build'
|
| 9 |
+
- '-t'
|
| 10 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:$COMMIT_SHA'
|
| 11 |
+
- '-t'
|
| 12 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:latest'
|
| 13 |
+
- '.'
|
| 14 |
+
timeout: 600s
|
| 15 |
+
|
| 16 |
+
# Step 2: Push the container image to Container Registry
|
| 17 |
+
- name: 'gcr.io/cloud-builders/docker'
|
| 18 |
+
args:
|
| 19 |
+
- 'push'
|
| 20 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:$COMMIT_SHA'
|
| 21 |
+
|
| 22 |
+
- name: 'gcr.io/cloud-builders/docker'
|
| 23 |
+
args:
|
| 24 |
+
- 'push'
|
| 25 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:latest'
|
| 26 |
+
|
| 27 |
+
# Step 3: Deploy to Cloud Run
|
| 28 |
+
- name: 'gcr.io/google.com/cloudsdktool/cloud-sdk'
|
| 29 |
+
entrypoint: gcloud
|
| 30 |
+
args:
|
| 31 |
+
- 'run'
|
| 32 |
+
- 'deploy'
|
| 33 |
+
- 'data-science-agent'
|
| 34 |
+
- '--image'
|
| 35 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:$COMMIT_SHA'
|
| 36 |
+
- '--region'
|
| 37 |
+
- 'us-central1'
|
| 38 |
+
- '--platform'
|
| 39 |
+
- 'managed'
|
| 40 |
+
- '--allow-unauthenticated'
|
| 41 |
+
- '--memory'
|
| 42 |
+
- '4Gi'
|
| 43 |
+
- '--cpu'
|
| 44 |
+
- '2'
|
| 45 |
+
- '--timeout'
|
| 46 |
+
- '900'
|
| 47 |
+
- '--max-instances'
|
| 48 |
+
- '10'
|
| 49 |
+
- '--min-instances'
|
| 50 |
+
- '0'
|
| 51 |
+
- '--concurrency'
|
| 52 |
+
- '10'
|
| 53 |
+
- '--set-env-vars'
|
| 54 |
+
- 'LLM_PROVIDER=groq,REASONING_EFFORT=medium,CACHE_TTL_SECONDS=86400'
|
| 55 |
+
- '--set-secrets'
|
| 56 |
+
- 'GROQ_API_KEY=GROQ_API_KEY:latest,GOOGLE_API_KEY=GOOGLE_API_KEY:latest,GOOGLE_APPLICATION_CREDENTIALS=GOOGLE_APPLICATION_CREDENTIALS:latest'
|
| 57 |
+
|
| 58 |
+
# Build timeout
|
| 59 |
+
timeout: 1200s
|
| 60 |
+
|
| 61 |
+
# Images to push to Container Registry
|
| 62 |
+
images:
|
| 63 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:$COMMIT_SHA'
|
| 64 |
+
- 'gcr.io/$PROJECT_ID/data-science-agent:latest'
|
| 65 |
+
|
| 66 |
+
# Build options
|
| 67 |
+
options:
|
| 68 |
+
machineType: 'N1_HIGHCPU_8'
|
| 69 |
+
logging: CLOUD_LOGGING_ONLY
|
data/.gitkeep
ADDED
|
File without changes
|
deploy.sh
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Manual deployment script for Google Cloud Run
|
| 3 |
+
# Use this for one-off deployments or CI/CD pipeline integration
|
| 4 |
+
|
| 5 |
+
set -e # Exit on error
|
| 6 |
+
|
| 7 |
+
# Colors for output
|
| 8 |
+
RED='\033[0;31m'
|
| 9 |
+
GREEN='\033[0;32m'
|
| 10 |
+
YELLOW='\033[1;33m'
|
| 11 |
+
NC='\033[0m' # No Color
|
| 12 |
+
|
| 13 |
+
echo -e "${GREEN}🚀 Data Science Agent - Cloud Run Deployment${NC}"
|
| 14 |
+
echo "=================================================="
|
| 15 |
+
|
| 16 |
+
# Check if gcloud is installed
|
| 17 |
+
if ! command -v gcloud &> /dev/null; then
|
| 18 |
+
echo -e "${RED}❌ Error: gcloud CLI not found. Install it from: https://cloud.google.com/sdk/install${NC}"
|
| 19 |
+
exit 1
|
| 20 |
+
fi
|
| 21 |
+
|
| 22 |
+
# Get GCP Project ID
|
| 23 |
+
if [ -z "$GCP_PROJECT_ID" ]; then
|
| 24 |
+
echo -e "${YELLOW}⚠️ GCP_PROJECT_ID not set. Using gcloud default project...${NC}"
|
| 25 |
+
GCP_PROJECT_ID=$(gcloud config get-value project 2>/dev/null)
|
| 26 |
+
|
| 27 |
+
if [ -z "$GCP_PROJECT_ID" ]; then
|
| 28 |
+
echo -e "${RED}❌ Error: No GCP project configured. Run: gcloud config set project YOUR_PROJECT_ID${NC}"
|
| 29 |
+
exit 1
|
| 30 |
+
fi
|
| 31 |
+
fi
|
| 32 |
+
|
| 33 |
+
echo -e "${GREEN}📋 Project ID: ${GCP_PROJECT_ID}${NC}"
|
| 34 |
+
|
| 35 |
+
# Configuration
|
| 36 |
+
SERVICE_NAME="data-science-agent"
|
| 37 |
+
REGION="${CLOUD_RUN_REGION:-us-central1}"
|
| 38 |
+
IMAGE_NAME="gcr.io/${GCP_PROJECT_ID}/${SERVICE_NAME}"
|
| 39 |
+
MEMORY="${MEMORY:-4Gi}"
|
| 40 |
+
CPU="${CPU:-2}"
|
| 41 |
+
MAX_INSTANCES="${MAX_INSTANCES:-10}"
|
| 42 |
+
TIMEOUT="${TIMEOUT:-900}"
|
| 43 |
+
|
| 44 |
+
echo "Region: ${REGION}"
|
| 45 |
+
echo "Image: ${IMAGE_NAME}:latest"
|
| 46 |
+
echo "Memory: ${MEMORY}"
|
| 47 |
+
echo "CPU: ${CPU}"
|
| 48 |
+
echo ""
|
| 49 |
+
|
| 50 |
+
# Step 1: Enable required APIs
|
| 51 |
+
echo -e "${YELLOW}🔧 Step 1/5: Enabling required Google Cloud APIs...${NC}"
|
| 52 |
+
gcloud services enable \
|
| 53 |
+
cloudbuild.googleapis.com \
|
| 54 |
+
run.googleapis.com \
|
| 55 |
+
containerregistry.googleapis.com \
|
| 56 |
+
secretmanager.googleapis.com \
|
| 57 |
+
--project=${GCP_PROJECT_ID} \
|
| 58 |
+
--quiet
|
| 59 |
+
|
| 60 |
+
echo -e "${GREEN}✅ APIs enabled${NC}"
|
| 61 |
+
echo ""
|
| 62 |
+
|
| 63 |
+
# Step 2: Create secrets (if not exist)
|
| 64 |
+
echo -e "${YELLOW}🔐 Step 2/5: Checking secrets...${NC}"
|
| 65 |
+
|
| 66 |
+
create_secret_if_not_exists() {
|
| 67 |
+
local secret_name=$1
|
| 68 |
+
local secret_value=$2
|
| 69 |
+
|
| 70 |
+
if gcloud secrets describe ${secret_name} --project=${GCP_PROJECT_ID} &>/dev/null; then
|
| 71 |
+
echo " ℹ️ Secret ${secret_name} already exists"
|
| 72 |
+
else
|
| 73 |
+
if [ -n "${secret_value}" ]; then
|
| 74 |
+
echo " ➕ Creating secret: ${secret_name}"
|
| 75 |
+
echo -n "${secret_value}" | gcloud secrets create ${secret_name} \
|
| 76 |
+
--data-file=- \
|
| 77 |
+
--project=${GCP_PROJECT_ID} \
|
| 78 |
+
--quiet
|
| 79 |
+
else
|
| 80 |
+
echo -e " ${YELLOW}⚠️ ${secret_name} not provided. You'll need to create it manually:${NC}"
|
| 81 |
+
echo " gcloud secrets create ${secret_name} --data-file=- --project=${GCP_PROJECT_ID}"
|
| 82 |
+
fi
|
| 83 |
+
fi
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
create_secret_if_not_exists "GROQ_API_KEY" "${GROQ_API_KEY}"
|
| 87 |
+
create_secret_if_not_exists "GOOGLE_API_KEY" "${GOOGLE_API_KEY}"
|
| 88 |
+
|
| 89 |
+
echo -e "${GREEN}✅ Secrets checked${NC}"
|
| 90 |
+
echo ""
|
| 91 |
+
|
| 92 |
+
# Step 3: Build container image
|
| 93 |
+
echo -e "${YELLOW}🏗️ Step 3/5: Building container image...${NC}"
|
| 94 |
+
gcloud builds submit \
|
| 95 |
+
--tag ${IMAGE_NAME}:latest \
|
| 96 |
+
--project=${GCP_PROJECT_ID} \
|
| 97 |
+
--timeout=600s \
|
| 98 |
+
.
|
| 99 |
+
|
| 100 |
+
echo -e "${GREEN}✅ Container built: ${IMAGE_NAME}:latest${NC}"
|
| 101 |
+
echo ""
|
| 102 |
+
|
| 103 |
+
# Step 4: Deploy to Cloud Run
|
| 104 |
+
echo -e "${YELLOW}🚀 Step 4/5: Deploying to Cloud Run...${NC}"
|
| 105 |
+
|
| 106 |
+
# Build the gcloud command
|
| 107 |
+
DEPLOY_CMD="gcloud run deploy ${SERVICE_NAME} \
|
| 108 |
+
--image ${IMAGE_NAME}:latest \
|
| 109 |
+
--platform managed \
|
| 110 |
+
--region ${REGION} \
|
| 111 |
+
--allow-unauthenticated \
|
| 112 |
+
--memory ${MEMORY} \
|
| 113 |
+
--cpu ${CPU} \
|
| 114 |
+
--timeout ${TIMEOUT} \
|
| 115 |
+
--max-instances ${MAX_INSTANCES} \
|
| 116 |
+
--min-instances 0 \
|
| 117 |
+
--concurrency 10 \
|
| 118 |
+
--set-env-vars LLM_PROVIDER=groq,REASONING_EFFORT=medium,CACHE_TTL_SECONDS=86400,ARTIFACT_BACKEND=local \
|
| 119 |
+
--project ${GCP_PROJECT_ID}"
|
| 120 |
+
|
| 121 |
+
# Add secrets if they exist
|
| 122 |
+
if gcloud secrets describe GROQ_API_KEY --project=${GCP_PROJECT_ID} &>/dev/null; then
|
| 123 |
+
DEPLOY_CMD="${DEPLOY_CMD} --set-secrets GROQ_API_KEY=GROQ_API_KEY:latest"
|
| 124 |
+
fi
|
| 125 |
+
|
| 126 |
+
if gcloud secrets describe GOOGLE_API_KEY --project=${GCP_PROJECT_ID} &>/dev/null; then
|
| 127 |
+
DEPLOY_CMD="${DEPLOY_CMD} --set-secrets GOOGLE_API_KEY=GOOGLE_API_KEY:latest"
|
| 128 |
+
fi
|
| 129 |
+
|
| 130 |
+
# Execute deployment
|
| 131 |
+
eval ${DEPLOY_CMD}
|
| 132 |
+
|
| 133 |
+
echo -e "${GREEN}✅ Deployment complete${NC}"
|
| 134 |
+
echo ""
|
| 135 |
+
|
| 136 |
+
# Step 5: Get service URL
|
| 137 |
+
echo -e "${YELLOW}🌐 Step 5/5: Retrieving service URL...${NC}"
|
| 138 |
+
SERVICE_URL=$(gcloud run services describe ${SERVICE_NAME} \
|
| 139 |
+
--region ${REGION} \
|
| 140 |
+
--project ${GCP_PROJECT_ID} \
|
| 141 |
+
--format 'value(status.url)')
|
| 142 |
+
|
| 143 |
+
echo ""
|
| 144 |
+
echo -e "${GREEN}========================================${NC}"
|
| 145 |
+
echo -e "${GREEN}✅ DEPLOYMENT SUCCESSFUL!${NC}"
|
| 146 |
+
echo -e "${GREEN}========================================${NC}"
|
| 147 |
+
echo ""
|
| 148 |
+
echo -e "🌐 Service URL: ${GREEN}${SERVICE_URL}${NC}"
|
| 149 |
+
echo ""
|
| 150 |
+
echo "📝 Test endpoints:"
|
| 151 |
+
echo " Health check:"
|
| 152 |
+
echo " curl ${SERVICE_URL}/health"
|
| 153 |
+
echo ""
|
| 154 |
+
echo " List tools:"
|
| 155 |
+
echo " curl ${SERVICE_URL}/tools"
|
| 156 |
+
echo ""
|
| 157 |
+
echo " Run analysis:"
|
| 158 |
+
echo " curl -X POST ${SERVICE_URL}/run \\"
|
| 159 |
+
echo " -F 'file=@data.csv' \\"
|
| 160 |
+
echo " -F 'task_description=Analyze this dataset and predict the target column'"
|
| 161 |
+
echo ""
|
| 162 |
+
echo -e "${YELLOW}📊 View logs:${NC}"
|
| 163 |
+
echo " gcloud run logs read ${SERVICE_NAME} --region ${REGION} --project ${GCP_PROJECT_ID} --limit 50"
|
| 164 |
+
echo ""
|
| 165 |
+
echo -e "${YELLOW}🔧 Manage service:${NC}"
|
| 166 |
+
echo " gcloud run services describe ${SERVICE_NAME} --region ${REGION} --project ${GCP_PROJECT_ID}"
|
| 167 |
+
echo ""
|
| 168 |
+
|
| 169 |
+
# Save service URL to file
|
| 170 |
+
echo "${SERVICE_URL}" > .cloud_run_url
|
| 171 |
+
echo -e "${GREEN}💾 Service URL saved to .cloud_run_url${NC}"
|
examples/titanic_example.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Titanic Example - Demonstrating the complete Data Science Copilot workflow
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
# Add src to path
|
| 10 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
| 11 |
+
|
| 12 |
+
from orchestrator import DataScienceCopilot
|
| 13 |
+
from rich.console import Console
|
| 14 |
+
from rich.panel import Panel
|
| 15 |
+
|
| 16 |
+
console = Console()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def main():
|
| 20 |
+
"""
|
| 21 |
+
Complete example using the Titanic dataset.
|
| 22 |
+
|
| 23 |
+
This demonstrates the full workflow:
|
| 24 |
+
1. Dataset profiling
|
| 25 |
+
2. Quality issue detection
|
| 26 |
+
3. Data cleaning
|
| 27 |
+
4. Feature engineering
|
| 28 |
+
5. Model training
|
| 29 |
+
6. Report generation
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
console.print(Panel.fit(
|
| 33 |
+
"🚢 Titanic Survival Prediction - Complete Workflow Example",
|
| 34 |
+
style="bold blue"
|
| 35 |
+
))
|
| 36 |
+
|
| 37 |
+
# Setup
|
| 38 |
+
titanic_path = "./data/titanic.csv"
|
| 39 |
+
|
| 40 |
+
# Check if dataset exists
|
| 41 |
+
if not Path(titanic_path).exists():
|
| 42 |
+
console.print("\n[yellow]⚠ Titanic dataset not found at ./data/titanic.csv[/yellow]")
|
| 43 |
+
console.print("[yellow]Please download it from: https://www.kaggle.com/c/titanic/data[/yellow]")
|
| 44 |
+
console.print("[yellow]Or place your own CSV file in the data directory[/yellow]\n")
|
| 45 |
+
|
| 46 |
+
# Use a sample path instead
|
| 47 |
+
console.print("[blue]Using sample dataset path for demonstration...[/blue]\n")
|
| 48 |
+
titanic_path = "your_dataset.csv" # User should replace this
|
| 49 |
+
|
| 50 |
+
# Initialize copilot
|
| 51 |
+
console.print("\n[bold]Step 1: Initialize Data Science Copilot[/bold]")
|
| 52 |
+
try:
|
| 53 |
+
copilot = DataScienceCopilot(reasoning_effort="medium")
|
| 54 |
+
console.print("[green]✓ Copilot initialized successfully[/green]")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
console.print(f"[red]✗ Error: {e}[/red]")
|
| 57 |
+
console.print("[yellow]Make sure to set GROQ_API_KEY in .env file[/yellow]")
|
| 58 |
+
return
|
| 59 |
+
|
| 60 |
+
# Define the task
|
| 61 |
+
task_description = """
|
| 62 |
+
Analyze the Titanic dataset and build a model to predict passenger survival.
|
| 63 |
+
|
| 64 |
+
Key objectives:
|
| 65 |
+
1. Understand the data structure and identify quality issues
|
| 66 |
+
2. Handle missing values appropriately
|
| 67 |
+
3. Engineer relevant features from available data (e.g., family size, titles from names)
|
| 68 |
+
4. Train and compare multiple baseline models
|
| 69 |
+
5. Identify the most important features for prediction
|
| 70 |
+
6. Provide recommendations for improvement
|
| 71 |
+
|
| 72 |
+
Target: Achieve competitive performance (aim for 50-70th percentile on Kaggle leaderboard)
|
| 73 |
+
"""
|
| 74 |
+
|
| 75 |
+
target_column = "Survived"
|
| 76 |
+
|
| 77 |
+
console.print("\n[bold]Step 2: Run Complete Analysis Workflow[/bold]")
|
| 78 |
+
console.print(f"Dataset: {titanic_path}")
|
| 79 |
+
console.print(f"Target: {target_column}")
|
| 80 |
+
console.print(f"Task: Predict passenger survival\n")
|
| 81 |
+
|
| 82 |
+
# Run analysis
|
| 83 |
+
try:
|
| 84 |
+
result = copilot.analyze(
|
| 85 |
+
file_path=titanic_path,
|
| 86 |
+
task_description=task_description,
|
| 87 |
+
target_col=target_column,
|
| 88 |
+
use_cache=True,
|
| 89 |
+
max_iterations=15 # Allow more iterations for complex workflow
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Display results
|
| 93 |
+
if result["status"] == "success":
|
| 94 |
+
console.print("\n[green]✓ Analysis Complete![/green]\n")
|
| 95 |
+
|
| 96 |
+
# Display summary
|
| 97 |
+
console.print(Panel(
|
| 98 |
+
result["summary"],
|
| 99 |
+
title="📋 Final Analysis Summary",
|
| 100 |
+
border_style="green"
|
| 101 |
+
))
|
| 102 |
+
|
| 103 |
+
# Display workflow steps
|
| 104 |
+
console.print("\n[bold]🔧 Workflow Steps Executed:[/bold]")
|
| 105 |
+
for i, step in enumerate(result["workflow_history"], 1):
|
| 106 |
+
tool = step["tool"]
|
| 107 |
+
success = step["result"].get("success", False)
|
| 108 |
+
icon = "✓" if success else "✗"
|
| 109 |
+
color = "green" if success else "red"
|
| 110 |
+
console.print(f"{i}. [{color}]{icon}[/{color}] {tool}")
|
| 111 |
+
|
| 112 |
+
# Display statistics
|
| 113 |
+
console.print(f"\n[bold]📊 Execution Statistics:[/bold]")
|
| 114 |
+
console.print(f" Total Iterations: {result['iterations']}")
|
| 115 |
+
console.print(f" API Calls Made: {result['api_calls']}")
|
| 116 |
+
console.print(f" Execution Time: {result['execution_time']}s")
|
| 117 |
+
|
| 118 |
+
# Check for trained models
|
| 119 |
+
console.print("\n[bold]🤖 Model Training Results:[/bold]")
|
| 120 |
+
for step in result["workflow_history"]:
|
| 121 |
+
if step["tool"] == "train_baseline_models":
|
| 122 |
+
if step["result"].get("success"):
|
| 123 |
+
models_result = step["result"]["result"]
|
| 124 |
+
best_model = models_result.get("best_model", {})
|
| 125 |
+
console.print(f" Best Model: {best_model.get('name')}")
|
| 126 |
+
console.print(f" Score: {best_model.get('score'):.4f}")
|
| 127 |
+
console.print(f" Model Path: {best_model.get('model_path')}")
|
| 128 |
+
|
| 129 |
+
# Save results
|
| 130 |
+
output_file = "./outputs/reports/titanic_analysis.json"
|
| 131 |
+
Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
| 132 |
+
|
| 133 |
+
import json
|
| 134 |
+
with open(output_file, "w") as f:
|
| 135 |
+
json.dump(result, f, indent=2)
|
| 136 |
+
|
| 137 |
+
console.print(f"\n[cyan]💾 Full results saved to: {output_file}[/cyan]")
|
| 138 |
+
|
| 139 |
+
# Next steps
|
| 140 |
+
console.print("\n[bold]🎯 Next Steps:[/bold]")
|
| 141 |
+
console.print(" 1. Review the generated models in ./outputs/models/")
|
| 142 |
+
console.print(" 2. Check data quality reports in ./outputs/reports/")
|
| 143 |
+
console.print(" 3. Examine cleaned datasets in ./outputs/data/")
|
| 144 |
+
console.print(" 4. Use the best model for predictions on new data")
|
| 145 |
+
|
| 146 |
+
elif result["status"] == "error":
|
| 147 |
+
console.print(f"\n[red]✗ Analysis failed: {result['error']}[/red]")
|
| 148 |
+
console.print(f"Error type: {result['error_type']}")
|
| 149 |
+
|
| 150 |
+
else:
|
| 151 |
+
console.print(f"\n[yellow]⚠ Analysis incomplete: {result.get('message')}[/yellow]")
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
console.print(f"\n[red]✗ Unexpected error: {e}[/red]")
|
| 155 |
+
import traceback
|
| 156 |
+
console.print(traceback.format_exc())
|
| 157 |
+
|
| 158 |
+
# Cache statistics
|
| 159 |
+
console.print("\n[bold]📦 Cache Statistics:[/bold]")
|
| 160 |
+
cache_stats = copilot.get_cache_stats()
|
| 161 |
+
console.print(f" Valid Entries: {cache_stats['valid_entries']}")
|
| 162 |
+
console.print(f" Cache Size: {cache_stats['size_mb']} MB")
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
if __name__ == "__main__":
|
| 166 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core Dependencies
|
| 2 |
+
groq==0.11.0
|
| 3 |
+
python-dotenv==1.0.0
|
| 4 |
+
|
| 5 |
+
# Data Processing
|
| 6 |
+
polars>=0.20.3
|
| 7 |
+
duckdb>=0.10.0
|
| 8 |
+
pyarrow>=14.0.1
|
| 9 |
+
pandas>=2.2.0 # Updated for Python 3.13 compatibility
|
| 10 |
+
|
| 11 |
+
# Machine Learning
|
| 12 |
+
scikit-learn>=1.4.0
|
| 13 |
+
xgboost>=2.0.3
|
| 14 |
+
lightgbm>=4.6.0
|
| 15 |
+
catboost>=1.2.8
|
| 16 |
+
optuna>=3.5.0
|
| 17 |
+
|
| 18 |
+
# Explainability
|
| 19 |
+
shap>=0.44.1
|
| 20 |
+
|
| 21 |
+
# Advanced ML Tools
|
| 22 |
+
imbalanced-learn>=0.12.0
|
| 23 |
+
|
| 24 |
+
# Statistical Analysis
|
| 25 |
+
scipy>=1.11.4
|
| 26 |
+
statsmodels>=0.14.1
|
| 27 |
+
|
| 28 |
+
# Visualization
|
| 29 |
+
matplotlib>=3.8.2
|
| 30 |
+
seaborn>=0.13.1
|
| 31 |
+
plotly>=5.18.0 # Interactive visualizations
|
| 32 |
+
|
| 33 |
+
# EDA Report Generation
|
| 34 |
+
sweetviz>=2.3.1 # Beautiful fast EDA reports
|
| 35 |
+
ydata-profiling>=4.17.0 # Updated for Python 3.13 compatibility
|
| 36 |
+
|
| 37 |
+
# User Interface
|
| 38 |
+
# gradio>=5.49.1 # Replaced with React frontend
|
| 39 |
+
|
| 40 |
+
# REST API (Cloud Run)
|
| 41 |
+
fastapi>=0.109.0
|
| 42 |
+
uvicorn>=0.25.0
|
| 43 |
+
python-multipart>=0.0.6 # For file uploads
|
| 44 |
+
|
| 45 |
+
# Text Processing
|
| 46 |
+
textblob>=0.17.1
|
| 47 |
+
|
| 48 |
+
# Time Series Forecasting
|
| 49 |
+
prophet>=1.1.5
|
| 50 |
+
holidays>=0.38
|
| 51 |
+
|
| 52 |
+
# MLOps & Explainability
|
| 53 |
+
lime==0.2.0.1
|
| 54 |
+
fairlearn==0.10.0
|
| 55 |
+
|
| 56 |
+
# NLP (Optional - Uncomment for advanced NLP tools)
|
| 57 |
+
# These are optional but recommended for full NLP capabilities
|
| 58 |
+
# spacy==3.7.2 # For named entity recognition (perform_named_entity_recognition)
|
| 59 |
+
# transformers==4.35.2 # For transformer-based sentiment & topic modeling
|
| 60 |
+
# sentence-transformers==2.2.2 # For semantic text similarity
|
| 61 |
+
# bertopic==0.16.0 # For advanced topic modeling
|
| 62 |
+
|
| 63 |
+
# Computer Vision (Optional - Uncomment for CV tools)
|
| 64 |
+
# These are optional but recommended for full CV capabilities
|
| 65 |
+
# torch==2.1.0 # For CNN-based image feature extraction
|
| 66 |
+
# torchvision==0.16.0 # For pre-trained models (ResNet, EfficientNet, VGG)
|
| 67 |
+
Pillow==10.1.0 # For basic image processing
|
| 68 |
+
#opencv-python==4.8.1 # For advanced image processing & color features
|
| 69 |
+
|
| 70 |
+
# Business Intelligence (Optional - Uncomment for advanced BI tools)
|
| 71 |
+
# These are optional but add specialized capabilities
|
| 72 |
+
# lifetimes==0.11.3 # For customer lifetime value modeling
|
| 73 |
+
# econml==0.15.0 # For advanced causal inference
|
| 74 |
+
|
| 75 |
+
# CLI & UI
|
| 76 |
+
typer==0.9.0
|
| 77 |
+
rich==13.7.0
|
| 78 |
+
tqdm==4.66.1
|
| 79 |
+
|
| 80 |
+
# Utilities
|
| 81 |
+
pydantic==2.5.3
|
| 82 |
+
joblib==1.3.2
|
| 83 |
+
|
| 84 |
+
# Google Cloud Integration
|
| 85 |
+
google-cloud-bigquery==3.14.1
|
| 86 |
+
google-cloud-storage==2.14.0 # For GCS artifact storage
|
| 87 |
+
google-auth==2.25.2
|
| 88 |
+
google-generativeai==0.3.2 # For Gemini LLM support
|
| 89 |
+
|
| 90 |
+
# Testing
|
| 91 |
+
pytest==7.4.3
|
| 92 |
+
pytest-mock==3.12.0
|
| 93 |
+
pytest-cov==4.1.0
|
| 94 |
+
|
| 95 |
+
# Development
|
| 96 |
+
black==23.12.1
|
| 97 |
+
flake8==7.0.0
|
| 98 |
+
mypy==1.8.0
|
setup-deployment.sh
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Quick setup script for macOS deployment prerequisites
|
| 3 |
+
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
RED='\033[0;31m'
|
| 7 |
+
GREEN='\033[0;32m'
|
| 8 |
+
YELLOW='\033[1;33m'
|
| 9 |
+
BLUE='\033[0;34m'
|
| 10 |
+
NC='\033[0m'
|
| 11 |
+
|
| 12 |
+
echo -e "${BLUE}🔧 Data Science Agent - Deployment Setup${NC}"
|
| 13 |
+
echo "=========================================="
|
| 14 |
+
echo ""
|
| 15 |
+
|
| 16 |
+
# Check if Homebrew is installed
|
| 17 |
+
if ! command -v brew &> /dev/null; then
|
| 18 |
+
echo -e "${RED}❌ Homebrew not found${NC}"
|
| 19 |
+
echo "Installing Homebrew..."
|
| 20 |
+
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
| 21 |
+
else
|
| 22 |
+
echo -e "${GREEN}✅ Homebrew installed${NC}"
|
| 23 |
+
fi
|
| 24 |
+
|
| 25 |
+
# Install Docker Desktop
|
| 26 |
+
if ! command -v docker &> /dev/null; then
|
| 27 |
+
echo -e "${YELLOW}📦 Installing Docker Desktop...${NC}"
|
| 28 |
+
brew install --cask docker
|
| 29 |
+
echo -e "${GREEN}✅ Docker Desktop installed${NC}"
|
| 30 |
+
echo -e "${YELLOW}⚠️ Please start Docker Desktop application, then run this script again${NC}"
|
| 31 |
+
exit 0
|
| 32 |
+
else
|
| 33 |
+
echo -e "${GREEN}✅ Docker installed${NC}"
|
| 34 |
+
fi
|
| 35 |
+
|
| 36 |
+
# Check if Docker daemon is running
|
| 37 |
+
if ! docker info &> /dev/null; then
|
| 38 |
+
echo -e "${YELLOW}⚠️ Docker is installed but not running${NC}"
|
| 39 |
+
echo "Please start Docker Desktop application, then run this script again"
|
| 40 |
+
exit 0
|
| 41 |
+
fi
|
| 42 |
+
|
| 43 |
+
# Install Google Cloud SDK
|
| 44 |
+
if ! command -v gcloud &> /dev/null; then
|
| 45 |
+
echo -e "${YELLOW}☁️ Installing Google Cloud SDK...${NC}"
|
| 46 |
+
brew install --cask google-cloud-sdk
|
| 47 |
+
echo -e "${GREEN}✅ Google Cloud SDK installed${NC}"
|
| 48 |
+
|
| 49 |
+
echo ""
|
| 50 |
+
echo -e "${YELLOW}📝 Next steps:${NC}"
|
| 51 |
+
echo "1. Restart your terminal to load gcloud"
|
| 52 |
+
echo "2. Run: gcloud auth login"
|
| 53 |
+
echo "3. Run: gcloud auth application-default login"
|
| 54 |
+
echo "4. Run: gcloud config set project YOUR_PROJECT_ID"
|
| 55 |
+
echo "5. Run: ./deploy.sh"
|
| 56 |
+
else
|
| 57 |
+
echo -e "${GREEN}✅ Google Cloud SDK installed${NC}"
|
| 58 |
+
fi
|
| 59 |
+
|
| 60 |
+
echo ""
|
| 61 |
+
echo -e "${BLUE}========================================${NC}"
|
| 62 |
+
echo -e "${GREEN}✅ Setup complete!${NC}"
|
| 63 |
+
echo ""
|
| 64 |
+
echo "Next steps:"
|
| 65 |
+
echo "1. Authenticate with Google Cloud:"
|
| 66 |
+
echo " ${YELLOW}gcloud auth login${NC}"
|
| 67 |
+
echo " ${YELLOW}gcloud auth application-default login${NC}"
|
| 68 |
+
echo ""
|
| 69 |
+
echo "2. Set your GCP project:"
|
| 70 |
+
echo " ${YELLOW}gcloud config set project YOUR_PROJECT_ID${NC}"
|
| 71 |
+
echo ""
|
| 72 |
+
echo "3. Set your API keys:"
|
| 73 |
+
echo " ${YELLOW}export GROQ_API_KEY='your-groq-key'${NC}"
|
| 74 |
+
echo " ${YELLOW}export GOOGLE_API_KEY='your-google-key'${NC}"
|
| 75 |
+
echo ""
|
| 76 |
+
echo "4. Deploy to Cloud Run:"
|
| 77 |
+
echo " ${YELLOW}./deploy.sh${NC}"
|
| 78 |
+
echo ""
|
src/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data Science Copilot - AI-powered data science automation."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
| 4 |
+
|
| 5 |
+
from .orchestrator import DataScienceCopilot
|
| 6 |
+
|
| 7 |
+
__all__ = ["DataScienceCopilot"]
|
src/api/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Cloud Run API Module
|
| 3 |
+
FastAPI wrapper for DataScienceCopilot
|
| 4 |
+
"""
|
src/api/app.py
ADDED
|
@@ -0,0 +1,513 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI Application for Google Cloud Run
|
| 3 |
+
Thin HTTP wrapper around DataScienceCopilot - No logic changes, just API exposure.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import tempfile
|
| 9 |
+
import shutil
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Optional, Dict, Any, List
|
| 12 |
+
import logging
|
| 13 |
+
from dotenv import load_dotenv
|
| 14 |
+
|
| 15 |
+
# Load environment variables from .env file
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Request
|
| 19 |
+
from fastapi.responses import JSONResponse, FileResponse
|
| 20 |
+
from fastapi.staticfiles import StaticFiles
|
| 21 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 22 |
+
from pydantic import BaseModel
|
| 23 |
+
|
| 24 |
+
# Add src to path for imports
|
| 25 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
| 26 |
+
|
| 27 |
+
from orchestrator import DataScienceCopilot
|
| 28 |
+
|
| 29 |
+
# Configure logging
|
| 30 |
+
logging.basicConfig(level=logging.INFO)
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
# Initialize FastAPI
|
| 34 |
+
app = FastAPI(
|
| 35 |
+
title="Data Science Agent API",
|
| 36 |
+
description="Cloud Run wrapper for autonomous data science workflows",
|
| 37 |
+
version="1.0.0"
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Enable CORS for frontend
|
| 41 |
+
app.add_middleware(
|
| 42 |
+
CORSMiddleware,
|
| 43 |
+
allow_origins=["*"], # Configure this properly in production
|
| 44 |
+
allow_credentials=True,
|
| 45 |
+
allow_methods=["*"],
|
| 46 |
+
allow_headers=["*"],
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Initialize agent once (singleton pattern for stateless service)
|
| 50 |
+
# Agent itself is stateless - no conversation memory between requests
|
| 51 |
+
agent: Optional[DataScienceCopilot] = None
|
| 52 |
+
|
| 53 |
+
# Mount static files for React frontend
|
| 54 |
+
frontend_path = Path(__file__).parent.parent.parent / "FRRONTEEEND" / "dist"
|
| 55 |
+
if frontend_path.exists():
|
| 56 |
+
app.mount("/assets", StaticFiles(directory=str(frontend_path / "assets")), name="assets")
|
| 57 |
+
logger.info(f"✅ Frontend assets mounted from {frontend_path}")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@app.on_event("startup")
|
| 61 |
+
async def startup_event():
|
| 62 |
+
"""Initialize DataScienceCopilot on service startup."""
|
| 63 |
+
global agent
|
| 64 |
+
try:
|
| 65 |
+
logger.info("Initializing DataScienceCopilot...")
|
| 66 |
+
agent = DataScienceCopilot(
|
| 67 |
+
reasoning_effort="medium",
|
| 68 |
+
provider=os.getenv("LLM_PROVIDER", "groq")
|
| 69 |
+
)
|
| 70 |
+
logger.info(f"✅ Agent initialized with provider: {agent.provider}")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.error(f"❌ Failed to initialize agent: {e}")
|
| 73 |
+
raise
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
@app.get("/api/health")
|
| 77 |
+
async def root():
|
| 78 |
+
"""Health check endpoint."""
|
| 79 |
+
return {
|
| 80 |
+
"service": "Data Science Agent API",
|
| 81 |
+
"status": "healthy",
|
| 82 |
+
"provider": agent.provider if agent else "not initialized",
|
| 83 |
+
"tools_available": len(agent.tool_functions) if agent else 0
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@app.get("/health")
|
| 88 |
+
async def health_check():
|
| 89 |
+
"""
|
| 90 |
+
Health check for Cloud Run.
|
| 91 |
+
Returns 200 if service is ready to accept requests.
|
| 92 |
+
"""
|
| 93 |
+
if agent is None:
|
| 94 |
+
raise HTTPException(status_code=503, detail="Agent not initialized")
|
| 95 |
+
|
| 96 |
+
return {
|
| 97 |
+
"status": "healthy",
|
| 98 |
+
"agent_ready": True,
|
| 99 |
+
"provider": agent.provider,
|
| 100 |
+
"tools_count": len(agent.tool_functions)
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class AnalysisRequest(BaseModel):
|
| 105 |
+
"""Request model for analysis endpoint (JSON body)."""
|
| 106 |
+
task_description: str
|
| 107 |
+
target_col: Optional[str] = None
|
| 108 |
+
use_cache: bool = True
|
| 109 |
+
max_iterations: int = 20
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
@app.post("/run")
|
| 113 |
+
async def run_analysis(
|
| 114 |
+
file: UploadFile = File(..., description="Dataset file (CSV or Parquet)"),
|
| 115 |
+
task_description: str = Form(..., description="Natural language task description"),
|
| 116 |
+
target_col: Optional[str] = Form(None, description="Target column name for prediction"),
|
| 117 |
+
use_cache: bool = Form(True, description="Enable caching for expensive operations"),
|
| 118 |
+
max_iterations: int = Form(20, description="Maximum workflow iterations")
|
| 119 |
+
) -> JSONResponse:
|
| 120 |
+
"""
|
| 121 |
+
Run complete data science workflow on uploaded dataset.
|
| 122 |
+
|
| 123 |
+
This is a thin wrapper - all logic lives in DataScienceCopilot.analyze().
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
file: CSV or Parquet file upload
|
| 127 |
+
task_description: Natural language description of the task
|
| 128 |
+
target_col: Optional target column for ML tasks
|
| 129 |
+
use_cache: Whether to use cached results
|
| 130 |
+
max_iterations: Maximum number of workflow steps
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
JSON response with analysis results, workflow history, and execution stats
|
| 134 |
+
|
| 135 |
+
Example:
|
| 136 |
+
```bash
|
| 137 |
+
curl -X POST http://localhost:8080/run \
|
| 138 |
+
-F "file=@data.csv" \
|
| 139 |
+
-F "task_description=Analyze this dataset and predict house prices" \
|
| 140 |
+
-F "target_col=price"
|
| 141 |
+
```
|
| 142 |
+
"""
|
| 143 |
+
if agent is None:
|
| 144 |
+
raise HTTPException(status_code=503, detail="Agent not initialized")
|
| 145 |
+
|
| 146 |
+
# Validate file format
|
| 147 |
+
filename = file.filename.lower()
|
| 148 |
+
if not (filename.endswith('.csv') or filename.endswith('.parquet')):
|
| 149 |
+
raise HTTPException(
|
| 150 |
+
status_code=400,
|
| 151 |
+
detail="Invalid file format. Only CSV and Parquet files are supported."
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Use /tmp for Cloud Run (ephemeral storage)
|
| 155 |
+
temp_dir = Path("/tmp") / "data_science_agent"
|
| 156 |
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
| 157 |
+
|
| 158 |
+
temp_file_path = None
|
| 159 |
+
|
| 160 |
+
try:
|
| 161 |
+
# Save uploaded file to temporary location
|
| 162 |
+
temp_file_path = temp_dir / file.filename
|
| 163 |
+
logger.info(f"Saving uploaded file to: {temp_file_path}")
|
| 164 |
+
|
| 165 |
+
with open(temp_file_path, "wb") as buffer:
|
| 166 |
+
shutil.copyfileobj(file.file, buffer)
|
| 167 |
+
|
| 168 |
+
logger.info(f"File saved successfully: {file.filename} ({os.path.getsize(temp_file_path)} bytes)")
|
| 169 |
+
|
| 170 |
+
# Call existing agent logic - NO CHANGES to orchestrator
|
| 171 |
+
logger.info(f"Starting analysis with task: {task_description}")
|
| 172 |
+
result = agent.analyze(
|
| 173 |
+
file_path=str(temp_file_path),
|
| 174 |
+
task_description=task_description,
|
| 175 |
+
target_col=target_col,
|
| 176 |
+
use_cache=use_cache,
|
| 177 |
+
max_iterations=max_iterations
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
logger.info(f"Analysis completed: {result.get('status')}")
|
| 181 |
+
|
| 182 |
+
# Filter out non-JSON-serializable objects (like matplotlib/plotly Figures)
|
| 183 |
+
def make_json_serializable(obj):
|
| 184 |
+
"""Recursively convert objects to JSON-serializable format."""
|
| 185 |
+
if isinstance(obj, dict):
|
| 186 |
+
return {k: make_json_serializable(v) for k, v in obj.items()}
|
| 187 |
+
elif isinstance(obj, list):
|
| 188 |
+
return [make_json_serializable(item) for item in obj]
|
| 189 |
+
elif hasattr(obj, '__class__') and obj.__class__.__name__ in ['Figure', 'Axes', 'Artist']:
|
| 190 |
+
# Skip matplotlib/plotly Figure objects
|
| 191 |
+
return f"<{obj.__class__.__name__} object - see artifacts>"
|
| 192 |
+
elif isinstance(obj, (str, int, float, bool, type(None))):
|
| 193 |
+
return obj
|
| 194 |
+
else:
|
| 195 |
+
# Try to convert to string for other types
|
| 196 |
+
try:
|
| 197 |
+
return str(obj)
|
| 198 |
+
except:
|
| 199 |
+
return f"<{type(obj).__name__}>"
|
| 200 |
+
|
| 201 |
+
serializable_result = make_json_serializable(result)
|
| 202 |
+
|
| 203 |
+
# Return result as-is from orchestrator
|
| 204 |
+
return JSONResponse(
|
| 205 |
+
content={
|
| 206 |
+
"success": result.get("status") == "success",
|
| 207 |
+
"result": serializable_result,
|
| 208 |
+
"metadata": {
|
| 209 |
+
"filename": file.filename,
|
| 210 |
+
"task": task_description,
|
| 211 |
+
"target": target_col,
|
| 212 |
+
"provider": agent.provider
|
| 213 |
+
}
|
| 214 |
+
},
|
| 215 |
+
status_code=200
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
logger.error(f"Analysis failed: {str(e)}", exc_info=True)
|
| 220 |
+
raise HTTPException(
|
| 221 |
+
status_code=500,
|
| 222 |
+
detail={
|
| 223 |
+
"error": str(e),
|
| 224 |
+
"error_type": type(e).__name__,
|
| 225 |
+
"message": "Analysis workflow failed. Check logs for details."
|
| 226 |
+
}
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
finally:
|
| 230 |
+
# Cleanup temporary file
|
| 231 |
+
if temp_file_path and temp_file_path.exists():
|
| 232 |
+
try:
|
| 233 |
+
temp_file_path.unlink()
|
| 234 |
+
logger.info(f"Cleaned up temporary file: {temp_file_path}")
|
| 235 |
+
except Exception as e:
|
| 236 |
+
logger.warning(f"Failed to cleanup temp file: {e}")
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
@app.post("/profile")
|
| 240 |
+
async def profile_dataset(
|
| 241 |
+
file: UploadFile = File(..., description="Dataset file (CSV or Parquet)")
|
| 242 |
+
) -> JSONResponse:
|
| 243 |
+
"""
|
| 244 |
+
Quick dataset profiling without full workflow.
|
| 245 |
+
|
| 246 |
+
Returns basic statistics, data types, and quality issues.
|
| 247 |
+
Useful for initial data exploration without running full analysis.
|
| 248 |
+
|
| 249 |
+
Example:
|
| 250 |
+
```bash
|
| 251 |
+
curl -X POST http://localhost:8080/profile \
|
| 252 |
+
-F "file=@data.csv"
|
| 253 |
+
```
|
| 254 |
+
"""
|
| 255 |
+
if agent is None:
|
| 256 |
+
raise HTTPException(status_code=503, detail="Agent not initialized")
|
| 257 |
+
|
| 258 |
+
filename = file.filename.lower()
|
| 259 |
+
if not (filename.endswith('.csv') or filename.endswith('.parquet')):
|
| 260 |
+
raise HTTPException(
|
| 261 |
+
status_code=400,
|
| 262 |
+
detail="Invalid file format. Only CSV and Parquet files are supported."
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
temp_dir = Path("/tmp") / "data_science_agent"
|
| 266 |
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
| 267 |
+
temp_file_path = None
|
| 268 |
+
|
| 269 |
+
try:
|
| 270 |
+
# Save file temporarily
|
| 271 |
+
temp_file_path = temp_dir / file.filename
|
| 272 |
+
with open(temp_file_path, "wb") as buffer:
|
| 273 |
+
shutil.copyfileobj(file.file, buffer)
|
| 274 |
+
|
| 275 |
+
# Import profiling tool directly
|
| 276 |
+
from tools.data_profiling import profile_dataset as profile_tool
|
| 277 |
+
from tools.data_profiling import detect_data_quality_issues
|
| 278 |
+
|
| 279 |
+
# Run profiling tools
|
| 280 |
+
logger.info(f"Profiling dataset: {file.filename}")
|
| 281 |
+
profile_result = profile_tool(str(temp_file_path))
|
| 282 |
+
quality_result = detect_data_quality_issues(str(temp_file_path))
|
| 283 |
+
|
| 284 |
+
return JSONResponse(
|
| 285 |
+
content={
|
| 286 |
+
"success": True,
|
| 287 |
+
"filename": file.filename,
|
| 288 |
+
"profile": profile_result,
|
| 289 |
+
"quality_issues": quality_result
|
| 290 |
+
},
|
| 291 |
+
status_code=200
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
except Exception as e:
|
| 295 |
+
logger.error(f"Profiling failed: {str(e)}", exc_info=True)
|
| 296 |
+
raise HTTPException(
|
| 297 |
+
status_code=500,
|
| 298 |
+
detail={
|
| 299 |
+
"error": str(e),
|
| 300 |
+
"error_type": type(e).__name__
|
| 301 |
+
}
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
finally:
|
| 305 |
+
if temp_file_path and temp_file_path.exists():
|
| 306 |
+
try:
|
| 307 |
+
temp_file_path.unlink()
|
| 308 |
+
except Exception as e:
|
| 309 |
+
logger.warning(f"Failed to cleanup temp file: {e}")
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
@app.get("/tools")
|
| 313 |
+
async def list_tools():
|
| 314 |
+
"""
|
| 315 |
+
List all available tools in the agent.
|
| 316 |
+
|
| 317 |
+
Returns tool names organized by category.
|
| 318 |
+
Useful for understanding agent capabilities.
|
| 319 |
+
"""
|
| 320 |
+
if agent is None:
|
| 321 |
+
raise HTTPException(status_code=503, detail="Agent not initialized")
|
| 322 |
+
|
| 323 |
+
from tools.tools_registry import get_tools_by_category
|
| 324 |
+
|
| 325 |
+
return {
|
| 326 |
+
"total_tools": len(agent.tool_functions),
|
| 327 |
+
"tools_by_category": get_tools_by_category(),
|
| 328 |
+
"all_tools": list(agent.tool_functions.keys())
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
class ChatMessage(BaseModel):
|
| 333 |
+
"""Chat message model."""
|
| 334 |
+
role: str # 'user' or 'assistant'
|
| 335 |
+
content: str
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
class ChatRequest(BaseModel):
|
| 339 |
+
"""Chat request model."""
|
| 340 |
+
messages: List[ChatMessage]
|
| 341 |
+
stream: bool = False
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
@app.post("/chat")
|
| 345 |
+
async def chat(request: ChatRequest) -> JSONResponse:
|
| 346 |
+
"""
|
| 347 |
+
Chat endpoint for conversational interface.
|
| 348 |
+
|
| 349 |
+
Processes chat messages and returns agent responses.
|
| 350 |
+
Uses the same underlying agent as /run but in chat format.
|
| 351 |
+
|
| 352 |
+
Args:
|
| 353 |
+
request: Chat request with message history
|
| 354 |
+
|
| 355 |
+
Returns:
|
| 356 |
+
JSON response with agent's reply
|
| 357 |
+
"""
|
| 358 |
+
if agent is None:
|
| 359 |
+
raise HTTPException(status_code=503, detail="Agent not initialized")
|
| 360 |
+
|
| 361 |
+
try:
|
| 362 |
+
# Extract the latest user message
|
| 363 |
+
user_messages = [msg for msg in request.messages if msg.role == "user"]
|
| 364 |
+
if not user_messages:
|
| 365 |
+
raise HTTPException(status_code=400, detail="No user message found")
|
| 366 |
+
|
| 367 |
+
latest_message = user_messages[-1].content
|
| 368 |
+
|
| 369 |
+
# Check for API key
|
| 370 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
| 371 |
+
if not api_key:
|
| 372 |
+
raise HTTPException(
|
| 373 |
+
status_code=500,
|
| 374 |
+
detail="GOOGLE_API_KEY not configured. Please set the environment variable."
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
# Use Google Gemini API
|
| 378 |
+
import google.generativeai as genai
|
| 379 |
+
|
| 380 |
+
logger.info(f"Configuring Gemini with API key (length: {len(api_key)})")
|
| 381 |
+
genai.configure(api_key=api_key)
|
| 382 |
+
|
| 383 |
+
# Initialize Gemini model
|
| 384 |
+
model = genai.GenerativeModel(
|
| 385 |
+
model_name=os.getenv("GEMINI_MODEL", "gemini-2.5-flash-lite"),
|
| 386 |
+
system_instruction="You are a Senior Data Science Autonomous Agent. You help users with end-to-end machine learning, data profiling, visualization, and strategic insights. Use a professional, technical yet accessible tone. Provide code snippets in Python if requested. You have access to tools for data analysis, ML training, visualization, and more."
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
# Convert messages to Gemini format (exclude system message, just conversation)
|
| 390 |
+
chat_history = []
|
| 391 |
+
for msg in request.messages[:-1]: # Exclude the latest message
|
| 392 |
+
chat_history.append({
|
| 393 |
+
"role": "user" if msg.role == "user" else "model",
|
| 394 |
+
"parts": [msg.content]
|
| 395 |
+
})
|
| 396 |
+
|
| 397 |
+
# Start chat with history
|
| 398 |
+
chat = model.start_chat(history=chat_history)
|
| 399 |
+
|
| 400 |
+
# Send the latest message
|
| 401 |
+
response = chat.send_message(latest_message)
|
| 402 |
+
|
| 403 |
+
assistant_message = response.text
|
| 404 |
+
|
| 405 |
+
return JSONResponse(
|
| 406 |
+
content={
|
| 407 |
+
"success": True,
|
| 408 |
+
"message": assistant_message,
|
| 409 |
+
"model": "gemini-2.0-flash-exp",
|
| 410 |
+
"provider": "gemini"
|
| 411 |
+
},
|
| 412 |
+
status_code=200
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
except Exception as e:
|
| 416 |
+
logger.error(f"Chat failed: {str(e)}", exc_info=True)
|
| 417 |
+
raise HTTPException(
|
| 418 |
+
status_code=500,
|
| 419 |
+
detail={
|
| 420 |
+
"error": str(e),
|
| 421 |
+
"error_type": type(e).__name__
|
| 422 |
+
}
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
# Error handlers
|
| 427 |
+
@app.exception_handler(HTTPException)
|
| 428 |
+
async def http_exception_handler(request, exc):
|
| 429 |
+
"""Custom error response format."""
|
| 430 |
+
return JSONResponse(
|
| 431 |
+
status_code=exc.status_code,
|
| 432 |
+
content={
|
| 433 |
+
"success": False,
|
| 434 |
+
"error": exc.detail,
|
| 435 |
+
"status_code": exc.status_code
|
| 436 |
+
}
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
@app.exception_handler(Exception)
|
| 441 |
+
async def general_exception_handler(request, exc):
|
| 442 |
+
"""Catch-all error handler."""
|
| 443 |
+
logger.error(f"Unhandled exception: {str(exc)}", exc_info=True)
|
| 444 |
+
return JSONResponse(
|
| 445 |
+
status_code=500,
|
| 446 |
+
content={
|
| 447 |
+
"success": False,
|
| 448 |
+
"error": "Internal server error",
|
| 449 |
+
"detail": str(exc),
|
| 450 |
+
"error_type": type(exc).__name__
|
| 451 |
+
}
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
@app.get("/outputs/{file_path:path}")
|
| 456 |
+
async def serve_output_files(file_path: str):
|
| 457 |
+
"""
|
| 458 |
+
Serve generated output files (reports, plots, models, etc.).
|
| 459 |
+
"""
|
| 460 |
+
output_path = Path("./outputs") / file_path
|
| 461 |
+
|
| 462 |
+
if not output_path.exists():
|
| 463 |
+
raise HTTPException(status_code=404, detail=f"File not found: {file_path}")
|
| 464 |
+
|
| 465 |
+
if not output_path.is_file():
|
| 466 |
+
raise HTTPException(status_code=400, detail="Path is not a file")
|
| 467 |
+
|
| 468 |
+
# Security: prevent directory traversal
|
| 469 |
+
try:
|
| 470 |
+
output_path.resolve().relative_to(Path("./outputs").resolve())
|
| 471 |
+
except ValueError:
|
| 472 |
+
raise HTTPException(status_code=403, detail="Access denied")
|
| 473 |
+
|
| 474 |
+
return FileResponse(output_path)
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
@app.get("/{full_path:path}")
|
| 478 |
+
async def serve_frontend(full_path: str):
|
| 479 |
+
"""
|
| 480 |
+
Serve React frontend for all non-API routes.
|
| 481 |
+
This should be the last route defined.
|
| 482 |
+
"""
|
| 483 |
+
frontend_path = Path(__file__).parent.parent.parent / "FRRONTEEEND" / "dist"
|
| 484 |
+
|
| 485 |
+
# Try to serve the requested file
|
| 486 |
+
file_path = frontend_path / full_path
|
| 487 |
+
if file_path.is_file():
|
| 488 |
+
return FileResponse(file_path)
|
| 489 |
+
|
| 490 |
+
# Default to index.html for client-side routing
|
| 491 |
+
index_path = frontend_path / "index.html"
|
| 492 |
+
if index_path.exists():
|
| 493 |
+
return FileResponse(index_path)
|
| 494 |
+
|
| 495 |
+
# Frontend not built
|
| 496 |
+
raise HTTPException(
|
| 497 |
+
status_code=404,
|
| 498 |
+
detail="Frontend not found. Please build the frontend first: cd FRRONTEEEND && npm run build"
|
| 499 |
+
)
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
# Cloud Run listens on PORT environment variable
|
| 503 |
+
if __name__ == "__main__":
|
| 504 |
+
import uvicorn
|
| 505 |
+
|
| 506 |
+
port = int(os.getenv("PORT", 8080))
|
| 507 |
+
|
| 508 |
+
uvicorn.run(
|
| 509 |
+
"app:app",
|
| 510 |
+
host="0.0.0.0",
|
| 511 |
+
port=port,
|
| 512 |
+
log_level="info"
|
| 513 |
+
)
|
src/cache/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cache module initialization."""
|
| 2 |
+
|
| 3 |
+
from .cache_manager import CacheManager
|
| 4 |
+
|
| 5 |
+
__all__ = ["CacheManager"]
|
src/cache/cache_manager.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Cache Manager for Data Science Copilot
|
| 3 |
+
Uses SQLite for persistent caching of API responses and computation results.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import hashlib
|
| 7 |
+
import json
|
| 8 |
+
import sqlite3
|
| 9 |
+
import time
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any, Optional
|
| 12 |
+
import pickle
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class CacheManager:
|
| 16 |
+
"""
|
| 17 |
+
Manages caching of LLM responses and expensive computations.
|
| 18 |
+
|
| 19 |
+
Uses SQLite for persistence and supports TTL-based invalidation.
|
| 20 |
+
Cache keys are generated from file hashes and operation parameters.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(self, db_path: str = "./cache_db/cache.db", ttl_seconds: int = 86400):
|
| 24 |
+
"""
|
| 25 |
+
Initialize cache manager.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
db_path: Path to SQLite database file
|
| 29 |
+
ttl_seconds: Time-to-live for cache entries (default 24 hours)
|
| 30 |
+
"""
|
| 31 |
+
self.db_path = Path(db_path)
|
| 32 |
+
self.ttl_seconds = ttl_seconds
|
| 33 |
+
|
| 34 |
+
# Ensure cache directory exists
|
| 35 |
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
| 36 |
+
|
| 37 |
+
# Initialize database
|
| 38 |
+
self._init_db()
|
| 39 |
+
|
| 40 |
+
def _init_db(self) -> None:
|
| 41 |
+
"""Create cache table if it doesn't exist."""
|
| 42 |
+
try:
|
| 43 |
+
conn = sqlite3.connect(self.db_path)
|
| 44 |
+
cursor = conn.cursor()
|
| 45 |
+
|
| 46 |
+
cursor.execute("""
|
| 47 |
+
CREATE TABLE IF NOT EXISTS cache (
|
| 48 |
+
key TEXT PRIMARY KEY,
|
| 49 |
+
value BLOB NOT NULL,
|
| 50 |
+
created_at INTEGER NOT NULL,
|
| 51 |
+
expires_at INTEGER NOT NULL,
|
| 52 |
+
metadata TEXT
|
| 53 |
+
)
|
| 54 |
+
""")
|
| 55 |
+
|
| 56 |
+
# Create index on expires_at for efficient cleanup
|
| 57 |
+
cursor.execute("""
|
| 58 |
+
CREATE INDEX IF NOT EXISTS idx_expires_at
|
| 59 |
+
ON cache(expires_at)
|
| 60 |
+
""")
|
| 61 |
+
|
| 62 |
+
conn.commit()
|
| 63 |
+
conn.close()
|
| 64 |
+
print(f"✅ Cache database initialized at {self.db_path}")
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"⚠️ Error initializing cache database: {e}")
|
| 67 |
+
print(f" Attempting to recreate database...")
|
| 68 |
+
try:
|
| 69 |
+
# Remove corrupted database and recreate
|
| 70 |
+
if self.db_path.exists():
|
| 71 |
+
self.db_path.unlink()
|
| 72 |
+
|
| 73 |
+
conn = sqlite3.connect(self.db_path)
|
| 74 |
+
cursor = conn.cursor()
|
| 75 |
+
|
| 76 |
+
cursor.execute("""
|
| 77 |
+
CREATE TABLE cache (
|
| 78 |
+
key TEXT PRIMARY KEY,
|
| 79 |
+
value BLOB NOT NULL,
|
| 80 |
+
created_at INTEGER NOT NULL,
|
| 81 |
+
expires_at INTEGER NOT NULL,
|
| 82 |
+
metadata TEXT
|
| 83 |
+
)
|
| 84 |
+
""")
|
| 85 |
+
|
| 86 |
+
cursor.execute("""
|
| 87 |
+
CREATE INDEX idx_expires_at
|
| 88 |
+
ON cache(expires_at)
|
| 89 |
+
""")
|
| 90 |
+
|
| 91 |
+
conn.commit()
|
| 92 |
+
conn.close()
|
| 93 |
+
print(f"✅ Cache database recreated successfully")
|
| 94 |
+
except Exception as e2:
|
| 95 |
+
print(f"❌ Failed to recreate cache database: {e2}")
|
| 96 |
+
print(f" Cache functionality will be disabled")
|
| 97 |
+
|
| 98 |
+
def _generate_key(self, *args, **kwargs) -> str:
|
| 99 |
+
"""
|
| 100 |
+
Generate a unique cache key from arguments.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
*args: Positional arguments to hash
|
| 104 |
+
**kwargs: Keyword arguments to hash
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
MD5 hash of the arguments
|
| 108 |
+
"""
|
| 109 |
+
# Combine args and kwargs into a single string
|
| 110 |
+
key_data = json.dumps({"args": args, "kwargs": kwargs}, sort_keys=True)
|
| 111 |
+
return hashlib.md5(key_data.encode()).hexdigest()
|
| 112 |
+
|
| 113 |
+
def get(self, key: str) -> Optional[Any]:
|
| 114 |
+
"""
|
| 115 |
+
Retrieve value from cache.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
key: Cache key
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Cached value if exists and not expired, None otherwise
|
| 122 |
+
"""
|
| 123 |
+
try:
|
| 124 |
+
conn = sqlite3.connect(self.db_path)
|
| 125 |
+
cursor = conn.cursor()
|
| 126 |
+
|
| 127 |
+
current_time = int(time.time())
|
| 128 |
+
|
| 129 |
+
cursor.execute("""
|
| 130 |
+
SELECT value, expires_at
|
| 131 |
+
FROM cache
|
| 132 |
+
WHERE key = ? AND expires_at > ?
|
| 133 |
+
""", (key, current_time))
|
| 134 |
+
|
| 135 |
+
result = cursor.fetchone()
|
| 136 |
+
conn.close()
|
| 137 |
+
except sqlite3.OperationalError as e:
|
| 138 |
+
print(f"⚠️ Cache read error: {e}")
|
| 139 |
+
print(f" Reinitializing cache database...")
|
| 140 |
+
self._init_db()
|
| 141 |
+
return None
|
| 142 |
+
except Exception as e:
|
| 143 |
+
print(f"⚠️ Unexpected cache error: {e}")
|
| 144 |
+
return None
|
| 145 |
+
|
| 146 |
+
if result:
|
| 147 |
+
value_blob, expires_at = result
|
| 148 |
+
# Deserialize using pickle for complex Python objects
|
| 149 |
+
return pickle.loads(value_blob)
|
| 150 |
+
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
def set(self, key: str, value: Any, ttl_override: Optional[int] = None,
|
| 154 |
+
metadata: Optional[dict] = None) -> None:
|
| 155 |
+
"""
|
| 156 |
+
Store value in cache.
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
key: Cache key
|
| 160 |
+
value: Value to cache (must be pickleable)
|
| 161 |
+
ttl_override: Optional override for TTL (seconds)
|
| 162 |
+
metadata: Optional metadata to store with cache entry
|
| 163 |
+
"""
|
| 164 |
+
try:
|
| 165 |
+
conn = sqlite3.connect(self.db_path)
|
| 166 |
+
cursor = conn.cursor()
|
| 167 |
+
|
| 168 |
+
current_time = int(time.time())
|
| 169 |
+
ttl = ttl_override if ttl_override is not None else self.ttl_seconds
|
| 170 |
+
expires_at = current_time + ttl
|
| 171 |
+
|
| 172 |
+
# Serialize value using pickle
|
| 173 |
+
value_blob = pickle.dumps(value)
|
| 174 |
+
|
| 175 |
+
# Serialize metadata as JSON
|
| 176 |
+
metadata_json = json.dumps(metadata) if metadata else None
|
| 177 |
+
|
| 178 |
+
cursor.execute("""
|
| 179 |
+
INSERT OR REPLACE INTO cache (key, value, created_at, expires_at, metadata)
|
| 180 |
+
VALUES (?, ?, ?, ?, ?)
|
| 181 |
+
""", (key, value_blob, current_time, expires_at, metadata_json))
|
| 182 |
+
|
| 183 |
+
conn.commit()
|
| 184 |
+
conn.close()
|
| 185 |
+
except sqlite3.OperationalError as e:
|
| 186 |
+
print(f"⚠️ Cache write error: {e}")
|
| 187 |
+
print(f" Reinitializing cache database...")
|
| 188 |
+
self._init_db()
|
| 189 |
+
except Exception as e:
|
| 190 |
+
print(f"⚠️ Unexpected cache error during write: {e}")
|
| 191 |
+
|
| 192 |
+
def invalidate(self, key: str) -> bool:
|
| 193 |
+
"""
|
| 194 |
+
Remove specific entry from cache.
|
| 195 |
+
|
| 196 |
+
Args:
|
| 197 |
+
key: Cache key to invalidate
|
| 198 |
+
|
| 199 |
+
Returns:
|
| 200 |
+
True if entry was removed, False if not found
|
| 201 |
+
"""
|
| 202 |
+
conn = sqlite3.connect(self.db_path)
|
| 203 |
+
cursor = conn.cursor()
|
| 204 |
+
|
| 205 |
+
cursor.execute("DELETE FROM cache WHERE key = ?", (key,))
|
| 206 |
+
deleted = cursor.rowcount > 0
|
| 207 |
+
|
| 208 |
+
conn.commit()
|
| 209 |
+
conn.close()
|
| 210 |
+
|
| 211 |
+
return deleted
|
| 212 |
+
|
| 213 |
+
def clear_expired(self) -> int:
|
| 214 |
+
"""
|
| 215 |
+
Remove all expired entries from cache.
|
| 216 |
+
|
| 217 |
+
Returns:
|
| 218 |
+
Number of entries removed
|
| 219 |
+
"""
|
| 220 |
+
conn = sqlite3.connect(self.db_path)
|
| 221 |
+
cursor = conn.cursor()
|
| 222 |
+
|
| 223 |
+
current_time = int(time.time())
|
| 224 |
+
cursor.execute("DELETE FROM cache WHERE expires_at <= ?", (current_time,))
|
| 225 |
+
deleted = cursor.rowcount
|
| 226 |
+
|
| 227 |
+
conn.commit()
|
| 228 |
+
conn.close()
|
| 229 |
+
|
| 230 |
+
return deleted
|
| 231 |
+
|
| 232 |
+
def clear_all(self) -> None:
|
| 233 |
+
"""Remove all entries from cache."""
|
| 234 |
+
conn = sqlite3.connect(self.db_path)
|
| 235 |
+
cursor = conn.cursor()
|
| 236 |
+
|
| 237 |
+
cursor.execute("DELETE FROM cache")
|
| 238 |
+
|
| 239 |
+
conn.commit()
|
| 240 |
+
conn.close()
|
| 241 |
+
|
| 242 |
+
def get_stats(self) -> dict:
|
| 243 |
+
"""
|
| 244 |
+
Get cache statistics.
|
| 245 |
+
|
| 246 |
+
Returns:
|
| 247 |
+
Dictionary with cache stats (total entries, expired, size)
|
| 248 |
+
"""
|
| 249 |
+
conn = sqlite3.connect(self.db_path)
|
| 250 |
+
cursor = conn.cursor()
|
| 251 |
+
|
| 252 |
+
current_time = int(time.time())
|
| 253 |
+
|
| 254 |
+
# Total entries
|
| 255 |
+
cursor.execute("SELECT COUNT(*) FROM cache")
|
| 256 |
+
total = cursor.fetchone()[0]
|
| 257 |
+
|
| 258 |
+
# Valid entries
|
| 259 |
+
cursor.execute("SELECT COUNT(*) FROM cache WHERE expires_at > ?", (current_time,))
|
| 260 |
+
valid = cursor.fetchone()[0]
|
| 261 |
+
|
| 262 |
+
# Database size
|
| 263 |
+
cursor.execute("SELECT page_count * page_size FROM pragma_page_count(), pragma_page_size()")
|
| 264 |
+
size_bytes = cursor.fetchone()[0]
|
| 265 |
+
|
| 266 |
+
conn.close()
|
| 267 |
+
|
| 268 |
+
return {
|
| 269 |
+
"total_entries": total,
|
| 270 |
+
"valid_entries": valid,
|
| 271 |
+
"expired_entries": total - valid,
|
| 272 |
+
"size_mb": round(size_bytes / (1024 * 1024), 2)
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
def generate_file_hash(self, file_path: str) -> str:
|
| 276 |
+
"""
|
| 277 |
+
Generate hash of file contents for cache key.
|
| 278 |
+
|
| 279 |
+
Args:
|
| 280 |
+
file_path: Path to file
|
| 281 |
+
|
| 282 |
+
Returns:
|
| 283 |
+
MD5 hash of file contents
|
| 284 |
+
"""
|
| 285 |
+
hasher = hashlib.md5()
|
| 286 |
+
|
| 287 |
+
with open(file_path, 'rb') as f:
|
| 288 |
+
# Read file in chunks to handle large files
|
| 289 |
+
for chunk in iter(lambda: f.read(4096), b""):
|
| 290 |
+
hasher.update(chunk)
|
| 291 |
+
|
| 292 |
+
return hasher.hexdigest()
|