Spaces:
Building
Building
Pulastya B
commited on
Commit
·
227cb22
1
Parent(s):
9f49815
fix: Fix module import paths for Render deployment
Browse files- Change all absolute imports to relative imports within src package
- Fix orchestrator.py: cache.cache_manager -> .cache.cache_manager
- Fix tools/__init__.py and auto_pipeline.py imports
- Fix all utils imports in tools/ directory (35 files)
- Fix api/app.py to use src.orchestrator import
- Resolves ModuleNotFoundError on Render deployment
- FRRONTEEEND/GDG on Campus - DevSprint Slides Template.pdf +0 -0
- VERCEL_DEPLOYMENT.md +267 -0
- src/api/app.py +2 -4
- src/orchestrator.py +5 -5
- src/tools/__init__.py +1 -1
- src/tools/advanced_analysis.py +2 -2
- src/tools/advanced_feature_engineering.py +2 -2
- src/tools/advanced_insights.py +2 -2
- src/tools/advanced_preprocessing.py +2 -2
- src/tools/advanced_training.py +2 -2
- src/tools/auto_pipeline.py +6 -6
- src/tools/cloud_data_sources.py +1 -1
- src/tools/data_cleaning.py +2 -2
- src/tools/data_profiling.py +2 -2
- src/tools/data_type_conversion.py +2 -2
- src/tools/data_wrangling.py +2 -2
- src/tools/enhanced_feature_engineering.py +2 -2
- src/tools/feature_engineering.py +2 -2
- src/tools/model_training.py +2 -2
- src/tools/plotly_visualizations.py +2 -2
- src/tools/production_mlops.py +2 -2
- src/tools/time_series.py +2 -2
- src/tools/visualization_engine.py +2 -2
- vercel.json +56 -0
FRRONTEEEND/GDG on Campus - DevSprint Slides Template.pdf
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
VERCEL_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vercel Deployment Guide
|
| 2 |
+
|
| 3 |
+
## ⚠️ Important Limitations
|
| 4 |
+
|
| 5 |
+
Vercel has significant limitations for this application:
|
| 6 |
+
|
| 7 |
+
### Execution Time Limits
|
| 8 |
+
- **Free/Hobby:** 10 seconds per request
|
| 9 |
+
- **Pro:** 60 seconds per request
|
| 10 |
+
- **Enterprise:** 300 seconds per request
|
| 11 |
+
|
| 12 |
+
### Memory Limits
|
| 13 |
+
- Maximum 3008 MB (Pro/Enterprise)
|
| 14 |
+
- May not be sufficient for large ML models
|
| 15 |
+
|
| 16 |
+
### File System
|
| 17 |
+
- Read-only except for `/tmp` (512 MB limit)
|
| 18 |
+
- Files in `/tmp` are ephemeral and cleared between invocations
|
| 19 |
+
|
| 20 |
+
### Recommendation
|
| 21 |
+
⚠️ **For ML/Data Science workloads, Render or Railway is recommended** over Vercel due to:
|
| 22 |
+
- Long-running analysis tasks (often >60s)
|
| 23 |
+
- Large model file sizes
|
| 24 |
+
- Memory requirements for ML operations
|
| 25 |
+
- Need for persistent storage
|
| 26 |
+
|
| 27 |
+
## If You Still Want to Try Vercel
|
| 28 |
+
|
| 29 |
+
### Prerequisites
|
| 30 |
+
|
| 31 |
+
1. A [Vercel account](https://vercel.com/) (free tier available)
|
| 32 |
+
2. Vercel CLI installed: `npm install -g vercel`
|
| 33 |
+
3. Your code pushed to GitHub
|
| 34 |
+
|
| 35 |
+
### Quick Deploy
|
| 36 |
+
|
| 37 |
+
#### Option 1: Via Vercel Dashboard (Easiest)
|
| 38 |
+
|
| 39 |
+
1. **Go to Vercel Dashboard**: https://vercel.com/dashboard
|
| 40 |
+
|
| 41 |
+
2. **Import Project:**
|
| 42 |
+
- Click "Add New..." → "Project"
|
| 43 |
+
- Select your GitHub repository: `Pulastya-B/DevSprint-Data-Science-Agent`
|
| 44 |
+
|
| 45 |
+
3. **Configure Build Settings:**
|
| 46 |
+
- **Framework Preset:** Other
|
| 47 |
+
- **Build Command:** `cd FRRONTEEEND && npm install && npm run build`
|
| 48 |
+
- **Output Directory:** `FRRONTEEEND/dist`
|
| 49 |
+
- **Install Command:** `pip install -r requirements.txt`
|
| 50 |
+
|
| 51 |
+
4. **Add Environment Variables:**
|
| 52 |
+
```
|
| 53 |
+
GOOGLE_API_KEY=<your-api-key>
|
| 54 |
+
LLM_PROVIDER=gemini
|
| 55 |
+
GEMINI_MODEL=gemini-2.5-flash
|
| 56 |
+
REASONING_EFFORT=medium
|
| 57 |
+
CACHE_DB_PATH=/tmp/cache_db/cache.db
|
| 58 |
+
OUTPUT_DIR=/tmp/outputs
|
| 59 |
+
DATA_DIR=/tmp/data
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
5. **Deploy:**
|
| 63 |
+
- Click "Deploy"
|
| 64 |
+
- Wait for build to complete (~3-5 minutes)
|
| 65 |
+
|
| 66 |
+
#### Option 2: Via Vercel CLI
|
| 67 |
+
|
| 68 |
+
1. **Install Vercel CLI:**
|
| 69 |
+
```bash
|
| 70 |
+
npm install -g vercel
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
2. **Login to Vercel:**
|
| 74 |
+
```bash
|
| 75 |
+
vercel login
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
3. **Deploy:**
|
| 79 |
+
```bash
|
| 80 |
+
cd "C:\Users\Pulastya\Videos\DS AGENTTTT"
|
| 81 |
+
vercel
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
4. **Follow prompts:**
|
| 85 |
+
- Link to existing project or create new one
|
| 86 |
+
- Accept default settings
|
| 87 |
+
- Add environment variables when prompted
|
| 88 |
+
|
| 89 |
+
5. **Production Deploy:**
|
| 90 |
+
```bash
|
| 91 |
+
vercel --prod
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
### Environment Variables (Required)
|
| 95 |
+
|
| 96 |
+
Add these in Vercel Dashboard → Settings → Environment Variables:
|
| 97 |
+
|
| 98 |
+
```
|
| 99 |
+
GOOGLE_API_KEY=<your-gemini-api-key>
|
| 100 |
+
LLM_PROVIDER=gemini
|
| 101 |
+
GEMINI_MODEL=gemini-2.5-flash
|
| 102 |
+
REASONING_EFFORT=medium
|
| 103 |
+
CACHE_DB_PATH=/tmp/cache_db/cache.db
|
| 104 |
+
CACHE_TTL_SECONDS=86400
|
| 105 |
+
OUTPUT_DIR=/tmp/outputs
|
| 106 |
+
DATA_DIR=/tmp/data
|
| 107 |
+
MAX_PARALLEL_TOOLS=5
|
| 108 |
+
MAX_RETRIES=3
|
| 109 |
+
TIMEOUT_SECONDS=60
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
### Configuration Files
|
| 113 |
+
|
| 114 |
+
- **vercel.json** - Vercel deployment configuration
|
| 115 |
+
- Routes API requests to FastAPI backend
|
| 116 |
+
- Serves React frontend statically
|
| 117 |
+
|
| 118 |
+
### Known Issues and Workarounds
|
| 119 |
+
|
| 120 |
+
#### 1. Timeout Errors
|
| 121 |
+
|
| 122 |
+
**Issue:** Analysis tasks exceed 60-second limit
|
| 123 |
+
|
| 124 |
+
**Workarounds:**
|
| 125 |
+
- Use smaller datasets for testing
|
| 126 |
+
- Upgrade to Vercel Pro ($20/month) for 60s timeout
|
| 127 |
+
- Consider splitting long operations into multiple API calls
|
| 128 |
+
- Use background jobs (not supported on Vercel free tier)
|
| 129 |
+
|
| 130 |
+
#### 2. Memory Errors
|
| 131 |
+
|
| 132 |
+
**Issue:** ML models exceed memory limits
|
| 133 |
+
|
| 134 |
+
**Workarounds:**
|
| 135 |
+
- Use lighter models (e.g., LogisticRegression instead of XGBoost)
|
| 136 |
+
- Process smaller data chunks
|
| 137 |
+
- Upgrade to Vercel Pro for more memory
|
| 138 |
+
|
| 139 |
+
#### 3. Cold Starts
|
| 140 |
+
|
| 141 |
+
**Issue:** First request after idle is slow (~5-10s)
|
| 142 |
+
|
| 143 |
+
**Workarounds:**
|
| 144 |
+
- Use Vercel Pro for faster cold starts
|
| 145 |
+
- Implement warming functions (Pro/Enterprise only)
|
| 146 |
+
|
| 147 |
+
#### 4. File Storage
|
| 148 |
+
|
| 149 |
+
**Issue:** Generated reports/models are lost between requests
|
| 150 |
+
|
| 151 |
+
**Workarounds:**
|
| 152 |
+
- Store outputs in external storage (S3, Cloudinary)
|
| 153 |
+
- Use Vercel Blob Storage (paid feature)
|
| 154 |
+
- Accept ephemeral storage for demo purposes
|
| 155 |
+
|
| 156 |
+
### Testing Your Deployment
|
| 157 |
+
|
| 158 |
+
1. **Check deployment status:**
|
| 159 |
+
```bash
|
| 160 |
+
vercel ls
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
2. **View logs:**
|
| 164 |
+
```bash
|
| 165 |
+
vercel logs <deployment-url>
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
3. **Test health endpoint:**
|
| 169 |
+
```bash
|
| 170 |
+
curl https://your-app.vercel.app/api/health
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
4. **Test with small dataset:**
|
| 174 |
+
- Upload a small CSV (< 1MB, < 1000 rows)
|
| 175 |
+
- Request simple analysis (avoid complex ML operations)
|
| 176 |
+
|
| 177 |
+
### Vercel vs Other Platforms
|
| 178 |
+
|
| 179 |
+
| Feature | Vercel | Render | Railway |
|
| 180 |
+
|---------|--------|--------|---------|
|
| 181 |
+
| **Best For** | Static sites, Next.js | Full-stack apps, ML | Full-stack apps |
|
| 182 |
+
| **Timeout (Free)** | 10s | 15min | 5min |
|
| 183 |
+
| **Timeout (Paid)** | 60s | ∞ | ∞ |
|
| 184 |
+
| **Memory (Max)** | 3008MB | 512MB-16GB | 512MB-32GB |
|
| 185 |
+
| **Cold Starts** | Fast | Medium | Fast |
|
| 186 |
+
| **Persistent Storage** | No (paid addon) | Yes | Yes |
|
| 187 |
+
| **Docker Support** | No | Yes | Yes |
|
| 188 |
+
| **Price (Hobby)** | $20/mo | $7/mo | $5/mo |
|
| 189 |
+
|
| 190 |
+
### Recommended Platform
|
| 191 |
+
|
| 192 |
+
For this Data Science Agent, we recommend:
|
| 193 |
+
|
| 194 |
+
1. **Render** (Best balance) - See [RENDER_DEPLOYMENT.md](RENDER_DEPLOYMENT.md)
|
| 195 |
+
- ✅ No timeout limits
|
| 196 |
+
- ✅ Docker support
|
| 197 |
+
- ✅ Affordable ($7/mo starter)
|
| 198 |
+
- ✅ Good for ML workloads
|
| 199 |
+
|
| 200 |
+
2. **Railway** (Alternative)
|
| 201 |
+
- ✅ Good free tier
|
| 202 |
+
- ✅ Persistent storage
|
| 203 |
+
- ✅ Docker support
|
| 204 |
+
- ⚠️ $5/mo minimum
|
| 205 |
+
|
| 206 |
+
3. **Vercel** (Not recommended for this app)
|
| 207 |
+
- ❌ 60s timeout limit
|
| 208 |
+
- ❌ No Docker support
|
| 209 |
+
- ❌ Expensive for ML ($20/mo minimum)
|
| 210 |
+
- ✅ Great for frontend-heavy apps
|
| 211 |
+
|
| 212 |
+
## Troubleshooting
|
| 213 |
+
|
| 214 |
+
### Deployment Fails
|
| 215 |
+
|
| 216 |
+
**Issue:** Build timeout during pip install
|
| 217 |
+
|
| 218 |
+
**Solution:**
|
| 219 |
+
- Reduce dependencies in requirements.txt
|
| 220 |
+
- Use lighter ML libraries
|
| 221 |
+
- Consider pre-building dependencies
|
| 222 |
+
|
| 223 |
+
**Issue:** "Function Payload Too Large"
|
| 224 |
+
|
| 225 |
+
**Solution:**
|
| 226 |
+
- Reduce package sizes
|
| 227 |
+
- Use `vercel.json` to exclude unnecessary files
|
| 228 |
+
- Consider serverless architecture redesign
|
| 229 |
+
|
| 230 |
+
### Runtime Errors
|
| 231 |
+
|
| 232 |
+
**Issue:** "Task timed out after 10.00 seconds"
|
| 233 |
+
|
| 234 |
+
**Solution:**
|
| 235 |
+
- Upgrade to Vercel Pro
|
| 236 |
+
- Optimize code for faster execution
|
| 237 |
+
- Use smaller datasets
|
| 238 |
+
- Consider using Render instead
|
| 239 |
+
|
| 240 |
+
**Issue:** "Out of memory"
|
| 241 |
+
|
| 242 |
+
**Solution:**
|
| 243 |
+
- Upgrade to higher memory tier
|
| 244 |
+
- Optimize memory usage
|
| 245 |
+
- Process data in chunks
|
| 246 |
+
|
| 247 |
+
## Conclusion
|
| 248 |
+
|
| 249 |
+
While Vercel deployment is possible, it's **not recommended** for this ML/Data Science application due to:
|
| 250 |
+
|
| 251 |
+
- ❌ Strict timeout limits (10s free, 60s pro)
|
| 252 |
+
- ❌ Memory constraints for ML models
|
| 253 |
+
- ❌ No persistent storage
|
| 254 |
+
- ❌ High cost for necessary features
|
| 255 |
+
|
| 256 |
+
**Better Alternative:** Use [Render](RENDER_DEPLOYMENT.md) for this application.
|
| 257 |
+
|
| 258 |
+
If you must use Vercel:
|
| 259 |
+
- Upgrade to Pro plan ($20/month minimum)
|
| 260 |
+
- Use only for simple datasets
|
| 261 |
+
- Expect frequent timeouts
|
| 262 |
+
- Consider it a demo/prototype only
|
| 263 |
+
|
| 264 |
+
---
|
| 265 |
+
|
| 266 |
+
**Need help with Render deployment instead?**
|
| 267 |
+
See [RENDER_DEPLOYMENT.md](RENDER_DEPLOYMENT.md) for a better solution.
|
src/api/app.py
CHANGED
|
@@ -21,10 +21,8 @@ from fastapi.staticfiles import StaticFiles
|
|
| 21 |
from fastapi.middleware.cors import CORSMiddleware
|
| 22 |
from pydantic import BaseModel
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
from orchestrator import DataScienceCopilot
|
| 28 |
|
| 29 |
# Configure logging
|
| 30 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 21 |
from fastapi.middleware.cors import CORSMiddleware
|
| 22 |
from pydantic import BaseModel
|
| 23 |
|
| 24 |
+
# Import from parent package
|
| 25 |
+
from src.orchestrator import DataScienceCopilot
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# Configure logging
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
src/orchestrator.py
CHANGED
|
@@ -15,11 +15,11 @@ from groq import Groq
|
|
| 15 |
import google.generativeai as genai
|
| 16 |
from dotenv import load_dotenv
|
| 17 |
|
| 18 |
-
from cache.cache_manager import CacheManager
|
| 19 |
-
from tools.tools_registry import TOOLS, get_all_tool_names, get_tools_by_category
|
| 20 |
-
from session_memory import SessionMemory
|
| 21 |
-
from session_store import SessionStore
|
| 22 |
-
from tools import (
|
| 23 |
# Basic Tools (13) - UPDATED: Added get_smart_summary + 3 wrangling tools
|
| 24 |
profile_dataset,
|
| 25 |
detect_data_quality_issues,
|
|
|
|
| 15 |
import google.generativeai as genai
|
| 16 |
from dotenv import load_dotenv
|
| 17 |
|
| 18 |
+
from .cache.cache_manager import CacheManager
|
| 19 |
+
from .tools.tools_registry import TOOLS, get_all_tool_names, get_tools_by_category
|
| 20 |
+
from .session_memory import SessionMemory
|
| 21 |
+
from .session_store import SessionStore
|
| 22 |
+
from .tools import (
|
| 23 |
# Basic Tools (13) - UPDATED: Added get_smart_summary + 3 wrangling tools
|
| 24 |
profile_dataset,
|
| 25 |
detect_data_quality_issues,
|
src/tools/__init__.py
CHANGED
|
@@ -162,7 +162,7 @@ from .cloud_data_sources import (
|
|
| 162 |
|
| 163 |
from .tools_registry import TOOLS, get_tool_by_name, get_all_tool_names
|
| 164 |
|
| 165 |
-
from
|
| 166 |
create_ratio_features,
|
| 167 |
create_statistical_features,
|
| 168 |
create_log_features,
|
|
|
|
| 162 |
|
| 163 |
from .tools_registry import TOOLS, get_tool_by_name, get_all_tool_names
|
| 164 |
|
| 165 |
+
from .enhanced_feature_engineering import (
|
| 166 |
create_ratio_features,
|
| 167 |
create_statistical_features,
|
| 168 |
create_log_features,
|
src/tools/advanced_analysis.py
CHANGED
|
@@ -28,10 +28,10 @@ import plotly.express as px
|
|
| 28 |
from plotly.subplots import make_subplots
|
| 29 |
import pandas as pd
|
| 30 |
|
| 31 |
-
from utils.polars_helpers import (
|
| 32 |
load_dataframe, get_numeric_columns, get_categorical_columns
|
| 33 |
)
|
| 34 |
-
from utils.validation import (
|
| 35 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 36 |
validate_column_exists
|
| 37 |
)
|
|
|
|
| 28 |
from plotly.subplots import make_subplots
|
| 29 |
import pandas as pd
|
| 30 |
|
| 31 |
+
from ..utils.polars_helpers import (
|
| 32 |
load_dataframe, get_numeric_columns, get_categorical_columns
|
| 33 |
)
|
| 34 |
+
from ..utils.validation import (
|
| 35 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 36 |
validate_column_exists
|
| 37 |
)
|
src/tools/advanced_feature_engineering.py
CHANGED
|
@@ -25,11 +25,11 @@ from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
|
|
| 25 |
from textblob import TextBlob
|
| 26 |
import re
|
| 27 |
|
| 28 |
-
from utils.polars_helpers import (
|
| 29 |
load_dataframe, save_dataframe, get_numeric_columns,
|
| 30 |
get_categorical_columns, get_datetime_columns
|
| 31 |
)
|
| 32 |
-
from utils.validation import (
|
| 33 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 34 |
validate_column_exists
|
| 35 |
)
|
|
|
|
| 25 |
from textblob import TextBlob
|
| 26 |
import re
|
| 27 |
|
| 28 |
+
from ..utils.polars_helpers import (
|
| 29 |
load_dataframe, save_dataframe, get_numeric_columns,
|
| 30 |
get_categorical_columns, get_datetime_columns
|
| 31 |
)
|
| 32 |
+
from ..utils.validation import (
|
| 33 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 34 |
validate_column_exists
|
| 35 |
)
|
src/tools/advanced_insights.py
CHANGED
|
@@ -20,8 +20,8 @@ import json
|
|
| 20 |
# Add parent directory to path
|
| 21 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 22 |
|
| 23 |
-
from utils.polars_helpers import load_dataframe, get_numeric_columns
|
| 24 |
-
from utils.validation import validate_file_exists, validate_file_format
|
| 25 |
|
| 26 |
|
| 27 |
def analyze_root_cause(file_path: str,
|
|
|
|
| 20 |
# Add parent directory to path
|
| 21 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 22 |
|
| 23 |
+
from ..utils.polars_helpers import load_dataframe, get_numeric_columns
|
| 24 |
+
from ..utils.validation import validate_file_exists, validate_file_format
|
| 25 |
|
| 26 |
|
| 27 |
def analyze_root_cause(file_path: str,
|
src/tools/advanced_preprocessing.py
CHANGED
|
@@ -24,11 +24,11 @@ from imblearn.under_sampling import RandomUnderSampler, TomekLinks, EditedNeares
|
|
| 24 |
from imblearn.combine import SMOTETomek, SMOTEENN
|
| 25 |
from collections import Counter
|
| 26 |
|
| 27 |
-
from utils.polars_helpers import (
|
| 28 |
load_dataframe, save_dataframe, get_numeric_columns,
|
| 29 |
get_categorical_columns, split_features_target
|
| 30 |
)
|
| 31 |
-
from utils.validation import (
|
| 32 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 33 |
validate_column_exists
|
| 34 |
)
|
|
|
|
| 24 |
from imblearn.combine import SMOTETomek, SMOTEENN
|
| 25 |
from collections import Counter
|
| 26 |
|
| 27 |
+
from ..utils.polars_helpers import (
|
| 28 |
load_dataframe, save_dataframe, get_numeric_columns,
|
| 29 |
get_categorical_columns, split_features_target
|
| 30 |
)
|
| 31 |
+
from ..utils.validation import (
|
| 32 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 33 |
validate_column_exists
|
| 34 |
)
|
src/tools/advanced_training.py
CHANGED
|
@@ -45,8 +45,8 @@ from sklearn.metrics import (
|
|
| 45 |
mean_squared_error, mean_absolute_error, r2_score
|
| 46 |
)
|
| 47 |
|
| 48 |
-
from utils.polars_helpers import load_dataframe, get_numeric_columns, split_features_target
|
| 49 |
-
from utils.validation import (
|
| 50 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 51 |
validate_column_exists, validate_target_column
|
| 52 |
)
|
|
|
|
| 45 |
mean_squared_error, mean_absolute_error, r2_score
|
| 46 |
)
|
| 47 |
|
| 48 |
+
from ..utils.polars_helpers import load_dataframe, get_numeric_columns, split_features_target
|
| 49 |
+
from ..utils.validation import (
|
| 50 |
validate_file_exists, validate_file_format, validate_dataframe,
|
| 51 |
validate_column_exists, validate_target_column
|
| 52 |
)
|
src/tools/auto_pipeline.py
CHANGED
|
@@ -16,12 +16,12 @@ from sklearn.preprocessing import StandardScaler
|
|
| 16 |
# Add parent directory to path
|
| 17 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 18 |
|
| 19 |
-
from utils.polars_helpers import load_dataframe, get_numeric_columns
|
| 20 |
-
from utils.validation import validate_file_exists
|
| 21 |
-
from
|
| 22 |
-
from
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
|
| 26 |
|
| 27 |
def auto_ml_pipeline(file_path: str,
|
|
|
|
| 16 |
# Add parent directory to path
|
| 17 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 18 |
|
| 19 |
+
from ..utils.polars_helpers import load_dataframe, get_numeric_columns
|
| 20 |
+
from ..utils.validation import validate_file_exists
|
| 21 |
+
from .data_cleaning import clean_missing_values, handle_outliers
|
| 22 |
+
from .data_type_conversion import force_numeric_conversion, smart_type_inference
|
| 23 |
+
from .feature_engineering import encode_categorical, create_time_features
|
| 24 |
+
from .advanced_feature_engineering import create_interaction_features
|
| 25 |
|
| 26 |
|
| 27 |
def auto_ml_pipeline(file_path: str,
|
src/tools/cloud_data_sources.py
CHANGED
|
@@ -14,7 +14,7 @@ import os
|
|
| 14 |
# Add parent directory to path
|
| 15 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 16 |
|
| 17 |
-
from utils.validation import validate_dataframe
|
| 18 |
|
| 19 |
try:
|
| 20 |
from google.cloud import bigquery
|
|
|
|
| 14 |
# Add parent directory to path
|
| 15 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 16 |
|
| 17 |
+
from ..utils.validation import validate_dataframe
|
| 18 |
|
| 19 |
try:
|
| 20 |
from google.cloud import bigquery
|
src/tools/data_cleaning.py
CHANGED
|
@@ -13,7 +13,7 @@ import os
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
-
from utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
save_dataframe,
|
| 19 |
get_numeric_columns,
|
|
@@ -21,7 +21,7 @@ from utils.polars_helpers import (
|
|
| 21 |
get_datetime_columns,
|
| 22 |
detect_id_columns,
|
| 23 |
)
|
| 24 |
-
from utils.validation import (
|
| 25 |
validate_file_exists,
|
| 26 |
validate_file_format,
|
| 27 |
validate_dataframe,
|
|
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
+
from ..utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
save_dataframe,
|
| 19 |
get_numeric_columns,
|
|
|
|
| 21 |
get_datetime_columns,
|
| 22 |
detect_id_columns,
|
| 23 |
)
|
| 24 |
+
from ..utils.validation import (
|
| 25 |
validate_file_exists,
|
| 26 |
validate_file_format,
|
| 27 |
validate_dataframe,
|
src/tools/data_profiling.py
CHANGED
|
@@ -13,7 +13,7 @@ import os
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
-
from utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
get_numeric_columns,
|
| 19 |
get_categorical_columns,
|
|
@@ -22,7 +22,7 @@ from utils.polars_helpers import (
|
|
| 22 |
calculate_memory_usage,
|
| 23 |
detect_id_columns,
|
| 24 |
)
|
| 25 |
-
from utils.validation import (
|
| 26 |
validate_file_exists,
|
| 27 |
validate_file_format,
|
| 28 |
validate_dataframe,
|
|
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
+
from ..utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
get_numeric_columns,
|
| 19 |
get_categorical_columns,
|
|
|
|
| 22 |
calculate_memory_usage,
|
| 23 |
detect_id_columns,
|
| 24 |
)
|
| 25 |
+
from ..utils.validation import (
|
| 26 |
validate_file_exists,
|
| 27 |
validate_file_format,
|
| 28 |
validate_dataframe,
|
src/tools/data_type_conversion.py
CHANGED
|
@@ -11,13 +11,13 @@ import os
|
|
| 11 |
# Add parent directory to path for imports
|
| 12 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
|
| 14 |
-
from utils.polars_helpers import (
|
| 15 |
load_dataframe,
|
| 16 |
save_dataframe,
|
| 17 |
get_numeric_columns,
|
| 18 |
get_categorical_columns
|
| 19 |
)
|
| 20 |
-
from utils.validation import (
|
| 21 |
validate_file_exists,
|
| 22 |
validate_file_format,
|
| 23 |
validate_dataframe
|
|
|
|
| 11 |
# Add parent directory to path for imports
|
| 12 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
|
| 14 |
+
from ..utils.polars_helpers import (
|
| 15 |
load_dataframe,
|
| 16 |
save_dataframe,
|
| 17 |
get_numeric_columns,
|
| 18 |
get_categorical_columns
|
| 19 |
)
|
| 20 |
+
from ..utils.validation import (
|
| 21 |
validate_file_exists,
|
| 22 |
validate_file_format,
|
| 23 |
validate_dataframe
|
src/tools/data_wrangling.py
CHANGED
|
@@ -13,11 +13,11 @@ import os
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
-
from utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
save_dataframe,
|
| 19 |
)
|
| 20 |
-
from utils.validation import (
|
| 21 |
validate_file_exists,
|
| 22 |
validate_file_format,
|
| 23 |
validate_dataframe,
|
|
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
+
from ..utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
save_dataframe,
|
| 19 |
)
|
| 20 |
+
from ..utils.validation import (
|
| 21 |
validate_file_exists,
|
| 22 |
validate_file_format,
|
| 23 |
validate_dataframe,
|
src/tools/enhanced_feature_engineering.py
CHANGED
|
@@ -11,8 +11,8 @@ import os
|
|
| 11 |
|
| 12 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
|
| 14 |
-
from utils.polars_helpers import load_dataframe, save_dataframe, get_numeric_columns
|
| 15 |
-
from utils.validation import validate_file_exists, validate_dataframe
|
| 16 |
|
| 17 |
|
| 18 |
def create_ratio_features(file_path: str,
|
|
|
|
| 11 |
|
| 12 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
|
| 14 |
+
from ..utils.polars_helpers import load_dataframe, save_dataframe, get_numeric_columns
|
| 15 |
+
from ..utils.validation import validate_file_exists, validate_dataframe
|
| 16 |
|
| 17 |
|
| 18 |
def create_ratio_features(file_path: str,
|
src/tools/feature_engineering.py
CHANGED
|
@@ -13,13 +13,13 @@ import os
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
-
from utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
save_dataframe,
|
| 19 |
get_numeric_columns,
|
| 20 |
get_categorical_columns,
|
| 21 |
)
|
| 22 |
-
from utils.validation import (
|
| 23 |
validate_file_exists,
|
| 24 |
validate_file_format,
|
| 25 |
validate_dataframe,
|
|
|
|
| 13 |
# Add parent directory to path for imports
|
| 14 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 15 |
|
| 16 |
+
from ..utils.polars_helpers import (
|
| 17 |
load_dataframe,
|
| 18 |
save_dataframe,
|
| 19 |
get_numeric_columns,
|
| 20 |
get_categorical_columns,
|
| 21 |
)
|
| 22 |
+
from ..utils.validation import (
|
| 23 |
validate_file_exists,
|
| 24 |
validate_file_format,
|
| 25 |
validate_dataframe,
|
src/tools/model_training.py
CHANGED
|
@@ -47,12 +47,12 @@ except ImportError as e:
|
|
| 47 |
VISUALIZATION_AVAILABLE = False
|
| 48 |
print(f"⚠️ Visualization engine not available: {e}")
|
| 49 |
|
| 50 |
-
from utils.polars_helpers import (
|
| 51 |
load_dataframe,
|
| 52 |
get_numeric_columns,
|
| 53 |
split_features_target,
|
| 54 |
)
|
| 55 |
-
from utils.validation import (
|
| 56 |
validate_file_exists,
|
| 57 |
validate_file_format,
|
| 58 |
validate_dataframe,
|
|
|
|
| 47 |
VISUALIZATION_AVAILABLE = False
|
| 48 |
print(f"⚠️ Visualization engine not available: {e}")
|
| 49 |
|
| 50 |
+
from ..utils.polars_helpers import (
|
| 51 |
load_dataframe,
|
| 52 |
get_numeric_columns,
|
| 53 |
split_features_target,
|
| 54 |
)
|
| 55 |
+
from ..utils.validation import (
|
| 56 |
validate_file_exists,
|
| 57 |
validate_file_format,
|
| 58 |
validate_dataframe,
|
src/tools/plotly_visualizations.py
CHANGED
|
@@ -16,12 +16,12 @@ import os
|
|
| 16 |
# Add parent directory to path for imports
|
| 17 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 18 |
|
| 19 |
-
from utils.polars_helpers import (
|
| 20 |
load_dataframe,
|
| 21 |
get_numeric_columns,
|
| 22 |
get_categorical_columns,
|
| 23 |
)
|
| 24 |
-
from utils.validation import (
|
| 25 |
validate_file_exists,
|
| 26 |
validate_file_format,
|
| 27 |
validate_dataframe,
|
|
|
|
| 16 |
# Add parent directory to path for imports
|
| 17 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 18 |
|
| 19 |
+
from ..utils.polars_helpers import (
|
| 20 |
load_dataframe,
|
| 21 |
get_numeric_columns,
|
| 22 |
get_categorical_columns,
|
| 23 |
)
|
| 24 |
+
from ..utils.validation import (
|
| 25 |
validate_file_exists,
|
| 26 |
validate_file_format,
|
| 27 |
validate_dataframe,
|
src/tools/production_mlops.py
CHANGED
|
@@ -25,8 +25,8 @@ import shap
|
|
| 25 |
from lime import lime_tabular
|
| 26 |
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
| 27 |
|
| 28 |
-
from utils.polars_helpers import load_dataframe, get_numeric_columns, split_features_target
|
| 29 |
-
from utils.validation import validate_file_exists, validate_file_format, validate_dataframe, validate_column_exists
|
| 30 |
|
| 31 |
|
| 32 |
def monitor_model_drift(
|
|
|
|
| 25 |
from lime import lime_tabular
|
| 26 |
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
| 27 |
|
| 28 |
+
from ..utils.polars_helpers import load_dataframe, get_numeric_columns, split_features_target
|
| 29 |
+
from ..utils.validation import validate_file_exists, validate_file_format, validate_dataframe, validate_column_exists
|
| 30 |
|
| 31 |
|
| 32 |
def monitor_model_drift(
|
src/tools/time_series.py
CHANGED
|
@@ -25,8 +25,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
| 25 |
# from prophet import Prophet
|
| 26 |
import pandas as pd
|
| 27 |
|
| 28 |
-
from utils.polars_helpers import load_dataframe, save_dataframe
|
| 29 |
-
from utils.validation import validate_file_exists, validate_file_format, validate_dataframe, validate_column_exists
|
| 30 |
|
| 31 |
|
| 32 |
def forecast_time_series(
|
|
|
|
| 25 |
# from prophet import Prophet
|
| 26 |
import pandas as pd
|
| 27 |
|
| 28 |
+
from ..utils.polars_helpers import load_dataframe, save_dataframe
|
| 29 |
+
from ..utils.validation import validate_file_exists, validate_file_format, validate_dataframe, validate_column_exists
|
| 30 |
|
| 31 |
|
| 32 |
def forecast_time_series(
|
src/tools/visualization_engine.py
CHANGED
|
@@ -22,8 +22,8 @@ from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_c
|
|
| 22 |
# Add parent directory to path
|
| 23 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 24 |
|
| 25 |
-
from utils.polars_helpers import load_dataframe
|
| 26 |
-
from utils.validation import validate_file_exists
|
| 27 |
|
| 28 |
# Import matplotlib visualization functions
|
| 29 |
try:
|
|
|
|
| 22 |
# Add parent directory to path
|
| 23 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 24 |
|
| 25 |
+
from ..utils.polars_helpers import load_dataframe
|
| 26 |
+
from ..utils.validation import validate_file_exists
|
| 27 |
|
| 28 |
# Import matplotlib visualization functions
|
| 29 |
try:
|
vercel.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": 2,
|
| 3 |
+
"builds": [
|
| 4 |
+
{
|
| 5 |
+
"src": "src/api/app.py",
|
| 6 |
+
"use": "@vercel/python",
|
| 7 |
+
"config": {
|
| 8 |
+
"maxLambdaSize": "50mb"
|
| 9 |
+
}
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"src": "FRRONTEEEND/package.json",
|
| 13 |
+
"use": "@vercel/static-build",
|
| 14 |
+
"config": {
|
| 15 |
+
"distDir": "dist"
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
],
|
| 19 |
+
"routes": [
|
| 20 |
+
{
|
| 21 |
+
"src": "/api/(.*)",
|
| 22 |
+
"dest": "src/api/app.py"
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"src": "/outputs/(.*)",
|
| 26 |
+
"dest": "src/api/app.py"
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"src": "/(.*)",
|
| 30 |
+
"dest": "FRRONTEEEND/dist/$1"
|
| 31 |
+
}
|
| 32 |
+
],
|
| 33 |
+
"env": {
|
| 34 |
+
"LLM_PROVIDER": "gemini",
|
| 35 |
+
"GEMINI_MODEL": "gemini-2.5-flash",
|
| 36 |
+
"REASONING_EFFORT": "medium",
|
| 37 |
+
"CACHE_DB_PATH": "/tmp/cache_db/cache.db",
|
| 38 |
+
"CACHE_TTL_SECONDS": "86400",
|
| 39 |
+
"OUTPUT_DIR": "/tmp/outputs",
|
| 40 |
+
"DATA_DIR": "/tmp/data",
|
| 41 |
+
"MAX_PARALLEL_TOOLS": "5",
|
| 42 |
+
"MAX_RETRIES": "3",
|
| 43 |
+
"TIMEOUT_SECONDS": "60"
|
| 44 |
+
},
|
| 45 |
+
"build": {
|
| 46 |
+
"env": {
|
| 47 |
+
"NODE_VERSION": "20"
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"functions": {
|
| 51 |
+
"src/api/app.py": {
|
| 52 |
+
"memory": 3008,
|
| 53 |
+
"maxDuration": 60
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
}
|