Spaces:
Running
Running
Pulastya B commited on
Commit ·
2cf9e11
1
Parent(s): c9ba5a9
Fixed all output path issues
Browse files- README.md +25 -0
- src/api/app.py +7 -1
README.md
CHANGED
|
@@ -261,6 +261,31 @@ CMD ["uvicorn", "src.api.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
| 261 |
| 50K rows | ~15s | ~2 min | ~5 min |
|
| 262 |
| 175K rows | ~45s | ~5 min | ~10 min |
|
| 263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
## 🤝 Contributing
|
| 265 |
|
| 266 |
Contributions welcome! Please:
|
|
|
|
| 261 |
| 50K rows | ~15s | ~2 min | ~5 min |
|
| 262 |
| 175K rows | ~45s | ~5 min | ~10 min |
|
| 263 |
|
| 264 |
+
## 🔮 Future Enhancements
|
| 265 |
+
|
| 266 |
+
We're actively working on exciting new features to make the Data Science Agent even more powerful:
|
| 267 |
+
|
| 268 |
+
### 🗄️ BigQuery Integration
|
| 269 |
+
- **Direct BigQuery Connection**: Query and analyze massive datasets directly from Google BigQuery
|
| 270 |
+
- **Smart Sampling**: Intelligent sampling strategies for billion-row tables
|
| 271 |
+
- **Cost Optimization**: Query cost estimation before execution
|
| 272 |
+
- **Schema Discovery**: Auto-detect tables, columns, and relationships
|
| 273 |
+
|
| 274 |
+
### 🔗 LangChain / LlamaIndex Compatibility
|
| 275 |
+
- **Framework Agnostic**: Use as a tool within LangChain agents or LlamaIndex pipelines
|
| 276 |
+
- **Custom Tool Registration**: Expose 50+ data science tools as LangChain tools
|
| 277 |
+
- **RAG Integration**: Combine with document retrieval for context-aware analysis
|
| 278 |
+
- **Memory Backends**: Support for LangChain memory stores and conversation history
|
| 279 |
+
|
| 280 |
+
### 💻 First-Class CLI Experience & Beautiful TUI
|
| 281 |
+
- **Rich Terminal UI**: Interactive dashboards with progress bars, tables, and charts
|
| 282 |
+
- **Keyboard Navigation**: Full workflow control without leaving the terminal
|
| 283 |
+
- **Pipeline Scripting**: Define reproducible workflows in YAML/TOML
|
| 284 |
+
- **Offline Mode**: Run locally without requiring a browser
|
| 285 |
+
- **SSH-Friendly**: Perfect for remote server analysis
|
| 286 |
+
|
| 287 |
+
---
|
| 288 |
+
|
| 289 |
## 🤝 Contributing
|
| 290 |
|
| 291 |
Contributions welcome! Please:
|
src/api/app.py
CHANGED
|
@@ -1328,17 +1328,23 @@ async def serve_output_files(file_path: str):
|
|
| 1328 |
search_paths = [
|
| 1329 |
Path("./outputs") / file_path, # Local development
|
| 1330 |
Path("/tmp/data_science_agent/outputs") / file_path, # Production with subdirs
|
| 1331 |
-
Path("/tmp/data_science_agent") / file_path, # Production flat
|
| 1332 |
Path("/tmp/data_science_agent/outputs") / Path(file_path).name, # Production filename only
|
|
|
|
|
|
|
| 1333 |
]
|
| 1334 |
|
| 1335 |
output_path = None
|
| 1336 |
for path in search_paths:
|
|
|
|
| 1337 |
if path.exists() and path.is_file():
|
| 1338 |
output_path = path
|
|
|
|
| 1339 |
break
|
| 1340 |
|
| 1341 |
if output_path is None:
|
|
|
|
|
|
|
| 1342 |
raise HTTPException(status_code=404, detail=f"File not found: {file_path}")
|
| 1343 |
|
| 1344 |
# Security: prevent directory traversal
|
|
|
|
| 1328 |
search_paths = [
|
| 1329 |
Path("./outputs") / file_path, # Local development
|
| 1330 |
Path("/tmp/data_science_agent/outputs") / file_path, # Production with subdirs
|
| 1331 |
+
Path("/tmp/data_science_agent") / file_path, # Production flat OR relative paths like plots/xxx.html
|
| 1332 |
Path("/tmp/data_science_agent/outputs") / Path(file_path).name, # Production filename only
|
| 1333 |
+
Path("/tmp/data_science_agent") / Path(file_path).name, # Production root filename only
|
| 1334 |
+
Path("./outputs") / Path(file_path).name, # Local development filename only
|
| 1335 |
]
|
| 1336 |
|
| 1337 |
output_path = None
|
| 1338 |
for path in search_paths:
|
| 1339 |
+
logger.debug(f"Checking path: {path}")
|
| 1340 |
if path.exists() and path.is_file():
|
| 1341 |
output_path = path
|
| 1342 |
+
logger.info(f"Found file at: {path}")
|
| 1343 |
break
|
| 1344 |
|
| 1345 |
if output_path is None:
|
| 1346 |
+
logger.error(f"File not found in any location: {file_path}")
|
| 1347 |
+
logger.error(f"Searched paths: {[str(p) for p in search_paths]}")
|
| 1348 |
raise HTTPException(status_code=404, detail=f"File not found: {file_path}")
|
| 1349 |
|
| 1350 |
# Security: prevent directory traversal
|