Spaces:
Runtime error
Runtime error
initial commit
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .gitignore +24 -201
- .repo_structure_plan.md +41 -0
- MIGRATION_GUIDE.md +81 -0
- QUICK_START.md +95 -0
- README.md +144 -2
- STRUCTURE.md +148 -0
- bkp/modal-rag.py.backup +284 -0
- clean_sample.csv +0 -0
- docs/HOW_TO_RUN.md +0 -3
- docs/guides/HOW_TO_RUN.md +215 -0
- docs/{QUICK_START_RAG.md β guides/QUICK_START_RAG.md} +0 -0
- docs/{RAG_SETUP_COMPLETE.md β guides/RAG_SETUP_COMPLETE.md} +0 -0
- docs/{SETUP_SUCCESS.md β guides/SETUP_SUCCESS.md} +0 -0
- docs/{SUMMARY.md β guides/SUMMARY.md} +0 -0
- docs/{TROUBLESHOOTING.md β guides/TROUBLESHOOTING.md} +0 -0
- docs/{WEB_INTERFACE.md β guides/WEB_INTERFACE.md} +0 -0
- docs/{WEB_TROUBLESHOOTING.md β guides/WEB_TROUBLESHOOTING.md} +0 -0
- docs/{estat_api_guide.md β guides/estat_api_guide.md} +0 -0
- docs/{ft_process.md β guides/ft_process.md} +0 -0
- docs/{modal-rag-optimization.md β guides/modal-rag-optimization.md} +0 -0
- docs/{modal-rag-sequence.md β guides/modal-rag-sequence.md} +0 -0
- docs/{next_steps_rag_recommendation.md β guides/next_steps_rag_recommendation.md} +0 -0
- docs/{source_data.md β guides/source_data.md} +0 -0
- docs/{PRODUCT_DECISION_GUIDE.md β product-design/PRODUCT_DECISION_GUIDE.md} +0 -0
- docs/{setup_product_design_rag.md β product-design/setup_product_design_rag.md} +0 -0
- docs/{tokyo_auto_insurance_product_design.docx β product-design/tokyo_auto_insurance_product_design.docx} +0 -0
- docs/{tokyo_auto_insurance_product_design.md β product-design/tokyo_auto_insurance_product_design.md} +0 -0
- docs/{tokyo_auto_insurance_product_design_filled.md β product-design/tokyo_auto_insurance_product_design_filled.md} +0 -0
- scripts/__init__.py +4 -0
- {docs β scripts/data}/cleanup_data.py +0 -0
- {docs β scripts/data}/clear_census_volume.py +0 -0
- {docs β scripts/data}/convert_census_to_csv.py +0 -0
- {docs β scripts/data}/convert_economy_labor_to_csv.py +0 -0
- {docs β scripts/data}/convert_to_word.py +0 -0
- {docs β scripts/data}/create_custom_qa.py +0 -0
- {docs β scripts/data}/delete_census_csvs.py +0 -0
- {docs β scripts/data}/download_census_api.py +0 -0
- {docs β scripts/data}/download_census_csv_modal.py +0 -0
- {docs β scripts/data}/download_census_data.py +0 -0
- {docs β scripts/data}/download_census_modal.py +0 -0
- {docs β scripts/data}/download_economy_labor_modal.py +0 -0
- {docs β scripts/data}/fix_csv_filenames.py +0 -0
- {docs β scripts/data}/prepare_economy_data.py +0 -0
- {docs β scripts/data}/prepare_finetune_data.py +0 -0
- {docs β scripts/data}/remove_duplicate_csvs.py +0 -0
- run_with_venv.sh β scripts/setup/run_with_venv.sh +3 -2
- start_web.sh β scripts/setup/start_web.sh +3 -2
- {docs β scripts/tools}/api_endpoint.py +0 -0
- {docs β scripts/tools}/api_endpoint_cpu.py +0 -0
- {docs β scripts/tools}/ask_model.py +0 -0
.gitignore
CHANGED
|
@@ -1,212 +1,35 @@
|
|
| 1 |
-
#
|
| 2 |
__pycache__/
|
| 3 |
-
*.py[
|
| 4 |
*$py.class
|
| 5 |
-
|
| 6 |
-
# C extensions
|
| 7 |
*.so
|
| 8 |
-
|
| 9 |
-
# Distribution / packaging
|
| 10 |
.Python
|
| 11 |
-
build/
|
| 12 |
-
develop-eggs/
|
| 13 |
-
dist/
|
| 14 |
-
downloads/
|
| 15 |
-
eggs/
|
| 16 |
-
.eggs/
|
| 17 |
-
lib/
|
| 18 |
-
lib64/
|
| 19 |
-
parts/
|
| 20 |
-
sdist/
|
| 21 |
-
var/
|
| 22 |
-
wheels/
|
| 23 |
-
share/python-wheels/
|
| 24 |
-
*.egg-info/
|
| 25 |
-
.installed.cfg
|
| 26 |
-
*.egg
|
| 27 |
-
MANIFEST
|
| 28 |
-
|
| 29 |
-
# PyInstaller
|
| 30 |
-
# Usually these files are written by a python script from a template
|
| 31 |
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
-
*.manifest
|
| 33 |
-
*.spec
|
| 34 |
-
|
| 35 |
-
# Installer logs
|
| 36 |
-
pip-log.txt
|
| 37 |
-
pip-delete-this-directory.txt
|
| 38 |
-
|
| 39 |
-
# Unit test / coverage reports
|
| 40 |
-
htmlcov/
|
| 41 |
-
.tox/
|
| 42 |
-
.nox/
|
| 43 |
-
.coverage
|
| 44 |
-
.coverage.*
|
| 45 |
-
.cache
|
| 46 |
-
nosetests.xml
|
| 47 |
-
coverage.xml
|
| 48 |
-
*.cover
|
| 49 |
-
*.py.cover
|
| 50 |
-
.hypothesis/
|
| 51 |
-
.pytest_cache/
|
| 52 |
-
cover/
|
| 53 |
-
|
| 54 |
-
# Translations
|
| 55 |
-
*.mo
|
| 56 |
-
*.pot
|
| 57 |
-
|
| 58 |
-
# Django stuff:
|
| 59 |
-
*.log
|
| 60 |
-
local_settings.py
|
| 61 |
-
db.sqlite3
|
| 62 |
-
db.sqlite3-journal
|
| 63 |
-
|
| 64 |
-
# Flask stuff:
|
| 65 |
-
instance/
|
| 66 |
-
.webassets-cache
|
| 67 |
-
|
| 68 |
-
# Scrapy stuff:
|
| 69 |
-
.scrapy
|
| 70 |
-
|
| 71 |
-
# Sphinx documentation
|
| 72 |
-
docs/_build/
|
| 73 |
-
|
| 74 |
-
# PyBuilder
|
| 75 |
-
.pybuilder/
|
| 76 |
-
target/
|
| 77 |
-
|
| 78 |
-
# Jupyter Notebook
|
| 79 |
-
.ipynb_checkpoints
|
| 80 |
-
|
| 81 |
-
# IPython
|
| 82 |
-
profile_default/
|
| 83 |
-
ipython_config.py
|
| 84 |
-
|
| 85 |
-
# pyenv
|
| 86 |
-
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
-
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
-
# .python-version
|
| 89 |
-
|
| 90 |
-
# pipenv
|
| 91 |
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
-
# install all needed dependencies.
|
| 95 |
-
#Pipfile.lock
|
| 96 |
-
|
| 97 |
-
# UV
|
| 98 |
-
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
-
# commonly ignored for libraries.
|
| 101 |
-
#uv.lock
|
| 102 |
-
|
| 103 |
-
# poetry
|
| 104 |
-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
-
# commonly ignored for libraries.
|
| 107 |
-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
-
#poetry.lock
|
| 109 |
-
#poetry.toml
|
| 110 |
-
|
| 111 |
-
# pdm
|
| 112 |
-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 113 |
-
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 114 |
-
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 115 |
-
#pdm.lock
|
| 116 |
-
#pdm.toml
|
| 117 |
-
.pdm-python
|
| 118 |
-
.pdm-build/
|
| 119 |
-
|
| 120 |
-
# pixi
|
| 121 |
-
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 122 |
-
#pixi.lock
|
| 123 |
-
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 124 |
-
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 125 |
-
.pixi
|
| 126 |
-
|
| 127 |
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 128 |
-
__pypackages__/
|
| 129 |
-
|
| 130 |
-
# Celery stuff
|
| 131 |
-
celerybeat-schedule
|
| 132 |
-
celerybeat.pid
|
| 133 |
-
|
| 134 |
-
# SageMath parsed files
|
| 135 |
-
*.sage.py
|
| 136 |
-
|
| 137 |
-
# Environments
|
| 138 |
-
.env
|
| 139 |
-
.envrc
|
| 140 |
-
.venv
|
| 141 |
-
env/
|
| 142 |
venv/
|
|
|
|
| 143 |
ENV/
|
| 144 |
-
|
| 145 |
-
venv.bak/
|
| 146 |
-
|
| 147 |
-
# Spyder project settings
|
| 148 |
-
.spyderproject
|
| 149 |
-
.spyproject
|
| 150 |
-
|
| 151 |
-
# Rope project settings
|
| 152 |
-
.ropeproject
|
| 153 |
-
|
| 154 |
-
# mkdocs documentation
|
| 155 |
-
/site
|
| 156 |
-
|
| 157 |
-
# mypy
|
| 158 |
-
.mypy_cache/
|
| 159 |
-
.dmypy.json
|
| 160 |
-
dmypy.json
|
| 161 |
-
|
| 162 |
-
# Pyre type checker
|
| 163 |
-
.pyre/
|
| 164 |
-
|
| 165 |
-
# pytype static type analyzer
|
| 166 |
-
.pytype/
|
| 167 |
-
|
| 168 |
-
# Cython debug symbols
|
| 169 |
-
cython_debug/
|
| 170 |
-
|
| 171 |
-
# PyCharm
|
| 172 |
-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 173 |
-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 174 |
-
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 175 |
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 176 |
-
#.idea/
|
| 177 |
-
|
| 178 |
-
# Abstra
|
| 179 |
-
# Abstra is an AI-powered process automation framework.
|
| 180 |
-
# Ignore directories containing user credentials, local state, and settings.
|
| 181 |
-
# Learn more at https://abstra.io/docs
|
| 182 |
-
.abstra/
|
| 183 |
-
|
| 184 |
-
# Visual Studio Code
|
| 185 |
-
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 186 |
-
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 187 |
-
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 188 |
-
# you could uncomment the following to ignore the entire vscode folder
|
| 189 |
-
# .vscode/
|
| 190 |
-
|
| 191 |
-
# Ruff stuff:
|
| 192 |
-
.ruff_cache/
|
| 193 |
-
|
| 194 |
-
# PyPI configuration file
|
| 195 |
-
.pypirc
|
| 196 |
|
| 197 |
-
#
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
|
| 204 |
-
#
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
__marimo__/
|
| 208 |
|
| 209 |
# Project specific
|
| 210 |
-
|
| 211 |
-
*.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
.modal/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
*$py.class
|
|
|
|
|
|
|
| 5 |
*.so
|
|
|
|
|
|
|
| 6 |
.Python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
venv/
|
| 8 |
+
env/
|
| 9 |
ENV/
|
| 10 |
+
.venv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# IDE
|
| 13 |
+
.vscode/
|
| 14 |
+
.idea/
|
| 15 |
+
*.swp
|
| 16 |
+
*.swo
|
| 17 |
+
*~
|
| 18 |
|
| 19 |
+
# OS
|
| 20 |
+
.DS_Store
|
| 21 |
+
Thumbs.db
|
|
|
|
| 22 |
|
| 23 |
# Project specific
|
| 24 |
+
*.log
|
| 25 |
+
*.csv
|
| 26 |
+
.pytest_cache/
|
| 27 |
+
.coverage
|
| 28 |
+
htmlcov/
|
| 29 |
+
|
| 30 |
+
# Modal
|
| 31 |
.modal/
|
| 32 |
+
|
| 33 |
+
# Environment variables
|
| 34 |
+
.env
|
| 35 |
+
.env.local
|
.repo_structure_plan.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Repository Restructure Plan
|
| 2 |
+
|
| 3 |
+
## Current Issues:
|
| 4 |
+
- Root directory has too many files (web_app.py, query_product_design.py, test files)
|
| 5 |
+
- docs/ has both scripts and documentation mixed
|
| 6 |
+
- scripts/ directory is empty
|
| 7 |
+
- Product design docs mixed with other docs
|
| 8 |
+
- Web app files (templates, static) in root
|
| 9 |
+
|
| 10 |
+
## Proposed Structure:
|
| 11 |
+
|
| 12 |
+
```
|
| 13 |
+
/
|
| 14 |
+
βββ README.md
|
| 15 |
+
βββ requirements.txt
|
| 16 |
+
βββ .gitignore
|
| 17 |
+
βββ src/ # Core application code
|
| 18 |
+
β βββ rag/ # RAG system
|
| 19 |
+
β β βββ modal-rag.py
|
| 20 |
+
β β βββ modal-rag-product-design.py
|
| 21 |
+
β βββ web/ # Web application
|
| 22 |
+
β βββ web_app.py
|
| 23 |
+
β βββ query_product_design.py
|
| 24 |
+
β βββ templates/
|
| 25 |
+
β βββ static/
|
| 26 |
+
βββ scripts/ # Utility scripts
|
| 27 |
+
β βββ data/ # Data processing
|
| 28 |
+
β βββ setup/ # Setup scripts
|
| 29 |
+
β βββ tools/ # General utilities
|
| 30 |
+
βββ docs/ # Documentation only
|
| 31 |
+
β βββ guides/ # How-to guides
|
| 32 |
+
β βββ api/ # API docs
|
| 33 |
+
β βββ product-design/ # Product design docs
|
| 34 |
+
βββ tests/ # Test files
|
| 35 |
+
βββ config/ # Config files (if any)
|
| 36 |
+
βββ diagrams/ # Keep as is
|
| 37 |
+
βββ finetune/ # Keep as is
|
| 38 |
+
βββ bkp/ # Keep as is
|
| 39 |
+
βββ venv/ # Keep as is
|
| 40 |
+
```
|
| 41 |
+
|
MIGRATION_GUIDE.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Repository Restructure Migration Guide
|
| 2 |
+
|
| 3 |
+
## What Changed
|
| 4 |
+
|
| 5 |
+
The repository has been reorganized for better structure and maintainability.
|
| 6 |
+
|
| 7 |
+
## File Moves
|
| 8 |
+
|
| 9 |
+
### RAG System
|
| 10 |
+
- `src/modal-rag.py` β `src/rag/modal-rag.py`
|
| 11 |
+
- `src/modal-rag-product-design.py` β `src/rag/modal-rag-product-design.py`
|
| 12 |
+
|
| 13 |
+
### Web Application
|
| 14 |
+
- `web_app.py` β `src/web/web_app.py`
|
| 15 |
+
- `query_product_design.py` β `src/web/query_product_design.py`
|
| 16 |
+
- `templates/` β `src/web/templates/`
|
| 17 |
+
- `static/` β `src/web/static/`
|
| 18 |
+
|
| 19 |
+
### Scripts
|
| 20 |
+
- Data processing scripts β `scripts/data/`
|
| 21 |
+
- Setup scripts β `scripts/setup/`
|
| 22 |
+
- Utility scripts β `scripts/tools/`
|
| 23 |
+
|
| 24 |
+
### Documentation
|
| 25 |
+
- All `.md` files β `docs/guides/`
|
| 26 |
+
- Product design docs β `docs/product-design/`
|
| 27 |
+
|
| 28 |
+
### Tests
|
| 29 |
+
- `test_*.py` β `tests/`
|
| 30 |
+
|
| 31 |
+
## Updated Commands
|
| 32 |
+
|
| 33 |
+
### Old Commands (No longer work)
|
| 34 |
+
```bash
|
| 35 |
+
python web_app.py
|
| 36 |
+
modal run src/modal-rag-product-design.py::query_product_design
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### New Commands
|
| 40 |
+
```bash
|
| 41 |
+
# Web app
|
| 42 |
+
python src/web/web_app.py
|
| 43 |
+
# Or use helper script
|
| 44 |
+
./scripts/setup/start_web.sh
|
| 45 |
+
|
| 46 |
+
# Modal RAG
|
| 47 |
+
modal run src/rag/modal-rag-product-design.py::query_product_design --question "your question"
|
| 48 |
+
|
| 49 |
+
# Indexing
|
| 50 |
+
modal run src/rag/modal-rag-product-design.py::index_product_design
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
## Import Path Updates
|
| 54 |
+
|
| 55 |
+
If you have custom scripts that import from these modules, update the imports:
|
| 56 |
+
|
| 57 |
+
```python
|
| 58 |
+
# Old
|
| 59 |
+
from query_product_design import query_rag
|
| 60 |
+
|
| 61 |
+
# New
|
| 62 |
+
import sys
|
| 63 |
+
sys.path.insert(0, 'src/web')
|
| 64 |
+
from query_product_design import query_rag
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
## Next Steps
|
| 68 |
+
|
| 69 |
+
1. Update any custom scripts with new import paths
|
| 70 |
+
2. Update CI/CD pipelines if applicable
|
| 71 |
+
3. Update documentation references
|
| 72 |
+
4. Test all functionality
|
| 73 |
+
|
| 74 |
+
## Rollback
|
| 75 |
+
|
| 76 |
+
If you need to rollback, all files are still in git history. You can:
|
| 77 |
+
```bash
|
| 78 |
+
git log --oneline --all -- "old/path/to/file"
|
| 79 |
+
git checkout <commit-hash> -- "old/path/to/file"
|
| 80 |
+
```
|
| 81 |
+
|
QUICK_START.md
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Quick Start Guide
|
| 2 |
+
|
| 3 |
+
## Prerequisites
|
| 4 |
+
|
| 5 |
+
- Python 3.13+
|
| 6 |
+
- Modal account and CLI installed
|
| 7 |
+
- Virtual environment (recommended)
|
| 8 |
+
|
| 9 |
+
## Setup
|
| 10 |
+
|
| 11 |
+
1. **Activate virtual environment:**
|
| 12 |
+
```bash
|
| 13 |
+
source venv/bin/activate
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
2. **Install dependencies:**
|
| 17 |
+
```bash
|
| 18 |
+
pip install flask flask-cors
|
| 19 |
+
# Or install all requirements if you have requirements.txt
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
3. **Index product design documents (first time only):**
|
| 24 |
+
```bash
|
| 25 |
+
modal run src/rag/modal-rag-product-design.py::index_product_design
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Running the Web Application
|
| 29 |
+
|
| 30 |
+
### Option 1: Using the helper script (Recommended)
|
| 31 |
+
```bash
|
| 32 |
+
./scripts/setup/start_web.sh
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### Option 2: Direct Python command
|
| 36 |
+
```bash
|
| 37 |
+
# Make sure venv is activated
|
| 38 |
+
source venv/bin/activate
|
| 39 |
+
python src/web/web_app.py
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
### Option 3: From project root
|
| 43 |
+
```bash
|
| 44 |
+
python3 src/web/web_app.py
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
## Access the Web Interface
|
| 48 |
+
|
| 49 |
+
Once the server starts, open your browser and go to:
|
| 50 |
+
- **http://127.0.0.1:5000** (or the port shown in the terminal)
|
| 51 |
+
|
| 52 |
+
β οΈ **Important:** Use `127.0.0.1` instead of `localhost` to avoid potential 403 errors on macOS.
|
| 53 |
+
|
| 54 |
+
## Querying the RAG System
|
| 55 |
+
|
| 56 |
+
### Via Web Interface
|
| 57 |
+
1. Start the web app (see above)
|
| 58 |
+
2. Open the URL in your browser
|
| 59 |
+
3. Enter your question and click "Ask Question"
|
| 60 |
+
|
| 61 |
+
### Via CLI
|
| 62 |
+
```bash
|
| 63 |
+
python src/web/query_product_design.py --question "your question here"
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### Via Modal Directly
|
| 67 |
+
```bash
|
| 68 |
+
modal run src/rag/modal-rag-product-design.py::query_product_design --question "your question here"
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
## Troubleshooting
|
| 72 |
+
|
| 73 |
+
### Flask Not Installed
|
| 74 |
+
```bash
|
| 75 |
+
# Activate venv
|
| 76 |
+
source venv/bin/activate
|
| 77 |
+
|
| 78 |
+
# Install Flask
|
| 79 |
+
pip install flask flask-cors
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
### Port Already in Use
|
| 83 |
+
The web app will automatically find an available port (5000-5009).
|
| 84 |
+
|
| 85 |
+
### Modal Command Not Found
|
| 86 |
+
```bash
|
| 87 |
+
# Install Modal CLI
|
| 88 |
+
pip install modal
|
| 89 |
+
|
| 90 |
+
# Or use python -m modal
|
| 91 |
+
python -m modal --version
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
For more help, see `docs/guides/TROUBLESHOOTING.md`.
|
| 95 |
+
|
README.md
CHANGED
|
@@ -1,2 +1,144 @@
|
|
| 1 |
-
# MCP-
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MCP Hack - Insurance Product Design RAG System
|
| 2 |
+
|
| 3 |
+
A comprehensive RAG (Retrieval Augmented Generation) system for querying and analyzing auto insurance product design documents, specifically designed for the Tokyo market.
|
| 4 |
+
|
| 5 |
+
## π Repository Structure
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
/
|
| 9 |
+
βββ src/ # Core application code
|
| 10 |
+
β βββ rag/ # RAG system implementation
|
| 11 |
+
β β βββ modal-rag.py # Main RAG system
|
| 12 |
+
β β βββ modal-rag-product-design.py # Product design RAG
|
| 13 |
+
β βββ web/ # Web application
|
| 14 |
+
β βββ web_app.py # Flask web server
|
| 15 |
+
β βββ query_product_design.py # RAG query interface
|
| 16 |
+
β βββ templates/ # HTML templates
|
| 17 |
+
β βββ static/ # CSS, JS, assets
|
| 18 |
+
β
|
| 19 |
+
βββ scripts/ # Utility scripts
|
| 20 |
+
β βββ data/ # Data processing scripts
|
| 21 |
+
β βββ setup/ # Setup and installation scripts
|
| 22 |
+
β βββ tools/ # General utility scripts
|
| 23 |
+
β
|
| 24 |
+
βββ docs/ # Documentation
|
| 25 |
+
β βββ guides/ # How-to guides and tutorials
|
| 26 |
+
β βββ api/ # API documentation
|
| 27 |
+
β βββ product-design/ # Product design documents
|
| 28 |
+
β
|
| 29 |
+
βββ tests/ # Test files
|
| 30 |
+
βββ diagrams/ # System architecture diagrams
|
| 31 |
+
βββ finetune/ # Model fine-tuning documentation
|
| 32 |
+
βββ bkp/ # Backup files
|
| 33 |
+
βββ venv/ # Python virtual environment
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## π Quick Start
|
| 37 |
+
|
| 38 |
+
### Prerequisites
|
| 39 |
+
|
| 40 |
+
- Python 3.13+
|
| 41 |
+
- Modal account and CLI installed
|
| 42 |
+
- Virtual environment activated
|
| 43 |
+
|
| 44 |
+
### Installation
|
| 45 |
+
|
| 46 |
+
1. **Clone and setup:**
|
| 47 |
+
```bash
|
| 48 |
+
git clone <repo-url>
|
| 49 |
+
cd mcp-hack
|
| 50 |
+
python3 -m venv venv
|
| 51 |
+
source venv/bin/activate
|
| 52 |
+
pip install -r requirements.txt
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
2. **Index product design documents:**
|
| 56 |
+
```bash
|
| 57 |
+
modal run src/rag/modal-rag-product-design.py::index_product_design
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
3. **Start web interface:**
|
| 61 |
+
```bash
|
| 62 |
+
python src/web/web_app.py
|
| 63 |
+
# Or use the helper script:
|
| 64 |
+
./scripts/setup/start_web.sh
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
4. **Access the web interface:**
|
| 68 |
+
- Open `http://127.0.0.1:5000` in your browser
|
| 69 |
+
- Ask questions about the product design document
|
| 70 |
+
|
| 71 |
+
## π Documentation
|
| 72 |
+
|
| 73 |
+
- **Quick Start Guide:** `docs/guides/QUICK_START_RAG.md`
|
| 74 |
+
- **Web Interface:** `docs/guides/WEB_INTERFACE.md`
|
| 75 |
+
- **Troubleshooting:** `docs/guides/TROUBLESHOOTING.md`
|
| 76 |
+
- **Product Design Docs:** `docs/product-design/`
|
| 77 |
+
|
| 78 |
+
## π§ Key Components
|
| 79 |
+
|
| 80 |
+
### RAG System (`src/rag/`)
|
| 81 |
+
- **modal-rag.py**: Main RAG system for insurance products
|
| 82 |
+
- **modal-rag-product-design.py**: Specialized RAG for product design documents
|
| 83 |
+
|
| 84 |
+
### Web Application (`src/web/`)
|
| 85 |
+
- **web_app.py**: Flask web server with REST API
|
| 86 |
+
- **query_product_design.py**: RAG query interface
|
| 87 |
+
- **templates/**: HTML templates for the web UI
|
| 88 |
+
- **static/**: CSS and JavaScript files
|
| 89 |
+
|
| 90 |
+
### Scripts (`scripts/`)
|
| 91 |
+
- **data/**: Data processing and conversion scripts
|
| 92 |
+
- **setup/**: Installation and setup scripts
|
| 93 |
+
- **tools/**: Utility scripts for various tasks
|
| 94 |
+
|
| 95 |
+
## π― Usage Examples
|
| 96 |
+
|
| 97 |
+
### Query via CLI
|
| 98 |
+
```bash
|
| 99 |
+
python src/web/query_product_design.py --question "What are the premium ranges?"
|
| 100 |
+
```
|
| 101 |
+
|
| 102 |
+
### Query via Web Interface
|
| 103 |
+
1. Start the web app: `python src/web/web_app.py`
|
| 104 |
+
2. Open `http://127.0.0.1:5000`
|
| 105 |
+
3. Enter your question and submit
|
| 106 |
+
|
| 107 |
+
### Query via Modal Directly
|
| 108 |
+
```bash
|
| 109 |
+
modal run src/rag/modal-rag-product-design.py::query_product_design --question "How to make product decisions?"
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## π Features
|
| 113 |
+
|
| 114 |
+
- β
RAG-based document querying
|
| 115 |
+
- β
Web interface for easy interaction
|
| 116 |
+
- β
Support for markdown and Word documents
|
| 117 |
+
- β
Vector database with ChromaDB
|
| 118 |
+
- β
Fast inference with vLLM
|
| 119 |
+
- β
Comprehensive documentation
|
| 120 |
+
|
| 121 |
+
## π οΈ Development
|
| 122 |
+
|
| 123 |
+
### Running Tests
|
| 124 |
+
```bash
|
| 125 |
+
python -m pytest tests/
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
### Adding New Documents
|
| 129 |
+
1. Add documents to Modal volume
|
| 130 |
+
2. Run indexing: `modal run src/rag/modal-rag-product-design.py::index_product_design`
|
| 131 |
+
|
| 132 |
+
### Project Structure Guidelines
|
| 133 |
+
- **src/**: Core application code only
|
| 134 |
+
- **scripts/**: Utility scripts organized by purpose
|
| 135 |
+
- **docs/**: Documentation organized by type
|
| 136 |
+
- **tests/**: All test files
|
| 137 |
+
|
| 138 |
+
## π License
|
| 139 |
+
|
| 140 |
+
[Add your license here]
|
| 141 |
+
|
| 142 |
+
## π€ Contributing
|
| 143 |
+
|
| 144 |
+
[Add contribution guidelines here]
|
STRUCTURE.md
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Repository Structure
|
| 2 |
+
|
| 3 |
+
This document describes the organization of the repository.
|
| 4 |
+
|
| 5 |
+
## Directory Layout
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
mcp-hack/
|
| 9 |
+
βββ src/ # Core application source code
|
| 10 |
+
β βββ rag/ # RAG (Retrieval Augmented Generation) system
|
| 11 |
+
β β βββ modal-rag.py # Main RAG system for insurance products
|
| 12 |
+
β β βββ modal-rag-product-design.py # Product design document RAG
|
| 13 |
+
β βββ web/ # Web application
|
| 14 |
+
β βββ web_app.py # Flask web server
|
| 15 |
+
β βββ query_product_design.py # RAG query CLI interface
|
| 16 |
+
β βββ templates/ # HTML templates
|
| 17 |
+
β β βββ index.html
|
| 18 |
+
β βββ static/ # Static assets
|
| 19 |
+
β βββ css/
|
| 20 |
+
β β βββ style.css
|
| 21 |
+
β βββ js/
|
| 22 |
+
β βββ app.js
|
| 23 |
+
β
|
| 24 |
+
βββ scripts/ # Utility scripts organized by purpose
|
| 25 |
+
β βββ data/ # Data processing scripts
|
| 26 |
+
β β βββ download_*.py # Data download scripts
|
| 27 |
+
β β βββ convert_*.py # Data conversion scripts
|
| 28 |
+
β β βββ prepare_*.py # Data preparation scripts
|
| 29 |
+
β β βββ cleanup_*.py # Data cleanup scripts
|
| 30 |
+
β βββ setup/ # Setup and installation scripts
|
| 31 |
+
β β βββ start_web.sh # Start web application
|
| 32 |
+
β β βββ run_with_venv.sh # Run scripts with venv
|
| 33 |
+
β βββ tools/ # General utility scripts
|
| 34 |
+
β βββ api_endpoint*.py # API endpoint scripts
|
| 35 |
+
β βββ finetune_*.py # Fine-tuning scripts
|
| 36 |
+
β βββ debug_*.py # Debugging utilities
|
| 37 |
+
β
|
| 38 |
+
βββ docs/ # Documentation
|
| 39 |
+
β βββ guides/ # How-to guides and tutorials
|
| 40 |
+
β β βββ QUICK_START_RAG.md
|
| 41 |
+
β β βββ WEB_INTERFACE.md
|
| 42 |
+
β β βββ TROUBLESHOOTING.md
|
| 43 |
+
β β βββ ... # Other guides
|
| 44 |
+
β βββ api/ # API documentation (if any)
|
| 45 |
+
β βββ product-design/ # Product design documents
|
| 46 |
+
β βββ tokyo_auto_insurance_product_design.md
|
| 47 |
+
β βββ tokyo_auto_insurance_product_design_filled.md
|
| 48 |
+
β βββ tokyo_auto_insurance_product_design.docx
|
| 49 |
+
β βββ PRODUCT_DECISION_GUIDE.md
|
| 50 |
+
β βββ setup_product_design_rag.md
|
| 51 |
+
β
|
| 52 |
+
βββ tests/ # Test files
|
| 53 |
+
β βββ test_server.py
|
| 54 |
+
β βββ test_web.py
|
| 55 |
+
β
|
| 56 |
+
βββ diagrams/ # System architecture diagrams
|
| 57 |
+
β βββ *.mmd # Mermaid diagram sources
|
| 58 |
+
β βββ *.svg # Rendered diagrams
|
| 59 |
+
β
|
| 60 |
+
βββ finetune/ # Model fine-tuning documentation
|
| 61 |
+
β βββ README.md
|
| 62 |
+
β βββ *.md # Fine-tuning guides
|
| 63 |
+
β
|
| 64 |
+
βββ bkp/ # Backup files (old versions)
|
| 65 |
+
β
|
| 66 |
+
βββ config/ # Configuration files (if any)
|
| 67 |
+
β
|
| 68 |
+
βββ venv/ # Python virtual environment (gitignored)
|
| 69 |
+
β
|
| 70 |
+
βββ README.md # Main project README
|
| 71 |
+
βββ MIGRATION_GUIDE.md # Guide for migrating from old structure
|
| 72 |
+
βββ STRUCTURE.md # This file
|
| 73 |
+
βββ .gitignore # Git ignore rules
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
## Key Directories
|
| 77 |
+
|
| 78 |
+
### `src/`
|
| 79 |
+
Contains all core application code. Organized into:
|
| 80 |
+
- **`rag/`**: RAG system implementations using Modal
|
| 81 |
+
- **`web/`**: Web application (Flask) with templates and static assets
|
| 82 |
+
|
| 83 |
+
### `scripts/`
|
| 84 |
+
Utility scripts organized by purpose:
|
| 85 |
+
- **`data/`**: Data processing, downloading, conversion
|
| 86 |
+
- **`setup/`**: Installation and setup scripts
|
| 87 |
+
- **`tools/`**: General utilities, API endpoints, debugging tools
|
| 88 |
+
|
| 89 |
+
### `docs/`
|
| 90 |
+
Documentation organized by type:
|
| 91 |
+
- **`guides/`**: How-to guides and tutorials
|
| 92 |
+
- **`api/`**: API documentation
|
| 93 |
+
- **`product-design/`**: Product design documents
|
| 94 |
+
|
| 95 |
+
### `tests/`
|
| 96 |
+
All test files for the application.
|
| 97 |
+
|
| 98 |
+
## File Naming Conventions
|
| 99 |
+
|
| 100 |
+
- Python scripts: `snake_case.py`
|
| 101 |
+
- Documentation: `UPPER_CASE.md` or `kebab-case.md`
|
| 102 |
+
- Shell scripts: `kebab-case.sh`
|
| 103 |
+
- Config files: `.config` or `config.json`
|
| 104 |
+
|
| 105 |
+
## Import Paths
|
| 106 |
+
|
| 107 |
+
When importing from this repository:
|
| 108 |
+
|
| 109 |
+
```python
|
| 110 |
+
# From root directory
|
| 111 |
+
import sys
|
| 112 |
+
sys.path.insert(0, 'src/web')
|
| 113 |
+
from query_product_design import query_rag
|
| 114 |
+
|
| 115 |
+
# Or add src to path
|
| 116 |
+
sys.path.insert(0, 'src')
|
| 117 |
+
from rag.modal_rag_product_design import ...
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## Running Applications
|
| 121 |
+
|
| 122 |
+
### Web Application
|
| 123 |
+
```bash
|
| 124 |
+
# From project root
|
| 125 |
+
python src/web/web_app.py
|
| 126 |
+
|
| 127 |
+
# Or use helper script
|
| 128 |
+
./scripts/setup/start_web.sh
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
### RAG Queries
|
| 132 |
+
```bash
|
| 133 |
+
# CLI
|
| 134 |
+
python src/web/query_product_design.py --question "your question"
|
| 135 |
+
|
| 136 |
+
# Modal direct
|
| 137 |
+
modal run src/rag/modal-rag-product-design.py::query_product_design --question "your question"
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
## Adding New Files
|
| 141 |
+
|
| 142 |
+
When adding new files, follow the structure:
|
| 143 |
+
- **Application code** β `src/`
|
| 144 |
+
- **Utility scripts** β `scripts/{data,setup,tools}/`
|
| 145 |
+
- **Documentation** β `docs/{guides,api,product-design}/`
|
| 146 |
+
- **Tests** β `tests/`
|
| 147 |
+
- **Config** β `config/`
|
| 148 |
+
|
bkp/modal-rag.py.backup
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import modal
|
| 2 |
+
|
| 3 |
+
app = modal.App("insurance-rag")
|
| 4 |
+
|
| 5 |
+
# Reference your specific volume
|
| 6 |
+
vol = modal.Volume.from_name("mcp-hack-ins-products", create_if_missing=True)
|
| 7 |
+
|
| 8 |
+
# Model configuration
|
| 9 |
+
LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
|
| 10 |
+
EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
|
| 11 |
+
|
| 12 |
+
# Build image with ALL required dependencies
|
| 13 |
+
image = (
|
| 14 |
+
modal.Image.debian_slim(python_version="3.11")
|
| 15 |
+
.pip_install(
|
| 16 |
+
"vllm==0.6.3.post1", # Fast inference engine
|
| 17 |
+
"langchain==0.3.7",
|
| 18 |
+
"langchain-community==0.3.7",
|
| 19 |
+
"langchain-text-splitters==0.3.2",
|
| 20 |
+
"sentence-transformers==3.3.0",
|
| 21 |
+
"chromadb==0.5.20",
|
| 22 |
+
"pypdf==5.1.0",
|
| 23 |
+
"cryptography==43.0.3",
|
| 24 |
+
"transformers==4.46.2",
|
| 25 |
+
"torch==2.5.1",
|
| 26 |
+
"huggingface_hub==0.26.2",
|
| 27 |
+
)
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
@app.function(image=image, volumes={"/insurance-data": vol})
|
| 31 |
+
def list_files():
|
| 32 |
+
"""List all files in the volume"""
|
| 33 |
+
import os
|
| 34 |
+
files = []
|
| 35 |
+
for root, dirs, filenames in os.walk("/insurance-data"):
|
| 36 |
+
for filename in filenames:
|
| 37 |
+
full_path = os.path.join(root, filename)
|
| 38 |
+
files.append(full_path)
|
| 39 |
+
return files
|
| 40 |
+
|
| 41 |
+
@app.function(
|
| 42 |
+
image=image,
|
| 43 |
+
volumes={"/insurance-data": vol},
|
| 44 |
+
timeout=900
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
def create_vector_db():
|
| 48 |
+
"""Create vector database from insurance PDFs"""
|
| 49 |
+
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
|
| 50 |
+
from langchain_community.vectorstores import Chroma
|
| 51 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 52 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 53 |
+
|
| 54 |
+
print("π Loading documents from /insurance-data...")
|
| 55 |
+
|
| 56 |
+
loader = DirectoryLoader(
|
| 57 |
+
"/insurance-data",
|
| 58 |
+
glob="**/*.pdf",
|
| 59 |
+
loader_cls=PyPDFLoader,
|
| 60 |
+
silent_errors=True
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
documents = loader.load()
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"β οΈ Warning during loading: {e}")
|
| 67 |
+
documents = []
|
| 68 |
+
|
| 69 |
+
print(f"π Loaded {len(documents)} document pages")
|
| 70 |
+
|
| 71 |
+
if len(documents) == 0:
|
| 72 |
+
return {
|
| 73 |
+
"status": "error",
|
| 74 |
+
"message": "No PDF files could be loaded",
|
| 75 |
+
"total_documents": 0,
|
| 76 |
+
"total_chunks": 0
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
print("βοΈ Splitting documents into chunks...")
|
| 80 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 81 |
+
chunk_size=1000,
|
| 82 |
+
chunk_overlap=200
|
| 83 |
+
)
|
| 84 |
+
chunks = text_splitter.split_documents(documents)
|
| 85 |
+
print(f"π¦ Created {len(chunks)} chunks")
|
| 86 |
+
|
| 87 |
+
print("π§ Creating embeddings...")
|
| 88 |
+
embeddings = HuggingFaceEmbeddings(
|
| 89 |
+
model_name=EMBEDDING_MODEL,
|
| 90 |
+
model_kwargs={'device': 'cuda'},
|
| 91 |
+
encode_kwargs={'normalize_embeddings': True}
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
print("πΎ Building vector database...")
|
| 95 |
+
|
| 96 |
+
# Connect to remote Chroma service
|
| 97 |
+
chroma_service = modal.Cls.from_name("chroma-server-v2", "ChromaDB")()
|
| 98 |
+
|
| 99 |
+
# Prepare data for upsert
|
| 100 |
+
ids = [f"id_{i}" for i in range(len(chunks))]
|
| 101 |
+
documents = [chunk.page_content for chunk in chunks]
|
| 102 |
+
metadatas = [chunk.metadata for chunk in chunks]
|
| 103 |
+
|
| 104 |
+
# Generate embeddings locally
|
| 105 |
+
print(" Generating embeddings locally...")
|
| 106 |
+
embeddings_list = embeddings.embed_documents(documents)
|
| 107 |
+
|
| 108 |
+
# Upsert to remote Chroma
|
| 109 |
+
print(" Upserting to remote Chroma DB...")
|
| 110 |
+
batch_size = 100
|
| 111 |
+
for i in range(0, len(ids), batch_size):
|
| 112 |
+
batch_ids = ids[i:i+batch_size]
|
| 113 |
+
batch_docs = documents[i:i+batch_size]
|
| 114 |
+
batch_metas = metadatas[i:i+batch_size]
|
| 115 |
+
batch_embs = embeddings_list[i:i+batch_size]
|
| 116 |
+
|
| 117 |
+
chroma_service.upsert.remote(
|
| 118 |
+
collection_name="insurance_products",
|
| 119 |
+
ids=batch_ids,
|
| 120 |
+
documents=batch_docs,
|
| 121 |
+
embeddings=batch_embs,
|
| 122 |
+
metadatas=batch_metas
|
| 123 |
+
)
|
| 124 |
+
print(f" Upserted batch {i//batch_size + 1}/{(len(ids)-1)//batch_size + 1}")
|
| 125 |
+
|
| 126 |
+
print("β
Vector database created and persisted remotely!")
|
| 127 |
+
|
| 128 |
+
return {
|
| 129 |
+
"status": "success",
|
| 130 |
+
"total_documents": len(documents),
|
| 131 |
+
"total_chunks": len(chunks)
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
@app.cls(
|
| 135 |
+
image=image,
|
| 136 |
+
volumes={"/insurance-data": vol},
|
| 137 |
+
gpu="A10G",
|
| 138 |
+
timeout=600,
|
| 139 |
+
max_containers=1, # Keep one container alive
|
| 140 |
+
min_containers=1 # Keep one container warm
|
| 141 |
+
)
|
| 142 |
+
class RAGModel:
|
| 143 |
+
@modal.enter()
|
| 144 |
+
def enter(self):
|
| 145 |
+
from langchain_community.vectorstores import Chroma
|
| 146 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 147 |
+
from langchain_community.llms import HuggingFacePipeline
|
| 148 |
+
from langchain.chains import RetrievalQA
|
| 149 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 150 |
+
import torch
|
| 151 |
+
from typing import Any, List
|
| 152 |
+
from langchain_core.retrievers import BaseRetriever
|
| 153 |
+
from langchain_core.documents import Document
|
| 154 |
+
|
| 155 |
+
print("π Loading embeddings...")
|
| 156 |
+
self.embeddings = HuggingFaceEmbeddings(
|
| 157 |
+
model_name=EMBEDDING_MODEL,
|
| 158 |
+
model_kwargs={'device': 'cuda'},
|
| 159 |
+
encode_kwargs={'normalize_embeddings': True}
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
print("π Connecting to remote Chroma DB...")
|
| 163 |
+
self.chroma_service = modal.Cls.from_name("chroma-server-v2", "ChromaDB")()
|
| 164 |
+
|
| 165 |
+
class RemoteChromaRetriever(BaseRetriever):
|
| 166 |
+
chroma_service: Any
|
| 167 |
+
embeddings: Any
|
| 168 |
+
k: int = 3
|
| 169 |
+
|
| 170 |
+
def _get_relevant_documents(self, query: str) -> List[Document]:
|
| 171 |
+
query_embedding = self.embeddings.embed_query(query)
|
| 172 |
+
results = self.chroma_service.query.remote(
|
| 173 |
+
collection_name="insurance_products",
|
| 174 |
+
query_embeddings=[query_embedding],
|
| 175 |
+
n_results=self.k
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
documents = []
|
| 179 |
+
if results['documents']:
|
| 180 |
+
for i in range(len(results['documents'][0])):
|
| 181 |
+
doc = Document(
|
| 182 |
+
page_content=results['documents'][0][i],
|
| 183 |
+
metadata=results['metadatas'][0][i] if results['metadatas'] else {}
|
| 184 |
+
)
|
| 185 |
+
documents.append(doc)
|
| 186 |
+
return documents
|
| 187 |
+
|
| 188 |
+
async def _aget_relevant_documents(self, query: str) -> List[Document]:
|
| 189 |
+
return self._get_relevant_documents(query)
|
| 190 |
+
|
| 191 |
+
self.RemoteChromaRetriever = RemoteChromaRetriever
|
| 192 |
+
|
| 193 |
+
print("π€ Loading LLM model with vLLM...")
|
| 194 |
+
from vllm import LLM, SamplingParams
|
| 195 |
+
|
| 196 |
+
# Initialize vLLM engine (much faster than HuggingFace pipeline)
|
| 197 |
+
self.llm_engine = LLM(
|
| 198 |
+
model=LLM_MODEL,
|
| 199 |
+
tensor_parallel_size=1,
|
| 200 |
+
gpu_memory_utilization=0.85,
|
| 201 |
+
max_model_len=4096, # Phi-3 supports 4k context
|
| 202 |
+
trust_remote_code=True # Required for Phi-3
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Configure sampling parameters for generation
|
| 206 |
+
self.sampling_params = SamplingParams(
|
| 207 |
+
temperature=0.7,
|
| 208 |
+
max_tokens=256, # Reduced for faster responses
|
| 209 |
+
top_p=0.9,
|
| 210 |
+
stop=["\n\n", "Question:", "Context:"] # Stop tokens
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
print("β
vLLM model loaded and ready!")
|
| 214 |
+
|
| 215 |
+
@modal.method()
|
| 216 |
+
def query(self, question: str, top_k: int = 2):
|
| 217 |
+
import time
|
| 218 |
+
start_time = time.time()
|
| 219 |
+
|
| 220 |
+
print(f"β Query: {question}")
|
| 221 |
+
|
| 222 |
+
# Retrieve relevant documents
|
| 223 |
+
retrieval_start = time.time()
|
| 224 |
+
retriever = self.RemoteChromaRetriever(
|
| 225 |
+
chroma_service=self.chroma_service,
|
| 226 |
+
embeddings=self.embeddings,
|
| 227 |
+
k=top_k
|
| 228 |
+
)
|
| 229 |
+
docs = retriever.get_relevant_documents(question)
|
| 230 |
+
retrieval_time = time.time() - retrieval_start
|
| 231 |
+
|
| 232 |
+
# Build context from retrieved documents
|
| 233 |
+
context = "\n\n".join([doc.page_content for doc in docs])
|
| 234 |
+
|
| 235 |
+
# Create prompt for Phi-3 (using its chat template)
|
| 236 |
+
prompt = f"""<|system|>
|
| 237 |
+
You are a helpful AI assistant that answers questions about insurance products based on the provided context. Be concise and accurate.<|end|>
|
| 238 |
+
|
| 239 |
+
def web_query(self, question: str):
|
| 240 |
+
return self.query.local(question)
|
| 241 |
+
|
| 242 |
+
@app.local_entrypoint()
|
| 243 |
+
def list():
|
| 244 |
+
"""List files in volume"""
|
| 245 |
+
print("π Listing files in mcp-hack-ins-products volume...")
|
| 246 |
+
files = list_files.remote()
|
| 247 |
+
print(f"\nβ
Found {len(files)} files:")
|
| 248 |
+
for f in files:
|
| 249 |
+
print(f" π {f}")
|
| 250 |
+
|
| 251 |
+
@app.local_entrypoint()
|
| 252 |
+
def index():
|
| 253 |
+
"""Create vector database"""
|
| 254 |
+
print("π Starting vector database creation...")
|
| 255 |
+
result = create_vector_db.remote()
|
| 256 |
+
print(f"\n{'='*60}")
|
| 257 |
+
print(f"Status: {result['status']}")
|
| 258 |
+
if result['status'] == 'success':
|
| 259 |
+
print(f"Documents processed: {result['total_documents']}")
|
| 260 |
+
print(f"Text chunks created: {result['total_chunks']}")
|
| 261 |
+
print("β
Vector database is ready for queries!")
|
| 262 |
+
else:
|
| 263 |
+
print(f"β Error: {result['message']}")
|
| 264 |
+
print(f"{'='*60}")
|
| 265 |
+
|
| 266 |
+
@app.local_entrypoint()
|
| 267 |
+
def query(question: str = "What insurance products are available?"):
|
| 268 |
+
"""Query the RAG system"""
|
| 269 |
+
print(f"π€ Question: {question}\n")
|
| 270 |
+
|
| 271 |
+
# Lookup the deployed RAGModel from the insurance-rag app
|
| 272 |
+
# This connects to the persistent container instead of creating a new one
|
| 273 |
+
model = RAGModel()
|
| 274 |
+
result = model.query.remote(question)
|
| 275 |
+
|
| 276 |
+
print(f"{'='*60}")
|
| 277 |
+
print(f"π‘ Answer:\n{result['answer']}")
|
| 278 |
+
print(f"\n{'='*60}")
|
| 279 |
+
print(f"π Sources ({len(result['sources'])}):")
|
| 280 |
+
for i, source in enumerate(result['sources'], 1):
|
| 281 |
+
print(f"\n [{i}] {source['metadata'].get('source', 'Unknown')}")
|
| 282 |
+
print(f" Page: {source['metadata'].get('page', 'N/A')}")
|
| 283 |
+
print(f" Preview: {source['content'][:150]}...")
|
| 284 |
+
print(f"{'='*60}")
|
clean_sample.csv
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
docs/HOW_TO_RUN.md
CHANGED
|
@@ -145,9 +145,6 @@ modal app logs mcp-hack::finetune-phi3-modal
|
|
| 145 |
|
| 146 |
### If You Need to Regenerate Data
|
| 147 |
```bash
|
| 148 |
-
# Clear existing dataset
|
| 149 |
-
./venv/bin/modal run docs/clear_dataset.py
|
| 150 |
-
|
| 151 |
# Regenerate with new logic
|
| 152 |
./venv/bin/modal run --detach docs/prepare_finetune_data.py
|
| 153 |
```
|
|
|
|
| 145 |
|
| 146 |
### If You Need to Regenerate Data
|
| 147 |
```bash
|
|
|
|
|
|
|
|
|
|
| 148 |
# Regenerate with new logic
|
| 149 |
./venv/bin/modal run --detach docs/prepare_finetune_data.py
|
| 150 |
```
|
docs/guides/HOW_TO_RUN.md
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# How to Run the Fine-Tuning Pipeline
|
| 2 |
+
|
| 3 |
+
This guide walks you through the complete pipeline from data generation to model deployment.
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
## π Dataset Generation Results
|
| 8 |
+
|
| 9 |
+
### Final Statistics
|
| 10 |
+
- **Training Samples**: 201,651
|
| 11 |
+
- **Validation Samples**: 22,407
|
| 12 |
+
- **Total Dataset**: 224,058 high-quality QA pairs
|
| 13 |
+
- **Improvement**: 150x more data than previous approach
|
| 14 |
+
|
| 15 |
+
### Batch Performance
|
| 16 |
+
| Batch | Files | Data Points | Status |
|
| 17 |
+
|-------|-------|-------------|--------|
|
| 18 |
+
| 1 | 1,000 | 100,611 | β
Excellent |
|
| 19 |
+
| 2 | 1,000 | 39,960 | β
Good |
|
| 20 |
+
| 3 | 1,000 | 0 | β οΈ Complex files |
|
| 21 |
+
| 4 | 1,000 | 600 | β οΈ Runner issue |
|
| 22 |
+
| 5 | 1,000 | 54,627 | β
Excellent |
|
| 23 |
+
| 6 | 1,000 | 5,400 | β
Good |
|
| 24 |
+
| 7 | 888 | 22,860 | β
Good |
|
| 25 |
+
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
## π Step-by-Step Instructions
|
| 29 |
+
|
| 30 |
+
### Step 1: Fine-Tune the Model
|
| 31 |
+
|
| 32 |
+
Run the fine-tuning job on Modal with H200 GPU:
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
cd /Users/veeru/agents/mcp-hack
|
| 36 |
+
|
| 37 |
+
# Start fine-tuning in detached mode
|
| 38 |
+
./venv/bin/modal run --detach docs/finetune_modal.py
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
**What happens:**
|
| 42 |
+
- Loads 201,651 training samples from `finetune-dataset` volume
|
| 43 |
+
- Trains Phi-3-mini-4k-instruct with LoRA on H200 GPU
|
| 44 |
+
- Runs for ~90-120 minutes
|
| 45 |
+
- Saves model to `model-checkpoints` volume
|
| 46 |
+
|
| 47 |
+
**Monitor progress:**
|
| 48 |
+
```bash
|
| 49 |
+
# View live logs
|
| 50 |
+
modal app logs mcp-hack::finetune-phi3-modal
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
### Step 2: Evaluate the Model
|
| 56 |
+
|
| 57 |
+
After training completes, test the model:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
./venv/bin/modal run docs/eval_finetuned.py
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
This will run sample questions and show the model's answers.
|
| 64 |
+
|
| 65 |
+
---
|
| 66 |
+
|
| 67 |
+
### Step 3: Deploy API Endpoint
|
| 68 |
+
|
| 69 |
+
Deploy the inference API:
|
| 70 |
+
|
| 71 |
+
**Option A: GPU Endpoint (A10G)**
|
| 72 |
+
```bash
|
| 73 |
+
./venv/bin/modal deploy docs/api_endpoint.py
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
**Option B: CPU Endpoint**
|
| 77 |
+
```bash
|
| 78 |
+
./venv/bin/modal deploy docs/api_endpoint_cpu.py
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
**Get the endpoint URL:**
|
| 82 |
+
```bash
|
| 83 |
+
modal app list
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
### Step 4: Test the API
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
# Example API call
|
| 92 |
+
curl -X POST https://YOUR-MODAL-URL/ask \
|
| 93 |
+
-H "Content-Type: application/json" \
|
| 94 |
+
-d '{
|
| 95 |
+
"question": "What is the population of Tokyo?",
|
| 96 |
+
"context": "Japan Census data"
|
| 97 |
+
}'
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
---
|
| 101 |
+
|
| 102 |
+
## π Key Files
|
| 103 |
+
|
| 104 |
+
### Data Processing
|
| 105 |
+
- `docs/prepare_finetune_data.py` - Generates dataset from CSV files
|
| 106 |
+
- `docs/clean_sample.py` - Local testing script for data cleaning
|
| 107 |
+
|
| 108 |
+
### Model Training
|
| 109 |
+
- `docs/finetune_modal.py` - Fine-tuning script (H200 GPU)
|
| 110 |
+
- `docs/eval_finetuned.py` - Evaluation script
|
| 111 |
+
|
| 112 |
+
### API Deployment
|
| 113 |
+
- `docs/api_endpoint.py` - GPU inference endpoint (A10G)
|
| 114 |
+
- `docs/api_endpoint_cpu.py` - CPU inference endpoint
|
| 115 |
+
|
| 116 |
+
### Documentation
|
| 117 |
+
- `diagrams/finetuning.svg` - Visual pipeline diagram
|
| 118 |
+
- `finetune/04-evaluation.md` - Evaluation results
|
| 119 |
+
|
| 120 |
+
---
|
| 121 |
+
|
| 122 |
+
## π§ Modal Volumes
|
| 123 |
+
|
| 124 |
+
The pipeline uses these Modal volumes:
|
| 125 |
+
|
| 126 |
+
| Volume | Purpose | Size |
|
| 127 |
+
|--------|---------|------|
|
| 128 |
+
| `census-data` | Raw census CSV files | 6,838 files |
|
| 129 |
+
| `economy-labor-data` | Raw economy CSV files | 50 files |
|
| 130 |
+
| `finetune-dataset` | Generated JSONL training data | 224K samples |
|
| 131 |
+
| `model-checkpoints` | Fine-tuned model weights | ~7GB |
|
| 132 |
+
|
| 133 |
+
---
|
| 134 |
+
|
| 135 |
+
## π‘ Tips
|
| 136 |
+
|
| 137 |
+
### If Training Fails
|
| 138 |
+
```bash
|
| 139 |
+
# Check logs for errors
|
| 140 |
+
modal app logs mcp-hack::finetune-phi3-modal
|
| 141 |
+
|
| 142 |
+
# Restart training
|
| 143 |
+
./venv/bin/modal run --detach docs/finetune_modal.py
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
### If You Need to Regenerate Data
|
| 147 |
+
```bash
|
| 148 |
+
# Clear existing dataset
|
| 149 |
+
./venv/bin/modal run docs/clear_dataset.py
|
| 150 |
+
|
| 151 |
+
# Regenerate with new logic
|
| 152 |
+
./venv/bin/modal run --detach docs/prepare_finetune_data.py
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
### View Volume Contents
|
| 156 |
+
```bash
|
| 157 |
+
# List files in a volume
|
| 158 |
+
modal volume ls finetune-dataset
|
| 159 |
+
|
| 160 |
+
# Download a file
|
| 161 |
+
modal volume get finetune-dataset train.jsonl finetune/train.jsonl
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
+
---
|
| 165 |
+
|
| 166 |
+
## π Expected Timeline
|
| 167 |
+
|
| 168 |
+
| Step | Duration | Notes |
|
| 169 |
+
|------|----------|-------|
|
| 170 |
+
| Data Generation | β
Complete | 224K samples ready |
|
| 171 |
+
| Fine-Tuning | ~90-120 min | H200 GPU |
|
| 172 |
+
| Evaluation | ~5 min | Quick tests |
|
| 173 |
+
| API Deployment | ~2 min | Instant after deploy |
|
| 174 |
+
|
| 175 |
+
---
|
| 176 |
+
|
| 177 |
+
## π― Next Steps
|
| 178 |
+
|
| 179 |
+
1. **Run fine-tuning** (see Step 1 above)
|
| 180 |
+
2. **Wait for completion** (~2 hours)
|
| 181 |
+
3. **Evaluate results** (see Step 2)
|
| 182 |
+
4. **Deploy API** (see Step 3)
|
| 183 |
+
5. **Test with real queries** (see Step 4)
|
| 184 |
+
|
| 185 |
+
---
|
| 186 |
+
|
| 187 |
+
## π Troubleshooting
|
| 188 |
+
|
| 189 |
+
**Issue**: "Volume not found"
|
| 190 |
+
```bash
|
| 191 |
+
# List all volumes
|
| 192 |
+
modal volume list
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
**Issue**: "Out of memory during training"
|
| 196 |
+
- Reduce `per_device_train_batch_size` in `finetune_modal.py`
|
| 197 |
+
- Current: 2 (already optimized for H200)
|
| 198 |
+
|
| 199 |
+
**Issue**: "Model not loading in API"
|
| 200 |
+
- Ensure fine-tuning completed successfully
|
| 201 |
+
- Check `model-checkpoints` volume has files
|
| 202 |
+
|
| 203 |
+
---
|
| 204 |
+
|
| 205 |
+
## β
Success Criteria
|
| 206 |
+
|
| 207 |
+
After completing all steps, you should have:
|
| 208 |
+
- β
Fine-tuned Phi-3-mini model
|
| 209 |
+
- β
Deployed API endpoint
|
| 210 |
+
- β
Model answering questions about Japanese census/economy data
|
| 211 |
+
- β
Improved accuracy over base model
|
| 212 |
+
|
| 213 |
+
---
|
| 214 |
+
|
| 215 |
+
**Ready to start?** Run the fine-tuning command from Step 1!
|
docs/{QUICK_START_RAG.md β guides/QUICK_START_RAG.md}
RENAMED
|
File without changes
|
docs/{RAG_SETUP_COMPLETE.md β guides/RAG_SETUP_COMPLETE.md}
RENAMED
|
File without changes
|
docs/{SETUP_SUCCESS.md β guides/SETUP_SUCCESS.md}
RENAMED
|
File without changes
|
docs/{SUMMARY.md β guides/SUMMARY.md}
RENAMED
|
File without changes
|
docs/{TROUBLESHOOTING.md β guides/TROUBLESHOOTING.md}
RENAMED
|
File without changes
|
docs/{WEB_INTERFACE.md β guides/WEB_INTERFACE.md}
RENAMED
|
File without changes
|
docs/{WEB_TROUBLESHOOTING.md β guides/WEB_TROUBLESHOOTING.md}
RENAMED
|
File without changes
|
docs/{estat_api_guide.md β guides/estat_api_guide.md}
RENAMED
|
File without changes
|
docs/{ft_process.md β guides/ft_process.md}
RENAMED
|
File without changes
|
docs/{modal-rag-optimization.md β guides/modal-rag-optimization.md}
RENAMED
|
File without changes
|
docs/{modal-rag-sequence.md β guides/modal-rag-sequence.md}
RENAMED
|
File without changes
|
docs/{next_steps_rag_recommendation.md β guides/next_steps_rag_recommendation.md}
RENAMED
|
File without changes
|
docs/{source_data.md β guides/source_data.md}
RENAMED
|
File without changes
|
docs/{PRODUCT_DECISION_GUIDE.md β product-design/PRODUCT_DECISION_GUIDE.md}
RENAMED
|
File without changes
|
docs/{setup_product_design_rag.md β product-design/setup_product_design_rag.md}
RENAMED
|
File without changes
|
docs/{tokyo_auto_insurance_product_design.docx β product-design/tokyo_auto_insurance_product_design.docx}
RENAMED
|
File without changes
|
docs/{tokyo_auto_insurance_product_design.md β product-design/tokyo_auto_insurance_product_design.md}
RENAMED
|
File without changes
|
docs/{tokyo_auto_insurance_product_design_filled.md β product-design/tokyo_auto_insurance_product_design_filled.md}
RENAMED
|
File without changes
|
scripts/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility scripts for data processing, setup, and tools
|
| 3 |
+
"""
|
| 4 |
+
|
{docs β scripts/data}/cleanup_data.py
RENAMED
|
File without changes
|
{docs β scripts/data}/clear_census_volume.py
RENAMED
|
File without changes
|
{docs β scripts/data}/convert_census_to_csv.py
RENAMED
|
File without changes
|
{docs β scripts/data}/convert_economy_labor_to_csv.py
RENAMED
|
File without changes
|
{docs β scripts/data}/convert_to_word.py
RENAMED
|
File without changes
|
{docs β scripts/data}/create_custom_qa.py
RENAMED
|
File without changes
|
{docs β scripts/data}/delete_census_csvs.py
RENAMED
|
File without changes
|
{docs β scripts/data}/download_census_api.py
RENAMED
|
File without changes
|
{docs β scripts/data}/download_census_csv_modal.py
RENAMED
|
File without changes
|
{docs β scripts/data}/download_census_data.py
RENAMED
|
File without changes
|
{docs β scripts/data}/download_census_modal.py
RENAMED
|
File without changes
|
{docs β scripts/data}/download_economy_labor_modal.py
RENAMED
|
File without changes
|
{docs β scripts/data}/fix_csv_filenames.py
RENAMED
|
File without changes
|
{docs β scripts/data}/prepare_economy_data.py
RENAMED
|
File without changes
|
{docs β scripts/data}/prepare_finetune_data.py
RENAMED
|
File without changes
|
{docs β scripts/data}/remove_duplicate_csvs.py
RENAMED
|
File without changes
|
run_with_venv.sh β scripts/setup/run_with_venv.sh
RENAMED
|
@@ -2,7 +2,8 @@
|
|
| 2 |
# Helper script to run query_product_design with venv activated
|
| 3 |
|
| 4 |
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
| 5 |
-
cd "$SCRIPT_DIR"
|
|
|
|
| 6 |
|
| 7 |
# Activate venv if it exists
|
| 8 |
if [ -d "venv" ]; then
|
|
@@ -10,4 +11,4 @@ if [ -d "venv" ]; then
|
|
| 10 |
fi
|
| 11 |
|
| 12 |
# Run the script with all arguments
|
| 13 |
-
python3 query_product_design.py "$@"
|
|
|
|
| 2 |
# Helper script to run query_product_design with venv activated
|
| 3 |
|
| 4 |
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
| 5 |
+
PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"
|
| 6 |
+
cd "$PROJECT_ROOT"
|
| 7 |
|
| 8 |
# Activate venv if it exists
|
| 9 |
if [ -d "venv" ]; then
|
|
|
|
| 11 |
fi
|
| 12 |
|
| 13 |
# Run the script with all arguments
|
| 14 |
+
python3 src/web/query_product_design.py "$@"
|
start_web.sh β scripts/setup/start_web.sh
RENAMED
|
@@ -2,7 +2,8 @@
|
|
| 2 |
# Helper script to start the web interface
|
| 3 |
|
| 4 |
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
| 5 |
-
cd "$SCRIPT_DIR"
|
|
|
|
| 6 |
|
| 7 |
# Activate venv if it exists
|
| 8 |
if [ -d "venv" ]; then
|
|
@@ -10,5 +11,5 @@ if [ -d "venv" ]; then
|
|
| 10 |
fi
|
| 11 |
|
| 12 |
# Start the web app
|
| 13 |
-
python3 web_app.py
|
| 14 |
|
|
|
|
| 2 |
# Helper script to start the web interface
|
| 3 |
|
| 4 |
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
| 5 |
+
PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"
|
| 6 |
+
cd "$PROJECT_ROOT"
|
| 7 |
|
| 8 |
# Activate venv if it exists
|
| 9 |
if [ -d "venv" ]; then
|
|
|
|
| 11 |
fi
|
| 12 |
|
| 13 |
# Start the web app
|
| 14 |
+
python3 src/web/web_app.py
|
| 15 |
|
{docs β scripts/tools}/api_endpoint.py
RENAMED
|
File without changes
|
{docs β scripts/tools}/api_endpoint_cpu.py
RENAMED
|
File without changes
|
{docs β scripts/tools}/ask_model.py
RENAMED
|
File without changes
|