Spaces:

MCP-1st-Birthday
/

sdlc-agent

Runtime error

App Files Files Community

Veeru-c commited on 17 days ago

Commit

23f437b

1 Parent(s): 46f2cb3

initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +24 -201
.repo_structure_plan.md +41 -0
MIGRATION_GUIDE.md +81 -0
QUICK_START.md +95 -0
README.md +144 -2
STRUCTURE.md +148 -0
bkp/modal-rag.py.backup +284 -0
clean_sample.csv +0 -0
docs/HOW_TO_RUN.md +0 -3
docs/guides/HOW_TO_RUN.md +215 -0
docs/{QUICK_START_RAG.md → guides/QUICK_START_RAG.md} +0 -0
docs/{RAG_SETUP_COMPLETE.md → guides/RAG_SETUP_COMPLETE.md} +0 -0
docs/{SETUP_SUCCESS.md → guides/SETUP_SUCCESS.md} +0 -0
docs/{SUMMARY.md → guides/SUMMARY.md} +0 -0
docs/{TROUBLESHOOTING.md → guides/TROUBLESHOOTING.md} +0 -0
docs/{WEB_INTERFACE.md → guides/WEB_INTERFACE.md} +0 -0
docs/{WEB_TROUBLESHOOTING.md → guides/WEB_TROUBLESHOOTING.md} +0 -0
docs/{estat_api_guide.md → guides/estat_api_guide.md} +0 -0
docs/{ft_process.md → guides/ft_process.md} +0 -0
docs/{modal-rag-optimization.md → guides/modal-rag-optimization.md} +0 -0
docs/{modal-rag-sequence.md → guides/modal-rag-sequence.md} +0 -0
docs/{next_steps_rag_recommendation.md → guides/next_steps_rag_recommendation.md} +0 -0
docs/{source_data.md → guides/source_data.md} +0 -0
docs/{PRODUCT_DECISION_GUIDE.md → product-design/PRODUCT_DECISION_GUIDE.md} +0 -0
docs/{setup_product_design_rag.md → product-design/setup_product_design_rag.md} +0 -0
docs/{tokyo_auto_insurance_product_design.docx → product-design/tokyo_auto_insurance_product_design.docx} +0 -0
docs/{tokyo_auto_insurance_product_design.md → product-design/tokyo_auto_insurance_product_design.md} +0 -0
docs/{tokyo_auto_insurance_product_design_filled.md → product-design/tokyo_auto_insurance_product_design_filled.md} +0 -0
scripts/__init__.py +4 -0
{docs → scripts/data}/cleanup_data.py +0 -0
{docs → scripts/data}/clear_census_volume.py +0 -0
{docs → scripts/data}/convert_census_to_csv.py +0 -0
{docs → scripts/data}/convert_economy_labor_to_csv.py +0 -0
{docs → scripts/data}/convert_to_word.py +0 -0
{docs → scripts/data}/create_custom_qa.py +0 -0
{docs → scripts/data}/delete_census_csvs.py +0 -0
{docs → scripts/data}/download_census_api.py +0 -0
{docs → scripts/data}/download_census_csv_modal.py +0 -0
{docs → scripts/data}/download_census_data.py +0 -0
{docs → scripts/data}/download_census_modal.py +0 -0
{docs → scripts/data}/download_economy_labor_modal.py +0 -0
{docs → scripts/data}/fix_csv_filenames.py +0 -0
{docs → scripts/data}/prepare_economy_data.py +0 -0
{docs → scripts/data}/prepare_finetune_data.py +0 -0
{docs → scripts/data}/remove_duplicate_csvs.py +0 -0
run_with_venv.sh → scripts/setup/run_with_venv.sh +3 -2
start_web.sh → scripts/setup/start_web.sh +3 -2
{docs → scripts/tools}/api_endpoint.py +0 -0
{docs → scripts/tools}/api_endpoint_cpu.py +0 -0
{docs → scripts/tools}/ask_model.py +0 -0

.gitignore CHANGED Viewed

@@ -1,212 +1,35 @@
-# Byte-compiled / optimized / DLL files
 __pycache__/
-*.py[codz]
 *$py.class
-# C extensions
 *.so
-# Distribution / packaging
 .Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py.cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-#poetry.toml
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
-#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
-#pdm.lock
-#pdm.toml
-.pdm-python
-.pdm-build/
-# pixi
-#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
-#pixi.lock
-#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
-#   in the .venv directory. It is recommended not to include this directory in version control.
-.pixi
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.env
-.envrc
-.venv
-env/
 venv/
 ENV/
-env.bak/
-venv.bak/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-# Abstra
-# Abstra is an AI-powered process automation framework.
-# Ignore directories containing user credentials, local state, and settings.
-# Learn more at https://abstra.io/docs
-.abstra/
-# Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
-#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer,
-#  you could uncomment the following to ignore the entire vscode folder
-# .vscode/
-# Ruff stuff:
-.ruff_cache/
-# PyPI configuration file
-.pypirc
-# Cursor
-#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
-#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
-#  refer to https://docs.cursor.com/context/ignore-files
-.cursorignore
-.cursorindexingignore
-# Marimo
-marimo/_static/
-marimo/_lsp/
-__marimo__/
 # Project specific
-insurance-data/
-*.backup
 .modal/

+# Python
 __pycache__/
+*.py[cod]
 *$py.class
 *.so
 .Python
 venv/
+env/
 ENV/
+.venv
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
 # Project specific
+*.log
+*.csv
+.pytest_cache/
+.coverage
+htmlcov/
+# Modal
 .modal/
+# Environment variables
+.env
+.env.local

.repo_structure_plan.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Repository Restructure Plan
+## Current Issues:
+- Root directory has too many files (web_app.py, query_product_design.py, test files)
+- docs/ has both scripts and documentation mixed
+- scripts/ directory is empty
+- Product design docs mixed with other docs
+- Web app files (templates, static) in root
+## Proposed Structure:
+```
+/
+├── README.md
+├── requirements.txt
+├── .gitignore
+├── src/                          # Core application code
+│   ├── rag/                      # RAG system
+│   │   ├── modal-rag.py
+│   │   └── modal-rag-product-design.py
+│   └── web/                      # Web application
+│       ├── web_app.py
+│       ├── query_product_design.py
+│       ├── templates/
+│       └── static/
+├── scripts/                       # Utility scripts
+│   ├── data/                     # Data processing
+│   ├── setup/                    # Setup scripts
+│   └── tools/                    # General utilities
+├── docs/                         # Documentation only
+│   ├── guides/                   # How-to guides
+│   ├── api/                      # API docs
+│   └── product-design/           # Product design docs
+├── tests/                         # Test files
+├── config/                        # Config files (if any)
+├── diagrams/                      # Keep as is
+├── finetune/                      # Keep as is
+├── bkp/                           # Keep as is
+└── venv/                          # Keep as is
+```

MIGRATION_GUIDE.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# Repository Restructure Migration Guide
+## What Changed
+The repository has been reorganized for better structure and maintainability.
+## File Moves
+### RAG System
+- `src/modal-rag.py` → `src/rag/modal-rag.py`
+- `src/modal-rag-product-design.py` → `src/rag/modal-rag-product-design.py`
+### Web Application
+- `web_app.py` → `src/web/web_app.py`
+- `query_product_design.py` → `src/web/query_product_design.py`
+- `templates/` → `src/web/templates/`
+- `static/` → `src/web/static/`
+### Scripts
+- Data processing scripts → `scripts/data/`
+- Setup scripts → `scripts/setup/`
+- Utility scripts → `scripts/tools/`
+### Documentation
+- All `.md` files → `docs/guides/`
+- Product design docs → `docs/product-design/`
+### Tests
+- `test_*.py` → `tests/`
+## Updated Commands
+### Old Commands (No longer work)
+```bash
+python web_app.py
+modal run src/modal-rag-product-design.py::query_product_design
+```
+### New Commands
+```bash
+# Web app
+python src/web/web_app.py
+# Or use helper script
+./scripts/setup/start_web.sh
+# Modal RAG
+modal run src/rag/modal-rag-product-design.py::query_product_design --question "your question"
+# Indexing
+modal run src/rag/modal-rag-product-design.py::index_product_design
+```
+## Import Path Updates
+If you have custom scripts that import from these modules, update the imports:
+```python
+# Old
+from query_product_design import query_rag
+# New
+import sys
+sys.path.insert(0, 'src/web')
+from query_product_design import query_rag
+```
+## Next Steps
+1. Update any custom scripts with new import paths
+2. Update CI/CD pipelines if applicable
+3. Update documentation references
+4. Test all functionality
+## Rollback
+If you need to rollback, all files are still in git history. You can:
+```bash
+git log --oneline --all -- "old/path/to/file"
+git checkout <commit-hash> -- "old/path/to/file"
+```

QUICK_START.md ADDED Viewed

	@@ -0,0 +1,95 @@

+# Quick Start Guide
+## Prerequisites
+- Python 3.13+
+- Modal account and CLI installed
+- Virtual environment (recommended)
+## Setup
+1. **Activate virtual environment:**
+   ```bash
+   source venv/bin/activate
+   ```
+2. **Install dependencies:**
+   ```bash
+   pip install flask flask-cors
+   # Or install all requirements if you have requirements.txt
+   pip install -r requirements.txt
+   ```
+3. **Index product design documents (first time only):**
+   ```bash
+   modal run src/rag/modal-rag-product-design.py::index_product_design
+   ```
+## Running the Web Application
+### Option 1: Using the helper script (Recommended)
+```bash
+./scripts/setup/start_web.sh
+```
+### Option 2: Direct Python command
+```bash
+# Make sure venv is activated
+source venv/bin/activate
+python src/web/web_app.py
+```
+### Option 3: From project root
+```bash
+python3 src/web/web_app.py
+```
+## Access the Web Interface
+Once the server starts, open your browser and go to:
+- **http://127.0.0.1:5000** (or the port shown in the terminal)
+⚠️ **Important:** Use `127.0.0.1` instead of `localhost` to avoid potential 403 errors on macOS.
+## Querying the RAG System
+### Via Web Interface
+1. Start the web app (see above)
+2. Open the URL in your browser
+3. Enter your question and click "Ask Question"
+### Via CLI
+```bash
+python src/web/query_product_design.py --question "your question here"
+```
+### Via Modal Directly
+```bash
+modal run src/rag/modal-rag-product-design.py::query_product_design --question "your question here"
+```
+## Troubleshooting
+### Flask Not Installed
+```bash
+# Activate venv
+source venv/bin/activate
+# Install Flask
+pip install flask flask-cors
+```
+### Port Already in Use
+The web app will automatically find an available port (5000-5009).
+### Modal Command Not Found
+```bash
+# Install Modal CLI
+pip install modal
+# Or use python -m modal
+python -m modal --version
+```
+For more help, see `docs/guides/TROUBLESHOOTING.md`.

README.md CHANGED Viewed

@@ -1,2 +1,144 @@
-# MCP-hack-2025
-Mcp gradio hackathon project

+# MCP Hack - Insurance Product Design RAG System
+A comprehensive RAG (Retrieval Augmented Generation) system for querying and analyzing auto insurance product design documents, specifically designed for the Tokyo market.
+## 📁 Repository Structure
+```
+/
+├── src/                          # Core application code
+│   ├── rag/                      # RAG system implementation
+│   │   ├── modal-rag.py                    # Main RAG system
+│   │   └── modal-rag-product-design.py     # Product design RAG
+│   └── web/                      # Web application
+│       ├── web_app.py                       # Flask web server
+│       ├── query_product_design.py         # RAG query interface
+│       ├── templates/                      # HTML templates
+│       └── static/                         # CSS, JS, assets
+│
+├── scripts/                       # Utility scripts
+│   ├── data/                     # Data processing scripts
+│   ├── setup/                    # Setup and installation scripts
+│   └── tools/                    # General utility scripts
+│
+├── docs/                         # Documentation
+│   ├── guides/                   # How-to guides and tutorials
+│   ├── api/                      # API documentation
+│   └── product-design/           # Product design documents
+│
+├── tests/                         # Test files
+├── diagrams/                     # System architecture diagrams
+├── finetune/                      # Model fine-tuning documentation
+├── bkp/                           # Backup files
+└── venv/                          # Python virtual environment
+```
+## 🚀 Quick Start
+### Prerequisites
+- Python 3.13+
+- Modal account and CLI installed
+- Virtual environment activated
+### Installation
+1. **Clone and setup:**
+   ```bash
+   git clone <repo-url>
+   cd mcp-hack
+   python3 -m venv venv
+   source venv/bin/activate
+   pip install -r requirements.txt
+   ```
+2. **Index product design documents:**
+   ```bash
+   modal run src/rag/modal-rag-product-design.py::index_product_design
+   ```
+3. **Start web interface:**
+   ```bash
+   python src/web/web_app.py
+   # Or use the helper script:
+   ./scripts/setup/start_web.sh
+   ```
+4. **Access the web interface:**
+   - Open `http://127.0.0.1:5000` in your browser
+   - Ask questions about the product design document
+## 📖 Documentation
+- **Quick Start Guide:** `docs/guides/QUICK_START_RAG.md`
+- **Web Interface:** `docs/guides/WEB_INTERFACE.md`
+- **Troubleshooting:** `docs/guides/TROUBLESHOOTING.md`
+- **Product Design Docs:** `docs/product-design/`
+## 🔧 Key Components
+### RAG System (`src/rag/`)
+- **modal-rag.py**: Main RAG system for insurance products
+- **modal-rag-product-design.py**: Specialized RAG for product design documents
+### Web Application (`src/web/`)
+- **web_app.py**: Flask web server with REST API
+- **query_product_design.py**: RAG query interface
+- **templates/**: HTML templates for the web UI
+- **static/**: CSS and JavaScript files
+### Scripts (`scripts/`)
+- **data/**: Data processing and conversion scripts
+- **setup/**: Installation and setup scripts
+- **tools/**: Utility scripts for various tasks
+## 🎯 Usage Examples
+### Query via CLI
+```bash
+python src/web/query_product_design.py --question "What are the premium ranges?"
+```
+### Query via Web Interface
+1. Start the web app: `python src/web/web_app.py`
+2. Open `http://127.0.0.1:5000`
+3. Enter your question and submit
+### Query via Modal Directly
+```bash
+modal run src/rag/modal-rag-product-design.py::query_product_design --question "How to make product decisions?"
+```
+## 📊 Features
+- ✅ RAG-based document querying
+- ✅ Web interface for easy interaction
+- ✅ Support for markdown and Word documents
+- ✅ Vector database with ChromaDB
+- ✅ Fast inference with vLLM
+- ✅ Comprehensive documentation
+## 🛠️ Development
+### Running Tests
+```bash
+python -m pytest tests/
+```
+### Adding New Documents
+1. Add documents to Modal volume
+2. Run indexing: `modal run src/rag/modal-rag-product-design.py::index_product_design`
+### Project Structure Guidelines
+- **src/**: Core application code only
+- **scripts/**: Utility scripts organized by purpose
+- **docs/**: Documentation organized by type
+- **tests/**: All test files
+## 📝 License
+[Add your license here]
+## 🤝 Contributing
+[Add contribution guidelines here]

STRUCTURE.md ADDED Viewed

	@@ -0,0 +1,148 @@

+# Repository Structure
+This document describes the organization of the repository.
+## Directory Layout
+```
+mcp-hack/
+├── src/                          # Core application source code
+│   ├── rag/                      # RAG (Retrieval Augmented Generation) system
+│   │   ├── modal-rag.py                    # Main RAG system for insurance products
+│   │   └── modal-rag-product-design.py     # Product design document RAG
+│   └── web/                      # Web application
+│       ├── web_app.py                       # Flask web server
+│       ├── query_product_design.py          # RAG query CLI interface
+│       ├── templates/                       # HTML templates
+│       │   └── index.html
+│       └── static/                          # Static assets
+│           ├── css/
+│           │   └── style.css
+│           └── js/
+│               └── app.js
+│
+├── scripts/                       # Utility scripts organized by purpose
+│   ├── data/                     # Data processing scripts
+│   │   ├── download_*.py         # Data download scripts
+│   │   ├── convert_*.py          # Data conversion scripts
+│   │   ├── prepare_*.py          # Data preparation scripts
+│   │   └── cleanup_*.py          # Data cleanup scripts
+│   ├── setup/                    # Setup and installation scripts
+│   │   ├── start_web.sh          # Start web application
+│   │   └── run_with_venv.sh      # Run scripts with venv
+│   └── tools/                     # General utility scripts
+│       ├── api_endpoint*.py       # API endpoint scripts
+│       ├── finetune_*.py          # Fine-tuning scripts
+│       └── debug_*.py             # Debugging utilities
+│
+├── docs/                         # Documentation
+│   ├── guides/                   # How-to guides and tutorials
+│   │   ├── QUICK_START_RAG.md
+│   │   ├── WEB_INTERFACE.md
+│   │   ├── TROUBLESHOOTING.md
+│   │   └── ...                   # Other guides
+│   ├── api/                      # API documentation (if any)
+│   └── product-design/           # Product design documents
+│       ├── tokyo_auto_insurance_product_design.md
+│       ├── tokyo_auto_insurance_product_design_filled.md
+│       ├── tokyo_auto_insurance_product_design.docx
+│       ├── PRODUCT_DECISION_GUIDE.md
+│       └── setup_product_design_rag.md
+│
+├── tests/                         # Test files
+│   ├── test_server.py
+│   └── test_web.py
+│
+├── diagrams/                      # System architecture diagrams
+│   ├── *.mmd                      # Mermaid diagram sources
+│   └── *.svg                      # Rendered diagrams
+│
+├── finetune/                      # Model fine-tuning documentation
+│   ├── README.md
+│   └── *.md                       # Fine-tuning guides
+│
+├── bkp/                           # Backup files (old versions)
+│
+├── config/                        # Configuration files (if any)
+│
+├── venv/                          # Python virtual environment (gitignored)
+│
+├── README.md                      # Main project README
+├── MIGRATION_GUIDE.md            # Guide for migrating from old structure
+├── STRUCTURE.md                   # This file
+└── .gitignore                    # Git ignore rules
+```
+## Key Directories
+### `src/`
+Contains all core application code. Organized into:
+- **`rag/`**: RAG system implementations using Modal
+- **`web/`**: Web application (Flask) with templates and static assets
+### `scripts/`
+Utility scripts organized by purpose:
+- **`data/`**: Data processing, downloading, conversion
+- **`setup/`**: Installation and setup scripts
+- **`tools/`**: General utilities, API endpoints, debugging tools
+### `docs/`
+Documentation organized by type:
+- **`guides/`**: How-to guides and tutorials
+- **`api/`**: API documentation
+- **`product-design/`**: Product design documents
+### `tests/`
+All test files for the application.
+## File Naming Conventions
+- Python scripts: `snake_case.py`
+- Documentation: `UPPER_CASE.md` or `kebab-case.md`
+- Shell scripts: `kebab-case.sh`
+- Config files: `.config` or `config.json`
+## Import Paths
+When importing from this repository:
+```python
+# From root directory
+import sys
+sys.path.insert(0, 'src/web')
+from query_product_design import query_rag
+# Or add src to path
+sys.path.insert(0, 'src')
+from rag.modal_rag_product_design import ...
+```
+## Running Applications
+### Web Application
+```bash
+# From project root
+python src/web/web_app.py
+# Or use helper script
+./scripts/setup/start_web.sh
+```
+### RAG Queries
+```bash
+# CLI
+python src/web/query_product_design.py --question "your question"
+# Modal direct
+modal run src/rag/modal-rag-product-design.py::query_product_design --question "your question"
+```
+## Adding New Files
+When adding new files, follow the structure:
+- **Application code** → `src/`
+- **Utility scripts** → `scripts/{data,setup,tools}/`
+- **Documentation** → `docs/{guides,api,product-design}/`
+- **Tests** → `tests/`
+- **Config** → `config/`

bkp/modal-rag.py.backup ADDED Viewed

	@@ -0,0 +1,284 @@

+import modal
+app = modal.App("insurance-rag")
+# Reference your specific volume
+vol = modal.Volume.from_name("mcp-hack-ins-products", create_if_missing=True)
+# Model configuration
+LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"
+EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
+# Build image with ALL required dependencies
+image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .pip_install(
+        "vllm==0.6.3.post1",  # Fast inference engine
+        "langchain==0.3.7",
+        "langchain-community==0.3.7",
+        "langchain-text-splitters==0.3.2",
+        "sentence-transformers==3.3.0",
+        "chromadb==0.5.20",
+        "pypdf==5.1.0",
+        "cryptography==43.0.3",
+        "transformers==4.46.2",
+        "torch==2.5.1",
+        "huggingface_hub==0.26.2",
+    )
+)
+@app.function(image=image, volumes={"/insurance-data": vol})
+def list_files():
+    """List all files in the volume"""
+    import os
+    files = []
+    for root, dirs, filenames in os.walk("/insurance-data"):
+        for filename in filenames:
+            full_path = os.path.join(root, filename)
+            files.append(full_path)
+    return files
+@app.function(
+    image=image,
+    volumes={"/insurance-data": vol},
+    timeout=900
+)
+def create_vector_db():
+    """Create vector database from insurance PDFs"""
+    from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
+    from langchain_community.vectorstores import Chroma
+    from langchain_community.embeddings import HuggingFaceEmbeddings
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+    print("🔍 Loading documents from /insurance-data...")
+    loader = DirectoryLoader(
+        "/insurance-data",
+        glob="**/*.pdf",
+        loader_cls=PyPDFLoader,
+        silent_errors=True
+    )
+    try:
+        documents = loader.load()
+    except Exception as e:
+        print(f"⚠️ Warning during loading: {e}")
+        documents = []
+    print(f"📄 Loaded {len(documents)} document pages")
+    if len(documents) == 0:
+        return {
+            "status": "error",
+            "message": "No PDF files could be loaded",
+            "total_documents": 0,
+            "total_chunks": 0
+        }
+    print("✂️ Splitting documents into chunks...")
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200
+    )
+    chunks = text_splitter.split_documents(documents)
+    print(f"📦 Created {len(chunks)} chunks")
+    print("🧠 Creating embeddings...")
+    embeddings = HuggingFaceEmbeddings(
+        model_name=EMBEDDING_MODEL,
+        model_kwargs={'device': 'cuda'},
+        encode_kwargs={'normalize_embeddings': True}
+    )
+    print("💾 Building vector database...")
+    # Connect to remote Chroma service
+    chroma_service = modal.Cls.from_name("chroma-server-v2", "ChromaDB")()
+    # Prepare data for upsert
+    ids = [f"id_{i}" for i in range(len(chunks))]
+    documents = [chunk.page_content for chunk in chunks]
+    metadatas = [chunk.metadata for chunk in chunks]
+    # Generate embeddings locally
+    print("   Generating embeddings locally...")
+    embeddings_list = embeddings.embed_documents(documents)
+    # Upsert to remote Chroma
+    print("   Upserting to remote Chroma DB...")
+    batch_size = 100
+    for i in range(0, len(ids), batch_size):
+        batch_ids = ids[i:i+batch_size]
+        batch_docs = documents[i:i+batch_size]
+        batch_metas = metadatas[i:i+batch_size]
+        batch_embs = embeddings_list[i:i+batch_size]
+        chroma_service.upsert.remote(
+            collection_name="insurance_products",
+            ids=batch_ids,
+            documents=batch_docs,
+            embeddings=batch_embs,
+            metadatas=batch_metas
+        )
+        print(f"   Upserted batch {i//batch_size + 1}/{(len(ids)-1)//batch_size + 1}")
+    print("✅ Vector database created and persisted remotely!")
+    return {
+        "status": "success",
+        "total_documents": len(documents),
+        "total_chunks": len(chunks)
+    }
+@app.cls(
+    image=image,
+    volumes={"/insurance-data": vol},
+    gpu="A10G",
+    timeout=600,
+    max_containers=1,  # Keep one container alive
+    min_containers=1   # Keep one container warm
+)
+class RAGModel:
+    @modal.enter()
+    def enter(self):
+        from langchain_community.vectorstores import Chroma
+        from langchain_community.embeddings import HuggingFaceEmbeddings
+        from langchain_community.llms import HuggingFacePipeline
+        from langchain.chains import RetrievalQA
+        from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+        import torch
+        from typing import Any, List
+        from langchain_core.retrievers import BaseRetriever
+        from langchain_core.documents import Document
+        print("🔄 Loading embeddings...")
+        self.embeddings = HuggingFaceEmbeddings(
+            model_name=EMBEDDING_MODEL,
+            model_kwargs={'device': 'cuda'},
+            encode_kwargs={'normalize_embeddings': True}
+        )
+        print("📚 Connecting to remote Chroma DB...")
+        self.chroma_service = modal.Cls.from_name("chroma-server-v2", "ChromaDB")()
+        class RemoteChromaRetriever(BaseRetriever):
+            chroma_service: Any
+            embeddings: Any
+            k: int = 3
+            def _get_relevant_documents(self, query: str) -> List[Document]:
+                query_embedding = self.embeddings.embed_query(query)
+                results = self.chroma_service.query.remote(
+                    collection_name="insurance_products",
+                    query_embeddings=[query_embedding],
+                    n_results=self.k
+                )
+                documents = []
+                if results['documents']:
+                    for i in range(len(results['documents'][0])):
+                        doc = Document(
+                            page_content=results['documents'][0][i],
+                            metadata=results['metadatas'][0][i] if results['metadatas'] else {}
+                        )
+                        documents.append(doc)
+                return documents
+            async def _aget_relevant_documents(self, query: str) -> List[Document]:
+                return self._get_relevant_documents(query)
+        self.RemoteChromaRetriever = RemoteChromaRetriever
+        print("🤖 Loading LLM model with vLLM...")
+        from vllm import LLM, SamplingParams
+        # Initialize vLLM engine (much faster than HuggingFace pipeline)
+        self.llm_engine = LLM(
+            model=LLM_MODEL,
+            tensor_parallel_size=1,
+            gpu_memory_utilization=0.85,
+            max_model_len=4096,  # Phi-3 supports 4k context
+            trust_remote_code=True  # Required for Phi-3
+        )
+        # Configure sampling parameters for generation
+        self.sampling_params = SamplingParams(
+            temperature=0.7,
+            max_tokens=256,  # Reduced for faster responses
+            top_p=0.9,
+            stop=["\n\n", "Question:", "Context:"]  # Stop tokens
+        )
+        print("✅ vLLM model loaded and ready!")
+    @modal.method()
+    def query(self, question: str, top_k: int = 2):
+        import time
+        start_time = time.time()
+        print(f"❓ Query: {question}")
+        # Retrieve relevant documents
+        retrieval_start = time.time()
+        retriever = self.RemoteChromaRetriever(
+            chroma_service=self.chroma_service,
+            embeddings=self.embeddings,
+            k=top_k
+        )
+        docs = retriever.get_relevant_documents(question)
+        retrieval_time = time.time() - retrieval_start
+        # Build context from retrieved documents
+        context = "\n\n".join([doc.page_content for doc in docs])
+        # Create prompt for Phi-3 (using its chat template)
+        prompt = f"""<|system|>
+You are a helpful AI assistant that answers questions about insurance products based on the provided context. Be concise and accurate.<|end|>
+    def web_query(self, question: str):
+        return self.query.local(question)
+@app.local_entrypoint()
+def list():
+    """List files in volume"""
+    print("📁 Listing files in mcp-hack-ins-products volume...")
+    files = list_files.remote()
+    print(f"\n✅ Found {len(files)} files:")
+    for f in files:
+        print(f"  📄 {f}")
+@app.local_entrypoint()
+def index():
+    """Create vector database"""
+    print("🚀 Starting vector database creation...")
+    result = create_vector_db.remote()
+    print(f"\n{'='*60}")
+    print(f"Status: {result['status']}")
+    if result['status'] == 'success':
+        print(f"Documents processed: {result['total_documents']}")
+        print(f"Text chunks created: {result['total_chunks']}")
+        print("✅ Vector database is ready for queries!")
+    else:
+        print(f"❌ Error: {result['message']}")
+    print(f"{'='*60}")
+@app.local_entrypoint()
+def query(question: str = "What insurance products are available?"):
+    """Query the RAG system"""
+    print(f"🤔 Question: {question}\n")
+    # Lookup the deployed RAGModel from the insurance-rag app
+    # This connects to the persistent container instead of creating a new one
+    model = RAGModel()
+    result = model.query.remote(question)
+    print(f"{'='*60}")
+    print(f"💡 Answer:\n{result['answer']}")
+    print(f"\n{'='*60}")
+    print(f"📖 Sources ({len(result['sources'])}):")
+    for i, source in enumerate(result['sources'], 1):
+        print(f"\n  [{i}] {source['metadata'].get('source', 'Unknown')}")
+        print(f"      Page: {source['metadata'].get('page', 'N/A')}")
+        print(f"      Preview: {source['content'][:150]}...")
+    print(f"{'='*60}")

clean_sample.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

docs/HOW_TO_RUN.md CHANGED Viewed

@@ -145,9 +145,6 @@ modal app logs mcp-hack::finetune-phi3-modal
 ### If You Need to Regenerate Data
 ```bash
-# Clear existing dataset
-./venv/bin/modal run docs/clear_dataset.py
 # Regenerate with new logic
 ./venv/bin/modal run --detach docs/prepare_finetune_data.py
 ```

 ### If You Need to Regenerate Data
 ```bash
 # Regenerate with new logic
 ./venv/bin/modal run --detach docs/prepare_finetune_data.py
 ```

docs/guides/HOW_TO_RUN.md ADDED Viewed

	@@ -0,0 +1,215 @@

+# How to Run the Fine-Tuning Pipeline
+This guide walks you through the complete pipeline from data generation to model deployment.
+---
+## 📊 Dataset Generation Results
+### Final Statistics
+- **Training Samples**: 201,651
+- **Validation Samples**: 22,407
+- **Total Dataset**: 224,058 high-quality QA pairs
+- **Improvement**: 150x more data than previous approach
+### Batch Performance
+| Batch | Files | Data Points | Status |
+|-------|-------|-------------|--------|
+| 1 | 1,000 | 100,611 | ✅ Excellent |
+| 2 | 1,000 | 39,960 | ✅ Good |
+| 3 | 1,000 | 0 | ⚠️ Complex files |
+| 4 | 1,000 | 600 | ⚠️ Runner issue |
+| 5 | 1,000 | 54,627 | ✅ Excellent |
+| 6 | 1,000 | 5,400 | ✅ Good |
+| 7 | 888 | 22,860 | ✅ Good |
+---
+## 🚀 Step-by-Step Instructions
+### Step 1: Fine-Tune the Model
+Run the fine-tuning job on Modal with H200 GPU:
+```bash
+cd /Users/veeru/agents/mcp-hack
+# Start fine-tuning in detached mode
+./venv/bin/modal run --detach docs/finetune_modal.py
+```
+**What happens:**
+- Loads 201,651 training samples from `finetune-dataset` volume
+- Trains Phi-3-mini-4k-instruct with LoRA on H200 GPU
+- Runs for ~90-120 minutes
+- Saves model to `model-checkpoints` volume
+**Monitor progress:**
+```bash
+# View live logs
+modal app logs mcp-hack::finetune-phi3-modal
+```
+---
+### Step 2: Evaluate the Model
+After training completes, test the model:
+```bash
+./venv/bin/modal run docs/eval_finetuned.py
+```
+This will run sample questions and show the model's answers.
+---
+### Step 3: Deploy API Endpoint
+Deploy the inference API:
+**Option A: GPU Endpoint (A10G)**
+```bash
+./venv/bin/modal deploy docs/api_endpoint.py
+```
+**Option B: CPU Endpoint**
+```bash
+./venv/bin/modal deploy docs/api_endpoint_cpu.py
+```
+**Get the endpoint URL:**
+```bash
+modal app list
+```
+---
+### Step 4: Test the API
+```bash
+# Example API call
+curl -X POST https://YOUR-MODAL-URL/ask \
+  -H "Content-Type: application/json" \
+  -d '{
+    "question": "What is the population of Tokyo?",
+    "context": "Japan Census data"
+  }'
+```
+---
+## 📁 Key Files
+### Data Processing
+- `docs/prepare_finetune_data.py` - Generates dataset from CSV files
+- `docs/clean_sample.py` - Local testing script for data cleaning
+### Model Training
+- `docs/finetune_modal.py` - Fine-tuning script (H200 GPU)
+- `docs/eval_finetuned.py` - Evaluation script
+### API Deployment
+- `docs/api_endpoint.py` - GPU inference endpoint (A10G)
+- `docs/api_endpoint_cpu.py` - CPU inference endpoint
+### Documentation
+- `diagrams/finetuning.svg` - Visual pipeline diagram
+- `finetune/04-evaluation.md` - Evaluation results
+---
+## 🔧 Modal Volumes
+The pipeline uses these Modal volumes:
+| Volume | Purpose | Size |
+|--------|---------|------|
+| `census-data` | Raw census CSV files | 6,838 files |
+| `economy-labor-data` | Raw economy CSV files | 50 files |
+| `finetune-dataset` | Generated JSONL training data | 224K samples |
+| `model-checkpoints` | Fine-tuned model weights | ~7GB |
+---
+## 💡 Tips
+### If Training Fails
+```bash
+# Check logs for errors
+modal app logs mcp-hack::finetune-phi3-modal
+# Restart training
+./venv/bin/modal run --detach docs/finetune_modal.py
+```
+### If You Need to Regenerate Data
+```bash
+# Clear existing dataset
+./venv/bin/modal run docs/clear_dataset.py
+# Regenerate with new logic
+./venv/bin/modal run --detach docs/prepare_finetune_data.py
+```
+### View Volume Contents
+```bash
+# List files in a volume
+modal volume ls finetune-dataset
+# Download a file
+modal volume get finetune-dataset train.jsonl finetune/train.jsonl
+```
+---
+## 📈 Expected Timeline
+| Step | Duration | Notes |
+|------|----------|-------|
+| Data Generation | ✅ Complete | 224K samples ready |
+| Fine-Tuning | ~90-120 min | H200 GPU |
+| Evaluation | ~5 min | Quick tests |
+| API Deployment | ~2 min | Instant after deploy |
+---
+## 🎯 Next Steps
+1. **Run fine-tuning** (see Step 1 above)
+2. **Wait for completion** (~2 hours)
+3. **Evaluate results** (see Step 2)
+4. **Deploy API** (see Step 3)
+5. **Test with real queries** (see Step 4)
+---
+## 📞 Troubleshooting
+**Issue**: "Volume not found"
+```bash
+# List all volumes
+modal volume list
+```
+**Issue**: "Out of memory during training"
+- Reduce `per_device_train_batch_size` in `finetune_modal.py`
+- Current: 2 (already optimized for H200)
+**Issue**: "Model not loading in API"
+- Ensure fine-tuning completed successfully
+- Check `model-checkpoints` volume has files
+---
+## ✅ Success Criteria
+After completing all steps, you should have:
+- ✅ Fine-tuned Phi-3-mini model
+- ✅ Deployed API endpoint
+- ✅ Model answering questions about Japanese census/economy data
+- ✅ Improved accuracy over base model
+---
+**Ready to start?** Run the fine-tuning command from Step 1!

docs/{QUICK_START_RAG.md → guides/QUICK_START_RAG.md} RENAMED Viewed

File without changes

docs/{RAG_SETUP_COMPLETE.md → guides/RAG_SETUP_COMPLETE.md} RENAMED Viewed

File without changes

docs/{SETUP_SUCCESS.md → guides/SETUP_SUCCESS.md} RENAMED Viewed

File without changes

docs/{SUMMARY.md → guides/SUMMARY.md} RENAMED Viewed

File without changes

docs/{TROUBLESHOOTING.md → guides/TROUBLESHOOTING.md} RENAMED Viewed

File without changes

docs/{WEB_INTERFACE.md → guides/WEB_INTERFACE.md} RENAMED Viewed

File without changes

docs/{WEB_TROUBLESHOOTING.md → guides/WEB_TROUBLESHOOTING.md} RENAMED Viewed

File without changes

docs/{estat_api_guide.md → guides/estat_api_guide.md} RENAMED Viewed

File without changes

docs/{ft_process.md → guides/ft_process.md} RENAMED Viewed

File without changes

docs/{modal-rag-optimization.md → guides/modal-rag-optimization.md} RENAMED Viewed

File without changes

docs/{modal-rag-sequence.md → guides/modal-rag-sequence.md} RENAMED Viewed

File without changes

docs/{next_steps_rag_recommendation.md → guides/next_steps_rag_recommendation.md} RENAMED Viewed

File without changes

docs/{source_data.md → guides/source_data.md} RENAMED Viewed

File without changes

docs/{PRODUCT_DECISION_GUIDE.md → product-design/PRODUCT_DECISION_GUIDE.md} RENAMED Viewed

File without changes

docs/{setup_product_design_rag.md → product-design/setup_product_design_rag.md} RENAMED Viewed

File without changes

docs/{tokyo_auto_insurance_product_design.docx → product-design/tokyo_auto_insurance_product_design.docx} RENAMED Viewed

File without changes

docs/{tokyo_auto_insurance_product_design.md → product-design/tokyo_auto_insurance_product_design.md} RENAMED Viewed

File without changes

docs/{tokyo_auto_insurance_product_design_filled.md → product-design/tokyo_auto_insurance_product_design_filled.md} RENAMED Viewed

File without changes

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+Utility scripts for data processing, setup, and tools
+"""

{docs → scripts/data}/cleanup_data.py RENAMED Viewed

File without changes

{docs → scripts/data}/clear_census_volume.py RENAMED Viewed

File without changes

{docs → scripts/data}/convert_census_to_csv.py RENAMED Viewed

File without changes

{docs → scripts/data}/convert_economy_labor_to_csv.py RENAMED Viewed

File without changes

{docs → scripts/data}/convert_to_word.py RENAMED Viewed

File without changes

{docs → scripts/data}/create_custom_qa.py RENAMED Viewed

File without changes

{docs → scripts/data}/delete_census_csvs.py RENAMED Viewed

File without changes

{docs → scripts/data}/download_census_api.py RENAMED Viewed

File without changes

{docs → scripts/data}/download_census_csv_modal.py RENAMED Viewed

File without changes

{docs → scripts/data}/download_census_data.py RENAMED Viewed

File without changes

{docs → scripts/data}/download_census_modal.py RENAMED Viewed

File without changes

{docs → scripts/data}/download_economy_labor_modal.py RENAMED Viewed

File without changes

{docs → scripts/data}/fix_csv_filenames.py RENAMED Viewed

File without changes

{docs → scripts/data}/prepare_economy_data.py RENAMED Viewed

File without changes

{docs → scripts/data}/prepare_finetune_data.py RENAMED Viewed

File without changes

{docs → scripts/data}/remove_duplicate_csvs.py RENAMED Viewed

File without changes

run_with_venv.sh → scripts/setup/run_with_venv.sh RENAMED Viewed

@@ -2,7 +2,8 @@
 # Helper script to run query_product_design with venv activated
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-cd "$SCRIPT_DIR"
 # Activate venv if it exists
 if [ -d "venv" ]; then
@@ -10,4 +11,4 @@ if [ -d "venv" ]; then
 fi
 # Run the script with all arguments
-python3 query_product_design.py "$@"

 # Helper script to run query_product_design with venv activated
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"
+cd "$PROJECT_ROOT"
 # Activate venv if it exists
 if [ -d "venv" ]; then
 fi
 # Run the script with all arguments
+python3 src/web/query_product_design.py "$@"

start_web.sh → scripts/setup/start_web.sh RENAMED Viewed

@@ -2,7 +2,8 @@
 # Helper script to start the web interface
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-cd "$SCRIPT_DIR"
 # Activate venv if it exists
 if [ -d "venv" ]; then
@@ -10,5 +11,5 @@ if [ -d "venv" ]; then
 fi
 # Start the web app
-python3 web_app.py

 # Helper script to start the web interface
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"
+cd "$PROJECT_ROOT"
 # Activate venv if it exists
 if [ -d "venv" ]; then
 fi
 # Start the web app
+python3 src/web/web_app.py

{docs → scripts/tools}/api_endpoint.py RENAMED Viewed

File without changes

{docs → scripts/tools}/api_endpoint_cpu.py RENAMED Viewed

File without changes

{docs → scripts/tools}/ask_model.py RENAMED Viewed

File without changes