diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..9a63f1952b0ed71928fa657a4482203e05885662 --- /dev/null +++ b/.env.example @@ -0,0 +1,54 @@ +# ========================================== +# MINDI 1.5 Vision-Coder — Environment Variables +# ========================================== +# Copy this file to .env and fill in your keys +# NEVER commit .env to git! + +# ── Project Identity ── +PROJECT_NAME=MINDI-1.5-Vision-Coder +STARTUP_NAME=MINDIGENOUS.AI +HF_USERNAME=Mindigenous + +# ── HuggingFace ── +HUGGINGFACE_TOKEN=hf_your_token_here +HUGGINGFACE_REPO=Mindigenous/MINDI-1.5-Vision-Coder + +# ── Tavily (Web Search) ── +TAVILY_API_KEY=tvly-your_key_here +MAX_SEARCH_RESULTS=5 +SEARCH_TIMEOUT=30 + +# ── Weights & Biases (Training Monitor) ── +WANDB_API_KEY=your_wandb_key_here +WANDB_PROJECT=mindi-1.5-vision-coder +WANDB_ENTITY=mindigenous + +# ── E2B (Cloud Sandbox) ── +E2B_API_KEY=e2b_your_key_here +SANDBOX_TYPE=e2b + +# ── Model Settings ── +MODEL_NAME=deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct +BASE_MODEL_PATH=./checkpoints/base +FINETUNED_MODEL_PATH=./checkpoints/finetuned + +# ── API Settings ── +API_HOST=0.0.0.0 +API_PORT=8000 +API_WORKERS=4 + +# ── Training Settings ── +DEVICE=cuda +MIXED_PRECISION=bf16 +MAX_SEQ_LENGTH=8192 +TRAINING_OUTPUT_DIR=./checkpoints + +# ── Directories ── +LOG_DIR=./logs +DATA_DIR=./data +CHECKPOINT_DIR=./checkpoints + +# ── Cloud GPU (AMD MI300X on DigitalOcean) ── +CLOUD_GPU_HOST=your_cloud_gpu_ip +CLOUD_GPU_USER=root +CLOUD_GPU_SSH_KEY=~/.ssh/id_rsa diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..0c8d383451f10f087e4b14546492cf7aa349785c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +data/tokenizer/*/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..f4c22a34fcbf87149163cdfd9dee4a621a0230c1 Binary files /dev/null and b/.gitignore differ diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f72ba71b8da0f301596d8b8725cbc2f29736c7a6 --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# MINDI 1.5 Vision-Coder + +**Built by [MINDIGENOUS.AI](https://mindigenous.ai)** + +**Builder:** Faaz ([@Mindigenous](https://huggingface.co/Mindigenous) on HuggingFace) + +**Started:** April 14, 2026 + +**Target Launch:** May 5, 2026 + +--- + +## What is MINDI 1.5? + +MINDI 1.5 Vision-Coder is a multimodal agentic AI coding model that: + +- Generates production-ready Next.js 14 + Tailwind CSS + TypeScript code +- Sees its own output via vision capabilities (CLIP ViT-L/14) +- Critiques its own UI/UX design and iterates +- Searches the internet for latest packages and documentation +- Tests code in an isolated sandbox environment +- Fixes its own errors automatically +- Suggests improvements to the user + +## Architecture + +- **Base Model:** Open-source coding model (3B-7B parameters, Apache 2.0 / MIT) +- **Fine-tuning:** LoRA on AMD MI300X 192GB VRAM +- **Vision Encoder:** CLIP ViT-L/14 +- **Agents:** Search + Sandbox + UI Critic + Code Generation +- **Training Data:** 500,000+ curated examples +- **Backend:** FastAPI +- **Output Format:** Next.js 14 + Tailwind CSS + TypeScript + +## HuggingFace + +Final model will be published at: `Mindigenous/MINDI-1.5-Vision-Coder` + +## License + +Apache 2.0 diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..79ca52320811d8816612ae7c1e085a0c99f921e8 --- /dev/null +++ b/api/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — FastAPI backend package.""" diff --git a/api/main.py b/api/main.py new file mode 100644 index 0000000000000000000000000000000000000000..f6c5b36b893c670847c3cb9f6d36ec5f88746636 --- /dev/null +++ b/api/main.py @@ -0,0 +1,48 @@ +""" +MINDI 1.5 Vision-Coder — FastAPI Application + +Main entry point for the MINDI API server. +Serves code generation, vision critique, and agent orchestration endpoints. +""" + +from __future__ import annotations + +from pathlib import Path + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from api.routes.generate import router as generate_router +from api.routes.health import router as health_router + +app = FastAPI( + title="MINDI 1.5 Vision-Coder API", + description="Multimodal agentic AI code generator by MINDIGENOUS.AI", + version="1.5.0", +) + +# CORS — allow the frontend to call the API +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000"], # Next.js dev server + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Register route modules +app.include_router(health_router, prefix="/api", tags=["Health"]) +app.include_router(generate_router, prefix="/api", tags=["Generation"]) + + +@app.on_event("startup") +async def startup_event() -> None: + """Load models and initialize agents on server start.""" + # Models and agents will be initialized here in later phases + print("[MINDI API] Server starting up...") + + +@app.on_event("shutdown") +async def shutdown_event() -> None: + """Cleanup on server shutdown.""" + print("[MINDI API] Server shutting down...") diff --git a/api/middleware/__init__.py b/api/middleware/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..01415eaa49f31e6e8788db75d0dfa0679208a29f --- /dev/null +++ b/api/middleware/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — API middleware (auth, rate limiting, CORS).""" diff --git a/api/middleware/auth.py b/api/middleware/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..1dc815a7b2f61055ec998113e40e0be8362384b1 --- /dev/null +++ b/api/middleware/auth.py @@ -0,0 +1,31 @@ +""" +MINDI 1.5 Vision-Coder — Auth Middleware + +API key validation for production deployment. +""" + +from __future__ import annotations + +import os +from typing import Optional + +from fastapi import HTTPException, Security +from fastapi.security import APIKeyHeader + +API_KEY_HEADER = APIKeyHeader(name="X-API-Key", auto_error=False) + + +async def verify_api_key( + api_key: Optional[str] = Security(API_KEY_HEADER), +) -> str: + """Validate the API key from request headers.""" + expected_key = os.environ.get("MINDI_API_KEY", "") + + # In development, skip auth if no key is configured + if not expected_key: + return "dev-mode" + + if not api_key or api_key != expected_key: + raise HTTPException(status_code=403, detail="Invalid API key") + + return api_key diff --git a/api/routes/__init__.py b/api/routes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c946df0fed74c8388c01dd06b2d18a1812a1131 --- /dev/null +++ b/api/routes/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — API route handlers.""" diff --git a/api/routes/generate.py b/api/routes/generate.py new file mode 100644 index 0000000000000000000000000000000000000000..606bdc4fa3731fa9ae67b3f13c3855ce1e0c8a8d --- /dev/null +++ b/api/routes/generate.py @@ -0,0 +1,46 @@ +""" +MINDI 1.5 Vision-Coder — Code Generation Route + +Accepts user prompts and returns generated Next.js + Tailwind + TypeScript code +via the agent orchestration pipeline. +""" + +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field + +router = APIRouter() + + +class GenerateRequest(BaseModel): + """Request body for code generation.""" + prompt: str = Field(..., min_length=1, max_length=10000, description="User's code generation prompt") + temperature: float = Field(0.7, ge=0.0, le=2.0) + max_tokens: int = Field(4096, ge=1, le=8192) + use_search: bool = Field(True, description="Enable web search for context") + use_sandbox: bool = Field(True, description="Enable sandbox testing") + use_vision: bool = Field(True, description="Enable vision-based UI critique") + + +class GenerateResponse(BaseModel): + """Response body for code generation.""" + code: str + language: str = "typescript" + file_path: str = "page.tsx" + critique: Optional[str] = None + search_sources: list[str] = [] + iterations: int = 1 + success: bool = True + + +@router.post("/generate", response_model=GenerateResponse) +async def generate_code(request: GenerateRequest) -> GenerateResponse: + """Generate code from a user prompt using the MINDI agent pipeline.""" + # Will be wired to AgentOrchestrator in later phases + raise HTTPException( + status_code=503, + detail="Model not loaded yet. Complete training pipeline first.", + ) diff --git a/api/routes/health.py b/api/routes/health.py new file mode 100644 index 0000000000000000000000000000000000000000..d9e58f1a50b445b1ef943e9bbf86272a76571cf5 --- /dev/null +++ b/api/routes/health.py @@ -0,0 +1,21 @@ +""" +MINDI 1.5 Vision-Coder — Health Check Route + +Simple health/readiness endpoint for monitoring. +""" + +from __future__ import annotations + +from fastapi import APIRouter + +router = APIRouter() + + +@router.get("/health") +async def health_check() -> dict[str, str]: + """Return server health status.""" + return { + "status": "healthy", + "model": "MINDI-1.5-Vision-Coder", + "version": "1.5.0", + } diff --git a/configs/data_config.yaml b/configs/data_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efe99d5aa5c798b3415fb21ae12d023751b8625d --- /dev/null +++ b/configs/data_config.yaml @@ -0,0 +1,62 @@ +# ========================================== +# MINDI 1.5 Vision-Coder — Data Configuration +# ========================================== + +dataset: + name: "mindi-1.5-training-data" + target_size: 500000 + format: "jsonl" + + # Data sources for fine-tuning + sources: + - name: "code_generation" + description: "Prompt → Next.js + Tailwind + TypeScript code pairs" + path: "./data/raw/code_generation/" + weight: 0.40 + + - name: "ui_critique" + description: "Screenshot + code → critique + improved code pairs" + path: "./data/raw/ui_critique/" + weight: 0.20 + + - name: "error_correction" + description: "Broken code → fixed code pairs with explanations" + path: "./data/raw/error_correction/" + weight: 0.15 + + - name: "documentation_qa" + description: "Documentation context → code answer pairs" + path: "./data/raw/documentation_qa/" + weight: 0.10 + + - name: "multi_turn" + description: "Multi-turn conversation with iterative refinement" + path: "./data/raw/multi_turn/" + weight: 0.15 + + # Processing + processing: + tokenizer: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" + max_length: 8192 + min_length: 64 + dedup_strategy: "minhash" + quality_filter: true + output_dir: "./data/processed/" + + # Train / validation split + splits: + train: 0.95 + validation: 0.05 + + # Knowledge base for RAG + knowledge_base: + path: "./data/knowledge_base/" + sources: + - "nextjs-14-docs" + - "tailwindcss-docs" + - "typescript-docs" + - "react-docs" + - "shadcn-ui-docs" + embedding_model: "BAAI/bge-small-en-v1.5" + chunk_size: 512 + chunk_overlap: 64 diff --git a/configs/model_config.yaml b/configs/model_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10e525f14751dd05ef32ab754f725473029e6412 --- /dev/null +++ b/configs/model_config.yaml @@ -0,0 +1,53 @@ +# ========================================== +# MINDI 1.5 Vision-Coder — Model Configuration +# ========================================== + +model: + name: "MINDI-1.5-Vision-Coder" + version: "1.5.0" + + # Base coding model (Apache 2.0 licensed) + base: + name: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" + parameters: "16B" + license: "Apache-2.0" + context_length: 8192 + dtype: "bfloat16" + + # Vision encoder for UI screenshot understanding + vision: + name: "openai/clip-vit-large-patch14" + image_size: 224 + patch_size: 14 + hidden_size: 1024 + projection_dim: 768 + freeze_backbone: true + trainable_projection: true + + # LoRA fine-tuning configuration + lora: + rank: 64 + alpha: 128 + dropout: 0.05 + target_modules: + - "q_proj" + - "k_proj" + - "v_proj" + - "o_proj" + - "gate_proj" + - "up_proj" + - "down_proj" + bias: "none" + task_type: "CAUSAL_LM" + + # Output format + output: + framework: "nextjs-14" + styling: "tailwindcss" + language: "typescript" + template_format: "markdown-codeblock" + +huggingface: + repo_id: "Mindigenous/MINDI-1.5-Vision-Coder" + private: false + license: "apache-2.0" diff --git a/configs/search_config.yaml b/configs/search_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd12c2d465634531cf68131311ee4b267910655e --- /dev/null +++ b/configs/search_config.yaml @@ -0,0 +1,45 @@ +# ========================================== +# MINDI 1.5 Vision-Coder — Search Agent Configuration +# ========================================== + +search: + # Primary search provider + provider: "tavily" + api_key_env: "TAVILY_API_KEY" + + # Search behavior + max_results: 5 + search_depth: "advanced" + include_domains: + - "nextjs.org" + - "tailwindcss.com" + - "typescriptlang.org" + - "react.dev" + - "ui.shadcn.com" + - "developer.mozilla.org" + - "npmjs.com" + - "github.com" + exclude_domains: + - "w3schools.com" + - "geeksforgeeks.org" + + # Rate limiting + rate_limit: + requests_per_minute: 30 + retry_attempts: 3 + retry_delay_seconds: 2 + + # Caching + cache: + enabled: true + ttl_hours: 24 + max_entries: 10000 + storage_path: "./data/knowledge_base/search_cache.db" + + # Documentation scraping + docs_scraper: + enabled: true + output_dir: "./docs/" + max_pages_per_site: 100 + respect_robots_txt: true + request_delay_seconds: 1 diff --git a/configs/training_config.yaml b/configs/training_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee7ebf90c1615dd59be4096f6ead8da8d9448762 --- /dev/null +++ b/configs/training_config.yaml @@ -0,0 +1,57 @@ +# ========================================== +# MINDI 1.5 Vision-Coder — Training Configuration +# ========================================== + +training: + # Hardware targets + local_device: "cuda" # RTX 4060 8GB — for dev/testing only + cloud_device: "cuda" # MI300X 192GB — for actual training + precision: "bf16" + + # Hyperparameters + epochs: 3 + batch_size: 4 + gradient_accumulation_steps: 8 + effective_batch_size: 32 # batch_size * grad_accum + learning_rate: 2.0e-4 + weight_decay: 0.01 + warmup_ratio: 0.03 + lr_scheduler: "cosine" + max_grad_norm: 1.0 + + # Sequence settings + max_seq_length: 8192 + packing: true # Pack short examples together + + # Checkpointing + save_strategy: "steps" + save_steps: 500 + save_total_limit: 5 + checkpoint_dir: "./checkpoints" + resume_from_checkpoint: null + + # Logging + logging_steps: 10 + log_dir: "./logs/training" + report_to: "wandb" + + # Evaluation + eval_strategy: "steps" + eval_steps: 250 + eval_samples: 1000 + + # Memory optimization (for RTX 4060 local testing) + local_overrides: + batch_size: 1 + gradient_accumulation_steps: 16 + max_seq_length: 2048 + gradient_checkpointing: true + optim: "adamw_8bit" + +wandb: + project: "mindi-1.5-vision-coder" + entity: "mindigenous" + tags: + - "mindi-1.5" + - "lora" + - "vision-coder" diff --git a/data/knowledge_base/.gitkeep b/data/knowledge_base/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/processed/.gitkeep b/data/processed/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/tokenizer/.gitkeep b/data/tokenizer/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/docs/.gitkeep b/docs/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/frontend/.gitkeep b/frontend/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e010a003cca24dfc6f83ab48c811be7a38fcbcdb Binary files /dev/null and b/requirements.txt differ diff --git a/scripts/health_check.py b/scripts/health_check.py new file mode 100644 index 0000000000000000000000000000000000000000..70ca540c513d121abd2bb16a1d83de5949181671 --- /dev/null +++ b/scripts/health_check.py @@ -0,0 +1,123 @@ +""" +MINDI 1.5 Vision-Coder — System Health Check Script + +Verifies that all dependencies, configs, and environment variables +are correctly set up before starting development or training. +""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + + +def check_python() -> bool: + """Verify Python version.""" + v = sys.version_info + ok = v.major == 3 and v.minor >= 10 + status = "OK" if ok else "FAIL" + print(f" [{status}] Python {v.major}.{v.minor}.{v.micro}") + return ok + + +def check_env_vars() -> bool: + """Check that required environment variables are set.""" + required = ["HUGGINGFACE_TOKEN", "TAVILY_API_KEY", "WANDB_API_KEY", "E2B_API_KEY"] + all_ok = True + for var in required: + value = os.environ.get(var, "") + if value: + print(f" [OK] {var} = {value[:8]}...") + else: + print(f" [MISSING] {var}") + all_ok = False + return all_ok + + +def check_directories() -> bool: + """Verify project directory structure exists.""" + project_root = Path(__file__).resolve().parent.parent + required_dirs = [ + "configs", "data/raw", "data/processed", "data/tokenizer", + "data/knowledge_base", "src/model", "src/agents", "src/search", + "src/sandbox", "src/training", "src/inference", "src/evaluation", + "api/routes", "api/middleware", "scripts", "tests", + "checkpoints", "logs", "docs", + ] + all_ok = True + for d in required_dirs: + path = project_root / d + if path.exists(): + print(f" [OK] {d}/") + else: + print(f" [MISSING] {d}/") + all_ok = False + return all_ok + + +def check_configs() -> bool: + """Verify config files exist.""" + project_root = Path(__file__).resolve().parent.parent + configs = [ + "configs/model_config.yaml", + "configs/training_config.yaml", + "configs/data_config.yaml", + "configs/search_config.yaml", + ] + all_ok = True + for c in configs: + path = project_root / c + if path.exists(): + print(f" [OK] {c}") + else: + print(f" [MISSING] {c}") + all_ok = False + return all_ok + + +def check_gpu() -> bool: + """Check CUDA GPU availability.""" + try: + import torch + if torch.cuda.is_available(): + name = torch.cuda.get_device_name(0) + vram = torch.cuda.get_device_properties(0).total_mem / (1024**3) + print(f" [OK] GPU: {name} ({vram:.1f} GB VRAM)") + return True + else: + print(" [WARN] No CUDA GPU detected (CPU mode)") + return False + except ImportError: + print(" [WARN] PyTorch not installed yet") + return False + + +def main() -> None: + """Run all health checks.""" + print("=" * 55) + print(" MINDI 1.5 Vision-Coder — System Health Check") + print("=" * 55) + + print("\n[1] Python Version:") + check_python() + + print("\n[2] GPU:") + check_gpu() + + print("\n[3] Environment Variables:") + check_env_vars() + + print("\n[4] Directory Structure:") + check_directories() + + print("\n[5] Config Files:") + check_configs() + + print("\n" + "=" * 55) + print(" Health check complete.") + print("=" * 55) + + +if __name__ == "__main__": + main() diff --git a/scripts/train.py b/scripts/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a15af2a783c010eb436aed3c5194e2f49a6f266d --- /dev/null +++ b/scripts/train.py @@ -0,0 +1,40 @@ +""" +MINDI 1.5 Vision-Coder — Training Launch Script + +Entry point for starting LoRA fine-tuning. +Loads config, initializes model + dataset, and runs training. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + + +def main() -> None: + """Parse args and launch training.""" + parser = argparse.ArgumentParser(description="MINDI 1.5 — Launch LoRA Training") + parser.add_argument( + "--config", type=str, default="./configs/training_config.yaml", + help="Path to training config YAML", + ) + parser.add_argument( + "--local", action="store_true", default=True, + help="Use local GPU overrides (RTX 4060 mode)", + ) + parser.add_argument( + "--cloud", action="store_true", + help="Use cloud GPU settings (MI300X mode)", + ) + args = parser.parse_args() + + local_mode = not args.cloud + config_path = Path(args.config) + + print(f"[MINDI Training] Config: {config_path}") + print(f"[MINDI Training] Mode: {'local (RTX 4060)' if local_mode else 'cloud (MI300X)'}") + print("[MINDI Training] Pipeline will be wired after Phase 3 setup.") + + +if __name__ == "__main__": + main() diff --git a/scripts/validate_setup.py b/scripts/validate_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1ea5ef4eac589296f3c282357e4209fccd718eb0 --- /dev/null +++ b/scripts/validate_setup.py @@ -0,0 +1,228 @@ +""" +MINDI 1.5 Vision-Coder — Setup Validation Script + +Comprehensive readiness check: environment, configs, directories, +API keys, GPU, and package imports. +""" + +from __future__ import annotations + +import importlib +import os +import sys +from pathlib import Path + +# Ensure project root is on sys.path +PROJECT_ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(PROJECT_ROOT)) + + +def header(title: str) -> None: + print(f"\n{'='*50}") + print(f" {title}") + print(f"{'='*50}") + + +def check(label: str, passed: bool, detail: str = "") -> bool: + icon = "✅" if passed else "❌" + msg = f" {icon} {label}" + if detail: + msg += f" — {detail}" + print(msg) + return passed + + +def validate_directories() -> int: + header("1. Directory Structure") + required_dirs = [ + "configs", "src", "src/model", "src/agents", "src/search", + "src/sandbox", "src/training", "src/inference", "src/evaluation", + "src/tokenizer", "src/utils", "api", "api/routes", "api/middleware", + "scripts", "data", "data/raw", "data/processed", "data/knowledge_base", + "checkpoints", "logs", "tests", "docs", "frontend", + ] + failures = 0 + for d in required_dirs: + path = PROJECT_ROOT / d + if not check(d, path.is_dir()): + failures += 1 + return failures + + +def validate_files() -> int: + header("2. Key Files") + required_files = [ + ".env", ".env.example", ".gitignore", "README.md", + "requirements.txt", "setup.py", + "configs/model_config.yaml", "configs/training_config.yaml", + "configs/data_config.yaml", "configs/search_config.yaml", + "src/__init__.py", "src/utils/__init__.py", + "src/utils/env_loader.py", "src/utils/config_loader.py", + "src/model/vision_encoder.py", "src/model/code_model.py", + "src/agents/orchestrator.py", "src/agents/ui_critic.py", + "src/agents/error_fixer.py", "src/search/search_agent.py", + "src/sandbox/sandbox_runner.py", "src/training/trainer.py", + "src/training/dataset.py", "src/inference/pipeline.py", + "src/evaluation/evaluator.py", + "api/main.py", "api/routes/generate.py", "api/middleware/auth.py", + "scripts/health_check.py", "scripts/train.py", + ] + failures = 0 + for f in required_files: + path = PROJECT_ROOT / f + if not check(f, path.is_file()): + failures += 1 + return failures + + +def validate_env() -> int: + header("3. Environment Variables") + from src.utils.env_loader import EnvLoader + + env = EnvLoader() + env.load() + result = env.validate() + + failures = 0 + required = ["HUGGINGFACE_TOKEN", "TAVILY_API_KEY", "WANDB_API_KEY", "E2B_API_KEY"] + for key in required: + value = os.environ.get(key, "") + if value: + masked = value[:8] + "..." + value[-4:] + check(key, True, masked) + else: + check(key, False, "NOT SET") + failures += 1 + + for w in result.warnings: + print(f" ⚠️ {w}") + + return failures + + +def validate_configs() -> int: + header("4. YAML Configurations") + from src.utils.config_loader import ConfigLoader + + loader = ConfigLoader() + failures = 0 + + try: + m = loader.model + check("model_config.yaml", True, f"{m.name} v{m.version}") + except Exception as e: + check("model_config.yaml", False, str(e)) + failures += 1 + + try: + t = loader.training + check("training_config.yaml", True, f"{t.epochs} epochs, lr={t.learning_rate}") + except Exception as e: + check("training_config.yaml", False, str(e)) + failures += 1 + + try: + d = loader.data + check("data_config.yaml", True, f"{d.target_size:,} target samples") + except Exception as e: + check("data_config.yaml", False, str(e)) + failures += 1 + + try: + s = loader.search + check("search_config.yaml", True, f"provider={s.provider}") + except Exception as e: + check("search_config.yaml", False, str(e)) + failures += 1 + + return failures + + +def validate_packages() -> int: + header("5. Critical Package Imports") + packages = [ + ("torch", "PyTorch"), + ("transformers", "HuggingFace Transformers"), + ("peft", "PEFT (LoRA)"), + ("datasets", "HuggingFace Datasets"), + ("wandb", "Weights & Biases"), + ("fastapi", "FastAPI"), + ("httpx", "HTTPX"), + ("PIL", "Pillow"), + ("yaml", "PyYAML"), + ("dotenv", "python-dotenv"), + ("pydantic", "Pydantic"), + ("playwright", "Playwright"), + ] + failures = 0 + for module, label in packages: + try: + importlib.import_module(module) + check(label, True) + except ImportError: + check(label, False, "not installed") + failures += 1 + return failures + + +def validate_gpu() -> int: + header("6. GPU / CUDA") + failures = 0 + try: + import torch + cuda_available = torch.cuda.is_available() + check("CUDA available", cuda_available) + if cuda_available: + gpu_name = torch.cuda.get_device_name(0) + vram = torch.cuda.get_device_properties(0).total_memory / (1024**3) + check("GPU", True, f"{gpu_name} ({vram:.1f} GB)") + check("PyTorch CUDA version", True, torch.version.cuda or "N/A") + else: + failures += 1 + except Exception as e: + check("GPU check", False, str(e)) + failures += 1 + return failures + + +def validate_gitignore() -> int: + header("7. Security Check") + gitignore = PROJECT_ROOT / ".gitignore" + failures = 0 + if gitignore.is_file(): + content = gitignore.read_text(encoding="utf-8") + check(".env in .gitignore", ".env" in content) + check("venv/ in .gitignore", "venv" in content) + if ".env" not in content: + failures += 1 + else: + check(".gitignore exists", False) + failures += 1 + return failures + + +def main() -> None: + print("\n╔══════════════════════════════════════════════════╗") + print("║ MINDI 1.5 Vision-Coder — Full Setup Validation ║") + print("╚══════════════════════════════════════════════════╝") + + total_failures = 0 + total_failures += validate_directories() + total_failures += validate_files() + total_failures += validate_env() + total_failures += validate_configs() + total_failures += validate_packages() + total_failures += validate_gpu() + total_failures += validate_gitignore() + + header("RESULT") + if total_failures == 0: + print(" ✅ ALL CHECKS PASSED — MINDI 1.5 is ready!") + else: + print(f" ❌ {total_failures} check(s) failed — review above") + + sys.exit(0 if total_failures == 0 else 1) + + +if __name__ == "__main__": + main() diff --git a/scripts/verify_install.py b/scripts/verify_install.py new file mode 100644 index 0000000000000000000000000000000000000000..b1d3a10a8c1354f1693781fb7968ed3c52eb1fe3 --- /dev/null +++ b/scripts/verify_install.py @@ -0,0 +1,115 @@ +""" +MINDI 1.5 Vision-Coder — Installation Verification Script + +Checks that every required package is importable and reports +versions + GPU status. Run after Phase 3 setup. +""" + +from __future__ import annotations + +import sys +from importlib.metadata import version as pkg_version + + +def check(package_name: str, import_name: str | None = None) -> bool: + """Try to import a package and report status.""" + mod = import_name or package_name + try: + __import__(mod) + v = pkg_version(package_name) + print(f" \u2705 {package_name} {v}") + return True + except Exception as e: + print(f" \u274c {package_name} — FAILED — {e}") + return False + + +def check_cuda() -> bool: + """Verify PyTorch CUDA availability.""" + try: + import torch + v = torch.__version__ + if torch.cuda.is_available(): + gpu = torch.cuda.get_device_name(0) + vram = round(torch.cuda.get_device_properties(0).total_memory / 1e9, 2) + print(f" \u2705 torch {v} — CUDA available — {gpu} ({vram} GB)") + return True + else: + print(f" \u26a0\ufe0f torch {v} — NO CUDA (CPU only)") + return False + except Exception as e: + print(f" \u274c torch — FAILED — {e}") + return False + + +def main() -> None: + print("=" * 60) + print(" MINDI 1.5 Vision-Coder — Package Verification") + print("=" * 60) + print(f"\n Python: {sys.version}") + print(f" Executable: {sys.executable}\n") + + results: list[bool] = [] + + print("[PyTorch + CUDA]") + results.append(check_cuda()) + check("torchvision") + check("torchaudio") + + print("\n[Group A — Core Transformers]") + for pkg in ["transformers", "datasets", "tokenizers", "accelerate", "peft", "huggingface-hub"]: + imp = pkg.replace("-", "_") + results.append(check(pkg, imp)) + + print("\n[Group B — Vision]") + results.append(check("pillow", "PIL")) + results.append(check("opencv-python", "cv2")) + results.append(check("open-clip-torch", "open_clip")) + + print("\n[Group C — Search]") + for pkg, imp in [("tavily-python", "tavily"), ("duckduckgo-search", "duckduckgo_search"), + ("beautifulsoup4", "bs4"), ("playwright", "playwright"), + ("requests", "requests"), ("httpx", "httpx"), ("lxml", "lxml")]: + results.append(check(pkg, imp)) + + print("\n[Group D — Sandbox]") + results.append(check("e2b")) + results.append(check("docker")) + + print("\n[Group E — Web Framework]") + for pkg, imp in [("fastapi", "fastapi"), ("uvicorn", "uvicorn"), ("websockets", "websockets"), + ("python-multipart", "multipart"), ("python-jose", "jose"), ("passlib", "passlib")]: + results.append(check(pkg, imp)) + + print("\n[Group F — Training Utilities]") + for pkg, imp in [("wandb", "wandb"), ("bitsandbytes", "bitsandbytes"), ("scipy", "scipy"), + ("scikit-learn", "sklearn"), ("einops", "einops")]: + results.append(check(pkg, imp)) + + print("\n[Group G — Vector Store / RAG]") + results.append(check("faiss-cpu", "faiss")) + results.append(check("sentence-transformers", "sentence_transformers")) + + print("\n[Group H — Utilities]") + for pkg, imp in [("rich", "rich"), ("tqdm", "tqdm"), ("python-dotenv", "dotenv"), + ("pyyaml", "yaml"), ("numpy", "numpy"), ("pandas", "pandas"), + ("matplotlib", "matplotlib")]: + results.append(check(pkg, imp)) + + print("\n[Group I — Code Quality]") + for pkg in ["black", "isort", "mypy"]: + results.append(check(pkg)) + + # Summary + passed = sum(results) + total = len(results) + print("\n" + "=" * 60) + if passed == total: + print(f" \u2705 ALL {total} PACKAGES VERIFIED — READY TO BUILD!") + else: + print(f" \u26a0\ufe0f {passed}/{total} passed — {total - passed} need fixing") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc3bd544eb90fa7cbb4e27c5a6565b86cda7295 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +""" +MINDI 1.5 Vision-Coder — Package Setup + +Allows installing the project as a Python package: + pip install -e . +""" + +from setuptools import setup, find_packages + +setup( + name="mindi-vision-coder", + version="1.5.0", + author="Faaz", + author_email="faaz@mindigenous.ai", + description="Multimodal agentic AI code generator by MINDIGENOUS.AI", + long_description=open("README.md", encoding="utf-8").read(), + long_description_content_type="text/markdown", + url="https://huggingface.co/Mindigenous/MINDI-1.5-Vision-Coder", + packages=find_packages(), + python_requires=">=3.10", + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ], +) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c6cd6b7f30ddfe6d91977c63f1fadfd4fa896ba --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,9 @@ +""" +MINDI 1.5 Vision-Coder — Source Package + +Multimodal agentic AI coding model by MINDIGENOUS.AI +Generates Next.js 14 + Tailwind CSS + TypeScript code. +""" + +__version__ = "1.5.0" +__author__ = "Faaz @ MINDIGENOUS.AI" diff --git a/src/agents/__init__.py b/src/agents/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9b3c135185ca536287fe433df5c7b09b64541f66 --- /dev/null +++ b/src/agents/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Agent system: orchestrator, UI critic, code generator.""" diff --git a/src/agents/error_fixer.py b/src/agents/error_fixer.py new file mode 100644 index 0000000000000000000000000000000000000000..aa642bf33cf80b02c4e2746e6ec97b371c53afb3 --- /dev/null +++ b/src/agents/error_fixer.py @@ -0,0 +1,56 @@ +""" +MINDI 1.5 Vision-Coder — Error Fixer Agent + +Automatically diagnoses and fixes errors from sandbox execution, +lint failures, and type errors in generated code. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +@dataclass +class ErrorDiagnosis: + """Structured error information for the fixer agent.""" + error_type: str # "runtime", "compile", "lint", "type" + message: str # Raw error message + file_path: Optional[str] = None + line_number: Optional[int] = None + suggested_fix: Optional[str] = None + + +@dataclass +class FixResult: + """Output from an error fix attempt.""" + original_code: str + fixed_code: str + errors_found: list[ErrorDiagnosis] = field(default_factory=list) + errors_fixed: int = 0 + success: bool = False + + +class ErrorFixer: + """Agent that diagnoses and fixes code errors automatically.""" + + def __init__(self, log_dir: Optional[Path] = None) -> None: + self.log_dir = log_dir or Path("./logs/error_fixer") + self.log_dir.mkdir(parents=True, exist_ok=True) + + async def diagnose(self, code: str, error_output: str) -> list[ErrorDiagnosis]: + """Parse error output and classify errors.""" + # Will be implemented with LLM-based error parsing + return [] + + async def fix(self, code: str, errors: list[ErrorDiagnosis]) -> FixResult: + """Attempt to fix all diagnosed errors in the code.""" + # Will be implemented with the fine-tuned model + return FixResult( + original_code=code, + fixed_code=code, + errors_found=errors, + errors_fixed=0, + success=False, + ) diff --git a/src/agents/orchestrator.py b/src/agents/orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..af754b26f4de0509c02567c7b00a681d1ef946a6 --- /dev/null +++ b/src/agents/orchestrator.py @@ -0,0 +1,149 @@ +""" +MINDI 1.5 Vision-Coder — Agent Orchestrator + +Coordinates multiple AI agents (Code Gen, Vision Critic, Search, Sandbox) +to produce, evaluate, and refine generated code. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum +from pathlib import Path +from typing import Any, Optional + + +class AgentRole(str, Enum): + """Roles for MINDI's agent system.""" + CODE_GENERATOR = "code_generator" + UI_CRITIC = "ui_critic" + SEARCH_AGENT = "search_agent" + SANDBOX_RUNNER = "sandbox_runner" + ERROR_FIXER = "error_fixer" + + +@dataclass +class AgentMessage: + """A message passed between agents in the orchestration pipeline.""" + role: AgentRole + content: str + metadata: dict[str, Any] = field(default_factory=dict) + artifacts: list[Path] = field(default_factory=list) + + +@dataclass +class GenerationResult: + """Final output from the agent pipeline.""" + code: str + language: str + file_path: str + critique: Optional[str] = None + search_context: Optional[str] = None + sandbox_output: Optional[str] = None + iterations: int = 1 + success: bool = True + errors: list[str] = field(default_factory=list) + + +class AgentOrchestrator: + """ + Orchestrates the MINDI agent pipeline: + + 1. User prompt arrives + 2. Search Agent gathers relevant docs/packages + 3. Code Generator produces Next.js + Tailwind + TS code + 4. Sandbox Runner tests the code in isolation + 5. Vision Critic screenshots the output and evaluates UI/UX + 6. Error Fixer resolves any issues + 7. Loop until quality threshold or max iterations + """ + + def __init__( + self, + max_iterations: int = 3, + quality_threshold: float = 0.85, + log_dir: Optional[Path] = None, + ) -> None: + self.max_iterations = max_iterations + self.quality_threshold = quality_threshold + self.log_dir = log_dir or Path("./logs/agents") + self.log_dir.mkdir(parents=True, exist_ok=True) + self.history: list[AgentMessage] = [] + + async def run_pipeline( + self, + user_prompt: str, + context: Optional[dict[str, Any]] = None, + ) -> GenerationResult: + """ + Execute the full agent pipeline for a user request. + + This is the main entry point — called by the FastAPI backend. + Each step will be implemented as we build each agent module. + """ + self.history.clear() + context = context or {} + + # Step 1: Search for relevant documentation + search_result = await self._run_search(user_prompt) + + # Step 2: Generate code + code_result = await self._generate_code(user_prompt, search_result) + + # Step 3: Test in sandbox + sandbox_result = await self._run_sandbox(code_result) + + # Step 4: Vision critique (if sandbox produced a screenshot) + critique_result = await self._run_critique(code_result, sandbox_result) + + # Step 5: Fix errors if any + final_code = code_result + iterations = 1 + + while iterations < self.max_iterations: + if sandbox_result.get("success") and critique_result.get("score", 0) >= self.quality_threshold: + break + final_code = await self._fix_errors( + final_code, sandbox_result, critique_result + ) + sandbox_result = await self._run_sandbox(final_code) + critique_result = await self._run_critique(final_code, sandbox_result) + iterations += 1 + + return GenerationResult( + code=final_code, + language="typescript", + file_path="page.tsx", + critique=critique_result.get("feedback"), + search_context=search_result.get("context"), + sandbox_output=sandbox_result.get("output"), + iterations=iterations, + success=sandbox_result.get("success", False), + ) + + async def _run_search(self, prompt: str) -> dict[str, Any]: + """Search for relevant docs and packages. Implemented in src/search/.""" + # Placeholder — will be wired to SearchAgent + return {"context": "", "sources": []} + + async def _generate_code(self, prompt: str, search_ctx: dict[str, Any]) -> str: + """Generate code using the fine-tuned model. Implemented in src/inference/.""" + # Placeholder — will be wired to inference pipeline + return "" + + async def _run_sandbox(self, code: str) -> dict[str, Any]: + """Run code in sandbox. Implemented in src/sandbox/.""" + # Placeholder — will be wired to SandboxRunner + return {"success": False, "output": "", "screenshot": None} + + async def _run_critique(self, code: str, sandbox: dict[str, Any]) -> dict[str, Any]: + """Critique UI via vision. Implemented in src/agents/ui_critic.py.""" + # Placeholder — will be wired to VisionCritic + return {"score": 0.0, "feedback": ""} + + async def _fix_errors( + self, code: str, sandbox: dict[str, Any], critique: dict[str, Any] + ) -> str: + """Fix errors in code. Implemented in src/agents/error_fixer.py.""" + # Placeholder — will be wired to ErrorFixer + return code diff --git a/src/agents/ui_critic.py b/src/agents/ui_critic.py new file mode 100644 index 0000000000000000000000000000000000000000..d9b2c17bce61fa99199e7a9b6dfa631b025852b8 --- /dev/null +++ b/src/agents/ui_critic.py @@ -0,0 +1,73 @@ +""" +MINDI 1.5 Vision-Coder — UI Critic Agent + +Uses the vision encoder to evaluate screenshots of generated UI +and provide structured feedback for iterative improvement. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +import torch + + +@dataclass +class CritiqueResult: + """Structured critique of a UI screenshot.""" + score: float # 0.0 to 1.0 overall quality + layout_score: float # Layout and spacing quality + typography_score: float # Text hierarchy and readability + color_score: float # Color contrast and consistency + responsiveness_score: float # Mobile-readiness estimation + feedback: str # Natural language critique + suggestions: list[str] # Actionable improvement items + + +class UICritic: + """Vision-powered UI/UX critic for evaluating generated web pages.""" + + def __init__( + self, + vision_encoder: Optional[object] = None, + device: Optional[str] = None, + ) -> None: + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + self.vision_encoder = vision_encoder # VisionEncoder instance + + async def critique_screenshot( + self, + screenshot_path: Path, + generated_code: str, + ) -> CritiqueResult: + """ + Analyze a screenshot of the generated UI and produce a critique. + + The critique is used by the orchestrator to decide whether to + iterate on the code or accept it as final output. + """ + if not screenshot_path.exists(): + return CritiqueResult( + score=0.0, + layout_score=0.0, + typography_score=0.0, + color_score=0.0, + responsiveness_score=0.0, + feedback="Screenshot not found — cannot critique.", + suggestions=["Ensure sandbox produces a screenshot."], + ) + + # Encode the screenshot using vision encoder + # (Full implementation will use the VisionEncoder + LLM to generate critique) + # For now, return a placeholder that signals "needs implementation" + return CritiqueResult( + score=0.0, + layout_score=0.0, + typography_score=0.0, + color_score=0.0, + responsiveness_score=0.0, + feedback="Vision critique pipeline not yet connected.", + suggestions=["Wire VisionEncoder to critique pipeline."], + ) diff --git a/src/evaluation/__init__.py b/src/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..23213bbe3a148d6569d32ec7a697142aaa8107fe --- /dev/null +++ b/src/evaluation/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Evaluation system for model quality and benchmark testing.""" diff --git a/src/evaluation/evaluator.py b/src/evaluation/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..819dab003a62226511010a54f8ba324fcaddc951 --- /dev/null +++ b/src/evaluation/evaluator.py @@ -0,0 +1,68 @@ +""" +MINDI 1.5 Vision-Coder — Evaluation System + +Evaluates model quality on code generation benchmarks, +UI quality metrics, and end-to-end task completion. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + + +@dataclass +class EvalMetrics: + """Aggregated evaluation metrics.""" + pass_at_1: float = 0.0 # Code correctness (passes tests) + pass_at_5: float = 0.0 # Code correctness with 5 samples + ui_quality_score: float = 0.0 # Average vision critic score + syntax_error_rate: float = 0.0 # Fraction with syntax errors + type_error_rate: float = 0.0 # Fraction with TypeScript errors + avg_iterations: float = 0.0 # Average fix iterations needed + total_examples: int = 0 + details: list[dict[str, Any]] = field(default_factory=list) + + +class Evaluator: + """Evaluates MINDI 1.5 model across multiple quality dimensions.""" + + def __init__( + self, + eval_data_dir: Optional[Path] = None, + results_dir: Optional[Path] = None, + ) -> None: + self.eval_data_dir = eval_data_dir or Path("./data/processed/eval") + self.results_dir = results_dir or Path("./logs/evaluation") + self.results_dir.mkdir(parents=True, exist_ok=True) + + async def run_evaluation( + self, + pipeline: Any, + num_samples: int = 100, + ) -> EvalMetrics: + """Run full evaluation suite against the inference pipeline.""" + # Will be implemented with actual eval logic + return EvalMetrics(total_examples=num_samples) + + def save_results(self, metrics: EvalMetrics, run_name: str = "eval") -> Path: + """Save evaluation results to disk.""" + import json + + output_path = self.results_dir / f"{run_name}_results.json" + with open(output_path, "w", encoding="utf-8") as f: + json.dump( + { + "pass_at_1": metrics.pass_at_1, + "pass_at_5": metrics.pass_at_5, + "ui_quality_score": metrics.ui_quality_score, + "syntax_error_rate": metrics.syntax_error_rate, + "type_error_rate": metrics.type_error_rate, + "avg_iterations": metrics.avg_iterations, + "total_examples": metrics.total_examples, + }, + f, + indent=2, + ) + return output_path diff --git a/src/inference/__init__.py b/src/inference/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6f56809b24b858bbfa155af019e83b4bd0a5aa8b --- /dev/null +++ b/src/inference/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Inference pipeline for code generation and vision.""" diff --git a/src/inference/pipeline.py b/src/inference/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..fd9badecd05531167262ce7b6da63968be14fe09 --- /dev/null +++ b/src/inference/pipeline.py @@ -0,0 +1,79 @@ +""" +MINDI 1.5 Vision-Coder — Inference Pipeline + +End-to-end inference: takes a user prompt, runs through the agent +pipeline, and returns generated Next.js + Tailwind + TypeScript code. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import torch +from transformers import AutoTokenizer + + +class InferencePipeline: + """Inference pipeline for MINDI 1.5 code generation.""" + + def __init__( + self, + model: Optional[object] = None, + tokenizer: Optional[AutoTokenizer] = None, + device: Optional[str] = None, + max_new_tokens: int = 4096, + ) -> None: + self.model = model + self.tokenizer = tokenizer + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + self.max_new_tokens = max_new_tokens + + def generate( + self, + prompt: str, + temperature: float = 0.7, + top_p: float = 0.95, + top_k: int = 50, + ) -> str: + """Generate code from a user prompt.""" + if self.model is None or self.tokenizer is None: + raise RuntimeError("Model and tokenizer must be loaded before inference.") + + inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) + + with torch.inference_mode(): + outputs = self.model.generate( + **inputs, + max_new_tokens=self.max_new_tokens, + temperature=temperature, + top_p=top_p, + top_k=top_k, + do_sample=True, + pad_token_id=self.tokenizer.eos_token_id, + ) + + generated = outputs[0][inputs["input_ids"].shape[1]:] + return self.tokenizer.decode(generated, skip_special_tokens=False) + + @classmethod + def from_checkpoint( + cls, + checkpoint_dir: Path, + base_model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", + ) -> "InferencePipeline": + """Load an inference pipeline from a saved checkpoint.""" + from src.model.code_model import MindiCodeModel + + model_wrapper = MindiCodeModel(model_name=base_model_name) + model_wrapper.load_base_model() + model_wrapper.load_adapter(checkpoint_dir) + + tokenizer = AutoTokenizer.from_pretrained( + base_model_name, trust_remote_code=True + ) + + return cls( + model=model_wrapper.peft_model, + tokenizer=tokenizer, + ) diff --git a/src/model/__init__.py b/src/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9a33023f9bfebfab663c3403dd1b8b81d7682336 --- /dev/null +++ b/src/model/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Model architecture, vision encoder, and LoRA configuration.""" diff --git a/src/model/code_model.py b/src/model/code_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e42e6d8a86b4cbc6dca2b929df9992d6dc943080 --- /dev/null +++ b/src/model/code_model.py @@ -0,0 +1,121 @@ +""" +MINDI 1.5 Vision-Coder — Code Generation Model + +Loads the base coding model with LoRA adapters for fine-tuning +on Next.js + Tailwind + TypeScript code generation. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import torch +from peft import LoraConfig, PeftModel, get_peft_model, TaskType +from transformers import AutoModelForCausalLM, BitsAndBytesConfig + + +class MindiCodeModel: + """Base coding model with LoRA for MINDI 1.5 fine-tuning.""" + + def __init__( + self, + model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", + device: Optional[str] = None, + cache_dir: Optional[Path] = None, + load_in_4bit: bool = False, + ) -> None: + self.model_name = model_name + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + self.cache_dir = cache_dir or Path("./checkpoints/base") + self.cache_dir.mkdir(parents=True, exist_ok=True) + self.load_in_4bit = load_in_4bit + self.model: Optional[AutoModelForCausalLM] = None + self.peft_model: Optional[PeftModel] = None + + def load_base_model(self) -> AutoModelForCausalLM: + """Load the base model with optional 4-bit quantization.""" + quantization_config = None + if self.load_in_4bit: + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_use_double_quant=True, + ) + + self.model = AutoModelForCausalLM.from_pretrained( + self.model_name, + cache_dir=str(self.cache_dir), + torch_dtype=torch.bfloat16, + device_map="auto" if self.device == "cuda" else None, + quantization_config=quantization_config, + trust_remote_code=True, + ) + return self.model + + def apply_lora( + self, + rank: int = 64, + alpha: int = 128, + dropout: float = 0.05, + target_modules: Optional[list[str]] = None, + ) -> PeftModel: + """Apply LoRA adapters to the base model for efficient fine-tuning.""" + if self.model is None: + raise RuntimeError("Base model not loaded. Call load_base_model() first.") + + if target_modules is None: + target_modules = [ + "q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj", + ] + + lora_config = LoraConfig( + r=rank, + lora_alpha=alpha, + lora_dropout=dropout, + target_modules=target_modules, + bias="none", + task_type=TaskType.CAUSAL_LM, + ) + + self.peft_model = get_peft_model(self.model, lora_config) + trainable, total = self._count_parameters() + print(f"[MindiCodeModel] LoRA applied — trainable: {trainable:,} / {total:,} " + f"({100 * trainable / total:.2f}%)") + return self.peft_model + + def _count_parameters(self) -> tuple[int, int]: + """Count trainable and total parameters.""" + model = self.peft_model or self.model + if model is None: + return 0, 0 + trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) + total = sum(p.numel() for p in model.parameters()) + return trainable, total + + def save_adapter(self, output_dir: Optional[Path] = None) -> Path: + """Save the LoRA adapter weights.""" + if self.peft_model is None: + raise RuntimeError("No LoRA adapter to save. Call apply_lora() first.") + save_path = output_dir or Path("./checkpoints/finetuned") + save_path.mkdir(parents=True, exist_ok=True) + self.peft_model.save_pretrained(str(save_path)) + return save_path + + def load_adapter(self, adapter_dir: Path) -> PeftModel: + """Load a saved LoRA adapter onto the base model.""" + if self.model is None: + self.load_base_model() + self.peft_model = PeftModel.from_pretrained( + self.model, str(adapter_dir) + ) + return self.peft_model + + def resize_embeddings(self, new_vocab_size: int) -> None: + """Resize model embeddings to accommodate new special tokens.""" + model = self.peft_model or self.model + if model is None: + raise RuntimeError("No model loaded.") + model.resize_token_embeddings(new_vocab_size) diff --git a/src/model/vision_encoder.py b/src/model/vision_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..91aaa35c6054a94b9a07864b3538e387f687a895 --- /dev/null +++ b/src/model/vision_encoder.py @@ -0,0 +1,91 @@ +""" +MINDI 1.5 Vision-Coder — Vision Encoder + +Uses CLIP ViT-L/14 to encode UI screenshots into embeddings +that the coding model can understand and critique. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import torch +import torch.nn as nn +from PIL import Image +from transformers import CLIPModel, CLIPProcessor + + +class VisionEncoder(nn.Module): + """CLIP-based vision encoder for UI screenshot understanding.""" + + def __init__( + self, + model_name: str = "openai/clip-vit-large-patch14", + projection_dim: int = 768, + device: Optional[str] = None, + cache_dir: Optional[Path] = None, + ) -> None: + super().__init__() + self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") + self.cache_dir = cache_dir or Path("./checkpoints/vision") + self.cache_dir.mkdir(parents=True, exist_ok=True) + + # Load CLIP model and processor + self.clip: CLIPModel = CLIPModel.from_pretrained( + model_name, cache_dir=str(self.cache_dir) + ) + self.processor: CLIPProcessor = CLIPProcessor.from_pretrained( + model_name, cache_dir=str(self.cache_dir) + ) + + # Freeze CLIP backbone — we only train the projection layer + for param in self.clip.parameters(): + param.requires_grad = False + + # Trainable projection: CLIP hidden → LLM embedding space + clip_hidden_size: int = self.clip.config.vision_config.hidden_size # 1024 + self.projection = nn.Sequential( + nn.Linear(clip_hidden_size, projection_dim), + nn.GELU(), + nn.Linear(projection_dim, projection_dim), + ) + + self.to(self.device) + + def encode_image(self, image: Image.Image) -> torch.Tensor: + """Encode a PIL image into a projected embedding tensor.""" + inputs = self.processor(images=image, return_tensors="pt") + inputs = {k: v.to(self.device) for k, v in inputs.items()} + + with torch.no_grad(): + vision_outputs = self.clip.vision_model(**inputs) + # Use [CLS] token embedding + cls_embedding = vision_outputs.last_hidden_state[:, 0, :] + + # Project into LLM embedding space (this part IS trainable) + projected = self.projection(cls_embedding) + return projected + + def encode_screenshot(self, screenshot_path: Path) -> torch.Tensor: + """Load a screenshot from disk and encode it.""" + if not screenshot_path.exists(): + raise FileNotFoundError(f"Screenshot not found: {screenshot_path}") + + image = Image.open(screenshot_path).convert("RGB") + return self.encode_image(image) + + def save_projection(self, save_dir: Optional[Path] = None) -> Path: + """Save only the trainable projection weights.""" + save_path = save_dir or self.cache_dir / "projection" + save_path.mkdir(parents=True, exist_ok=True) + torch.save(self.projection.state_dict(), save_path / "projection.pt") + return save_path + + def load_projection(self, load_dir: Path) -> None: + """Load projection weights from disk.""" + weights_path = load_dir / "projection.pt" + if not weights_path.exists(): + raise FileNotFoundError(f"Projection weights not found: {weights_path}") + state_dict = torch.load(weights_path, map_location=self.device, weights_only=True) + self.projection.load_state_dict(state_dict) diff --git a/src/sandbox/__init__.py b/src/sandbox/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9733558dcb913d48ffdd9b3e43c66809ba5a5de8 --- /dev/null +++ b/src/sandbox/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Sandbox execution environment for safe code testing.""" diff --git a/src/sandbox/sandbox_runner.py b/src/sandbox/sandbox_runner.py new file mode 100644 index 0000000000000000000000000000000000000000..6fe0bb1923c691a6471cf27bd1b7fb334d245479 --- /dev/null +++ b/src/sandbox/sandbox_runner.py @@ -0,0 +1,89 @@ +""" +MINDI 1.5 Vision-Coder — Sandbox Runner + +Executes generated code in an isolated environment (E2B cloud sandbox +or local Docker container) to test for errors and capture screenshots. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +@dataclass +class SandboxResult: + """Result from running code in the sandbox.""" + success: bool + stdout: str = "" + stderr: str = "" + exit_code: int = -1 + screenshot_path: Optional[Path] = None + execution_time_ms: float = 0.0 + errors: list[str] = field(default_factory=list) + + +class SandboxRunner: + """ + Isolated code execution environment. + + Supports two backends: + - E2B (cloud): For production — real browser rendering + screenshots + - Docker (local): For development/testing + """ + + def __init__( + self, + backend: str = "e2b", + e2b_api_key: Optional[str] = None, + screenshot_dir: Optional[Path] = None, + ) -> None: + self.backend = backend + self.e2b_api_key = e2b_api_key or os.environ.get("E2B_API_KEY", "") + self.screenshot_dir = screenshot_dir or Path("./logs/screenshots") + self.screenshot_dir.mkdir(parents=True, exist_ok=True) + + async def run_code( + self, + code: str, + filename: str = "page.tsx", + capture_screenshot: bool = True, + ) -> SandboxResult: + """ + Execute code in the sandbox and optionally capture a screenshot. + + The screenshot is used by the VisionCritic to evaluate UI quality. + """ + if self.backend == "e2b": + return await self._run_e2b(code, filename, capture_screenshot) + elif self.backend == "docker": + return await self._run_docker(code, filename, capture_screenshot) + else: + return SandboxResult( + success=False, + stderr=f"Unknown sandbox backend: {self.backend}", + errors=[f"Unknown backend: {self.backend}"], + ) + + async def _run_e2b( + self, code: str, filename: str, capture_screenshot: bool + ) -> SandboxResult: + """Execute in E2B cloud sandbox.""" + if not self.e2b_api_key: + return SandboxResult( + success=False, + stderr="E2B_API_KEY not set", + errors=["E2B_API_KEY not configured"], + ) + + # Will be implemented with e2b-code-interpreter SDK + return SandboxResult(success=False, stderr="E2B integration pending") + + async def _run_docker( + self, code: str, filename: str, capture_screenshot: bool + ) -> SandboxResult: + """Execute in local Docker container.""" + # Will be implemented with Docker SDK + return SandboxResult(success=False, stderr="Docker integration pending") diff --git a/src/search/__init__.py b/src/search/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0a57eb3e5797dd0bf23eec4898f6fb05f0ff1d41 --- /dev/null +++ b/src/search/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Web search agent for documentation and package lookup.""" diff --git a/src/search/search_agent.py b/src/search/search_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..be481d909c28887a0fb9a355e43ffa68429793a1 --- /dev/null +++ b/src/search/search_agent.py @@ -0,0 +1,103 @@ +""" +MINDI 1.5 Vision-Coder — Web Search Agent + +Uses Tavily API to search for latest documentation, packages, +and code examples to ground the model's code generation. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +import httpx +import yaml + + +@dataclass +class SearchResult: + """A single search result from Tavily.""" + title: str + url: str + content: str + score: float + + +@dataclass +class SearchResponse: + """Aggregated search response.""" + query: str + results: list[SearchResult] = field(default_factory=list) + context: str = "" # Concatenated relevant content for the model + + +class SearchAgent: + """Web search agent powered by Tavily for documentation lookup.""" + + def __init__( + self, + config_path: Optional[Path] = None, + api_key: Optional[str] = None, + ) -> None: + self.config_path = config_path or Path("./configs/search_config.yaml") + self.config = self._load_config() + + self.api_key = api_key or os.environ.get("TAVILY_API_KEY", "") + if not self.api_key: + print("[SearchAgent] WARNING: TAVILY_API_KEY not set") + + self.base_url = "https://api.tavily.com" + + def _load_config(self) -> dict: + """Load search configuration from YAML.""" + if self.config_path.exists(): + with open(self.config_path, "r", encoding="utf-8") as f: + return yaml.safe_load(f).get("search", {}) + return {} + + async def search(self, query: str, max_results: int = 5) -> SearchResponse: + """Execute a web search via Tavily API.""" + if not self.api_key: + return SearchResponse(query=query, context="Search unavailable — no API key.") + + payload = { + "api_key": self.api_key, + "query": query, + "search_depth": self.config.get("search_depth", "advanced"), + "max_results": max_results, + "include_domains": self.config.get("include_domains", []), + "exclude_domains": self.config.get("exclude_domains", []), + } + + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.post(f"{self.base_url}/search", json=payload) + response.raise_for_status() + data = response.json() + + results = [ + SearchResult( + title=r.get("title", ""), + url=r.get("url", ""), + content=r.get("content", ""), + score=r.get("score", 0.0), + ) + for r in data.get("results", []) + ] + + # Build concatenated context for the model + context_parts = [f"### {r.title}\n{r.content}" for r in results] + context = "\n\n".join(context_parts) + + return SearchResponse(query=query, results=results, context=context) + + async def search_docs(self, topic: str) -> SearchResponse: + """Search specifically for framework documentation.""" + query = f"{topic} documentation latest Next.js 14 Tailwind TypeScript" + return await self.search(query) + + async def search_package(self, package_name: str) -> SearchResponse: + """Search for an npm package's usage and API.""" + query = f"npm {package_name} usage example TypeScript" + return await self.search(query) diff --git a/src/tokenizer/__init__.py b/src/tokenizer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c01473323166b24003758181bb687b3f4115246c --- /dev/null +++ b/src/tokenizer/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Tokenizer module for data preprocessing and encoding.""" diff --git a/src/tokenizer/tokenizer.py b/src/tokenizer/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..66cc6e4596855ddd3849e31ea7c25874ed136a55 --- /dev/null +++ b/src/tokenizer/tokenizer.py @@ -0,0 +1,75 @@ +""" +MINDI 1.5 Vision-Coder — Tokenizer Wrapper + +Wraps the base model tokenizer with MINDI-specific special tokens +and encoding utilities for code generation tasks. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +from transformers import AutoTokenizer, PreTrainedTokenizerFast + + +# Special tokens for MINDI's structured output format +SPECIAL_TOKENS: dict[str, str] = { + "code_start": "<|code_start|>", + "code_end": "<|code_end|>", + "file_start": "<|file_start|>", + "file_end": "<|file_end|>", + "critique_start": "<|critique_start|>", + "critique_end": "<|critique_end|>", + "search_start": "<|search_start|>", + "search_end": "<|search_end|>", + "fix_start": "<|fix_start|>", + "fix_end": "<|fix_end|>", +} + + +class MindiTokenizer: + """Tokenizer wrapper with MINDI-specific special tokens.""" + + def __init__(self, model_name: str, cache_dir: Optional[Path] = None) -> None: + self.model_name = model_name + self.cache_dir = cache_dir or Path("./data/tokenizer") + self.cache_dir.mkdir(parents=True, exist_ok=True) + + self.tokenizer: PreTrainedTokenizerFast = AutoTokenizer.from_pretrained( + model_name, + cache_dir=str(self.cache_dir), + trust_remote_code=True, + ) + self._add_special_tokens() + + def _add_special_tokens(self) -> None: + """Register MINDI special tokens with the tokenizer.""" + new_tokens = list(SPECIAL_TOKENS.values()) + num_added = self.tokenizer.add_special_tokens( + {"additional_special_tokens": new_tokens} + ) + if num_added > 0: + print(f"[MindiTokenizer] Added {num_added} special tokens") + + @property + def vocab_size(self) -> int: + """Return the full vocabulary size including special tokens.""" + return len(self.tokenizer) + + def encode(self, text: str, max_length: int = 8192) -> list[int]: + """Encode text to token IDs with truncation.""" + return self.tokenizer.encode( + text, max_length=max_length, truncation=True + ) + + def decode(self, token_ids: list[int]) -> str: + """Decode token IDs back to text.""" + return self.tokenizer.decode(token_ids, skip_special_tokens=False) + + def save(self, output_dir: Optional[Path] = None) -> Path: + """Save the tokenizer to disk.""" + save_path = output_dir or self.cache_dir / "mindi_tokenizer" + save_path.mkdir(parents=True, exist_ok=True) + self.tokenizer.save_pretrained(str(save_path)) + return save_path diff --git a/src/training/__init__.py b/src/training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4a1ead7b8ee78e96875f466fdc13c1735403cbc1 --- /dev/null +++ b/src/training/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Training pipeline: LoRA fine-tuning and data loading.""" diff --git a/src/training/dataset.py b/src/training/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..30f903e9590d0843fa3054f3b5ea87558ad47dd6 --- /dev/null +++ b/src/training/dataset.py @@ -0,0 +1,84 @@ +""" +MINDI 1.5 Vision-Coder — Dataset Loader + +Loads and preprocesses training data from JSONL files into +tokenized format for LoRA fine-tuning. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Optional + +import yaml +from torch.utils.data import Dataset + + +class MindiDataset(Dataset): + """Dataset for MINDI 1.5 fine-tuning data (JSONL format).""" + + def __init__( + self, + data_dir: Path, + tokenizer: Any, + max_length: int = 8192, + split: str = "train", + ) -> None: + self.data_dir = Path(data_dir) + self.tokenizer = tokenizer + self.max_length = max_length + self.split = split + self.examples: list[dict[str, Any]] = [] + self._load_data() + + def _load_data(self) -> None: + """Load all JSONL files from the data directory.""" + data_path = self.data_dir / f"{self.split}.jsonl" + if not data_path.exists(): + print(f"[MindiDataset] No data file at {data_path} — dataset is empty") + return + + with open(data_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line: + self.examples.append(json.loads(line)) + + print(f"[MindiDataset] Loaded {len(self.examples)} examples ({self.split})") + + def __len__(self) -> int: + return len(self.examples) + + def __getitem__(self, idx: int) -> dict[str, Any]: + """Tokenize and return a single training example.""" + example = self.examples[idx] + + # Expected format: {"prompt": "...", "completion": "..."} + prompt = example.get("prompt", "") + completion = example.get("completion", "") + full_text = f"{prompt}\n{completion}" + + encoded = self.tokenizer( + full_text, + max_length=self.max_length, + truncation=True, + padding="max_length", + return_tensors="pt", + ) + + return { + "input_ids": encoded["input_ids"].squeeze(0), + "attention_mask": encoded["attention_mask"].squeeze(0), + "labels": encoded["input_ids"].squeeze(0), + } + + +def load_data_config(config_path: Optional[Path] = None) -> dict: + """Load data configuration from YAML.""" + path = config_path or Path("./configs/data_config.yaml") + if not path.exists(): + raise FileNotFoundError(f"Data config not found: {path}") + + with open(path, "r", encoding="utf-8") as f: + return yaml.safe_load(f).get("dataset", {}) diff --git a/src/training/trainer.py b/src/training/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..cb20b1477093b826df9daaf6156d3cf64a647d89 --- /dev/null +++ b/src/training/trainer.py @@ -0,0 +1,75 @@ +""" +MINDI 1.5 Vision-Coder — Training Pipeline + +LoRA fine-tuning pipeline using Hugging Face Transformers + PEFT. +Designed to run on AMD MI300X (192GB) cloud GPU for full training, +with RTX 4060 (8GB) local overrides for development testing. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import yaml +from transformers import TrainingArguments + + +class TrainingPipeline: + """Manages the LoRA fine-tuning pipeline for MINDI 1.5.""" + + def __init__( + self, + config_path: Optional[Path] = None, + local_mode: bool = True, + ) -> None: + self.config_path = config_path or Path("./configs/training_config.yaml") + self.local_mode = local_mode + self.config = self._load_config() + + def _load_config(self) -> dict: + """Load training configuration from YAML.""" + if not self.config_path.exists(): + raise FileNotFoundError(f"Training config not found: {self.config_path}") + + with open(self.config_path, "r", encoding="utf-8") as f: + full_config = yaml.safe_load(f) + + config = full_config.get("training", {}) + + # Apply local overrides if running on RTX 4060 + if self.local_mode and "local_overrides" in config: + overrides = config.pop("local_overrides") + config.update(overrides) + print("[TrainingPipeline] Applied local GPU overrides (RTX 4060 mode)") + + return config + + def build_training_args(self, output_dir: Optional[Path] = None) -> TrainingArguments: + """Build HuggingFace TrainingArguments from config.""" + output = output_dir or Path("./checkpoints/finetuned") + output.mkdir(parents=True, exist_ok=True) + + return TrainingArguments( + output_dir=str(output), + num_train_epochs=self.config.get("epochs", 3), + per_device_train_batch_size=self.config.get("batch_size", 1), + gradient_accumulation_steps=self.config.get("gradient_accumulation_steps", 16), + learning_rate=self.config.get("learning_rate", 2e-4), + weight_decay=self.config.get("weight_decay", 0.01), + warmup_ratio=self.config.get("warmup_ratio", 0.03), + lr_scheduler_type=self.config.get("lr_scheduler", "cosine"), + max_grad_norm=self.config.get("max_grad_norm", 1.0), + bf16=self.config.get("precision", "bf16") == "bf16", + logging_steps=self.config.get("logging_steps", 10), + save_strategy=self.config.get("save_strategy", "steps"), + save_steps=self.config.get("save_steps", 500), + save_total_limit=self.config.get("save_total_limit", 5), + eval_strategy=self.config.get("eval_strategy", "steps"), + eval_steps=self.config.get("eval_steps", 250), + report_to=self.config.get("report_to", "wandb"), + gradient_checkpointing=self.config.get("gradient_checkpointing", True), + optim=self.config.get("optim", "adamw_torch"), + dataloader_num_workers=2, + remove_unused_columns=False, + ) diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b3251438bf38d55bb79db6eafc0267c0f21c3545 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1,6 @@ +"""MINDI 1.5 Vision-Coder — Utility modules.""" + +from src.utils.env_loader import EnvLoader +from src.utils.config_loader import ConfigLoader + +__all__ = ["EnvLoader", "ConfigLoader"] diff --git a/src/utils/config_loader.py b/src/utils/config_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..529f01cdc74cd489baa24f8da66340c6928c3175 --- /dev/null +++ b/src/utils/config_loader.py @@ -0,0 +1,369 @@ +""" +MINDI 1.5 Vision-Coder — Configuration Loader + +Typed dataclasses for all YAML configuration files. +Provides validated, type-safe access to model, training, data, and search configs. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +import yaml + + +# ── Model Config Dataclasses ── + +@dataclass +class BaseModelConfig: + name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" + parameters: str = "16B" + license: str = "Apache-2.0" + context_length: int = 8192 + dtype: str = "bfloat16" + + +@dataclass +class VisionConfig: + name: str = "openai/clip-vit-large-patch14" + image_size: int = 224 + patch_size: int = 14 + hidden_size: int = 1024 + projection_dim: int = 768 + freeze_backbone: bool = True + trainable_projection: bool = True + + +@dataclass +class LoraConfig: + rank: int = 64 + alpha: int = 128 + dropout: float = 0.05 + target_modules: list[str] = field(default_factory=lambda: [ + "q_proj", "k_proj", "v_proj", "o_proj", + "gate_proj", "up_proj", "down_proj", + ]) + bias: str = "none" + task_type: str = "CAUSAL_LM" + + +@dataclass +class OutputConfig: + framework: str = "nextjs-14" + styling: str = "tailwindcss" + language: str = "typescript" + template_format: str = "markdown-codeblock" + + +@dataclass +class HuggingFaceConfig: + repo_id: str = "Mindigenous/MINDI-1.5-Vision-Coder" + private: bool = False + license: str = "apache-2.0" + + +@dataclass +class ModelConfig: + name: str = "MINDI-1.5-Vision-Coder" + version: str = "1.5.0" + base: BaseModelConfig = field(default_factory=BaseModelConfig) + vision: VisionConfig = field(default_factory=VisionConfig) + lora: LoraConfig = field(default_factory=LoraConfig) + output: OutputConfig = field(default_factory=OutputConfig) + huggingface: HuggingFaceConfig = field(default_factory=HuggingFaceConfig) + + +# ── Training Config Dataclasses ── + +@dataclass +class LocalOverrides: + batch_size: int = 1 + gradient_accumulation_steps: int = 16 + max_seq_length: int = 2048 + gradient_checkpointing: bool = True + optim: str = "adamw_8bit" + + +@dataclass +class WandbConfig: + project: str = "mindi-1.5-vision-coder" + entity: str = "mindigenous" + tags: list[str] = field(default_factory=lambda: ["mindi-1.5", "lora", "vision-coder"]) + + +@dataclass +class TrainingConfig: + local_device: str = "cuda" + cloud_device: str = "cuda" + precision: str = "bf16" + epochs: int = 3 + batch_size: int = 4 + gradient_accumulation_steps: int = 8 + effective_batch_size: int = 32 + learning_rate: float = 2.0e-4 + weight_decay: float = 0.01 + warmup_ratio: float = 0.03 + lr_scheduler: str = "cosine" + max_grad_norm: float = 1.0 + max_seq_length: int = 8192 + packing: bool = True + save_strategy: str = "steps" + save_steps: int = 500 + save_total_limit: int = 5 + checkpoint_dir: str = "./checkpoints" + resume_from_checkpoint: Optional[str] = None + logging_steps: int = 10 + log_dir: str = "./logs/training" + report_to: str = "wandb" + eval_strategy: str = "steps" + eval_steps: int = 250 + eval_samples: int = 1000 + local_overrides: LocalOverrides = field(default_factory=LocalOverrides) + wandb: WandbConfig = field(default_factory=WandbConfig) + + +# ── Data Config Dataclasses ── + +@dataclass +class DataSource: + name: str = "" + description: str = "" + path: str = "" + weight: float = 0.0 + + +@dataclass +class DataProcessing: + tokenizer: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" + max_length: int = 8192 + min_length: int = 64 + dedup_strategy: str = "minhash" + quality_filter: bool = True + output_dir: str = "./data/processed/" + + +@dataclass +class DataSplits: + train: float = 0.95 + validation: float = 0.05 + + +@dataclass +class KnowledgeBase: + path: str = "./data/knowledge_base/" + sources: list[str] = field(default_factory=lambda: [ + "nextjs-14-docs", "tailwindcss-docs", "typescript-docs", + "react-docs", "shadcn-ui-docs", + ]) + embedding_model: str = "BAAI/bge-small-en-v1.5" + chunk_size: int = 512 + chunk_overlap: int = 64 + + +@dataclass +class DataConfig: + name: str = "mindi-1.5-training-data" + target_size: int = 500000 + format: str = "jsonl" + sources: list[DataSource] = field(default_factory=list) + processing: DataProcessing = field(default_factory=DataProcessing) + splits: DataSplits = field(default_factory=DataSplits) + knowledge_base: KnowledgeBase = field(default_factory=KnowledgeBase) + + +# ── Search Config Dataclasses ── + +@dataclass +class RateLimit: + requests_per_minute: int = 30 + retry_attempts: int = 3 + retry_delay_seconds: int = 2 + + +@dataclass +class SearchCache: + enabled: bool = True + ttl_hours: int = 24 + max_entries: int = 10000 + storage_path: str = "./data/knowledge_base/search_cache.db" + + +@dataclass +class DocsScraper: + enabled: bool = True + output_dir: str = "./docs/" + max_pages_per_site: int = 100 + respect_robots_txt: bool = True + request_delay_seconds: int = 1 + + +@dataclass +class SearchConfig: + provider: str = "tavily" + api_key_env: str = "TAVILY_API_KEY" + max_results: int = 5 + search_depth: str = "advanced" + include_domains: list[str] = field(default_factory=list) + exclude_domains: list[str] = field(default_factory=list) + rate_limit: RateLimit = field(default_factory=RateLimit) + cache: SearchCache = field(default_factory=SearchCache) + docs_scraper: DocsScraper = field(default_factory=DocsScraper) + + +# ── Config Loader ── + +def _dict_to_dataclass(cls: type, data: dict[str, Any]) -> Any: + """Recursively convert a dict to a dataclass, handling nested dataclasses and lists.""" + if not isinstance(data, dict): + return data + + field_types = {f.name: f.type for f in cls.__dataclass_fields__.values()} + kwargs: dict[str, Any] = {} + + for key, value in data.items(): + if key not in field_types: + continue + + field_type = field_types[key] + + # Handle list of dataclasses (e.g., list[DataSource]) + if isinstance(value, list) and hasattr(field_type, "__origin__"): + # For list[DataSource] etc. + inner = getattr(field_type, "__args__", [None])[0] + if inner and hasattr(inner, "__dataclass_fields__"): + kwargs[key] = [_dict_to_dataclass(inner, item) for item in value] + else: + kwargs[key] = value + elif isinstance(value, dict): + # Try to match nested dataclass + field_cls = cls.__dataclass_fields__[key].default_factory if hasattr(cls.__dataclass_fields__[key], "default_factory") else None + # Get actual type from annotations + import typing + actual_type = typing.get_type_hints(cls).get(key) + if actual_type and hasattr(actual_type, "__dataclass_fields__"): + kwargs[key] = _dict_to_dataclass(actual_type, value) + else: + kwargs[key] = value + else: + kwargs[key] = value + + return cls(**kwargs) + + +class ConfigLoader: + """ + Loads and provides typed access to all YAML configuration files. + + Usage: + loader = ConfigLoader() + model_cfg = loader.model + training_cfg = loader.training + data_cfg = loader.data + search_cfg = loader.search + """ + + def __init__(self, config_dir: Optional[Path] = None) -> None: + self.config_dir = config_dir or Path(__file__).resolve().parents[2] / "configs" + self._model: Optional[ModelConfig] = None + self._training: Optional[TrainingConfig] = None + self._data: Optional[DataConfig] = None + self._search: Optional[SearchConfig] = None + + def _load_yaml(self, filename: str) -> dict[str, Any]: + """Load a YAML file from the config directory.""" + path = self.config_dir / filename + if not path.exists(): + raise FileNotFoundError(f"Config file not found: {path}") + with open(path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + + @property + def model(self) -> ModelConfig: + """Load and return typed model configuration.""" + if self._model is None: + raw = self._load_yaml("model_config.yaml") + model_data = raw.get("model", {}) + self._model = _dict_to_dataclass(ModelConfig, model_data) + # HuggingFace config is at root level + hf_data = raw.get("huggingface", {}) + if hf_data: + self._model.huggingface = _dict_to_dataclass(HuggingFaceConfig, hf_data) + return self._model + + @property + def training(self) -> TrainingConfig: + """Load and return typed training configuration.""" + if self._training is None: + raw = self._load_yaml("training_config.yaml") + training_data = raw.get("training", {}) + self._training = _dict_to_dataclass(TrainingConfig, training_data) + # WandB config is at root level + wandb_data = raw.get("wandb", {}) + if wandb_data: + self._training.wandb = _dict_to_dataclass(WandbConfig, wandb_data) + return self._training + + @property + def data(self) -> DataConfig: + """Load and return typed data configuration.""" + if self._data is None: + raw = self._load_yaml("data_config.yaml") + dataset_data = raw.get("dataset", {}) + self._data = _dict_to_dataclass(DataConfig, dataset_data) + return self._data + + @property + def search(self) -> SearchConfig: + """Load and return typed search configuration.""" + if self._search is None: + raw = self._load_yaml("search_config.yaml") + search_data = raw.get("search", {}) + self._search = _dict_to_dataclass(SearchConfig, search_data) + return self._search + + def reload(self) -> None: + """Force reload all configurations from disk.""" + self._model = None + self._training = None + self._data = None + self._search = None + + def print_summary(self) -> None: + """Print a summary of all loaded configurations.""" + print("\n╔══════════════════════════════════════════╗") + print("║ MINDI 1.5 — Configuration Summary ║") + print("╠══════════════════════════════════════════╣") + + m = self.model + print(f" Model: {m.name} v{m.version}") + print(f" Base: {m.base.name} ({m.base.parameters})") + print(f" Vision: {m.vision.name}") + print(f" LoRA: r={m.lora.rank}, alpha={m.lora.alpha}") + print(f" Output: {m.output.framework} + {m.output.styling} + {m.output.language}") + + print("╠──────────────────────────────────────────╣") + + t = self.training + print(f" Epochs: {t.epochs}") + print(f" Batch: {t.batch_size} (effective: {t.effective_batch_size})") + print(f" LR: {t.learning_rate}") + print(f" Precision: {t.precision}") + print(f" Seq length: {t.max_seq_length}") + + print("╠──────────────────────────────────────────╣") + + d = self.data + print(f" Dataset: {d.name} ({d.target_size:,} target)") + print(f" Sources: {len(d.sources)}") + print(f" Format: {d.format}") + + print("╠──────────────────────────────────────────╣") + + s = self.search + print(f" Provider: {s.provider}") + print(f" Max results: {s.max_results}") + print(f" Domains: {len(s.include_domains)} included, {len(s.exclude_domains)} excluded") + + print("╚══════════════════════════════════════════╝\n") diff --git a/src/utils/env_loader.py b/src/utils/env_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..fb756cbeaf5f535f5f3eb7042fd0fafacb3a1c7e --- /dev/null +++ b/src/utils/env_loader.py @@ -0,0 +1,215 @@ +""" +MINDI 1.5 Vision-Coder — Environment Variable Loader + +Loads secrets from .env, validates required keys, and provides +typed access to environment configuration. +""" + +from __future__ import annotations + +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +from dotenv import load_dotenv + + +@dataclass +class EnvValidationResult: + """Result of environment variable validation.""" + valid: bool + missing: list[str] + warnings: list[str] + + +class EnvLoader: + """ + Loads and validates environment variables from .env files. + + Usage: + env = EnvLoader() + env.load() + env.validate() + key = env.get("TAVILY_API_KEY") + """ + + REQUIRED_KEYS = [ + "HUGGINGFACE_TOKEN", + "TAVILY_API_KEY", + "WANDB_API_KEY", + "E2B_API_KEY", + ] + + OPTIONAL_KEYS = [ + "HUGGINGFACE_REPO", + "WANDB_PROJECT", + "WANDB_ENTITY", + "MODEL_NAME", + "BASE_MODEL_PATH", + "FINETUNED_MODEL_PATH", + "API_HOST", + "API_PORT", + "API_WORKERS", + "DEVICE", + "MIXED_PRECISION", + "MAX_SEQ_LENGTH", + "TRAINING_OUTPUT_DIR", + "LOG_DIR", + "DATA_DIR", + "CHECKPOINT_DIR", + "SANDBOX_TYPE", + "MAX_SEARCH_RESULTS", + "SEARCH_TIMEOUT", + "CLOUD_GPU_HOST", + "CLOUD_GPU_USER", + "CLOUD_GPU_SSH_KEY", + "PROJECT_NAME", + "STARTUP_NAME", + "HF_USERNAME", + ] + + KEY_PREFIXES = { + "HUGGINGFACE_TOKEN": "hf_", + "TAVILY_API_KEY": "tvly-", + "E2B_API_KEY": "e2b_", + } + + def __init__(self, env_path: Optional[Path] = None) -> None: + self.env_path = env_path or Path(__file__).resolve().parents[2] / ".env" + self._loaded = False + + def load(self, override: bool = False) -> None: + """Load environment variables from .env file.""" + if not self.env_path.exists(): + raise FileNotFoundError( + f".env file not found at {self.env_path}\n" + f"Copy .env.example to .env and fill in your API keys." + ) + load_dotenv(self.env_path, override=override) + self._loaded = True + + def validate(self) -> EnvValidationResult: + """Validate that all required environment variables are set and well-formed.""" + if not self._loaded: + self.load() + + missing: list[str] = [] + warnings: list[str] = [] + + for key in self.REQUIRED_KEYS: + value = os.environ.get(key, "").strip() + if not value: + missing.append(key) + continue + + # Check prefix format + expected_prefix = self.KEY_PREFIXES.get(key) + if expected_prefix and not value.startswith(expected_prefix): + warnings.append( + f"{key} doesn't start with expected prefix '{expected_prefix}'" + ) + + return EnvValidationResult( + valid=len(missing) == 0, + missing=missing, + warnings=warnings, + ) + + def get(self, key: str, default: Optional[str] = None) -> str: + """Get an environment variable with optional default.""" + if not self._loaded: + self.load() + return os.environ.get(key, default or "") + + def get_int(self, key: str, default: int = 0) -> int: + """Get an environment variable as an integer.""" + value = self.get(key) + if not value: + return default + return int(value) + + def get_path(self, key: str, default: str = ".") -> Path: + """Get an environment variable as a Path.""" + return Path(self.get(key, default)) + + # ── Convenience properties ── + + @property + def huggingface_token(self) -> str: + return self.get("HUGGINGFACE_TOKEN") + + @property + def tavily_api_key(self) -> str: + return self.get("TAVILY_API_KEY") + + @property + def wandb_api_key(self) -> str: + return self.get("WANDB_API_KEY") + + @property + def e2b_api_key(self) -> str: + return self.get("E2B_API_KEY") + + @property + def model_name(self) -> str: + return self.get("MODEL_NAME", "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct") + + @property + def device(self) -> str: + return self.get("DEVICE", "cuda") + + @property + def mixed_precision(self) -> str: + return self.get("MIXED_PRECISION", "bf16") + + @property + def sandbox_type(self) -> str: + return self.get("SANDBOX_TYPE", "e2b") + + def print_status(self) -> None: + """Print a summary of environment variable status.""" + result = self.validate() + + print("\n╔══════════════════════════════════════════╗") + print("║ MINDI 1.5 — Environment Status ║") + print("╠══════════════════════════════════════════╣") + + for key in self.REQUIRED_KEYS: + value = os.environ.get(key, "") + if value: + masked = value[:8] + "..." + value[-4:] + print(f" ✅ {key:<25} = {masked}") + else: + print(f" ❌ {key:<25} = NOT SET") + + print("╠──────────────────────────────────────────╣") + + for key in self.OPTIONAL_KEYS: + value = os.environ.get(key, "") + if value: + display = value if len(value) <= 40 else value[:37] + "..." + print(f" ✅ {key:<25} = {display}") + else: + print(f" ⚪ {key:<25} = (not set)") + + print("╠══════════════════════════════════════════╣") + + if result.valid: + print(" ✅ All required keys are set!") + else: + print(f" ❌ Missing {len(result.missing)} required key(s): {', '.join(result.missing)}") + + for w in result.warnings: + print(f" ⚠️ {w}") + + print("╚══════════════════════════════════════════╝\n") + + +if __name__ == "__main__": + env = EnvLoader() + env.load() + env.print_status() + result = env.validate() + sys.exit(0 if result.valid else 1) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c334038804c1ec60fa12e564fdeb029cec1d3f2e --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""MINDI 1.5 — Test suite.""" diff --git a/tests/test_structure.py b/tests/test_structure.py new file mode 100644 index 0000000000000000000000000000000000000000..7e1f46cf128e66917ec648f062def60589244ea7 --- /dev/null +++ b/tests/test_structure.py @@ -0,0 +1,45 @@ +""" +MINDI 1.5 Vision-Coder — Smoke Test + +Basic tests to verify the project structure and imports work correctly. +""" + +from __future__ import annotations + +from pathlib import Path + + +def test_project_structure_exists() -> None: + """Verify all critical directories exist.""" + root = Path(__file__).resolve().parent.parent + required = [ + "configs", "src", "api", "scripts", "tests", + "data", "checkpoints", "logs", "docs", + ] + for d in required: + assert (root / d).exists(), f"Missing directory: {d}" + + +def test_config_files_exist() -> None: + """Verify config YAML files are present.""" + root = Path(__file__).resolve().parent.parent + configs = [ + "configs/model_config.yaml", + "configs/training_config.yaml", + "configs/data_config.yaml", + "configs/search_config.yaml", + ] + for c in configs: + assert (root / c).exists(), f"Missing config: {c}" + + +def test_src_packages_importable() -> None: + """Verify src __init__.py files exist (importability test).""" + root = Path(__file__).resolve().parent.parent + packages = [ + "src", "src/model", "src/agents", "src/search", + "src/sandbox", "src/training", "src/inference", "src/evaluation", + ] + for pkg in packages: + init_file = root / pkg / "__init__.py" + assert init_file.exists(), f"Missing __init__.py in {pkg}"