"""
ORCH Studio - Autonomous Next.js Code Generation

Generate complete, production-ready Next.js applications from natural language prompts.
Now powered by the latest QLoRA fine-tuned ORCH-7B model (43h training on A100).

https://huggingface.co/orch-ai

Version: 1.0.1 - Fixed GPU duration limits for ZeroGPU
"""

import os
import re
import json
import math
import tempfile
import zipfile
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass
from typing import Optional, Dict, Any, Tuple, List

import torch
import torch.nn as nn
import torch.nn.functional as F
import gradio as gr
import spaces
from huggingface_hub import hf_hub_download
from tokenizers import Tokenizer

# =============================================================================
# Model Configuration
# =============================================================================

@dataclass
class OrchConfig:
    """Configuration for Orch transformer model."""

    model_name: str = "orch"
    model_size: str = "3b"
    vocab_size: int = 32000
    hidden_size: int = 2560
    intermediate_size: int = 10240
    num_hidden_layers: int = 32
    num_attention_heads: int = 32
    num_key_value_heads: int = 8
    max_position_embeddings: int = 16384
    rms_norm_eps: float = 1e-5
    rope_theta: float = 10000.0
    hidden_dropout: float = 0.0
    attention_dropout: float = 0.0
    use_flash_attention: bool = False
    tie_word_embeddings: bool = False
    rope_scaling_type: Optional[str] = None
    rope_scaling_factor: float = 1.0
    original_max_position_embeddings: Optional[int] = None
    initializer_range: float = 0.02

    @property
    def head_dim(self) -> int:
        return self.hidden_size // self.num_attention_heads

    @property
    def num_kv_groups(self) -> int:
        return self.num_attention_heads // self.num_key_value_heads

    @classmethod
    def from_dict(cls, config_dict: Dict[str, Any]) -> "OrchConfig":
        return cls(**{k: v for k, v in config_dict.items() if k in cls.__dataclass_fields__})

    @classmethod
    def load(cls, path: str) -> "OrchConfig":
        with open(path, "r") as f:
            config_dict = json.load(f)
        return cls.from_dict(config_dict)


# =============================================================================
# Model Layers
# =============================================================================

class RMSNorm(nn.Module):
    def __init__(self, hidden_size: int, eps: float = 1e-6):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(hidden_size))
        self.eps = eps

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        rms = torch.sqrt(torch.mean(x ** 2, dim=-1, keepdim=True) + self.eps)
        return (x / rms) * self.weight


class SwiGLU(nn.Module):
    def __init__(self, hidden_size: int, intermediate_size: int, bias: bool = False, dropout: float = 0.0):
        super().__init__()
        self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=bias)
        self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=bias)
        self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=bias)
        self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        gate = F.silu(self.gate_proj(x))
        up = self.up_proj(x)
        hidden = gate * up
        hidden = self.dropout(hidden)
        return self.down_proj(hidden)


class RotaryEmbedding(nn.Module):
    def __init__(
        self,
        dim: int,
        max_position_embeddings: int = 2048,
        base: float = 10000.0,
        scaling_type: Optional[str] = None,
        scaling_factor: float = 1.0,
        original_max_position_embeddings: Optional[int] = None,
    ):
        super().__init__()
        self.dim = dim
        self.max_position_embeddings = max_position_embeddings
        self.base = base
        self.scaling_type = scaling_type
        self.scaling_factor = scaling_factor
        self.original_max_position_embeddings = original_max_position_embeddings or max_position_embeddings
        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float() / self.dim))
        self.register_buffer("inv_freq", inv_freq, persistent=False)
        self._set_cos_sin_cache(max_position_embeddings)

    def _set_cos_sin_cache(self, seq_len: int):
        self.max_seq_len_cached = seq_len
        t = torch.arange(seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
        if self.scaling_type == "linear":
            t = t / self.scaling_factor
        freqs = torch.outer(t, self.inv_freq)
        emb = torch.cat((freqs, freqs), dim=-1)
        self.register_buffer("cos_cached", emb.cos(), persistent=False)
        self.register_buffer("sin_cached", emb.sin(), persistent=False)

    def forward(self, x: torch.Tensor, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        seq_len = position_ids.max().item() + 1
        if seq_len > self.max_seq_len_cached:
            self._set_cos_sin_cache(seq_len)
        cos = self.cos_cached[position_ids].to(x.dtype)
        sin = self.sin_cached[position_ids].to(x.dtype)
        return cos, sin


def rotate_half(x: torch.Tensor) -> torch.Tensor:
    x1 = x[..., : x.shape[-1] // 2]
    x2 = x[..., x.shape[-1] // 2 :]
    return torch.cat((-x2, x1), dim=-1)


def apply_rotary_pos_emb(q, k, cos, sin):
    cos = cos.unsqueeze(1)
    sin = sin.unsqueeze(1)
    q_embed = (q * cos) + (rotate_half(q) * sin)
    k_embed = (k * cos) + (rotate_half(k) * sin)
    return q_embed, k_embed


def create_causal_mask(seq_len: int, device: torch.device, dtype: torch.dtype = torch.float32) -> torch.Tensor:
    mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=device, dtype=dtype), diagonal=1)
    return mask.unsqueeze(0).unsqueeze(0)


class OrchAttention(nn.Module):
    def __init__(self, config: OrchConfig, layer_idx: int):
        super().__init__()
        self.config = config
        self.layer_idx = layer_idx
        self.hidden_size = config.hidden_size
        self.num_heads = config.num_attention_heads
        self.num_kv_heads = config.num_key_value_heads
        self.head_dim = config.head_dim
        self.num_kv_groups = config.num_kv_groups
        self.attention_dropout = config.attention_dropout
        self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
        self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
        self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
        self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
        self.rotary_emb = RotaryEmbedding(
            self.head_dim,
            max_position_embeddings=config.max_position_embeddings,
            base=config.rope_theta,
            scaling_type=config.rope_scaling_type,
            scaling_factor=config.rope_scaling_factor,
            original_max_position_embeddings=config.original_max_position_embeddings,
        )

    def forward(self, hidden_states, attention_mask=None, position_ids=None):
        batch_size, seq_len, _ = hidden_states.shape
        query_states = self.q_proj(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
        key_states = self.k_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
        value_states = self.v_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
        cos, sin = self.rotary_emb(value_states, position_ids)
        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
        if self.num_kv_groups > 1:
            key_states = key_states.repeat_interleave(self.num_kv_groups, dim=1)
            value_states = value_states.repeat_interleave(self.num_kv_groups, dim=1)
        scale = 1.0 / math.sqrt(self.head_dim)
        attn_weights = torch.matmul(query_states, key_states.transpose(-2, -1)) * scale
        if attention_mask is not None:
            attn_weights = attn_weights + attention_mask
        attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
        attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training)
        attn_output = torch.matmul(attn_weights, value_states)
        attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.num_heads * self.head_dim)
        return self.o_proj(attn_output)


class OrchBlock(nn.Module):
    def __init__(self, config: OrchConfig, layer_idx: int):
        super().__init__()
        self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.attention = OrchAttention(config, layer_idx)
        self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.feed_forward = SwiGLU(config.hidden_size, config.intermediate_size, dropout=config.hidden_dropout)
        self.dropout = nn.Dropout(config.hidden_dropout)

    def forward(self, hidden_states, attention_mask=None, position_ids=None):
        residual = hidden_states
        hidden_states = self.input_layernorm(hidden_states)
        hidden_states = self.attention(hidden_states, attention_mask, position_ids)
        hidden_states = self.dropout(hidden_states)
        hidden_states = residual + hidden_states
        residual = hidden_states
        hidden_states = self.post_attention_layernorm(hidden_states)
        hidden_states = self.feed_forward(hidden_states)
        hidden_states = self.dropout(hidden_states)
        hidden_states = residual + hidden_states
        return hidden_states


class OrchModel(nn.Module):
    def __init__(self, config: OrchConfig):
        super().__init__()
        self.config = config
        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
        self.embed_dropout = nn.Dropout(config.hidden_dropout)
        self.layers = nn.ModuleList([OrchBlock(config, i) for i in range(config.num_hidden_layers)])
        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)

    def forward(self, input_ids, attention_mask=None, position_ids=None):
        batch_size, seq_len = input_ids.shape
        device = input_ids.device
        if position_ids is None:
            position_ids = torch.arange(seq_len, device=device).unsqueeze(0).expand(batch_size, -1)
        hidden_states = self.embed_tokens(input_ids)
        hidden_states = self.embed_dropout(hidden_states)
        causal_mask = create_causal_mask(seq_len, device, hidden_states.dtype)
        for layer in self.layers:
            hidden_states = layer(hidden_states, causal_mask, position_ids)
        return self.norm(hidden_states)


class OrchForCausalLM(nn.Module):
    def __init__(self, config: OrchConfig):
        super().__init__()
        self.config = config
        self.model = OrchModel(config)
        if config.tie_word_embeddings:
            self.lm_head = None
        else:
            self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    def forward(self, input_ids, attention_mask=None, position_ids=None, labels=None):
        hidden_states = self.model(input_ids, attention_mask, position_ids)
        if self.lm_head is not None:
            logits = self.lm_head(hidden_states)
        else:
            logits = F.linear(hidden_states, self.model.embed_tokens.weight)
        output = {"logits": logits}
        if labels is not None:
            shift_logits = logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            loss = F.cross_entropy(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1), ignore_index=-100)
            output["loss"] = loss
        return output

    @torch.no_grad()
    def generate(self, input_ids, max_new_tokens=512, temperature=0.7, top_k=50, top_p=0.9, do_sample=True, eos_token_id=None):
        self.eval()
        for _ in range(max_new_tokens):
            if input_ids.shape[1] > self.config.max_position_embeddings:
                input_ids = input_ids[:, -self.config.max_position_embeddings:]
            outputs = self.forward(input_ids)
            next_token_logits = outputs["logits"][:, -1, :]
            if temperature != 1.0:
                next_token_logits = next_token_logits / temperature
            if top_k is not None:
                indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1, None]
                next_token_logits[indices_to_remove] = float("-inf")
            if top_p is not None:
                sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
                sorted_indices_to_remove = cumulative_probs > top_p
                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                sorted_indices_to_remove[..., 0] = False
                indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
                next_token_logits[indices_to_remove] = float("-inf")
            if do_sample:
                probs = F.softmax(next_token_logits, dim=-1)
                next_tokens = torch.multinomial(probs, num_samples=1)
            else:
                next_tokens = torch.argmax(next_token_logits, dim=-1, keepdim=True)
            input_ids = torch.cat([input_ids, next_tokens], dim=1)
            if eos_token_id is not None and (next_tokens == eos_token_id).all():
                break
        return input_ids

    @classmethod
    def from_pretrained(cls, repo_id: str, device: str = "cpu"):
        config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
        model_path = hf_hub_download(repo_id=repo_id, filename="model.pt")
        config = OrchConfig.load(config_path)
        model = cls(config)
        state_dict = torch.load(model_path, map_location=device, weights_only=True)
        model.load_state_dict(state_dict)
        return model.to(device)


# =============================================================================
# Project Templates
# =============================================================================

PROJECT_TEMPLATES = {
    "blank": {
        "name": "Blank Project",
        "description": "Start from scratch with a minimal Next.js setup",
        "prefix": ""
    },
    "saas": {
        "name": "SaaS Application",
        "description": "Full SaaS with auth, billing, dashboard",
        "prefix": "Create a modern SaaS application with user authentication, subscription billing integration, admin dashboard, and "
    },
    "ecommerce": {
        "name": "E-Commerce Store",
        "description": "Online store with products, cart, checkout",
        "prefix": "Build an e-commerce store with product catalog, shopping cart, checkout flow, order management, and "
    },
    "dashboard": {
        "name": "Admin Dashboard",
        "description": "Data visualization and management dashboard",
        "prefix": "Create an admin dashboard with data tables, charts, user management, settings, and "
    },
    "portfolio": {
        "name": "Portfolio Website",
        "description": "Personal/professional portfolio site",
        "prefix": "Build a modern portfolio website with project showcase, about section, skills display, contact form, and "
    },
    "blog": {
        "name": "Blog Platform",
        "description": "Content management and blog system",
        "prefix": "Create a blog platform with markdown support, categories, tags, comments, search, and "
    },
    "api": {
        "name": "API Backend",
        "description": "RESTful API with authentication",
        "prefix": "Build a RESTful API backend with JWT authentication, rate limiting, validation, error handling, and "
    }
}


# =============================================================================
# Project Parsing and Packaging
# =============================================================================

def parse_project_output(output: str) -> Dict[str, str]:
    """Parse model output into file dictionary."""
    files = {}

    # Pattern 1: Markdown code blocks with file paths
    md_pattern = re.compile(
        r'```(?:\w+\s+)?([^\n`]+\.[a-zA-Z]+)\n([\s\S]*?)```',
        re.MULTILINE
    )

    for match in md_pattern.finditer(output):
        file_path = match.group(1).strip()
        content = match.group(2).strip()
        if file_path and content and '/' in file_path or '.' in file_path:
            file_path = file_path.lstrip('./')
            if not file_path.startswith('node_modules') and len(content) > 10:
                files[file_path] = content

    # Pattern 2: Special token format (fallback)
    if not files:
        special_pattern = re.compile(
            r'<\|file\|>([^<]+)<\|end_path\|>\s*([\s\S]*?)<\|end_file\|>',
            re.MULTILINE
        )
        for match in special_pattern.finditer(output):
            file_path = match.group(1).strip()
            content = match.group(2).strip()
            if file_path and content:
                files[file_path] = content

    return files


def create_zip_archive(files: Dict[str, str], project_name: str, include_extras: bool = True) -> str:
    """Create a ZIP archive from files dictionary"""
    temp_dir = tempfile.mkdtemp()
    zip_path = os.path.join(temp_dir, f"{project_name}.zip")

    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path, content in files.items():
            full_path = f"{project_name}/{file_path}"
            zipf.writestr(full_path, content)

        # Add README
        readme = generate_readme(project_name, len(files))
        zipf.writestr(f"{project_name}/README.md", readme)

        if include_extras:
            # Add .env.example if not present
            if ".env" not in files and ".env.example" not in files:
                env_example = generate_env_example()
                zipf.writestr(f"{project_name}/.env.example", env_example)

            # Add .gitignore if not present
            if ".gitignore" not in files:
                gitignore = generate_gitignore()
                zipf.writestr(f"{project_name}/.gitignore", gitignore)

    return zip_path


def generate_readme(project_name: str, file_count: int) -> str:
    """Generate README for the project"""
    return f'''# {project_name}

Generated by [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio) - Powered by ORCH-7B

## Quick Start

```bash
# Install dependencies
npm install

# Set up environment variables
cp .env.example .env
# Edit .env with your configuration

# Set up database (if using Prisma)
npx prisma generate
npx prisma db push

# Start development server
npm run dev
```

Open [http://localhost:3000](http://localhost:3000) to view your application.

## Tech Stack

- **Framework**: Next.js 14 (App Router)
- **Language**: TypeScript
- **Styling**: Tailwind CSS
- **Components**: Radix UI / shadcn/ui compatible
- **Database**: Prisma ORM (SQLite/PostgreSQL)
- **Authentication**: NextAuth.js patterns

## Project Structure

```
{project_name}/
├── app/              # Next.js App Router pages
├── components/       # React components
├── lib/              # Utility functions
├── prisma/           # Database schema
└── public/           # Static assets
```

## Scripts

- `npm run dev` - Start development server
- `npm run build` - Build for production
- `npm run start` - Start production server
- `npm run lint` - Run ESLint

## Generated Info

- **Files**: {file_count}
- **Generated**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
- **Model**: ORCH-7B (QLoRA fine-tuned)

## Links

- [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio)
- [ORCH-7B Model](https://huggingface.co/orch-ai/ORCH-7B)
- [ORCH AI Organization](https://huggingface.co/orch-ai)

---

*Built with ORCH - Orchestrated Recursive Code Hierarchy*
'''


def generate_env_example() -> str:
    """Generate .env.example file"""
    return '''# Database
DATABASE_URL="file:./dev.db"
# DATABASE_URL="postgresql://user:password@localhost:5432/mydb"

# Authentication (NextAuth.js)
NEXTAUTH_URL="http://localhost:3000"
NEXTAUTH_SECRET="your-secret-key-here"

# OAuth Providers (optional)
# GITHUB_ID=""
# GITHUB_SECRET=""
# GOOGLE_ID=""
# GOOGLE_SECRET=""

# Stripe (optional)
# STRIPE_SECRET_KEY=""
# STRIPE_WEBHOOK_SECRET=""
# NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=""

# Email (optional)
# SMTP_HOST=""
# SMTP_PORT=""
# SMTP_USER=""
# SMTP_PASSWORD=""
'''


def generate_gitignore() -> str:
    """Generate .gitignore file"""
    return '''# Dependencies
node_modules/
.pnpm-store/

# Build
.next/
out/
build/
dist/

# Environment
.env
.env.local
.env.*.local

# Database
*.db
*.sqlite

# IDE
.vscode/
.idea/

# OS
.DS_Store
Thumbs.db

# Logs
*.log
npm-debug.log*

# Testing
coverage/
.nyc_output/

# Prisma
prisma/migrations/

# Misc
*.tsbuildinfo
next-env.d.ts
'''


# =============================================================================
# Model Loading
# =============================================================================

MODEL_7B_ID = os.environ.get("MODEL_ID", "orch-ai/ORCH-7B")
MODEL_3B_ID = "raihan-js/orch-nextjs-3b"
USE_7B = os.environ.get("USE_7B", "true").lower() == "true"

print(f"[ORCH] Model selection: {'ORCH-7B' if USE_7B else 'ORCH-3B'}")
print(f"[ORCH] Model ID: {MODEL_7B_ID if USE_7B else MODEL_3B_ID}")

model = None
tokenizer = None
MODEL_TYPE = None
MODEL_LOADED = False
ERROR_MSG = ""
MODEL_LOAD_TIME = None


def load_7b_model():
    global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME
    from transformers import AutoModelForCausalLM, AutoTokenizer
    import time

    hf_token = os.environ.get("HF_TOKEN")
    print(f"[ORCH] Loading ORCH-7B from {MODEL_7B_ID}...")
    start_time = time.time()

    tokenizer = AutoTokenizer.from_pretrained(MODEL_7B_ID, trust_remote_code=True, token=hf_token)

    # Check if GPU is available and load accordingly
    if torch.cuda.is_available():
        print("[ORCH] GPU detected, loading with device_map='auto'...")
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_7B_ID,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True,
            token=hf_token
        )
    else:
        print("[ORCH] No GPU, loading on CPU (this will be slow)...")
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_7B_ID,
            torch_dtype=torch.float32,  # Use float32 for CPU
            trust_remote_code=True,
            token=hf_token,
            low_cpu_mem_usage=True
        )

    model.eval()
    MODEL_TYPE = "7b"
    MODEL_LOAD_TIME = time.time() - start_time
    print(f"[ORCH] ORCH-7B loaded successfully in {MODEL_LOAD_TIME:.1f}s!")


def load_3b_model():
    global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME
    import time

    print(f"[ORCH] Loading ORCH-3B from {MODEL_3B_ID}...")
    start_time = time.time()

    model = OrchForCausalLM.from_pretrained(MODEL_3B_ID, device="cpu")
    model.eval()
    tokenizer_path = hf_hub_download(repo_id=MODEL_3B_ID, filename="tokenizer.json")
    tokenizer = Tokenizer.from_file(tokenizer_path)
    MODEL_TYPE = "3b"
    MODEL_LOAD_TIME = time.time() - start_time
    print(f"[ORCH] ORCH-3B loaded successfully in {MODEL_LOAD_TIME:.1f}s!")


try:
    if USE_7B:
        try:
            load_7b_model()
            MODEL_LOADED = True
        except Exception as e7b:
            print(f"[ORCH] ORCH-7B not available: {e7b}")
            print("[ORCH] Falling back to ORCH-3B...")
            load_3b_model()
            MODEL_LOADED = True
    else:
        load_3b_model()
        MODEL_LOADED = True
except Exception as e:
    MODEL_LOADED = False
    ERROR_MSG = str(e)
    print(f"[ORCH] Error loading model: {e}")


# =============================================================================
# Generation Functions
# =============================================================================

INSTRUCTION_TEMPLATE = """### Instruction:
{instruction}

### Response:
"""


@spaces.GPU(duration=60)
def generate_project_gpu(
    prompt: str,
    template: str = "blank",
    max_tokens: int = 4096,
    temperature: float = 0.7,
    top_p: float = 0.95,
    include_extras: bool = True,
    progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str, Optional[str]]:
    """Generate project with GPU acceleration."""
    return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress)


def generate_project_cpu(
    prompt: str,
    template: str = "blank",
    max_tokens: int = 2048,  # Lower for CPU
    temperature: float = 0.7,
    top_p: float = 0.95,
    include_extras: bool = True,
    progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str, Optional[str]]:
    """Generate project on CPU (slower but always available)."""
    return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, False, progress)


@spaces.GPU(duration=60)
def generate_project(
    prompt: str,
    template: str = "blank",
    max_tokens: int = 4096,
    temperature: float = 0.7,
    top_p: float = 0.95,
    include_extras: bool = True,
    progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str, Optional[str]]:
    """Generate a complete Next.js project using GPU (ZeroGPU required)."""
    return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress)


def _generate_project_core(
    prompt: str,
    template: str = "blank",
    max_tokens: int = 4096,
    temperature: float = 0.7,
    top_p: float = 0.95,
    include_extras: bool = True,
    use_gpu: bool = True,
    progress=None
) -> Tuple[str, str, Optional[str]]:
    """Core implementation for project generation."""
    if not MODEL_LOADED:
        return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}", "", None

    if not prompt.strip() or len(prompt.strip()) < 10:
        return "Please enter a detailed project description (at least 10 characters).", "", None

    try:
        if progress is not None:
            progress(0.1, desc="Preparing generation...")
    except:
        pass  # Progress might not be available in some contexts

    # Get device from model (ZeroGPU manages this automatically)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"[ORCH] Using device: {device}")

    # Apply template prefix
    template_info = PROJECT_TEMPLATES.get(template, PROJECT_TEMPLATES["blank"])
    full_prompt = template_info["prefix"] + prompt.strip()

    def update_progress(val, desc=""):
        try:
            if progress is not None:
                progress(val, desc=desc)
        except:
            pass  # Progress might not be available

    if MODEL_TYPE == "7b":
        update_progress(0.2, f"Generating with ORCH-7B on {device}...")
        instruction = f"Create a complete Next.js full-stack application: {full_prompt}"
        formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction)
        inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)

        update_progress(0.3, f"Generating project on {'GPU' if use_gpu else 'CPU'} (this may take a while)...")
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1,
            )
        input_length = inputs['input_ids'].shape[1]
        generated_ids = outputs[0][input_length:]
        generated = tokenizer.decode(generated_ids, skip_special_tokens=False)
    else:
        update_progress(0.2, f"Loading ORCH-3B to {'GPU' if use_gpu else 'CPU'}...")
        model.to(device)
        if use_gpu and torch.cuda.is_available():
            model.to(torch.bfloat16)
        formatted_prompt = f"// {full_prompt}\n"
        encoded = tokenizer.encode(formatted_prompt)
        input_ids = torch.tensor([encoded.ids], device=device)

        update_progress(0.3, "Generating code...")
        with torch.no_grad():
            output_ids = model.generate(
                input_ids,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                top_k=50,
                do_sample=True,
            )
        generated = tokenizer.decode(output_ids[0].tolist())

    update_progress(0.8, "Processing output...")
    files = parse_project_output(generated)

    if files:
        update_progress(0.9, "Creating project archive...")
        project_name = "orch-project-" + datetime.now().strftime("%Y%m%d-%H%M%S")
        zip_path = create_zip_archive(files, project_name, include_extras)

        file_list = "\n".join([f"  {f}" for f in sorted(files.keys())[:20]])
        if len(files) > 20:
            file_list += f"\n  ... and {len(files) - 20} more files"

        status = f"Successfully generated {len(files)} files!\n\nTemplate: {template_info['name']}\nDevice: {'GPU' if use_gpu else 'CPU'}\n\nFiles:\n{file_list}"
        update_progress(1.0, "Done!")
        return status, generated[:10000], zip_path
    else:
        update_progress(1.0, "Done!")
        return "Code generated (no structured project detected - try a more detailed prompt)", generated[:10000], None


# Also update generate_code with CPU fallback
def _generate_code_core(
    prompt: str,
    max_tokens: int = 1024,
    temperature: float = 0.7,
    top_p: float = 0.9,
    top_k: int = 50,
    use_gpu: bool = True
):
    """Core implementation for code generation."""
    if not MODEL_LOADED:
        return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}"
    if not prompt.strip():
        return "Please enter a prompt describing what you want to generate."

    # Get the device the model is currently on (don't try to move it)
    if hasattr(model, 'device'):
        device = model.device
    else:
        # For models with device_map="auto", get device from first parameter
        device = next(model.parameters()).device
    print(f"[ORCH] Code generation using device: {device}")

    if MODEL_TYPE == "7b":
        instruction = prompt.strip()
        formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction)
        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=True,
                repetition_penalty=1.1,
            )
        input_length = inputs['input_ids'].shape[1]
        generated = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
        return generated
    else:
        encoded = tokenizer.encode(f"// {prompt.strip()}\n")
        input_ids = torch.tensor([encoded.ids], device=device)
        with torch.no_grad():
            output_ids = model.generate(
                input_ids,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                top_k=top_k,
                do_sample=True
            )
        return tokenizer.decode(output_ids[0].tolist())


@spaces.GPU
def generate_code_gpu(
    prompt: str,
    max_tokens: int = 1024,
    temperature: float = 0.7,
    top_p: float = 0.9,
    top_k: int = 50
):
    """Generate code with GPU."""
    return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True)


def generate_code_cpu(
    prompt: str,
    max_tokens: int = 512,  # Lower for CPU
    temperature: float = 0.7,
    top_p: float = 0.9,
    top_k: int = 50
):
    """Generate code on CPU."""
    return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, False)


@spaces.GPU
def generate_code(
    prompt: str,
    max_tokens: int = 1024,
    temperature: float = 0.7,
    top_p: float = 0.9,
    top_k: int = 50
):
    """Generate Next.js code snippet using GPU (ZeroGPU required)."""
    return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True)


def get_model_info() -> str:
    """Get current model information"""
    if not MODEL_LOADED:
        return f"Model Status: Failed to load\nError: {ERROR_MSG}"

    gpu_info = "Available" if torch.cuda.is_available() else "Not available"
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        gpu_info = f"{gpu_name} ({gpu_memory:.1f} GB)"

    return f"""Model: ORCH-{MODEL_TYPE.upper()}
Repository: {MODEL_7B_ID if MODEL_TYPE == "7b" else MODEL_3B_ID}
Load Time: {MODEL_LOAD_TIME:.1f}s
GPU: {gpu_info}
Status: Ready"""


# =============================================================================
# Professional UI with Brand Colors
# =============================================================================

# Brand Colors (Khaki/Earthy)
BRAND_PRIMARY = "#D4A574"
BRAND_PRIMARY_MID = "#C4956A"
BRAND_PRIMARY_DARK = "#A67C52"
BRAND_DEEP = "#5D4E37"
BRAND_BG_DARK = "#1a1512"
BRAND_BG_LIGHT = "#2d2420"
BRAND_TEXT = "#E8DED5"
BRAND_TEXT_MUTED = "#9C8B7A"

PROJECT_EXAMPLES = [
    ["Create a modern SaaS landing page with pricing tiers, feature showcase, testimonials, and newsletter signup. Include dark mode."],
    ["Build a task management app with kanban boards, drag-and-drop, user authentication, and team collaboration features."],
    ["Create an e-commerce store for digital products with Stripe payments, download management, and customer dashboard."],
    ["Build a personal portfolio website for a developer with project showcase, blog, skills section, and contact form."],
    ["Create a real-time chat application with channels, direct messaging, file sharing, and user presence indicators."],
    ["Build a restaurant booking system with table management, menu display, online reservations, and admin panel."],
]

CODE_EXAMPLES = [
    ["Create a Next.js dashboard page with user statistics cards showing total users, active sessions, and revenue"],
    ["Build a login form component with email and password fields, validation, and error handling"],
    ["Generate an API route for user authentication that handles login with JWT tokens"],
    ["Create a responsive navbar component with logo, navigation links, and mobile hamburger menu"],
    ["Build a product card component with image, title, price, rating, and add to cart button"],
    ["Create a data table component with sorting, filtering, and pagination"],
]

MODEL_INFO = f"ORCH-{MODEL_TYPE.upper() if MODEL_TYPE else '7B'}"

# Professional CSS
CUSTOM_CSS = f"""
/* Global Styles */
.gradio-container {{
    max-width: 1400px !important;
    margin: 0 auto !important;
    background: linear-gradient(180deg, {BRAND_BG_DARK} 0%, #0f0d0b 100%) !important;
    min-height: 100vh;
}}

.dark {{
    --background-fill-primary: {BRAND_BG_DARK} !important;
    --background-fill-secondary: {BRAND_BG_LIGHT} !important;
    --border-color-primary: {BRAND_DEEP} !important;
}}

/* Header */
.header-container {{
    background: linear-gradient(135deg, {BRAND_BG_LIGHT} 0%, {BRAND_BG_DARK} 100%);
    border: 1px solid {BRAND_DEEP};
    border-radius: 16px;
    padding: 2rem;
    margin-bottom: 1.5rem;
    text-align: center;
}}

.logo-container {{
    display: flex;
    align-items: center;
    justify-content: center;
    gap: 1rem;
    margin-bottom: 0.75rem;
}}

.logo-icon {{
    width: 64px;
    height: 64px;
    border-radius: 16px;
}}

.brand-title {{
    font-size: 2.5rem;
    font-weight: 700;
    background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
    margin: 0;
}}

.brand-tagline {{
    color: {BRAND_TEXT_MUTED};
    font-size: 1rem;
    margin: 0.25rem 0;
    letter-spacing: 0.5px;
}}

.brand-subtitle {{
    color: {BRAND_TEXT};
    font-size: 1.1rem;
    margin: 0.5rem 0;
}}

.model-badge {{
    display: inline-flex;
    align-items: center;
    gap: 0.5rem;
    background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%);
    color: {BRAND_BG_DARK};
    padding: 0.5rem 1rem;
    border-radius: 20px;
    font-weight: 600;
    font-size: 0.9rem;
    margin-top: 0.75rem;
}}

.model-badge-dot {{
    width: 8px;
    height: 8px;
    background: {BRAND_BG_DARK};
    border-radius: 50%;
    animation: pulse 2s infinite;
}}

@keyframes pulse {{
    0%, 100% {{ opacity: 1; }}
    50% {{ opacity: 0.5; }}
}}

/* Tabs */
.tabs {{
    border: none !important;
    background: transparent !important;
}}

.tab-nav {{
    background: {BRAND_BG_LIGHT} !important;
    border: 1px solid {BRAND_DEEP} !important;
    border-radius: 12px !important;
    padding: 0.5rem !important;
    margin-bottom: 1rem !important;
}}

.tab-nav button {{
    background: transparent !important;
    border: none !important;
    color: {BRAND_TEXT_MUTED} !important;
    padding: 0.75rem 1.5rem !important;
    border-radius: 8px !important;
    font-weight: 500 !important;
    transition: all 0.2s ease !important;
}}

.tab-nav button.selected {{
    background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important;
    color: {BRAND_BG_DARK} !important;
}}

.tab-nav button:hover:not(.selected) {{
    background: rgba(212, 165, 116, 0.1) !important;
    color: {BRAND_PRIMARY} !important;
}}

/* Input Fields */
.input-container textarea,
.input-container input {{
    background: {BRAND_BG_LIGHT} !important;
    border: 1px solid {BRAND_DEEP} !important;
    border-radius: 12px !important;
    color: {BRAND_TEXT} !important;
    padding: 1rem !important;
    font-size: 1rem !important;
    transition: border-color 0.2s ease !important;
}}

.input-container textarea:focus,
.input-container input:focus {{
    border-color: {BRAND_PRIMARY} !important;
    box-shadow: 0 0 0 3px rgba(212, 165, 116, 0.1) !important;
}}

/* Buttons */
.primary-btn {{
    background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important;
    color: {BRAND_BG_DARK} !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 0.875rem 2rem !important;
    font-weight: 600 !important;
    font-size: 1rem !important;
    cursor: pointer !important;
    transition: all 0.2s ease !important;
    box-shadow: 0 4px 12px rgba(212, 165, 116, 0.3) !important;
}}

.primary-btn:hover {{
    transform: translateY(-2px) !important;
    box-shadow: 0 6px 20px rgba(212, 165, 116, 0.4) !important;
}}

/* Template Cards */
.template-card {{
    background: {BRAND_BG_LIGHT};
    border: 1px solid {BRAND_DEEP};
    border-radius: 12px;
    padding: 1rem;
    cursor: pointer;
    transition: all 0.2s ease;
}}

.template-card:hover {{
    border-color: {BRAND_PRIMARY};
    transform: translateY(-2px);
}}

.template-card.selected {{
    border-color: {BRAND_PRIMARY};
    background: rgba(212, 165, 116, 0.1);
}}

/* Info Box */
.info-box {{
    background: {BRAND_BG_LIGHT};
    border: 1px solid {BRAND_DEEP};
    border-radius: 12px;
    padding: 1rem;
    color: {BRAND_TEXT};
    font-family: monospace;
    font-size: 0.9rem;
}}

/* Footer */
.footer {{
    text-align: center;
    padding: 2rem;
    color: {BRAND_TEXT_MUTED};
    font-size: 0.9rem;
}}

.footer a {{
    color: {BRAND_PRIMARY};
    text-decoration: none;
}}

.footer a:hover {{
    text-decoration: underline;
}}
"""

# =============================================================================
# Gradio Interface
# =============================================================================

with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Base(), title="ORCH Studio") as demo:
    # Header
    gr.HTML(f"""
    <div class="header-container">
        <div class="logo-container">
            <img src="https://huggingface.co/spaces/raihan-js/orch-studio/resolve/main/logo.png"
                 alt="ORCH" class="logo-icon" onerror="this.style.display='none'">
            <h1 class="brand-title">ORCH Studio</h1>
        </div>
        <p class="brand-tagline">Orchestrated Recursive Code Hierarchy</p>
        <p class="brand-subtitle">Generate complete, production-ready Next.js applications from natural language</p>
        <div class="model-badge">
            <span class="model-badge-dot"></span>
            {MODEL_INFO} - QLoRA Fine-tuned (43h A100 Training)
        </div>
    </div>
    """)

    with gr.Tabs():
        # Tab 1: Full Project Generation
        with gr.TabItem("Full Project", id="project"):
            with gr.Row():
                with gr.Column(scale=2):
                    project_prompt = gr.Textbox(
                        label="Project Description",
                        placeholder="Describe your Next.js application in detail. Include features, pages, and functionality you want...",
                        lines=6,
                        elem_classes=["input-container"]
                    )

                    with gr.Row():
                        template_dropdown = gr.Dropdown(
                            label="Project Template",
                            choices=[(v["name"], k) for k, v in PROJECT_TEMPLATES.items()],
                            value="blank",
                            info="Select a template to get started faster"
                        )
                        include_extras = gr.Checkbox(
                            label="Include extras (.gitignore, .env.example)",
                            value=True
                        )

                    with gr.Row():
                        max_tokens = gr.Slider(
                            minimum=1024, maximum=8192, value=4096, step=256,
                            label="Max Tokens", info="More tokens = larger project"
                        )
                        temperature = gr.Slider(
                            minimum=0.1, maximum=1.0, value=0.7, step=0.05,
                            label="Temperature", info="Higher = more creative"
                        )
                        top_p = gr.Slider(
                            minimum=0.1, maximum=1.0, value=0.95, step=0.05,
                            label="Top P", info="Nucleus sampling"
                        )

                    generate_btn = gr.Button(
                        "Generate Project",
                        variant="primary",
                        size="lg",
                        elem_classes=["primary-btn"]
                    )

                    gr.Examples(
                        examples=PROJECT_EXAMPLES,
                        inputs=project_prompt,
                        label="Example Prompts"
                    )

                with gr.Column(scale=1):
                    status_output = gr.Textbox(
                        label="Generation Status",
                        lines=12,
                        interactive=False,
                        elem_classes=["status-box"]
                    )
                    download_file = gr.File(
                        label="Download Project",
                        elem_classes=["file-download"]
                    )

            with gr.Accordion("Generated Code Preview", open=False):
                code_preview = gr.Code(
                    label="Raw Output",
                    language="markdown",
                    lines=20,
                    elem_classes=["code-output"]
                )

        # Tab 2: Code Snippet Generation
        with gr.TabItem("Code Snippet", id="snippet"):
            with gr.Row():
                with gr.Column(scale=2):
                    code_prompt = gr.Textbox(
                        label="Code Request",
                        placeholder="Describe the component, function, or feature you want to generate...",
                        lines=4,
                        elem_classes=["input-container"]
                    )

                    with gr.Row():
                        code_max_tokens = gr.Slider(
                            minimum=256, maximum=2048, value=1024, step=128,
                            label="Max Tokens"
                        )
                        code_temperature = gr.Slider(
                            minimum=0.1, maximum=1.0, value=0.7, step=0.05,
                            label="Temperature"
                        )
                        code_top_p = gr.Slider(
                            minimum=0.1, maximum=1.0, value=0.9, step=0.05,
                            label="Top P"
                        )

                    code_generate_btn = gr.Button(
                        "Generate Code",
                        variant="primary",
                        size="lg",
                        elem_classes=["primary-btn"]
                    )

                    gr.Examples(
                        examples=CODE_EXAMPLES,
                        inputs=code_prompt,
                        label="Example Prompts"
                    )

                with gr.Column(scale=2):
                    code_output = gr.Code(
                        label="Generated Code",
                        language="typescript",
                        lines=25,
                        elem_classes=["code-output"]
                    )

        # Tab 3: Model Info
        with gr.TabItem("Model Info", id="info"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown(f"""
                    ## ORCH-7B Model

                    **Latest QLoRA Fine-tuned Model** (January 2025)

                    | Specification | Value |
                    |--------------|-------|
                    | Base Model | DeepSeek Coder 6.7B Instruct |
                    | Fine-tuning | QLoRA (4-bit quantization + LoRA) |
                    | Training Time | 43 hours on A100 GPU |
                    | Training Steps | 5,238 steps |
                    | Focus | Next.js 14+ full-stack applications |
                    | Output | Complete downloadable projects |

                    ### Training Data
                    - Curated Next.js 14+ projects from GitHub
                    - Synthetic instruction-response pairs
                    - Focus on TypeScript, Tailwind CSS, Prisma

                    ### Capabilities
                    - Full-stack application generation
                    - Component and API route creation
                    - Database schema design
                    - Authentication patterns
                    - Responsive UI with Tailwind CSS

                    ### Links
                    - [ORCH-7B on HuggingFace](https://huggingface.co/orch-ai/ORCH-7B)
                    - [ORCH AI Organization](https://huggingface.co/orch-ai)
                    - [raihan-js/orch-7b](https://huggingface.co/raihan-js/orch-7b)
                    """)

                with gr.Column():
                    model_info_display = gr.Textbox(
                        label="Current Session",
                        value=get_model_info(),
                        lines=8,
                        interactive=False,
                        elem_classes=["info-box"]
                    )

                    refresh_btn = gr.Button("Refresh Status")
                    refresh_btn.click(fn=get_model_info, outputs=model_info_display)

    # Footer
    gr.HTML(f"""
    <div class="footer">
        <p>
            <strong>ORCH</strong> - Orchestrated Recursive Code Hierarchy
            <br>
            <a href="https://huggingface.co/orch-ai" target="_blank">ORCH AI</a> |
            <a href="https://huggingface.co/orch-ai/ORCH-7B" target="_blank">ORCH-7B Model</a> |
            <a href="https://huggingface.co/raihan-js" target="_blank">raihan-js</a>
        </p>
        <p style="font-size: 0.8rem; margin-top: 0.5rem;">
            Model: {MODEL_INFO} | Updated: January 2025
        </p>
    </div>
    """)

    # Event handlers
    generate_btn.click(
        fn=generate_project,
        inputs=[project_prompt, template_dropdown, max_tokens, temperature, top_p, include_extras],
        outputs=[status_output, code_preview, download_file]
    )

    code_generate_btn.click(
        fn=generate_code,
        inputs=[code_prompt, code_max_tokens, code_temperature, code_top_p],
        outputs=code_output
    )


if __name__ == "__main__":
    demo.launch()