v1.0.1: Fixed GPU duration to 60s for ZeroGPU compatibility

ae61038 verified about 2 months ago

49 kB

	"""
	ORCH Studio - Autonomous Next.js Code Generation

	Generate complete, production-ready Next.js applications from natural language prompts.
	Now powered by the latest QLoRA fine-tuned ORCH-7B model (43h training on A100).

	https://huggingface.co/orch-ai

	Version: 1.0.1 - Fixed GPU duration limits for ZeroGPU
	"""

	import os
	import re
	import json
	import math
	import tempfile
	import zipfile
	from pathlib import Path
	from datetime import datetime
	from dataclasses import dataclass
	from typing import Optional, Dict, Any, Tuple, List

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import gradio as gr
	import spaces
	from huggingface_hub import hf_hub_download
	from tokenizers import Tokenizer

	# =============================================================================
	# Model Configuration
	# =============================================================================

	@dataclass
	class OrchConfig:
	"""Configuration for Orch transformer model."""

	model_name: str = "orch"
	model_size: str = "3b"
	vocab_size: int = 32000
	hidden_size: int = 2560
	intermediate_size: int = 10240
	num_hidden_layers: int = 32
	num_attention_heads: int = 32
	num_key_value_heads: int = 8
	max_position_embeddings: int = 16384
	rms_norm_eps: float = 1e-5
	rope_theta: float = 10000.0
	hidden_dropout: float = 0.0
	attention_dropout: float = 0.0
	use_flash_attention: bool = False
	tie_word_embeddings: bool = False
	rope_scaling_type: Optional[str] = None
	rope_scaling_factor: float = 1.0
	original_max_position_embeddings: Optional[int] = None
	initializer_range: float = 0.02

	@property
	def head_dim(self) -> int:
	return self.hidden_size // self.num_attention_heads

	@property
	def num_kv_groups(self) -> int:
	return self.num_attention_heads // self.num_key_value_heads

	@classmethod
	def from_dict(cls, config_dict: Dict[str, Any]) -> "OrchConfig":
	return cls(**{k: v for k, v in config_dict.items() if k in cls.__dataclass_fields__})

	@classmethod
	def load(cls, path: str) -> "OrchConfig":
	with open(path, "r") as f:
	config_dict = json.load(f)
	return cls.from_dict(config_dict)


	# =============================================================================
	# Model Layers
	# =============================================================================

	class RMSNorm(nn.Module):
	def __init__(self, hidden_size: int, eps: float = 1e-6):
	super().__init__()
	self.weight = nn.Parameter(torch.ones(hidden_size))
	self.eps = eps

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	rms = torch.sqrt(torch.mean(x ** 2, dim=-1, keepdim=True) + self.eps)
	return (x / rms) * self.weight


	class SwiGLU(nn.Module):
	def __init__(self, hidden_size: int, intermediate_size: int, bias: bool = False, dropout: float = 0.0):
	super().__init__()
	self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=bias)
	self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=bias)
	self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=bias)
	self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	gate = F.silu(self.gate_proj(x))
	up = self.up_proj(x)
	hidden = gate * up
	hidden = self.dropout(hidden)
	return self.down_proj(hidden)


	class RotaryEmbedding(nn.Module):
	def __init__(
	self,
	dim: int,
	max_position_embeddings: int = 2048,
	base: float = 10000.0,
	scaling_type: Optional[str] = None,
	scaling_factor: float = 1.0,
	original_max_position_embeddings: Optional[int] = None,
	):
	super().__init__()
	self.dim = dim
	self.max_position_embeddings = max_position_embeddings
	self.base = base
	self.scaling_type = scaling_type
	self.scaling_factor = scaling_factor
	self.original_max_position_embeddings = original_max_position_embeddings or max_position_embeddings
	inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float() / self.dim))
	self.register_buffer("inv_freq", inv_freq, persistent=False)
	self._set_cos_sin_cache(max_position_embeddings)

	def _set_cos_sin_cache(self, seq_len: int):
	self.max_seq_len_cached = seq_len
	t = torch.arange(seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
	if self.scaling_type == "linear":
	t = t / self.scaling_factor
	freqs = torch.outer(t, self.inv_freq)
	emb = torch.cat((freqs, freqs), dim=-1)
	self.register_buffer("cos_cached", emb.cos(), persistent=False)
	self.register_buffer("sin_cached", emb.sin(), persistent=False)

	def forward(self, x: torch.Tensor, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
	seq_len = position_ids.max().item() + 1
	if seq_len > self.max_seq_len_cached:
	self._set_cos_sin_cache(seq_len)
	cos = self.cos_cached[position_ids].to(x.dtype)
	sin = self.sin_cached[position_ids].to(x.dtype)
	return cos, sin


	def rotate_half(x: torch.Tensor) -> torch.Tensor:
	x1 = x[..., : x.shape[-1] // 2]
	x2 = x[..., x.shape[-1] // 2 :]
	return torch.cat((-x2, x1), dim=-1)


	def apply_rotary_pos_emb(q, k, cos, sin):
	cos = cos.unsqueeze(1)
	sin = sin.unsqueeze(1)
	q_embed = (q * cos) + (rotate_half(q) * sin)
	k_embed = (k * cos) + (rotate_half(k) * sin)
	return q_embed, k_embed


	def create_causal_mask(seq_len: int, device: torch.device, dtype: torch.dtype = torch.float32) -> torch.Tensor:
	mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=device, dtype=dtype), diagonal=1)
	return mask.unsqueeze(0).unsqueeze(0)


	class OrchAttention(nn.Module):
	def __init__(self, config: OrchConfig, layer_idx: int):
	super().__init__()
	self.config = config
	self.layer_idx = layer_idx
	self.hidden_size = config.hidden_size
	self.num_heads = config.num_attention_heads
	self.num_kv_heads = config.num_key_value_heads
	self.head_dim = config.head_dim
	self.num_kv_groups = config.num_kv_groups
	self.attention_dropout = config.attention_dropout
	self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
	self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
	self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
	self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
	self.rotary_emb = RotaryEmbedding(
	self.head_dim,
	max_position_embeddings=config.max_position_embeddings,
	base=config.rope_theta,
	scaling_type=config.rope_scaling_type,
	scaling_factor=config.rope_scaling_factor,
	original_max_position_embeddings=config.original_max_position_embeddings,
	)

	def forward(self, hidden_states, attention_mask=None, position_ids=None):
	batch_size, seq_len, _ = hidden_states.shape
	query_states = self.q_proj(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
	key_states = self.k_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
	value_states = self.v_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
	cos, sin = self.rotary_emb(value_states, position_ids)
	query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
	if self.num_kv_groups > 1:
	key_states = key_states.repeat_interleave(self.num_kv_groups, dim=1)
	value_states = value_states.repeat_interleave(self.num_kv_groups, dim=1)
	scale = 1.0 / math.sqrt(self.head_dim)
	attn_weights = torch.matmul(query_states, key_states.transpose(-2, -1)) * scale
	if attention_mask is not None:
	attn_weights = attn_weights + attention_mask
	attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
	attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training)
	attn_output = torch.matmul(attn_weights, value_states)
	attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.num_heads * self.head_dim)
	return self.o_proj(attn_output)


	class OrchBlock(nn.Module):
	def __init__(self, config: OrchConfig, layer_idx: int):
	super().__init__()
	self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
	self.attention = OrchAttention(config, layer_idx)
	self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
	self.feed_forward = SwiGLU(config.hidden_size, config.intermediate_size, dropout=config.hidden_dropout)
	self.dropout = nn.Dropout(config.hidden_dropout)

	def forward(self, hidden_states, attention_mask=None, position_ids=None):
	residual = hidden_states
	hidden_states = self.input_layernorm(hidden_states)
	hidden_states = self.attention(hidden_states, attention_mask, position_ids)
	hidden_states = self.dropout(hidden_states)
	hidden_states = residual + hidden_states
	residual = hidden_states
	hidden_states = self.post_attention_layernorm(hidden_states)
	hidden_states = self.feed_forward(hidden_states)
	hidden_states = self.dropout(hidden_states)
	hidden_states = residual + hidden_states
	return hidden_states


	class OrchModel(nn.Module):
	def __init__(self, config: OrchConfig):
	super().__init__()
	self.config = config
	self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
	self.embed_dropout = nn.Dropout(config.hidden_dropout)
	self.layers = nn.ModuleList([OrchBlock(config, i) for i in range(config.num_hidden_layers)])
	self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)

	def forward(self, input_ids, attention_mask=None, position_ids=None):
	batch_size, seq_len = input_ids.shape
	device = input_ids.device
	if position_ids is None:
	position_ids = torch.arange(seq_len, device=device).unsqueeze(0).expand(batch_size, -1)
	hidden_states = self.embed_tokens(input_ids)
	hidden_states = self.embed_dropout(hidden_states)
	causal_mask = create_causal_mask(seq_len, device, hidden_states.dtype)
	for layer in self.layers:
	hidden_states = layer(hidden_states, causal_mask, position_ids)
	return self.norm(hidden_states)


	class OrchForCausalLM(nn.Module):
	def __init__(self, config: OrchConfig):
	super().__init__()
	self.config = config
	self.model = OrchModel(config)
	if config.tie_word_embeddings:
	self.lm_head = None
	else:
	self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

	def forward(self, input_ids, attention_mask=None, position_ids=None, labels=None):
	hidden_states = self.model(input_ids, attention_mask, position_ids)
	if self.lm_head is not None:
	logits = self.lm_head(hidden_states)
	else:
	logits = F.linear(hidden_states, self.model.embed_tokens.weight)
	output = {"logits": logits}
	if labels is not None:
	shift_logits = logits[..., :-1, :].contiguous()
	shift_labels = labels[..., 1:].contiguous()
	loss = F.cross_entropy(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1), ignore_index=-100)
	output["loss"] = loss
	return output

	@torch.no_grad()
	def generate(self, input_ids, max_new_tokens=512, temperature=0.7, top_k=50, top_p=0.9, do_sample=True, eos_token_id=None):
	self.eval()
	for _ in range(max_new_tokens):
	if input_ids.shape[1] > self.config.max_position_embeddings:
	input_ids = input_ids[:, -self.config.max_position_embeddings:]
	outputs = self.forward(input_ids)
	next_token_logits = outputs["logits"][:, -1, :]
	if temperature != 1.0:
	next_token_logits = next_token_logits / temperature
	if top_k is not None:
	indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1, None]
	next_token_logits[indices_to_remove] = float("-inf")
	if top_p is not None:
	sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
	cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
	sorted_indices_to_remove = cumulative_probs > top_p
	sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
	sorted_indices_to_remove[..., 0] = False
	indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
	next_token_logits[indices_to_remove] = float("-inf")
	if do_sample:
	probs = F.softmax(next_token_logits, dim=-1)
	next_tokens = torch.multinomial(probs, num_samples=1)
	else:
	next_tokens = torch.argmax(next_token_logits, dim=-1, keepdim=True)
	input_ids = torch.cat([input_ids, next_tokens], dim=1)
	if eos_token_id is not None and (next_tokens == eos_token_id).all():
	break
	return input_ids

	@classmethod
	def from_pretrained(cls, repo_id: str, device: str = "cpu"):
	config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
	model_path = hf_hub_download(repo_id=repo_id, filename="model.pt")
	config = OrchConfig.load(config_path)
	model = cls(config)
	state_dict = torch.load(model_path, map_location=device, weights_only=True)
	model.load_state_dict(state_dict)
	return model.to(device)


	# =============================================================================
	# Project Templates
	# =============================================================================

	PROJECT_TEMPLATES = {
	"blank": {
	"name": "Blank Project",
	"description": "Start from scratch with a minimal Next.js setup",
	"prefix": ""
	},
	"saas": {
	"name": "SaaS Application",
	"description": "Full SaaS with auth, billing, dashboard",
	"prefix": "Create a modern SaaS application with user authentication, subscription billing integration, admin dashboard, and "
	},
	"ecommerce": {
	"name": "E-Commerce Store",
	"description": "Online store with products, cart, checkout",
	"prefix": "Build an e-commerce store with product catalog, shopping cart, checkout flow, order management, and "
	},
	"dashboard": {
	"name": "Admin Dashboard",
	"description": "Data visualization and management dashboard",
	"prefix": "Create an admin dashboard with data tables, charts, user management, settings, and "
	},
	"portfolio": {
	"name": "Portfolio Website",
	"description": "Personal/professional portfolio site",
	"prefix": "Build a modern portfolio website with project showcase, about section, skills display, contact form, and "
	},
	"blog": {
	"name": "Blog Platform",
	"description": "Content management and blog system",
	"prefix": "Create a blog platform with markdown support, categories, tags, comments, search, and "
	},
	"api": {
	"name": "API Backend",
	"description": "RESTful API with authentication",
	"prefix": "Build a RESTful API backend with JWT authentication, rate limiting, validation, error handling, and "
	}
	}


	# =============================================================================
	# Project Parsing and Packaging
	# =============================================================================

	def parse_project_output(output: str) -> Dict[str, str]:
	"""Parse model output into file dictionary."""
	files = {}

	# Pattern 1: Markdown code blocks with file paths
	md_pattern = re.compile(
	r'```(?:\w+\s+)?([^\n`]+\.[a-zA-Z]+)\n([\s\S]*?)```',
	re.MULTILINE
	)

	for match in md_pattern.finditer(output):
	file_path = match.group(1).strip()
	content = match.group(2).strip()
	if file_path and content and '/' in file_path or '.' in file_path:
	file_path = file_path.lstrip('./')
	if not file_path.startswith('node_modules') and len(content) > 10:
	files[file_path] = content

	# Pattern 2: Special token format (fallback)
	if not files:
	special_pattern = re.compile(
	r'<\\|file\\|>([^<]+)<\\|end_path\\|>\s([\s\S]?)<\\|end_file\\|>',
	re.MULTILINE
	)
	for match in special_pattern.finditer(output):
	file_path = match.group(1).strip()
	content = match.group(2).strip()
	if file_path and content:
	files[file_path] = content

	return files


	def create_zip_archive(files: Dict[str, str], project_name: str, include_extras: bool = True) -> str:
	"""Create a ZIP archive from files dictionary"""
	temp_dir = tempfile.mkdtemp()
	zip_path = os.path.join(temp_dir, f"{project_name}.zip")

	with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
	for file_path, content in files.items():
	full_path = f"{project_name}/{file_path}"
	zipf.writestr(full_path, content)

	# Add README
	readme = generate_readme(project_name, len(files))
	zipf.writestr(f"{project_name}/README.md", readme)

	if include_extras:
	# Add .env.example if not present
	if ".env" not in files and ".env.example" not in files:
	env_example = generate_env_example()
	zipf.writestr(f"{project_name}/.env.example", env_example)

	# Add .gitignore if not present
	if ".gitignore" not in files:
	gitignore = generate_gitignore()
	zipf.writestr(f"{project_name}/.gitignore", gitignore)

	return zip_path


	def generate_readme(project_name: str, file_count: int) -> str:
	"""Generate README for the project"""
	return f'''# {project_name}

	Generated by [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio) - Powered by ORCH-7B

	## Quick Start

	```bash
	# Install dependencies
	npm install

	# Set up environment variables
	cp .env.example .env
	# Edit .env with your configuration

	# Set up database (if using Prisma)
	npx prisma generate
	npx prisma db push

	# Start development server
	npm run dev
	```

	Open [http://localhost:3000](http://localhost:3000) to view your application.

	## Tech Stack

	- Framework: Next.js 14 (App Router)
	- Language: TypeScript
	- Styling: Tailwind CSS
	- Components: Radix UI / shadcn/ui compatible
	- Database: Prisma ORM (SQLite/PostgreSQL)
	- Authentication: NextAuth.js patterns

	## Project Structure

	```
	{project_name}/
	├── app/ # Next.js App Router pages
	├── components/ # React components
	├── lib/ # Utility functions
	├── prisma/ # Database schema
	└── public/ # Static assets
	```

	## Scripts

	- `npm run dev` - Start development server
	- `npm run build` - Build for production
	- `npm run start` - Start production server
	- `npm run lint` - Run ESLint

	## Generated Info

	- Files: {file_count}
	- Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
	- Model: ORCH-7B (QLoRA fine-tuned)

	## Links

	- [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio)
	- [ORCH-7B Model](https://huggingface.co/orch-ai/ORCH-7B)
	- [ORCH AI Organization](https://huggingface.co/orch-ai)

	---

	Built with ORCH - Orchestrated Recursive Code Hierarchy
	'''


	def generate_env_example() -> str:
	"""Generate .env.example file"""
	return '''# Database
	DATABASE_URL="file:./dev.db"
	# DATABASE_URL="postgresql://user:password@localhost:5432/mydb"

	# Authentication (NextAuth.js)
	NEXTAUTH_URL="http://localhost:3000"
	NEXTAUTH_SECRET="your-secret-key-here"

	# OAuth Providers (optional)
	# GITHUB_ID=""
	# GITHUB_SECRET=""
	# GOOGLE_ID=""
	# GOOGLE_SECRET=""

	# Stripe (optional)
	# STRIPE_SECRET_KEY=""
	# STRIPE_WEBHOOK_SECRET=""
	# NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=""

	# Email (optional)
	# SMTP_HOST=""
	# SMTP_PORT=""
	# SMTP_USER=""
	# SMTP_PASSWORD=""
	'''


	def generate_gitignore() -> str:
	"""Generate .gitignore file"""
	return '''# Dependencies
	node_modules/
	.pnpm-store/

	# Build
	.next/
	out/
	build/
	dist/

	# Environment
	.env
	.env.local
	.env.*.local

	# Database
	*.db
	*.sqlite

	# IDE
	.vscode/
	.idea/

	# OS
	.DS_Store
	Thumbs.db

	# Logs
	*.log
	npm-debug.log*

	# Testing
	coverage/
	.nyc_output/

	# Prisma
	prisma/migrations/

	# Misc
	*.tsbuildinfo
	next-env.d.ts
	'''


	# =============================================================================
	# Model Loading
	# =============================================================================

	MODEL_7B_ID = os.environ.get("MODEL_ID", "orch-ai/ORCH-7B")
	MODEL_3B_ID = "raihan-js/orch-nextjs-3b"
	USE_7B = os.environ.get("USE_7B", "true").lower() == "true"

	print(f"[ORCH] Model selection: {'ORCH-7B' if USE_7B else 'ORCH-3B'}")
	print(f"[ORCH] Model ID: {MODEL_7B_ID if USE_7B else MODEL_3B_ID}")

	model = None
	tokenizer = None
	MODEL_TYPE = None
	MODEL_LOADED = False
	ERROR_MSG = ""
	MODEL_LOAD_TIME = None


	def load_7b_model():
	global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import time

	hf_token = os.environ.get("HF_TOKEN")
	print(f"[ORCH] Loading ORCH-7B from {MODEL_7B_ID}...")
	start_time = time.time()

	tokenizer = AutoTokenizer.from_pretrained(MODEL_7B_ID, trust_remote_code=True, token=hf_token)

	# Check if GPU is available and load accordingly
	if torch.cuda.is_available():
	print("[ORCH] GPU detected, loading with device_map='auto'...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_7B_ID,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True,
	token=hf_token
	)
	else:
	print("[ORCH] No GPU, loading on CPU (this will be slow)...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_7B_ID,
	torch_dtype=torch.float32, # Use float32 for CPU
	trust_remote_code=True,
	token=hf_token,
	low_cpu_mem_usage=True
	)

	model.eval()
	MODEL_TYPE = "7b"
	MODEL_LOAD_TIME = time.time() - start_time
	print(f"[ORCH] ORCH-7B loaded successfully in {MODEL_LOAD_TIME:.1f}s!")


	def load_3b_model():
	global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME
	import time

	print(f"[ORCH] Loading ORCH-3B from {MODEL_3B_ID}...")
	start_time = time.time()

	model = OrchForCausalLM.from_pretrained(MODEL_3B_ID, device="cpu")
	model.eval()
	tokenizer_path = hf_hub_download(repo_id=MODEL_3B_ID, filename="tokenizer.json")
	tokenizer = Tokenizer.from_file(tokenizer_path)
	MODEL_TYPE = "3b"
	MODEL_LOAD_TIME = time.time() - start_time
	print(f"[ORCH] ORCH-3B loaded successfully in {MODEL_LOAD_TIME:.1f}s!")


	try:
	if USE_7B:
	try:
	load_7b_model()
	MODEL_LOADED = True
	except Exception as e7b:
	print(f"[ORCH] ORCH-7B not available: {e7b}")
	print("[ORCH] Falling back to ORCH-3B...")
	load_3b_model()
	MODEL_LOADED = True
	else:
	load_3b_model()
	MODEL_LOADED = True
	except Exception as e:
	MODEL_LOADED = False
	ERROR_MSG = str(e)
	print(f"[ORCH] Error loading model: {e}")


	# =============================================================================
	# Generation Functions
	# =============================================================================

	INSTRUCTION_TEMPLATE = """### Instruction:
	{instruction}

	### Response:
	"""


	@spaces.GPU(duration=60)
	def generate_project_gpu(
	prompt: str,
	template: str = "blank",
	max_tokens: int = 4096,
	temperature: float = 0.7,
	top_p: float = 0.95,
	include_extras: bool = True,
	progress=gr.Progress(track_tqdm=True)
	) -> Tuple[str, str, Optional[str]]:
	"""Generate project with GPU acceleration."""
	return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress)


	def generate_project_cpu(
	prompt: str,
	template: str = "blank",
	max_tokens: int = 2048, # Lower for CPU
	temperature: float = 0.7,
	top_p: float = 0.95,
	include_extras: bool = True,
	progress=gr.Progress(track_tqdm=True)
	) -> Tuple[str, str, Optional[str]]:
	"""Generate project on CPU (slower but always available)."""
	return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, False, progress)


	@spaces.GPU(duration=60)
	def generate_project(
	prompt: str,
	template: str = "blank",
	max_tokens: int = 4096,
	temperature: float = 0.7,
	top_p: float = 0.95,
	include_extras: bool = True,
	progress=gr.Progress(track_tqdm=True)
	) -> Tuple[str, str, Optional[str]]:
	"""Generate a complete Next.js project using GPU (ZeroGPU required)."""
	return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress)


	def _generate_project_core(
	prompt: str,
	template: str = "blank",
	max_tokens: int = 4096,
	temperature: float = 0.7,
	top_p: float = 0.95,
	include_extras: bool = True,
	use_gpu: bool = True,
	progress=None
	) -> Tuple[str, str, Optional[str]]:
	"""Core implementation for project generation."""
	if not MODEL_LOADED:
	return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}", "", None

	if not prompt.strip() or len(prompt.strip()) < 10:
	return "Please enter a detailed project description (at least 10 characters).", "", None

	try:
	if progress is not None:
	progress(0.1, desc="Preparing generation...")
	except:
	pass # Progress might not be available in some contexts

	# Get device from model (ZeroGPU manages this automatically)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"[ORCH] Using device: {device}")

	# Apply template prefix
	template_info = PROJECT_TEMPLATES.get(template, PROJECT_TEMPLATES["blank"])
	full_prompt = template_info["prefix"] + prompt.strip()

	def update_progress(val, desc=""):
	try:
	if progress is not None:
	progress(val, desc=desc)
	except:
	pass # Progress might not be available

	if MODEL_TYPE == "7b":
	update_progress(0.2, f"Generating with ORCH-7B on {device}...")
	instruction = f"Create a complete Next.js full-stack application: {full_prompt}"
	formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction)
	inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)

	update_progress(0.3, f"Generating project on {'GPU' if use_gpu else 'CPU'} (this may take a while)...")
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	repetition_penalty=1.1,
	)
	input_length = inputs['input_ids'].shape[1]
	generated_ids = outputs[0][input_length:]
	generated = tokenizer.decode(generated_ids, skip_special_tokens=False)
	else:
	update_progress(0.2, f"Loading ORCH-3B to {'GPU' if use_gpu else 'CPU'}...")
	model.to(device)
	if use_gpu and torch.cuda.is_available():
	model.to(torch.bfloat16)
	formatted_prompt = f"// {full_prompt}\n"
	encoded = tokenizer.encode(formatted_prompt)
	input_ids = torch.tensor([encoded.ids], device=device)

	update_progress(0.3, "Generating code...")
	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=50,
	do_sample=True,
	)
	generated = tokenizer.decode(output_ids[0].tolist())

	update_progress(0.8, "Processing output...")
	files = parse_project_output(generated)

	if files:
	update_progress(0.9, "Creating project archive...")
	project_name = "orch-project-" + datetime.now().strftime("%Y%m%d-%H%M%S")
	zip_path = create_zip_archive(files, project_name, include_extras)

	file_list = "\n".join([f" {f}" for f in sorted(files.keys())[:20]])
	if len(files) > 20:
	file_list += f"\n ... and {len(files) - 20} more files"

	status = f"Successfully generated {len(files)} files!\n\nTemplate: {template_info['name']}\nDevice: {'GPU' if use_gpu else 'CPU'}\n\nFiles:\n{file_list}"
	update_progress(1.0, "Done!")
	return status, generated[:10000], zip_path
	else:
	update_progress(1.0, "Done!")
	return "Code generated (no structured project detected - try a more detailed prompt)", generated[:10000], None


	# Also update generate_code with CPU fallback
	def _generate_code_core(
	prompt: str,
	max_tokens: int = 1024,
	temperature: float = 0.7,
	top_p: float = 0.9,
	top_k: int = 50,
	use_gpu: bool = True
	):
	"""Core implementation for code generation."""
	if not MODEL_LOADED:
	return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}"
	if not prompt.strip():
	return "Please enter a prompt describing what you want to generate."

	# Get the device the model is currently on (don't try to move it)
	if hasattr(model, 'device'):
	device = model.device
	else:
	# For models with device_map="auto", get device from first parameter
	device = next(model.parameters()).device
	print(f"[ORCH] Code generation using device: {device}")

	if MODEL_TYPE == "7b":
	instruction = prompt.strip()
	formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction)
	inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	repetition_penalty=1.1,
	)
	input_length = inputs['input_ids'].shape[1]
	generated = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
	return generated
	else:
	encoded = tokenizer.encode(f"// {prompt.strip()}\n")
	input_ids = torch.tensor([encoded.ids], device=device)
	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	do_sample=True
	)
	return tokenizer.decode(output_ids[0].tolist())


	@spaces.GPU
	def generate_code_gpu(
	prompt: str,
	max_tokens: int = 1024,
	temperature: float = 0.7,
	top_p: float = 0.9,
	top_k: int = 50
	):
	"""Generate code with GPU."""
	return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True)


	def generate_code_cpu(
	prompt: str,
	max_tokens: int = 512, # Lower for CPU
	temperature: float = 0.7,
	top_p: float = 0.9,
	top_k: int = 50
	):
	"""Generate code on CPU."""
	return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, False)


	@spaces.GPU
	def generate_code(
	prompt: str,
	max_tokens: int = 1024,
	temperature: float = 0.7,
	top_p: float = 0.9,
	top_k: int = 50
	):
	"""Generate Next.js code snippet using GPU (ZeroGPU required)."""
	return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True)


	def get_model_info() -> str:
	"""Get current model information"""
	if not MODEL_LOADED:
	return f"Model Status: Failed to load\nError: {ERROR_MSG}"

	gpu_info = "Available" if torch.cuda.is_available() else "Not available"
	if torch.cuda.is_available():
	gpu_name = torch.cuda.get_device_name(0)
	gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
	gpu_info = f"{gpu_name} ({gpu_memory:.1f} GB)"

	return f"""Model: ORCH-{MODEL_TYPE.upper()}
	Repository: {MODEL_7B_ID if MODEL_TYPE == "7b" else MODEL_3B_ID}
	Load Time: {MODEL_LOAD_TIME:.1f}s
	GPU: {gpu_info}
	Status: Ready"""


	# =============================================================================
	# Professional UI with Brand Colors
	# =============================================================================

	# Brand Colors (Khaki/Earthy)
	BRAND_PRIMARY = "#D4A574"
	BRAND_PRIMARY_MID = "#C4956A"
	BRAND_PRIMARY_DARK = "#A67C52"
	BRAND_DEEP = "#5D4E37"
	BRAND_BG_DARK = "#1a1512"
	BRAND_BG_LIGHT = "#2d2420"
	BRAND_TEXT = "#E8DED5"
	BRAND_TEXT_MUTED = "#9C8B7A"

	PROJECT_EXAMPLES = [
	["Create a modern SaaS landing page with pricing tiers, feature showcase, testimonials, and newsletter signup. Include dark mode."],
	["Build a task management app with kanban boards, drag-and-drop, user authentication, and team collaboration features."],
	["Create an e-commerce store for digital products with Stripe payments, download management, and customer dashboard."],
	["Build a personal portfolio website for a developer with project showcase, blog, skills section, and contact form."],
	["Create a real-time chat application with channels, direct messaging, file sharing, and user presence indicators."],
	["Build a restaurant booking system with table management, menu display, online reservations, and admin panel."],
	]

	CODE_EXAMPLES = [
	["Create a Next.js dashboard page with user statistics cards showing total users, active sessions, and revenue"],
	["Build a login form component with email and password fields, validation, and error handling"],
	["Generate an API route for user authentication that handles login with JWT tokens"],
	["Create a responsive navbar component with logo, navigation links, and mobile hamburger menu"],
	["Build a product card component with image, title, price, rating, and add to cart button"],
	["Create a data table component with sorting, filtering, and pagination"],
	]

	MODEL_INFO = f"ORCH-{MODEL_TYPE.upper() if MODEL_TYPE else '7B'}"

	# Professional CSS
	CUSTOM_CSS = f"""
	/* Global Styles */
	.gradio-container {{
	max-width: 1400px !important;
	margin: 0 auto !important;
	background: linear-gradient(180deg, {BRAND_BG_DARK} 0%, #0f0d0b 100%) !important;
	min-height: 100vh;
	}}

	.dark {{
	--background-fill-primary: {BRAND_BG_DARK} !important;
	--background-fill-secondary: {BRAND_BG_LIGHT} !important;
	--border-color-primary: {BRAND_DEEP} !important;
	}}

	/* Header */
	.header-container {{
	background: linear-gradient(135deg, {BRAND_BG_LIGHT} 0%, {BRAND_BG_DARK} 100%);
	border: 1px solid {BRAND_DEEP};
	border-radius: 16px;
	padding: 2rem;
	margin-bottom: 1.5rem;
	text-align: center;
	}}

	.logo-container {{
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 1rem;
	margin-bottom: 0.75rem;
	}}

	.logo-icon {{
	width: 64px;
	height: 64px;
	border-radius: 16px;
	}}

	.brand-title {{
	font-size: 2.5rem;
	font-weight: 700;
	background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	margin: 0;
	}}

	.brand-tagline {{
	color: {BRAND_TEXT_MUTED};
	font-size: 1rem;
	margin: 0.25rem 0;
	letter-spacing: 0.5px;
	}}

	.brand-subtitle {{
	color: {BRAND_TEXT};
	font-size: 1.1rem;
	margin: 0.5rem 0;
	}}

	.model-badge {{
	display: inline-flex;
	align-items: center;
	gap: 0.5rem;
	background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%);
	color: {BRAND_BG_DARK};
	padding: 0.5rem 1rem;
	border-radius: 20px;
	font-weight: 600;
	font-size: 0.9rem;
	margin-top: 0.75rem;
	}}

	.model-badge-dot {{
	width: 8px;
	height: 8px;
	background: {BRAND_BG_DARK};
	border-radius: 50%;
	animation: pulse 2s infinite;
	}}

	@keyframes pulse {{
	0%, 100% {{ opacity: 1; }}
	50% {{ opacity: 0.5; }}
	}}

	/* Tabs */
	.tabs {{
	border: none !important;
	background: transparent !important;
	}}

	.tab-nav {{
	background: {BRAND_BG_LIGHT} !important;
	border: 1px solid {BRAND_DEEP} !important;
	border-radius: 12px !important;
	padding: 0.5rem !important;
	margin-bottom: 1rem !important;
	}}

	.tab-nav button {{
	background: transparent !important;
	border: none !important;
	color: {BRAND_TEXT_MUTED} !important;
	padding: 0.75rem 1.5rem !important;
	border-radius: 8px !important;
	font-weight: 500 !important;
	transition: all 0.2s ease !important;
	}}

	.tab-nav button.selected {{
	background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important;
	color: {BRAND_BG_DARK} !important;
	}}

	.tab-nav button:hover:not(.selected) {{
	background: rgba(212, 165, 116, 0.1) !important;
	color: {BRAND_PRIMARY} !important;
	}}

	/* Input Fields */
	.input-container textarea,
	.input-container input {{
	background: {BRAND_BG_LIGHT} !important;
	border: 1px solid {BRAND_DEEP} !important;
	border-radius: 12px !important;
	color: {BRAND_TEXT} !important;
	padding: 1rem !important;
	font-size: 1rem !important;
	transition: border-color 0.2s ease !important;
	}}

	.input-container textarea:focus,
	.input-container input:focus {{
	border-color: {BRAND_PRIMARY} !important;
	box-shadow: 0 0 0 3px rgba(212, 165, 116, 0.1) !important;
	}}

	/* Buttons */
	.primary-btn {{
	background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important;
	color: {BRAND_BG_DARK} !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 0.875rem 2rem !important;
	font-weight: 600 !important;
	font-size: 1rem !important;
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	box-shadow: 0 4px 12px rgba(212, 165, 116, 0.3) !important;
	}}

	.primary-btn:hover {{
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 20px rgba(212, 165, 116, 0.4) !important;
	}}

	/* Template Cards */
	.template-card {{
	background: {BRAND_BG_LIGHT};
	border: 1px solid {BRAND_DEEP};
	border-radius: 12px;
	padding: 1rem;
	cursor: pointer;
	transition: all 0.2s ease;
	}}

	.template-card:hover {{
	border-color: {BRAND_PRIMARY};
	transform: translateY(-2px);
	}}

	.template-card.selected {{
	border-color: {BRAND_PRIMARY};
	background: rgba(212, 165, 116, 0.1);
	}}

	/* Info Box */
	.info-box {{
	background: {BRAND_BG_LIGHT};
	border: 1px solid {BRAND_DEEP};
	border-radius: 12px;
	padding: 1rem;
	color: {BRAND_TEXT};
	font-family: monospace;
	font-size: 0.9rem;
	}}

	/* Footer */
	.footer {{
	text-align: center;
	padding: 2rem;
	color: {BRAND_TEXT_MUTED};
	font-size: 0.9rem;
	}}

	.footer a {{
	color: {BRAND_PRIMARY};
	text-decoration: none;
	}}

	.footer a:hover {{
	text-decoration: underline;
	}}
	"""

	# =============================================================================
	# Gradio Interface
	# =============================================================================

	with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Base(), title="ORCH Studio") as demo:
	# Header
	gr.HTML(f"""
	<div class="header-container">
	<div class="logo-container">
	<img src="https://huggingface.co/spaces/raihan-js/orch-studio/resolve/main/logo.png"
	alt="ORCH" class="logo-icon" onerror="this.style.display='none'">
	<h1 class="brand-title">ORCH Studio</h1>
	</div>
	<p class="brand-tagline">Orchestrated Recursive Code Hierarchy</p>
	<p class="brand-subtitle">Generate complete, production-ready Next.js applications from natural language</p>
	<div class="model-badge">
	<span class="model-badge-dot"></span>
	{MODEL_INFO} - QLoRA Fine-tuned (43h A100 Training)
	</div>
	</div>
	""")

	with gr.Tabs():
	# Tab 1: Full Project Generation
	with gr.TabItem("Full Project", id="project"):
	with gr.Row():
	with gr.Column(scale=2):
	project_prompt = gr.Textbox(
	label="Project Description",
	placeholder="Describe your Next.js application in detail. Include features, pages, and functionality you want...",
	lines=6,
	elem_classes=["input-container"]
	)

	with gr.Row():
	template_dropdown = gr.Dropdown(
	label="Project Template",
	choices=[(v["name"], k) for k, v in PROJECT_TEMPLATES.items()],
	value="blank",
	info="Select a template to get started faster"
	)
	include_extras = gr.Checkbox(
	label="Include extras (.gitignore, .env.example)",
	value=True
	)

	with gr.Row():
	max_tokens = gr.Slider(
	minimum=1024, maximum=8192, value=4096, step=256,
	label="Max Tokens", info="More tokens = larger project"
	)
	temperature = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.7, step=0.05,
	label="Temperature", info="Higher = more creative"
	)
	top_p = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.95, step=0.05,
	label="Top P", info="Nucleus sampling"
	)

	generate_btn = gr.Button(
	"Generate Project",
	variant="primary",
	size="lg",
	elem_classes=["primary-btn"]
	)

	gr.Examples(
	examples=PROJECT_EXAMPLES,
	inputs=project_prompt,
	label="Example Prompts"
	)

	with gr.Column(scale=1):
	status_output = gr.Textbox(
	label="Generation Status",
	lines=12,
	interactive=False,
	elem_classes=["status-box"]
	)
	download_file = gr.File(
	label="Download Project",
	elem_classes=["file-download"]
	)

	with gr.Accordion("Generated Code Preview", open=False):
	code_preview = gr.Code(
	label="Raw Output",
	language="markdown",
	lines=20,
	elem_classes=["code-output"]
	)

	# Tab 2: Code Snippet Generation
	with gr.TabItem("Code Snippet", id="snippet"):
	with gr.Row():
	with gr.Column(scale=2):
	code_prompt = gr.Textbox(
	label="Code Request",
	placeholder="Describe the component, function, or feature you want to generate...",
	lines=4,
	elem_classes=["input-container"]
	)

	with gr.Row():
	code_max_tokens = gr.Slider(
	minimum=256, maximum=2048, value=1024, step=128,
	label="Max Tokens"
	)
	code_temperature = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.7, step=0.05,
	label="Temperature"
	)
	code_top_p = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.9, step=0.05,
	label="Top P"
	)

	code_generate_btn = gr.Button(
	"Generate Code",
	variant="primary",
	size="lg",
	elem_classes=["primary-btn"]
	)

	gr.Examples(
	examples=CODE_EXAMPLES,
	inputs=code_prompt,
	label="Example Prompts"
	)

	with gr.Column(scale=2):
	code_output = gr.Code(
	label="Generated Code",
	language="typescript",
	lines=25,
	elem_classes=["code-output"]
	)

	# Tab 3: Model Info
	with gr.TabItem("Model Info", id="info"):
	with gr.Row():
	with gr.Column():
	gr.Markdown(f"""
	## ORCH-7B Model

	Latest QLoRA Fine-tuned Model (January 2025)

	\| Specification \| Value \|
	\|--------------\|-------\|
	\| Base Model \| DeepSeek Coder 6.7B Instruct \|
	\| Fine-tuning \| QLoRA (4-bit quantization + LoRA) \|
	\| Training Time \| 43 hours on A100 GPU \|
	\| Training Steps \| 5,238 steps \|
	\| Focus \| Next.js 14+ full-stack applications \|
	\| Output \| Complete downloadable projects \|

	### Training Data
	- Curated Next.js 14+ projects from GitHub
	- Synthetic instruction-response pairs
	- Focus on TypeScript, Tailwind CSS, Prisma

	### Capabilities
	- Full-stack application generation
	- Component and API route creation
	- Database schema design
	- Authentication patterns
	- Responsive UI with Tailwind CSS

	### Links
	- [ORCH-7B on HuggingFace](https://huggingface.co/orch-ai/ORCH-7B)
	- [ORCH AI Organization](https://huggingface.co/orch-ai)
	- [raihan-js/orch-7b](https://huggingface.co/raihan-js/orch-7b)
	""")

	with gr.Column():
	model_info_display = gr.Textbox(
	label="Current Session",
	value=get_model_info(),
	lines=8,
	interactive=False,
	elem_classes=["info-box"]
	)

	refresh_btn = gr.Button("Refresh Status")
	refresh_btn.click(fn=get_model_info, outputs=model_info_display)

	# Footer
	gr.HTML(f"""
	<div class="footer">
	<p>
	<strong>ORCH</strong> - Orchestrated Recursive Code Hierarchy
	<br>
	<a href="https://huggingface.co/orch-ai" target="_blank">ORCH AI</a> \|
	<a href="https://huggingface.co/orch-ai/ORCH-7B" target="_blank">ORCH-7B Model</a> \|
	<a href="https://huggingface.co/raihan-js" target="_blank">raihan-js</a>
	</p>
	<p style="font-size: 0.8rem; margin-top: 0.5rem;">
	Model: {MODEL_INFO} \| Updated: January 2025
	</p>
	</div>
	""")

	# Event handlers
	generate_btn.click(
	fn=generate_project,
	inputs=[project_prompt, template_dropdown, max_tokens, temperature, top_p, include_extras],
	outputs=[status_output, code_preview, download_file]
	)

	code_generate_btn.click(
	fn=generate_code,
	inputs=[code_prompt, code_max_tokens, code_temperature, code_top_p],
	outputs=code_output
	)


	if __name__ == "__main__":
	demo.launch()