""" ORCH Studio - Autonomous Next.js Code Generation Generate complete, production-ready Next.js applications from natural language prompts. Now powered by the latest QLoRA fine-tuned ORCH-7B model (43h training on A100). https://huggingface.co/orch-ai Version: 1.0.1 - Fixed GPU duration limits for ZeroGPU """ import os import re import json import math import tempfile import zipfile from pathlib import Path from datetime import datetime from dataclasses import dataclass from typing import Optional, Dict, Any, Tuple, List import torch import torch.nn as nn import torch.nn.functional as F import gradio as gr import spaces from huggingface_hub import hf_hub_download from tokenizers import Tokenizer # ============================================================================= # Model Configuration # ============================================================================= @dataclass class OrchConfig: """Configuration for Orch transformer model.""" model_name: str = "orch" model_size: str = "3b" vocab_size: int = 32000 hidden_size: int = 2560 intermediate_size: int = 10240 num_hidden_layers: int = 32 num_attention_heads: int = 32 num_key_value_heads: int = 8 max_position_embeddings: int = 16384 rms_norm_eps: float = 1e-5 rope_theta: float = 10000.0 hidden_dropout: float = 0.0 attention_dropout: float = 0.0 use_flash_attention: bool = False tie_word_embeddings: bool = False rope_scaling_type: Optional[str] = None rope_scaling_factor: float = 1.0 original_max_position_embeddings: Optional[int] = None initializer_range: float = 0.02 @property def head_dim(self) -> int: return self.hidden_size // self.num_attention_heads @property def num_kv_groups(self) -> int: return self.num_attention_heads // self.num_key_value_heads @classmethod def from_dict(cls, config_dict: Dict[str, Any]) -> "OrchConfig": return cls(**{k: v for k, v in config_dict.items() if k in cls.__dataclass_fields__}) @classmethod def load(cls, path: str) -> "OrchConfig": with open(path, "r") as f: config_dict = json.load(f) return cls.from_dict(config_dict) # ============================================================================= # Model Layers # ============================================================================= class RMSNorm(nn.Module): def __init__(self, hidden_size: int, eps: float = 1e-6): super().__init__() self.weight = nn.Parameter(torch.ones(hidden_size)) self.eps = eps def forward(self, x: torch.Tensor) -> torch.Tensor: rms = torch.sqrt(torch.mean(x ** 2, dim=-1, keepdim=True) + self.eps) return (x / rms) * self.weight class SwiGLU(nn.Module): def __init__(self, hidden_size: int, intermediate_size: int, bias: bool = False, dropout: float = 0.0): super().__init__() self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=bias) self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=bias) self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=bias) self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity() def forward(self, x: torch.Tensor) -> torch.Tensor: gate = F.silu(self.gate_proj(x)) up = self.up_proj(x) hidden = gate * up hidden = self.dropout(hidden) return self.down_proj(hidden) class RotaryEmbedding(nn.Module): def __init__( self, dim: int, max_position_embeddings: int = 2048, base: float = 10000.0, scaling_type: Optional[str] = None, scaling_factor: float = 1.0, original_max_position_embeddings: Optional[int] = None, ): super().__init__() self.dim = dim self.max_position_embeddings = max_position_embeddings self.base = base self.scaling_type = scaling_type self.scaling_factor = scaling_factor self.original_max_position_embeddings = original_max_position_embeddings or max_position_embeddings inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float() / self.dim)) self.register_buffer("inv_freq", inv_freq, persistent=False) self._set_cos_sin_cache(max_position_embeddings) def _set_cos_sin_cache(self, seq_len: int): self.max_seq_len_cached = seq_len t = torch.arange(seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype) if self.scaling_type == "linear": t = t / self.scaling_factor freqs = torch.outer(t, self.inv_freq) emb = torch.cat((freqs, freqs), dim=-1) self.register_buffer("cos_cached", emb.cos(), persistent=False) self.register_buffer("sin_cached", emb.sin(), persistent=False) def forward(self, x: torch.Tensor, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: seq_len = position_ids.max().item() + 1 if seq_len > self.max_seq_len_cached: self._set_cos_sin_cache(seq_len) cos = self.cos_cached[position_ids].to(x.dtype) sin = self.sin_cached[position_ids].to(x.dtype) return cos, sin def rotate_half(x: torch.Tensor) -> torch.Tensor: x1 = x[..., : x.shape[-1] // 2] x2 = x[..., x.shape[-1] // 2 :] return torch.cat((-x2, x1), dim=-1) def apply_rotary_pos_emb(q, k, cos, sin): cos = cos.unsqueeze(1) sin = sin.unsqueeze(1) q_embed = (q * cos) + (rotate_half(q) * sin) k_embed = (k * cos) + (rotate_half(k) * sin) return q_embed, k_embed def create_causal_mask(seq_len: int, device: torch.device, dtype: torch.dtype = torch.float32) -> torch.Tensor: mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=device, dtype=dtype), diagonal=1) return mask.unsqueeze(0).unsqueeze(0) class OrchAttention(nn.Module): def __init__(self, config: OrchConfig, layer_idx: int): super().__init__() self.config = config self.layer_idx = layer_idx self.hidden_size = config.hidden_size self.num_heads = config.num_attention_heads self.num_kv_heads = config.num_key_value_heads self.head_dim = config.head_dim self.num_kv_groups = config.num_kv_groups self.attention_dropout = config.attention_dropout self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False) self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False) self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False) self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False) self.rotary_emb = RotaryEmbedding( self.head_dim, max_position_embeddings=config.max_position_embeddings, base=config.rope_theta, scaling_type=config.rope_scaling_type, scaling_factor=config.rope_scaling_factor, original_max_position_embeddings=config.original_max_position_embeddings, ) def forward(self, hidden_states, attention_mask=None, position_ids=None): batch_size, seq_len, _ = hidden_states.shape query_states = self.q_proj(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2) key_states = self.k_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) value_states = self.v_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2) cos, sin = self.rotary_emb(value_states, position_ids) query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if self.num_kv_groups > 1: key_states = key_states.repeat_interleave(self.num_kv_groups, dim=1) value_states = value_states.repeat_interleave(self.num_kv_groups, dim=1) scale = 1.0 / math.sqrt(self.head_dim) attn_weights = torch.matmul(query_states, key_states.transpose(-2, -1)) * scale if attention_mask is not None: attn_weights = attn_weights + attention_mask attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training) attn_output = torch.matmul(attn_weights, value_states) attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.num_heads * self.head_dim) return self.o_proj(attn_output) class OrchBlock(nn.Module): def __init__(self, config: OrchConfig, layer_idx: int): super().__init__() self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.attention = OrchAttention(config, layer_idx) self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.feed_forward = SwiGLU(config.hidden_size, config.intermediate_size, dropout=config.hidden_dropout) self.dropout = nn.Dropout(config.hidden_dropout) def forward(self, hidden_states, attention_mask=None, position_ids=None): residual = hidden_states hidden_states = self.input_layernorm(hidden_states) hidden_states = self.attention(hidden_states, attention_mask, position_ids) hidden_states = self.dropout(hidden_states) hidden_states = residual + hidden_states residual = hidden_states hidden_states = self.post_attention_layernorm(hidden_states) hidden_states = self.feed_forward(hidden_states) hidden_states = self.dropout(hidden_states) hidden_states = residual + hidden_states return hidden_states class OrchModel(nn.Module): def __init__(self, config: OrchConfig): super().__init__() self.config = config self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size) self.embed_dropout = nn.Dropout(config.hidden_dropout) self.layers = nn.ModuleList([OrchBlock(config, i) for i in range(config.num_hidden_layers)]) self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) def forward(self, input_ids, attention_mask=None, position_ids=None): batch_size, seq_len = input_ids.shape device = input_ids.device if position_ids is None: position_ids = torch.arange(seq_len, device=device).unsqueeze(0).expand(batch_size, -1) hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_dropout(hidden_states) causal_mask = create_causal_mask(seq_len, device, hidden_states.dtype) for layer in self.layers: hidden_states = layer(hidden_states, causal_mask, position_ids) return self.norm(hidden_states) class OrchForCausalLM(nn.Module): def __init__(self, config: OrchConfig): super().__init__() self.config = config self.model = OrchModel(config) if config.tie_word_embeddings: self.lm_head = None else: self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) def forward(self, input_ids, attention_mask=None, position_ids=None, labels=None): hidden_states = self.model(input_ids, attention_mask, position_ids) if self.lm_head is not None: logits = self.lm_head(hidden_states) else: logits = F.linear(hidden_states, self.model.embed_tokens.weight) output = {"logits": logits} if labels is not None: shift_logits = logits[..., :-1, :].contiguous() shift_labels = labels[..., 1:].contiguous() loss = F.cross_entropy(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1), ignore_index=-100) output["loss"] = loss return output @torch.no_grad() def generate(self, input_ids, max_new_tokens=512, temperature=0.7, top_k=50, top_p=0.9, do_sample=True, eos_token_id=None): self.eval() for _ in range(max_new_tokens): if input_ids.shape[1] > self.config.max_position_embeddings: input_ids = input_ids[:, -self.config.max_position_embeddings:] outputs = self.forward(input_ids) next_token_logits = outputs["logits"][:, -1, :] if temperature != 1.0: next_token_logits = next_token_logits / temperature if top_k is not None: indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1, None] next_token_logits[indices_to_remove] = float("-inf") if top_p is not None: sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True) cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) sorted_indices_to_remove = cumulative_probs > top_p sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() sorted_indices_to_remove[..., 0] = False indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) next_token_logits[indices_to_remove] = float("-inf") if do_sample: probs = F.softmax(next_token_logits, dim=-1) next_tokens = torch.multinomial(probs, num_samples=1) else: next_tokens = torch.argmax(next_token_logits, dim=-1, keepdim=True) input_ids = torch.cat([input_ids, next_tokens], dim=1) if eos_token_id is not None and (next_tokens == eos_token_id).all(): break return input_ids @classmethod def from_pretrained(cls, repo_id: str, device: str = "cpu"): config_path = hf_hub_download(repo_id=repo_id, filename="config.json") model_path = hf_hub_download(repo_id=repo_id, filename="model.pt") config = OrchConfig.load(config_path) model = cls(config) state_dict = torch.load(model_path, map_location=device, weights_only=True) model.load_state_dict(state_dict) return model.to(device) # ============================================================================= # Project Templates # ============================================================================= PROJECT_TEMPLATES = { "blank": { "name": "Blank Project", "description": "Start from scratch with a minimal Next.js setup", "prefix": "" }, "saas": { "name": "SaaS Application", "description": "Full SaaS with auth, billing, dashboard", "prefix": "Create a modern SaaS application with user authentication, subscription billing integration, admin dashboard, and " }, "ecommerce": { "name": "E-Commerce Store", "description": "Online store with products, cart, checkout", "prefix": "Build an e-commerce store with product catalog, shopping cart, checkout flow, order management, and " }, "dashboard": { "name": "Admin Dashboard", "description": "Data visualization and management dashboard", "prefix": "Create an admin dashboard with data tables, charts, user management, settings, and " }, "portfolio": { "name": "Portfolio Website", "description": "Personal/professional portfolio site", "prefix": "Build a modern portfolio website with project showcase, about section, skills display, contact form, and " }, "blog": { "name": "Blog Platform", "description": "Content management and blog system", "prefix": "Create a blog platform with markdown support, categories, tags, comments, search, and " }, "api": { "name": "API Backend", "description": "RESTful API with authentication", "prefix": "Build a RESTful API backend with JWT authentication, rate limiting, validation, error handling, and " } } # ============================================================================= # Project Parsing and Packaging # ============================================================================= def parse_project_output(output: str) -> Dict[str, str]: """Parse model output into file dictionary.""" files = {} # Pattern 1: Markdown code blocks with file paths md_pattern = re.compile( r'```(?:\w+\s+)?([^\n`]+\.[a-zA-Z]+)\n([\s\S]*?)```', re.MULTILINE ) for match in md_pattern.finditer(output): file_path = match.group(1).strip() content = match.group(2).strip() if file_path and content and '/' in file_path or '.' in file_path: file_path = file_path.lstrip('./') if not file_path.startswith('node_modules') and len(content) > 10: files[file_path] = content # Pattern 2: Special token format (fallback) if not files: special_pattern = re.compile( r'<\|file\|>([^<]+)<\|end_path\|>\s*([\s\S]*?)<\|end_file\|>', re.MULTILINE ) for match in special_pattern.finditer(output): file_path = match.group(1).strip() content = match.group(2).strip() if file_path and content: files[file_path] = content return files def create_zip_archive(files: Dict[str, str], project_name: str, include_extras: bool = True) -> str: """Create a ZIP archive from files dictionary""" temp_dir = tempfile.mkdtemp() zip_path = os.path.join(temp_dir, f"{project_name}.zip") with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: for file_path, content in files.items(): full_path = f"{project_name}/{file_path}" zipf.writestr(full_path, content) # Add README readme = generate_readme(project_name, len(files)) zipf.writestr(f"{project_name}/README.md", readme) if include_extras: # Add .env.example if not present if ".env" not in files and ".env.example" not in files: env_example = generate_env_example() zipf.writestr(f"{project_name}/.env.example", env_example) # Add .gitignore if not present if ".gitignore" not in files: gitignore = generate_gitignore() zipf.writestr(f"{project_name}/.gitignore", gitignore) return zip_path def generate_readme(project_name: str, file_count: int) -> str: """Generate README for the project""" return f'''# {project_name} Generated by [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio) - Powered by ORCH-7B ## Quick Start ```bash # Install dependencies npm install # Set up environment variables cp .env.example .env # Edit .env with your configuration # Set up database (if using Prisma) npx prisma generate npx prisma db push # Start development server npm run dev ``` Open [http://localhost:3000](http://localhost:3000) to view your application. ## Tech Stack - **Framework**: Next.js 14 (App Router) - **Language**: TypeScript - **Styling**: Tailwind CSS - **Components**: Radix UI / shadcn/ui compatible - **Database**: Prisma ORM (SQLite/PostgreSQL) - **Authentication**: NextAuth.js patterns ## Project Structure ``` {project_name}/ ├── app/ # Next.js App Router pages ├── components/ # React components ├── lib/ # Utility functions ├── prisma/ # Database schema └── public/ # Static assets ``` ## Scripts - `npm run dev` - Start development server - `npm run build` - Build for production - `npm run start` - Start production server - `npm run lint` - Run ESLint ## Generated Info - **Files**: {file_count} - **Generated**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")} - **Model**: ORCH-7B (QLoRA fine-tuned) ## Links - [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio) - [ORCH-7B Model](https://huggingface.co/orch-ai/ORCH-7B) - [ORCH AI Organization](https://huggingface.co/orch-ai) --- *Built with ORCH - Orchestrated Recursive Code Hierarchy* ''' def generate_env_example() -> str: """Generate .env.example file""" return '''# Database DATABASE_URL="file:./dev.db" # DATABASE_URL="postgresql://user:password@localhost:5432/mydb" # Authentication (NextAuth.js) NEXTAUTH_URL="http://localhost:3000" NEXTAUTH_SECRET="your-secret-key-here" # OAuth Providers (optional) # GITHUB_ID="" # GITHUB_SECRET="" # GOOGLE_ID="" # GOOGLE_SECRET="" # Stripe (optional) # STRIPE_SECRET_KEY="" # STRIPE_WEBHOOK_SECRET="" # NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY="" # Email (optional) # SMTP_HOST="" # SMTP_PORT="" # SMTP_USER="" # SMTP_PASSWORD="" ''' def generate_gitignore() -> str: """Generate .gitignore file""" return '''# Dependencies node_modules/ .pnpm-store/ # Build .next/ out/ build/ dist/ # Environment .env .env.local .env.*.local # Database *.db *.sqlite # IDE .vscode/ .idea/ # OS .DS_Store Thumbs.db # Logs *.log npm-debug.log* # Testing coverage/ .nyc_output/ # Prisma prisma/migrations/ # Misc *.tsbuildinfo next-env.d.ts ''' # ============================================================================= # Model Loading # ============================================================================= MODEL_7B_ID = os.environ.get("MODEL_ID", "orch-ai/ORCH-7B") MODEL_3B_ID = "raihan-js/orch-nextjs-3b" USE_7B = os.environ.get("USE_7B", "true").lower() == "true" print(f"[ORCH] Model selection: {'ORCH-7B' if USE_7B else 'ORCH-3B'}") print(f"[ORCH] Model ID: {MODEL_7B_ID if USE_7B else MODEL_3B_ID}") model = None tokenizer = None MODEL_TYPE = None MODEL_LOADED = False ERROR_MSG = "" MODEL_LOAD_TIME = None def load_7b_model(): global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME from transformers import AutoModelForCausalLM, AutoTokenizer import time hf_token = os.environ.get("HF_TOKEN") print(f"[ORCH] Loading ORCH-7B from {MODEL_7B_ID}...") start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(MODEL_7B_ID, trust_remote_code=True, token=hf_token) # Check if GPU is available and load accordingly if torch.cuda.is_available(): print("[ORCH] GPU detected, loading with device_map='auto'...") model = AutoModelForCausalLM.from_pretrained( MODEL_7B_ID, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, token=hf_token ) else: print("[ORCH] No GPU, loading on CPU (this will be slow)...") model = AutoModelForCausalLM.from_pretrained( MODEL_7B_ID, torch_dtype=torch.float32, # Use float32 for CPU trust_remote_code=True, token=hf_token, low_cpu_mem_usage=True ) model.eval() MODEL_TYPE = "7b" MODEL_LOAD_TIME = time.time() - start_time print(f"[ORCH] ORCH-7B loaded successfully in {MODEL_LOAD_TIME:.1f}s!") def load_3b_model(): global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME import time print(f"[ORCH] Loading ORCH-3B from {MODEL_3B_ID}...") start_time = time.time() model = OrchForCausalLM.from_pretrained(MODEL_3B_ID, device="cpu") model.eval() tokenizer_path = hf_hub_download(repo_id=MODEL_3B_ID, filename="tokenizer.json") tokenizer = Tokenizer.from_file(tokenizer_path) MODEL_TYPE = "3b" MODEL_LOAD_TIME = time.time() - start_time print(f"[ORCH] ORCH-3B loaded successfully in {MODEL_LOAD_TIME:.1f}s!") try: if USE_7B: try: load_7b_model() MODEL_LOADED = True except Exception as e7b: print(f"[ORCH] ORCH-7B not available: {e7b}") print("[ORCH] Falling back to ORCH-3B...") load_3b_model() MODEL_LOADED = True else: load_3b_model() MODEL_LOADED = True except Exception as e: MODEL_LOADED = False ERROR_MSG = str(e) print(f"[ORCH] Error loading model: {e}") # ============================================================================= # Generation Functions # ============================================================================= INSTRUCTION_TEMPLATE = """### Instruction: {instruction} ### Response: """ @spaces.GPU(duration=60) def generate_project_gpu( prompt: str, template: str = "blank", max_tokens: int = 4096, temperature: float = 0.7, top_p: float = 0.95, include_extras: bool = True, progress=gr.Progress(track_tqdm=True) ) -> Tuple[str, str, Optional[str]]: """Generate project with GPU acceleration.""" return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress) def generate_project_cpu( prompt: str, template: str = "blank", max_tokens: int = 2048, # Lower for CPU temperature: float = 0.7, top_p: float = 0.95, include_extras: bool = True, progress=gr.Progress(track_tqdm=True) ) -> Tuple[str, str, Optional[str]]: """Generate project on CPU (slower but always available).""" return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, False, progress) @spaces.GPU(duration=60) def generate_project( prompt: str, template: str = "blank", max_tokens: int = 4096, temperature: float = 0.7, top_p: float = 0.95, include_extras: bool = True, progress=gr.Progress(track_tqdm=True) ) -> Tuple[str, str, Optional[str]]: """Generate a complete Next.js project using GPU (ZeroGPU required).""" return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress) def _generate_project_core( prompt: str, template: str = "blank", max_tokens: int = 4096, temperature: float = 0.7, top_p: float = 0.95, include_extras: bool = True, use_gpu: bool = True, progress=None ) -> Tuple[str, str, Optional[str]]: """Core implementation for project generation.""" if not MODEL_LOADED: return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}", "", None if not prompt.strip() or len(prompt.strip()) < 10: return "Please enter a detailed project description (at least 10 characters).", "", None try: if progress is not None: progress(0.1, desc="Preparing generation...") except: pass # Progress might not be available in some contexts # Get device from model (ZeroGPU manages this automatically) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"[ORCH] Using device: {device}") # Apply template prefix template_info = PROJECT_TEMPLATES.get(template, PROJECT_TEMPLATES["blank"]) full_prompt = template_info["prefix"] + prompt.strip() def update_progress(val, desc=""): try: if progress is not None: progress(val, desc=desc) except: pass # Progress might not be available if MODEL_TYPE == "7b": update_progress(0.2, f"Generating with ORCH-7B on {device}...") instruction = f"Create a complete Next.js full-stack application: {full_prompt}" formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction) inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=2048).to(device) update_progress(0.3, f"Generating project on {'GPU' if use_gpu else 'CPU'} (this may take a while)...") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id, repetition_penalty=1.1, ) input_length = inputs['input_ids'].shape[1] generated_ids = outputs[0][input_length:] generated = tokenizer.decode(generated_ids, skip_special_tokens=False) else: update_progress(0.2, f"Loading ORCH-3B to {'GPU' if use_gpu else 'CPU'}...") model.to(device) if use_gpu and torch.cuda.is_available(): model.to(torch.bfloat16) formatted_prompt = f"// {full_prompt}\n" encoded = tokenizer.encode(formatted_prompt) input_ids = torch.tensor([encoded.ids], device=device) update_progress(0.3, "Generating code...") with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=50, do_sample=True, ) generated = tokenizer.decode(output_ids[0].tolist()) update_progress(0.8, "Processing output...") files = parse_project_output(generated) if files: update_progress(0.9, "Creating project archive...") project_name = "orch-project-" + datetime.now().strftime("%Y%m%d-%H%M%S") zip_path = create_zip_archive(files, project_name, include_extras) file_list = "\n".join([f" {f}" for f in sorted(files.keys())[:20]]) if len(files) > 20: file_list += f"\n ... and {len(files) - 20} more files" status = f"Successfully generated {len(files)} files!\n\nTemplate: {template_info['name']}\nDevice: {'GPU' if use_gpu else 'CPU'}\n\nFiles:\n{file_list}" update_progress(1.0, "Done!") return status, generated[:10000], zip_path else: update_progress(1.0, "Done!") return "Code generated (no structured project detected - try a more detailed prompt)", generated[:10000], None # Also update generate_code with CPU fallback def _generate_code_core( prompt: str, max_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50, use_gpu: bool = True ): """Core implementation for code generation.""" if not MODEL_LOADED: return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}" if not prompt.strip(): return "Please enter a prompt describing what you want to generate." # Get the device the model is currently on (don't try to move it) if hasattr(model, 'device'): device = model.device else: # For models with device_map="auto", get device from first parameter device = next(model.parameters()).device print(f"[ORCH] Code generation using device: {device}") if MODEL_TYPE == "7b": instruction = prompt.strip() formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction) inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True, repetition_penalty=1.1, ) input_length = inputs['input_ids'].shape[1] generated = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True) return generated else: encoded = tokenizer.encode(f"// {prompt.strip()}\n") input_ids = torch.tensor([encoded.ids], device=device) with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k, do_sample=True ) return tokenizer.decode(output_ids[0].tolist()) @spaces.GPU def generate_code_gpu( prompt: str, max_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50 ): """Generate code with GPU.""" return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True) def generate_code_cpu( prompt: str, max_tokens: int = 512, # Lower for CPU temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50 ): """Generate code on CPU.""" return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, False) @spaces.GPU def generate_code( prompt: str, max_tokens: int = 1024, temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50 ): """Generate Next.js code snippet using GPU (ZeroGPU required).""" return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True) def get_model_info() -> str: """Get current model information""" if not MODEL_LOADED: return f"Model Status: Failed to load\nError: {ERROR_MSG}" gpu_info = "Available" if torch.cuda.is_available() else "Not available" if torch.cuda.is_available(): gpu_name = torch.cuda.get_device_name(0) gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) gpu_info = f"{gpu_name} ({gpu_memory:.1f} GB)" return f"""Model: ORCH-{MODEL_TYPE.upper()} Repository: {MODEL_7B_ID if MODEL_TYPE == "7b" else MODEL_3B_ID} Load Time: {MODEL_LOAD_TIME:.1f}s GPU: {gpu_info} Status: Ready""" # ============================================================================= # Professional UI with Brand Colors # ============================================================================= # Brand Colors (Khaki/Earthy) BRAND_PRIMARY = "#D4A574" BRAND_PRIMARY_MID = "#C4956A" BRAND_PRIMARY_DARK = "#A67C52" BRAND_DEEP = "#5D4E37" BRAND_BG_DARK = "#1a1512" BRAND_BG_LIGHT = "#2d2420" BRAND_TEXT = "#E8DED5" BRAND_TEXT_MUTED = "#9C8B7A" PROJECT_EXAMPLES = [ ["Create a modern SaaS landing page with pricing tiers, feature showcase, testimonials, and newsletter signup. Include dark mode."], ["Build a task management app with kanban boards, drag-and-drop, user authentication, and team collaboration features."], ["Create an e-commerce store for digital products with Stripe payments, download management, and customer dashboard."], ["Build a personal portfolio website for a developer with project showcase, blog, skills section, and contact form."], ["Create a real-time chat application with channels, direct messaging, file sharing, and user presence indicators."], ["Build a restaurant booking system with table management, menu display, online reservations, and admin panel."], ] CODE_EXAMPLES = [ ["Create a Next.js dashboard page with user statistics cards showing total users, active sessions, and revenue"], ["Build a login form component with email and password fields, validation, and error handling"], ["Generate an API route for user authentication that handles login with JWT tokens"], ["Create a responsive navbar component with logo, navigation links, and mobile hamburger menu"], ["Build a product card component with image, title, price, rating, and add to cart button"], ["Create a data table component with sorting, filtering, and pagination"], ] MODEL_INFO = f"ORCH-{MODEL_TYPE.upper() if MODEL_TYPE else '7B'}" # Professional CSS CUSTOM_CSS = f""" /* Global Styles */ .gradio-container {{ max-width: 1400px !important; margin: 0 auto !important; background: linear-gradient(180deg, {BRAND_BG_DARK} 0%, #0f0d0b 100%) !important; min-height: 100vh; }} .dark {{ --background-fill-primary: {BRAND_BG_DARK} !important; --background-fill-secondary: {BRAND_BG_LIGHT} !important; --border-color-primary: {BRAND_DEEP} !important; }} /* Header */ .header-container {{ background: linear-gradient(135deg, {BRAND_BG_LIGHT} 0%, {BRAND_BG_DARK} 100%); border: 1px solid {BRAND_DEEP}; border-radius: 16px; padding: 2rem; margin-bottom: 1.5rem; text-align: center; }} .logo-container {{ display: flex; align-items: center; justify-content: center; gap: 1rem; margin-bottom: 0.75rem; }} .logo-icon {{ width: 64px; height: 64px; border-radius: 16px; }} .brand-title {{ font-size: 2.5rem; font-weight: 700; background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; margin: 0; }} .brand-tagline {{ color: {BRAND_TEXT_MUTED}; font-size: 1rem; margin: 0.25rem 0; letter-spacing: 0.5px; }} .brand-subtitle {{ color: {BRAND_TEXT}; font-size: 1.1rem; margin: 0.5rem 0; }} .model-badge {{ display: inline-flex; align-items: center; gap: 0.5rem; background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%); color: {BRAND_BG_DARK}; padding: 0.5rem 1rem; border-radius: 20px; font-weight: 600; font-size: 0.9rem; margin-top: 0.75rem; }} .model-badge-dot {{ width: 8px; height: 8px; background: {BRAND_BG_DARK}; border-radius: 50%; animation: pulse 2s infinite; }} @keyframes pulse {{ 0%, 100% {{ opacity: 1; }} 50% {{ opacity: 0.5; }} }} /* Tabs */ .tabs {{ border: none !important; background: transparent !important; }} .tab-nav {{ background: {BRAND_BG_LIGHT} !important; border: 1px solid {BRAND_DEEP} !important; border-radius: 12px !important; padding: 0.5rem !important; margin-bottom: 1rem !important; }} .tab-nav button {{ background: transparent !important; border: none !important; color: {BRAND_TEXT_MUTED} !important; padding: 0.75rem 1.5rem !important; border-radius: 8px !important; font-weight: 500 !important; transition: all 0.2s ease !important; }} .tab-nav button.selected {{ background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important; color: {BRAND_BG_DARK} !important; }} .tab-nav button:hover:not(.selected) {{ background: rgba(212, 165, 116, 0.1) !important; color: {BRAND_PRIMARY} !important; }} /* Input Fields */ .input-container textarea, .input-container input {{ background: {BRAND_BG_LIGHT} !important; border: 1px solid {BRAND_DEEP} !important; border-radius: 12px !important; color: {BRAND_TEXT} !important; padding: 1rem !important; font-size: 1rem !important; transition: border-color 0.2s ease !important; }} .input-container textarea:focus, .input-container input:focus {{ border-color: {BRAND_PRIMARY} !important; box-shadow: 0 0 0 3px rgba(212, 165, 116, 0.1) !important; }} /* Buttons */ .primary-btn {{ background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important; color: {BRAND_BG_DARK} !important; border: none !important; border-radius: 12px !important; padding: 0.875rem 2rem !important; font-weight: 600 !important; font-size: 1rem !important; cursor: pointer !important; transition: all 0.2s ease !important; box-shadow: 0 4px 12px rgba(212, 165, 116, 0.3) !important; }} .primary-btn:hover {{ transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(212, 165, 116, 0.4) !important; }} /* Template Cards */ .template-card {{ background: {BRAND_BG_LIGHT}; border: 1px solid {BRAND_DEEP}; border-radius: 12px; padding: 1rem; cursor: pointer; transition: all 0.2s ease; }} .template-card:hover {{ border-color: {BRAND_PRIMARY}; transform: translateY(-2px); }} .template-card.selected {{ border-color: {BRAND_PRIMARY}; background: rgba(212, 165, 116, 0.1); }} /* Info Box */ .info-box {{ background: {BRAND_BG_LIGHT}; border: 1px solid {BRAND_DEEP}; border-radius: 12px; padding: 1rem; color: {BRAND_TEXT}; font-family: monospace; font-size: 0.9rem; }} /* Footer */ .footer {{ text-align: center; padding: 2rem; color: {BRAND_TEXT_MUTED}; font-size: 0.9rem; }} .footer a {{ color: {BRAND_PRIMARY}; text-decoration: none; }} .footer a:hover {{ text-decoration: underline; }} """ # ============================================================================= # Gradio Interface # ============================================================================= with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Base(), title="ORCH Studio") as demo: # Header gr.HTML(f"""
ORCH

ORCH Studio

Orchestrated Recursive Code Hierarchy

Generate complete, production-ready Next.js applications from natural language

{MODEL_INFO} - QLoRA Fine-tuned (43h A100 Training)
""") with gr.Tabs(): # Tab 1: Full Project Generation with gr.TabItem("Full Project", id="project"): with gr.Row(): with gr.Column(scale=2): project_prompt = gr.Textbox( label="Project Description", placeholder="Describe your Next.js application in detail. Include features, pages, and functionality you want...", lines=6, elem_classes=["input-container"] ) with gr.Row(): template_dropdown = gr.Dropdown( label="Project Template", choices=[(v["name"], k) for k, v in PROJECT_TEMPLATES.items()], value="blank", info="Select a template to get started faster" ) include_extras = gr.Checkbox( label="Include extras (.gitignore, .env.example)", value=True ) with gr.Row(): max_tokens = gr.Slider( minimum=1024, maximum=8192, value=4096, step=256, label="Max Tokens", info="More tokens = larger project" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature", info="Higher = more creative" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top P", info="Nucleus sampling" ) generate_btn = gr.Button( "Generate Project", variant="primary", size="lg", elem_classes=["primary-btn"] ) gr.Examples( examples=PROJECT_EXAMPLES, inputs=project_prompt, label="Example Prompts" ) with gr.Column(scale=1): status_output = gr.Textbox( label="Generation Status", lines=12, interactive=False, elem_classes=["status-box"] ) download_file = gr.File( label="Download Project", elem_classes=["file-download"] ) with gr.Accordion("Generated Code Preview", open=False): code_preview = gr.Code( label="Raw Output", language="markdown", lines=20, elem_classes=["code-output"] ) # Tab 2: Code Snippet Generation with gr.TabItem("Code Snippet", id="snippet"): with gr.Row(): with gr.Column(scale=2): code_prompt = gr.Textbox( label="Code Request", placeholder="Describe the component, function, or feature you want to generate...", lines=4, elem_classes=["input-container"] ) with gr.Row(): code_max_tokens = gr.Slider( minimum=256, maximum=2048, value=1024, step=128, label="Max Tokens" ) code_temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.05, label="Temperature" ) code_top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P" ) code_generate_btn = gr.Button( "Generate Code", variant="primary", size="lg", elem_classes=["primary-btn"] ) gr.Examples( examples=CODE_EXAMPLES, inputs=code_prompt, label="Example Prompts" ) with gr.Column(scale=2): code_output = gr.Code( label="Generated Code", language="typescript", lines=25, elem_classes=["code-output"] ) # Tab 3: Model Info with gr.TabItem("Model Info", id="info"): with gr.Row(): with gr.Column(): gr.Markdown(f""" ## ORCH-7B Model **Latest QLoRA Fine-tuned Model** (January 2025) | Specification | Value | |--------------|-------| | Base Model | DeepSeek Coder 6.7B Instruct | | Fine-tuning | QLoRA (4-bit quantization + LoRA) | | Training Time | 43 hours on A100 GPU | | Training Steps | 5,238 steps | | Focus | Next.js 14+ full-stack applications | | Output | Complete downloadable projects | ### Training Data - Curated Next.js 14+ projects from GitHub - Synthetic instruction-response pairs - Focus on TypeScript, Tailwind CSS, Prisma ### Capabilities - Full-stack application generation - Component and API route creation - Database schema design - Authentication patterns - Responsive UI with Tailwind CSS ### Links - [ORCH-7B on HuggingFace](https://huggingface.co/orch-ai/ORCH-7B) - [ORCH AI Organization](https://huggingface.co/orch-ai) - [raihan-js/orch-7b](https://huggingface.co/raihan-js/orch-7b) """) with gr.Column(): model_info_display = gr.Textbox( label="Current Session", value=get_model_info(), lines=8, interactive=False, elem_classes=["info-box"] ) refresh_btn = gr.Button("Refresh Status") refresh_btn.click(fn=get_model_info, outputs=model_info_display) # Footer gr.HTML(f""" """) # Event handlers generate_btn.click( fn=generate_project, inputs=[project_prompt, template_dropdown, max_tokens, temperature, top_p, include_extras], outputs=[status_output, code_preview, download_file] ) code_generate_btn.click( fn=generate_code, inputs=[code_prompt, code_max_tokens, code_temperature, code_top_p], outputs=code_output ) if __name__ == "__main__": demo.launch()