orch-studio / app.py
raihan-js's picture
v1.0.1: Fixed GPU duration to 60s for ZeroGPU compatibility
ae61038 verified
"""
ORCH Studio - Autonomous Next.js Code Generation
Generate complete, production-ready Next.js applications from natural language prompts.
Now powered by the latest QLoRA fine-tuned ORCH-7B model (43h training on A100).
https://huggingface.co/orch-ai
Version: 1.0.1 - Fixed GPU duration limits for ZeroGPU
"""
import os
import re
import json
import math
import tempfile
import zipfile
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass
from typing import Optional, Dict, Any, Tuple, List
import torch
import torch.nn as nn
import torch.nn.functional as F
import gradio as gr
import spaces
from huggingface_hub import hf_hub_download
from tokenizers import Tokenizer
# =============================================================================
# Model Configuration
# =============================================================================
@dataclass
class OrchConfig:
"""Configuration for Orch transformer model."""
model_name: str = "orch"
model_size: str = "3b"
vocab_size: int = 32000
hidden_size: int = 2560
intermediate_size: int = 10240
num_hidden_layers: int = 32
num_attention_heads: int = 32
num_key_value_heads: int = 8
max_position_embeddings: int = 16384
rms_norm_eps: float = 1e-5
rope_theta: float = 10000.0
hidden_dropout: float = 0.0
attention_dropout: float = 0.0
use_flash_attention: bool = False
tie_word_embeddings: bool = False
rope_scaling_type: Optional[str] = None
rope_scaling_factor: float = 1.0
original_max_position_embeddings: Optional[int] = None
initializer_range: float = 0.02
@property
def head_dim(self) -> int:
return self.hidden_size // self.num_attention_heads
@property
def num_kv_groups(self) -> int:
return self.num_attention_heads // self.num_key_value_heads
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "OrchConfig":
return cls(**{k: v for k, v in config_dict.items() if k in cls.__dataclass_fields__})
@classmethod
def load(cls, path: str) -> "OrchConfig":
with open(path, "r") as f:
config_dict = json.load(f)
return cls.from_dict(config_dict)
# =============================================================================
# Model Layers
# =============================================================================
class RMSNorm(nn.Module):
def __init__(self, hidden_size: int, eps: float = 1e-6):
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.eps = eps
def forward(self, x: torch.Tensor) -> torch.Tensor:
rms = torch.sqrt(torch.mean(x ** 2, dim=-1, keepdim=True) + self.eps)
return (x / rms) * self.weight
class SwiGLU(nn.Module):
def __init__(self, hidden_size: int, intermediate_size: int, bias: bool = False, dropout: float = 0.0):
super().__init__()
self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=bias)
self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=bias)
self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=bias)
self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
def forward(self, x: torch.Tensor) -> torch.Tensor:
gate = F.silu(self.gate_proj(x))
up = self.up_proj(x)
hidden = gate * up
hidden = self.dropout(hidden)
return self.down_proj(hidden)
class RotaryEmbedding(nn.Module):
def __init__(
self,
dim: int,
max_position_embeddings: int = 2048,
base: float = 10000.0,
scaling_type: Optional[str] = None,
scaling_factor: float = 1.0,
original_max_position_embeddings: Optional[int] = None,
):
super().__init__()
self.dim = dim
self.max_position_embeddings = max_position_embeddings
self.base = base
self.scaling_type = scaling_type
self.scaling_factor = scaling_factor
self.original_max_position_embeddings = original_max_position_embeddings or max_position_embeddings
inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float() / self.dim))
self.register_buffer("inv_freq", inv_freq, persistent=False)
self._set_cos_sin_cache(max_position_embeddings)
def _set_cos_sin_cache(self, seq_len: int):
self.max_seq_len_cached = seq_len
t = torch.arange(seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
if self.scaling_type == "linear":
t = t / self.scaling_factor
freqs = torch.outer(t, self.inv_freq)
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer("cos_cached", emb.cos(), persistent=False)
self.register_buffer("sin_cached", emb.sin(), persistent=False)
def forward(self, x: torch.Tensor, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
seq_len = position_ids.max().item() + 1
if seq_len > self.max_seq_len_cached:
self._set_cos_sin_cache(seq_len)
cos = self.cos_cached[position_ids].to(x.dtype)
sin = self.sin_cached[position_ids].to(x.dtype)
return cos, sin
def rotate_half(x: torch.Tensor) -> torch.Tensor:
x1 = x[..., : x.shape[-1] // 2]
x2 = x[..., x.shape[-1] // 2 :]
return torch.cat((-x2, x1), dim=-1)
def apply_rotary_pos_emb(q, k, cos, sin):
cos = cos.unsqueeze(1)
sin = sin.unsqueeze(1)
q_embed = (q * cos) + (rotate_half(q) * sin)
k_embed = (k * cos) + (rotate_half(k) * sin)
return q_embed, k_embed
def create_causal_mask(seq_len: int, device: torch.device, dtype: torch.dtype = torch.float32) -> torch.Tensor:
mask = torch.triu(torch.full((seq_len, seq_len), float("-inf"), device=device, dtype=dtype), diagonal=1)
return mask.unsqueeze(0).unsqueeze(0)
class OrchAttention(nn.Module):
def __init__(self, config: OrchConfig, layer_idx: int):
super().__init__()
self.config = config
self.layer_idx = layer_idx
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
self.num_kv_heads = config.num_key_value_heads
self.head_dim = config.head_dim
self.num_kv_groups = config.num_kv_groups
self.attention_dropout = config.attention_dropout
self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
self.k_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
self.v_proj = nn.Linear(self.hidden_size, self.num_kv_heads * self.head_dim, bias=False)
self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
self.rotary_emb = RotaryEmbedding(
self.head_dim,
max_position_embeddings=config.max_position_embeddings,
base=config.rope_theta,
scaling_type=config.rope_scaling_type,
scaling_factor=config.rope_scaling_factor,
original_max_position_embeddings=config.original_max_position_embeddings,
)
def forward(self, hidden_states, attention_mask=None, position_ids=None):
batch_size, seq_len, _ = hidden_states.shape
query_states = self.q_proj(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
key_states = self.k_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
value_states = self.v_proj(hidden_states).view(batch_size, seq_len, self.num_kv_heads, self.head_dim).transpose(1, 2)
cos, sin = self.rotary_emb(value_states, position_ids)
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
if self.num_kv_groups > 1:
key_states = key_states.repeat_interleave(self.num_kv_groups, dim=1)
value_states = value_states.repeat_interleave(self.num_kv_groups, dim=1)
scale = 1.0 / math.sqrt(self.head_dim)
attn_weights = torch.matmul(query_states, key_states.transpose(-2, -1)) * scale
if attention_mask is not None:
attn_weights = attn_weights + attention_mask
attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
attn_weights = F.dropout(attn_weights, p=self.attention_dropout, training=self.training)
attn_output = torch.matmul(attn_weights, value_states)
attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.num_heads * self.head_dim)
return self.o_proj(attn_output)
class OrchBlock(nn.Module):
def __init__(self, config: OrchConfig, layer_idx: int):
super().__init__()
self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.attention = OrchAttention(config, layer_idx)
self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.feed_forward = SwiGLU(config.hidden_size, config.intermediate_size, dropout=config.hidden_dropout)
self.dropout = nn.Dropout(config.hidden_dropout)
def forward(self, hidden_states, attention_mask=None, position_ids=None):
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
hidden_states = self.attention(hidden_states, attention_mask, position_ids)
hidden_states = self.dropout(hidden_states)
hidden_states = residual + hidden_states
residual = hidden_states
hidden_states = self.post_attention_layernorm(hidden_states)
hidden_states = self.feed_forward(hidden_states)
hidden_states = self.dropout(hidden_states)
hidden_states = residual + hidden_states
return hidden_states
class OrchModel(nn.Module):
def __init__(self, config: OrchConfig):
super().__init__()
self.config = config
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
self.embed_dropout = nn.Dropout(config.hidden_dropout)
self.layers = nn.ModuleList([OrchBlock(config, i) for i in range(config.num_hidden_layers)])
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
def forward(self, input_ids, attention_mask=None, position_ids=None):
batch_size, seq_len = input_ids.shape
device = input_ids.device
if position_ids is None:
position_ids = torch.arange(seq_len, device=device).unsqueeze(0).expand(batch_size, -1)
hidden_states = self.embed_tokens(input_ids)
hidden_states = self.embed_dropout(hidden_states)
causal_mask = create_causal_mask(seq_len, device, hidden_states.dtype)
for layer in self.layers:
hidden_states = layer(hidden_states, causal_mask, position_ids)
return self.norm(hidden_states)
class OrchForCausalLM(nn.Module):
def __init__(self, config: OrchConfig):
super().__init__()
self.config = config
self.model = OrchModel(config)
if config.tie_word_embeddings:
self.lm_head = None
else:
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
def forward(self, input_ids, attention_mask=None, position_ids=None, labels=None):
hidden_states = self.model(input_ids, attention_mask, position_ids)
if self.lm_head is not None:
logits = self.lm_head(hidden_states)
else:
logits = F.linear(hidden_states, self.model.embed_tokens.weight)
output = {"logits": logits}
if labels is not None:
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
loss = F.cross_entropy(shift_logits.view(-1, self.config.vocab_size), shift_labels.view(-1), ignore_index=-100)
output["loss"] = loss
return output
@torch.no_grad()
def generate(self, input_ids, max_new_tokens=512, temperature=0.7, top_k=50, top_p=0.9, do_sample=True, eos_token_id=None):
self.eval()
for _ in range(max_new_tokens):
if input_ids.shape[1] > self.config.max_position_embeddings:
input_ids = input_ids[:, -self.config.max_position_embeddings:]
outputs = self.forward(input_ids)
next_token_logits = outputs["logits"][:, -1, :]
if temperature != 1.0:
next_token_logits = next_token_logits / temperature
if top_k is not None:
indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1, None]
next_token_logits[indices_to_remove] = float("-inf")
if top_p is not None:
sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
sorted_indices_to_remove = cumulative_probs > top_p
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = False
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
next_token_logits[indices_to_remove] = float("-inf")
if do_sample:
probs = F.softmax(next_token_logits, dim=-1)
next_tokens = torch.multinomial(probs, num_samples=1)
else:
next_tokens = torch.argmax(next_token_logits, dim=-1, keepdim=True)
input_ids = torch.cat([input_ids, next_tokens], dim=1)
if eos_token_id is not None and (next_tokens == eos_token_id).all():
break
return input_ids
@classmethod
def from_pretrained(cls, repo_id: str, device: str = "cpu"):
config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
model_path = hf_hub_download(repo_id=repo_id, filename="model.pt")
config = OrchConfig.load(config_path)
model = cls(config)
state_dict = torch.load(model_path, map_location=device, weights_only=True)
model.load_state_dict(state_dict)
return model.to(device)
# =============================================================================
# Project Templates
# =============================================================================
PROJECT_TEMPLATES = {
"blank": {
"name": "Blank Project",
"description": "Start from scratch with a minimal Next.js setup",
"prefix": ""
},
"saas": {
"name": "SaaS Application",
"description": "Full SaaS with auth, billing, dashboard",
"prefix": "Create a modern SaaS application with user authentication, subscription billing integration, admin dashboard, and "
},
"ecommerce": {
"name": "E-Commerce Store",
"description": "Online store with products, cart, checkout",
"prefix": "Build an e-commerce store with product catalog, shopping cart, checkout flow, order management, and "
},
"dashboard": {
"name": "Admin Dashboard",
"description": "Data visualization and management dashboard",
"prefix": "Create an admin dashboard with data tables, charts, user management, settings, and "
},
"portfolio": {
"name": "Portfolio Website",
"description": "Personal/professional portfolio site",
"prefix": "Build a modern portfolio website with project showcase, about section, skills display, contact form, and "
},
"blog": {
"name": "Blog Platform",
"description": "Content management and blog system",
"prefix": "Create a blog platform with markdown support, categories, tags, comments, search, and "
},
"api": {
"name": "API Backend",
"description": "RESTful API with authentication",
"prefix": "Build a RESTful API backend with JWT authentication, rate limiting, validation, error handling, and "
}
}
# =============================================================================
# Project Parsing and Packaging
# =============================================================================
def parse_project_output(output: str) -> Dict[str, str]:
"""Parse model output into file dictionary."""
files = {}
# Pattern 1: Markdown code blocks with file paths
md_pattern = re.compile(
r'```(?:\w+\s+)?([^\n`]+\.[a-zA-Z]+)\n([\s\S]*?)```',
re.MULTILINE
)
for match in md_pattern.finditer(output):
file_path = match.group(1).strip()
content = match.group(2).strip()
if file_path and content and '/' in file_path or '.' in file_path:
file_path = file_path.lstrip('./')
if not file_path.startswith('node_modules') and len(content) > 10:
files[file_path] = content
# Pattern 2: Special token format (fallback)
if not files:
special_pattern = re.compile(
r'<\|file\|>([^<]+)<\|end_path\|>\s*([\s\S]*?)<\|end_file\|>',
re.MULTILINE
)
for match in special_pattern.finditer(output):
file_path = match.group(1).strip()
content = match.group(2).strip()
if file_path and content:
files[file_path] = content
return files
def create_zip_archive(files: Dict[str, str], project_name: str, include_extras: bool = True) -> str:
"""Create a ZIP archive from files dictionary"""
temp_dir = tempfile.mkdtemp()
zip_path = os.path.join(temp_dir, f"{project_name}.zip")
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file_path, content in files.items():
full_path = f"{project_name}/{file_path}"
zipf.writestr(full_path, content)
# Add README
readme = generate_readme(project_name, len(files))
zipf.writestr(f"{project_name}/README.md", readme)
if include_extras:
# Add .env.example if not present
if ".env" not in files and ".env.example" not in files:
env_example = generate_env_example()
zipf.writestr(f"{project_name}/.env.example", env_example)
# Add .gitignore if not present
if ".gitignore" not in files:
gitignore = generate_gitignore()
zipf.writestr(f"{project_name}/.gitignore", gitignore)
return zip_path
def generate_readme(project_name: str, file_count: int) -> str:
"""Generate README for the project"""
return f'''# {project_name}
Generated by [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio) - Powered by ORCH-7B
## Quick Start
```bash
# Install dependencies
npm install
# Set up environment variables
cp .env.example .env
# Edit .env with your configuration
# Set up database (if using Prisma)
npx prisma generate
npx prisma db push
# Start development server
npm run dev
```
Open [http://localhost:3000](http://localhost:3000) to view your application.
## Tech Stack
- **Framework**: Next.js 14 (App Router)
- **Language**: TypeScript
- **Styling**: Tailwind CSS
- **Components**: Radix UI / shadcn/ui compatible
- **Database**: Prisma ORM (SQLite/PostgreSQL)
- **Authentication**: NextAuth.js patterns
## Project Structure
```
{project_name}/
├── app/ # Next.js App Router pages
├── components/ # React components
├── lib/ # Utility functions
├── prisma/ # Database schema
└── public/ # Static assets
```
## Scripts
- `npm run dev` - Start development server
- `npm run build` - Build for production
- `npm run start` - Start production server
- `npm run lint` - Run ESLint
## Generated Info
- **Files**: {file_count}
- **Generated**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
- **Model**: ORCH-7B (QLoRA fine-tuned)
## Links
- [ORCH Studio](https://huggingface.co/spaces/raihan-js/orch-studio)
- [ORCH-7B Model](https://huggingface.co/orch-ai/ORCH-7B)
- [ORCH AI Organization](https://huggingface.co/orch-ai)
---
*Built with ORCH - Orchestrated Recursive Code Hierarchy*
'''
def generate_env_example() -> str:
"""Generate .env.example file"""
return '''# Database
DATABASE_URL="file:./dev.db"
# DATABASE_URL="postgresql://user:password@localhost:5432/mydb"
# Authentication (NextAuth.js)
NEXTAUTH_URL="http://localhost:3000"
NEXTAUTH_SECRET="your-secret-key-here"
# OAuth Providers (optional)
# GITHUB_ID=""
# GITHUB_SECRET=""
# GOOGLE_ID=""
# GOOGLE_SECRET=""
# Stripe (optional)
# STRIPE_SECRET_KEY=""
# STRIPE_WEBHOOK_SECRET=""
# NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=""
# Email (optional)
# SMTP_HOST=""
# SMTP_PORT=""
# SMTP_USER=""
# SMTP_PASSWORD=""
'''
def generate_gitignore() -> str:
"""Generate .gitignore file"""
return '''# Dependencies
node_modules/
.pnpm-store/
# Build
.next/
out/
build/
dist/
# Environment
.env
.env.local
.env.*.local
# Database
*.db
*.sqlite
# IDE
.vscode/
.idea/
# OS
.DS_Store
Thumbs.db
# Logs
*.log
npm-debug.log*
# Testing
coverage/
.nyc_output/
# Prisma
prisma/migrations/
# Misc
*.tsbuildinfo
next-env.d.ts
'''
# =============================================================================
# Model Loading
# =============================================================================
MODEL_7B_ID = os.environ.get("MODEL_ID", "orch-ai/ORCH-7B")
MODEL_3B_ID = "raihan-js/orch-nextjs-3b"
USE_7B = os.environ.get("USE_7B", "true").lower() == "true"
print(f"[ORCH] Model selection: {'ORCH-7B' if USE_7B else 'ORCH-3B'}")
print(f"[ORCH] Model ID: {MODEL_7B_ID if USE_7B else MODEL_3B_ID}")
model = None
tokenizer = None
MODEL_TYPE = None
MODEL_LOADED = False
ERROR_MSG = ""
MODEL_LOAD_TIME = None
def load_7b_model():
global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME
from transformers import AutoModelForCausalLM, AutoTokenizer
import time
hf_token = os.environ.get("HF_TOKEN")
print(f"[ORCH] Loading ORCH-7B from {MODEL_7B_ID}...")
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained(MODEL_7B_ID, trust_remote_code=True, token=hf_token)
# Check if GPU is available and load accordingly
if torch.cuda.is_available():
print("[ORCH] GPU detected, loading with device_map='auto'...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_7B_ID,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
token=hf_token
)
else:
print("[ORCH] No GPU, loading on CPU (this will be slow)...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_7B_ID,
torch_dtype=torch.float32, # Use float32 for CPU
trust_remote_code=True,
token=hf_token,
low_cpu_mem_usage=True
)
model.eval()
MODEL_TYPE = "7b"
MODEL_LOAD_TIME = time.time() - start_time
print(f"[ORCH] ORCH-7B loaded successfully in {MODEL_LOAD_TIME:.1f}s!")
def load_3b_model():
global model, tokenizer, MODEL_TYPE, MODEL_LOAD_TIME
import time
print(f"[ORCH] Loading ORCH-3B from {MODEL_3B_ID}...")
start_time = time.time()
model = OrchForCausalLM.from_pretrained(MODEL_3B_ID, device="cpu")
model.eval()
tokenizer_path = hf_hub_download(repo_id=MODEL_3B_ID, filename="tokenizer.json")
tokenizer = Tokenizer.from_file(tokenizer_path)
MODEL_TYPE = "3b"
MODEL_LOAD_TIME = time.time() - start_time
print(f"[ORCH] ORCH-3B loaded successfully in {MODEL_LOAD_TIME:.1f}s!")
try:
if USE_7B:
try:
load_7b_model()
MODEL_LOADED = True
except Exception as e7b:
print(f"[ORCH] ORCH-7B not available: {e7b}")
print("[ORCH] Falling back to ORCH-3B...")
load_3b_model()
MODEL_LOADED = True
else:
load_3b_model()
MODEL_LOADED = True
except Exception as e:
MODEL_LOADED = False
ERROR_MSG = str(e)
print(f"[ORCH] Error loading model: {e}")
# =============================================================================
# Generation Functions
# =============================================================================
INSTRUCTION_TEMPLATE = """### Instruction:
{instruction}
### Response:
"""
@spaces.GPU(duration=60)
def generate_project_gpu(
prompt: str,
template: str = "blank",
max_tokens: int = 4096,
temperature: float = 0.7,
top_p: float = 0.95,
include_extras: bool = True,
progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str, Optional[str]]:
"""Generate project with GPU acceleration."""
return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress)
def generate_project_cpu(
prompt: str,
template: str = "blank",
max_tokens: int = 2048, # Lower for CPU
temperature: float = 0.7,
top_p: float = 0.95,
include_extras: bool = True,
progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str, Optional[str]]:
"""Generate project on CPU (slower but always available)."""
return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, False, progress)
@spaces.GPU(duration=60)
def generate_project(
prompt: str,
template: str = "blank",
max_tokens: int = 4096,
temperature: float = 0.7,
top_p: float = 0.95,
include_extras: bool = True,
progress=gr.Progress(track_tqdm=True)
) -> Tuple[str, str, Optional[str]]:
"""Generate a complete Next.js project using GPU (ZeroGPU required)."""
return _generate_project_core(prompt, template, max_tokens, temperature, top_p, include_extras, True, progress)
def _generate_project_core(
prompt: str,
template: str = "blank",
max_tokens: int = 4096,
temperature: float = 0.7,
top_p: float = 0.95,
include_extras: bool = True,
use_gpu: bool = True,
progress=None
) -> Tuple[str, str, Optional[str]]:
"""Core implementation for project generation."""
if not MODEL_LOADED:
return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}", "", None
if not prompt.strip() or len(prompt.strip()) < 10:
return "Please enter a detailed project description (at least 10 characters).", "", None
try:
if progress is not None:
progress(0.1, desc="Preparing generation...")
except:
pass # Progress might not be available in some contexts
# Get device from model (ZeroGPU manages this automatically)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[ORCH] Using device: {device}")
# Apply template prefix
template_info = PROJECT_TEMPLATES.get(template, PROJECT_TEMPLATES["blank"])
full_prompt = template_info["prefix"] + prompt.strip()
def update_progress(val, desc=""):
try:
if progress is not None:
progress(val, desc=desc)
except:
pass # Progress might not be available
if MODEL_TYPE == "7b":
update_progress(0.2, f"Generating with ORCH-7B on {device}...")
instruction = f"Create a complete Next.js full-stack application: {full_prompt}"
formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction)
inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)
update_progress(0.3, f"Generating project on {'GPU' if use_gpu else 'CPU'} (this may take a while)...")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.1,
)
input_length = inputs['input_ids'].shape[1]
generated_ids = outputs[0][input_length:]
generated = tokenizer.decode(generated_ids, skip_special_tokens=False)
else:
update_progress(0.2, f"Loading ORCH-3B to {'GPU' if use_gpu else 'CPU'}...")
model.to(device)
if use_gpu and torch.cuda.is_available():
model.to(torch.bfloat16)
formatted_prompt = f"// {full_prompt}\n"
encoded = tokenizer.encode(formatted_prompt)
input_ids = torch.tensor([encoded.ids], device=device)
update_progress(0.3, "Generating code...")
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=50,
do_sample=True,
)
generated = tokenizer.decode(output_ids[0].tolist())
update_progress(0.8, "Processing output...")
files = parse_project_output(generated)
if files:
update_progress(0.9, "Creating project archive...")
project_name = "orch-project-" + datetime.now().strftime("%Y%m%d-%H%M%S")
zip_path = create_zip_archive(files, project_name, include_extras)
file_list = "\n".join([f" {f}" for f in sorted(files.keys())[:20]])
if len(files) > 20:
file_list += f"\n ... and {len(files) - 20} more files"
status = f"Successfully generated {len(files)} files!\n\nTemplate: {template_info['name']}\nDevice: {'GPU' if use_gpu else 'CPU'}\n\nFiles:\n{file_list}"
update_progress(1.0, "Done!")
return status, generated[:10000], zip_path
else:
update_progress(1.0, "Done!")
return "Code generated (no structured project detected - try a more detailed prompt)", generated[:10000], None
# Also update generate_code with CPU fallback
def _generate_code_core(
prompt: str,
max_tokens: int = 1024,
temperature: float = 0.7,
top_p: float = 0.9,
top_k: int = 50,
use_gpu: bool = True
):
"""Core implementation for code generation."""
if not MODEL_LOADED:
return f"Error: Model failed to load.\n\nDetails: {ERROR_MSG}"
if not prompt.strip():
return "Please enter a prompt describing what you want to generate."
# Get the device the model is currently on (don't try to move it)
if hasattr(model, 'device'):
device = model.device
else:
# For models with device_map="auto", get device from first parameter
device = next(model.parameters()).device
print(f"[ORCH] Code generation using device: {device}")
if MODEL_TYPE == "7b":
instruction = prompt.strip()
formatted_prompt = INSTRUCTION_TEMPLATE.format(instruction=instruction)
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
repetition_penalty=1.1,
)
input_length = inputs['input_ids'].shape[1]
generated = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
return generated
else:
encoded = tokenizer.encode(f"// {prompt.strip()}\n")
input_ids = torch.tensor([encoded.ids], device=device)
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
do_sample=True
)
return tokenizer.decode(output_ids[0].tolist())
@spaces.GPU
def generate_code_gpu(
prompt: str,
max_tokens: int = 1024,
temperature: float = 0.7,
top_p: float = 0.9,
top_k: int = 50
):
"""Generate code with GPU."""
return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True)
def generate_code_cpu(
prompt: str,
max_tokens: int = 512, # Lower for CPU
temperature: float = 0.7,
top_p: float = 0.9,
top_k: int = 50
):
"""Generate code on CPU."""
return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, False)
@spaces.GPU
def generate_code(
prompt: str,
max_tokens: int = 1024,
temperature: float = 0.7,
top_p: float = 0.9,
top_k: int = 50
):
"""Generate Next.js code snippet using GPU (ZeroGPU required)."""
return _generate_code_core(prompt, max_tokens, temperature, top_p, top_k, True)
def get_model_info() -> str:
"""Get current model information"""
if not MODEL_LOADED:
return f"Model Status: Failed to load\nError: {ERROR_MSG}"
gpu_info = "Available" if torch.cuda.is_available() else "Not available"
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
gpu_info = f"{gpu_name} ({gpu_memory:.1f} GB)"
return f"""Model: ORCH-{MODEL_TYPE.upper()}
Repository: {MODEL_7B_ID if MODEL_TYPE == "7b" else MODEL_3B_ID}
Load Time: {MODEL_LOAD_TIME:.1f}s
GPU: {gpu_info}
Status: Ready"""
# =============================================================================
# Professional UI with Brand Colors
# =============================================================================
# Brand Colors (Khaki/Earthy)
BRAND_PRIMARY = "#D4A574"
BRAND_PRIMARY_MID = "#C4956A"
BRAND_PRIMARY_DARK = "#A67C52"
BRAND_DEEP = "#5D4E37"
BRAND_BG_DARK = "#1a1512"
BRAND_BG_LIGHT = "#2d2420"
BRAND_TEXT = "#E8DED5"
BRAND_TEXT_MUTED = "#9C8B7A"
PROJECT_EXAMPLES = [
["Create a modern SaaS landing page with pricing tiers, feature showcase, testimonials, and newsletter signup. Include dark mode."],
["Build a task management app with kanban boards, drag-and-drop, user authentication, and team collaboration features."],
["Create an e-commerce store for digital products with Stripe payments, download management, and customer dashboard."],
["Build a personal portfolio website for a developer with project showcase, blog, skills section, and contact form."],
["Create a real-time chat application with channels, direct messaging, file sharing, and user presence indicators."],
["Build a restaurant booking system with table management, menu display, online reservations, and admin panel."],
]
CODE_EXAMPLES = [
["Create a Next.js dashboard page with user statistics cards showing total users, active sessions, and revenue"],
["Build a login form component with email and password fields, validation, and error handling"],
["Generate an API route for user authentication that handles login with JWT tokens"],
["Create a responsive navbar component with logo, navigation links, and mobile hamburger menu"],
["Build a product card component with image, title, price, rating, and add to cart button"],
["Create a data table component with sorting, filtering, and pagination"],
]
MODEL_INFO = f"ORCH-{MODEL_TYPE.upper() if MODEL_TYPE else '7B'}"
# Professional CSS
CUSTOM_CSS = f"""
/* Global Styles */
.gradio-container {{
max-width: 1400px !important;
margin: 0 auto !important;
background: linear-gradient(180deg, {BRAND_BG_DARK} 0%, #0f0d0b 100%) !important;
min-height: 100vh;
}}
.dark {{
--background-fill-primary: {BRAND_BG_DARK} !important;
--background-fill-secondary: {BRAND_BG_LIGHT} !important;
--border-color-primary: {BRAND_DEEP} !important;
}}
/* Header */
.header-container {{
background: linear-gradient(135deg, {BRAND_BG_LIGHT} 0%, {BRAND_BG_DARK} 100%);
border: 1px solid {BRAND_DEEP};
border-radius: 16px;
padding: 2rem;
margin-bottom: 1.5rem;
text-align: center;
}}
.logo-container {{
display: flex;
align-items: center;
justify-content: center;
gap: 1rem;
margin-bottom: 0.75rem;
}}
.logo-icon {{
width: 64px;
height: 64px;
border-radius: 16px;
}}
.brand-title {{
font-size: 2.5rem;
font-weight: 700;
background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin: 0;
}}
.brand-tagline {{
color: {BRAND_TEXT_MUTED};
font-size: 1rem;
margin: 0.25rem 0;
letter-spacing: 0.5px;
}}
.brand-subtitle {{
color: {BRAND_TEXT};
font-size: 1.1rem;
margin: 0.5rem 0;
}}
.model-badge {{
display: inline-flex;
align-items: center;
gap: 0.5rem;
background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%);
color: {BRAND_BG_DARK};
padding: 0.5rem 1rem;
border-radius: 20px;
font-weight: 600;
font-size: 0.9rem;
margin-top: 0.75rem;
}}
.model-badge-dot {{
width: 8px;
height: 8px;
background: {BRAND_BG_DARK};
border-radius: 50%;
animation: pulse 2s infinite;
}}
@keyframes pulse {{
0%, 100% {{ opacity: 1; }}
50% {{ opacity: 0.5; }}
}}
/* Tabs */
.tabs {{
border: none !important;
background: transparent !important;
}}
.tab-nav {{
background: {BRAND_BG_LIGHT} !important;
border: 1px solid {BRAND_DEEP} !important;
border-radius: 12px !important;
padding: 0.5rem !important;
margin-bottom: 1rem !important;
}}
.tab-nav button {{
background: transparent !important;
border: none !important;
color: {BRAND_TEXT_MUTED} !important;
padding: 0.75rem 1.5rem !important;
border-radius: 8px !important;
font-weight: 500 !important;
transition: all 0.2s ease !important;
}}
.tab-nav button.selected {{
background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important;
color: {BRAND_BG_DARK} !important;
}}
.tab-nav button:hover:not(.selected) {{
background: rgba(212, 165, 116, 0.1) !important;
color: {BRAND_PRIMARY} !important;
}}
/* Input Fields */
.input-container textarea,
.input-container input {{
background: {BRAND_BG_LIGHT} !important;
border: 1px solid {BRAND_DEEP} !important;
border-radius: 12px !important;
color: {BRAND_TEXT} !important;
padding: 1rem !important;
font-size: 1rem !important;
transition: border-color 0.2s ease !important;
}}
.input-container textarea:focus,
.input-container input:focus {{
border-color: {BRAND_PRIMARY} !important;
box-shadow: 0 0 0 3px rgba(212, 165, 116, 0.1) !important;
}}
/* Buttons */
.primary-btn {{
background: linear-gradient(135deg, {BRAND_PRIMARY} 0%, {BRAND_PRIMARY_DARK} 100%) !important;
color: {BRAND_BG_DARK} !important;
border: none !important;
border-radius: 12px !important;
padding: 0.875rem 2rem !important;
font-weight: 600 !important;
font-size: 1rem !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
box-shadow: 0 4px 12px rgba(212, 165, 116, 0.3) !important;
}}
.primary-btn:hover {{
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(212, 165, 116, 0.4) !important;
}}
/* Template Cards */
.template-card {{
background: {BRAND_BG_LIGHT};
border: 1px solid {BRAND_DEEP};
border-radius: 12px;
padding: 1rem;
cursor: pointer;
transition: all 0.2s ease;
}}
.template-card:hover {{
border-color: {BRAND_PRIMARY};
transform: translateY(-2px);
}}
.template-card.selected {{
border-color: {BRAND_PRIMARY};
background: rgba(212, 165, 116, 0.1);
}}
/* Info Box */
.info-box {{
background: {BRAND_BG_LIGHT};
border: 1px solid {BRAND_DEEP};
border-radius: 12px;
padding: 1rem;
color: {BRAND_TEXT};
font-family: monospace;
font-size: 0.9rem;
}}
/* Footer */
.footer {{
text-align: center;
padding: 2rem;
color: {BRAND_TEXT_MUTED};
font-size: 0.9rem;
}}
.footer a {{
color: {BRAND_PRIMARY};
text-decoration: none;
}}
.footer a:hover {{
text-decoration: underline;
}}
"""
# =============================================================================
# Gradio Interface
# =============================================================================
with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Base(), title="ORCH Studio") as demo:
# Header
gr.HTML(f"""
<div class="header-container">
<div class="logo-container">
<img src="https://huggingface.co/spaces/raihan-js/orch-studio/resolve/main/logo.png"
alt="ORCH" class="logo-icon" onerror="this.style.display='none'">
<h1 class="brand-title">ORCH Studio</h1>
</div>
<p class="brand-tagline">Orchestrated Recursive Code Hierarchy</p>
<p class="brand-subtitle">Generate complete, production-ready Next.js applications from natural language</p>
<div class="model-badge">
<span class="model-badge-dot"></span>
{MODEL_INFO} - QLoRA Fine-tuned (43h A100 Training)
</div>
</div>
""")
with gr.Tabs():
# Tab 1: Full Project Generation
with gr.TabItem("Full Project", id="project"):
with gr.Row():
with gr.Column(scale=2):
project_prompt = gr.Textbox(
label="Project Description",
placeholder="Describe your Next.js application in detail. Include features, pages, and functionality you want...",
lines=6,
elem_classes=["input-container"]
)
with gr.Row():
template_dropdown = gr.Dropdown(
label="Project Template",
choices=[(v["name"], k) for k, v in PROJECT_TEMPLATES.items()],
value="blank",
info="Select a template to get started faster"
)
include_extras = gr.Checkbox(
label="Include extras (.gitignore, .env.example)",
value=True
)
with gr.Row():
max_tokens = gr.Slider(
minimum=1024, maximum=8192, value=4096, step=256,
label="Max Tokens", info="More tokens = larger project"
)
temperature = gr.Slider(
minimum=0.1, maximum=1.0, value=0.7, step=0.05,
label="Temperature", info="Higher = more creative"
)
top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=0.95, step=0.05,
label="Top P", info="Nucleus sampling"
)
generate_btn = gr.Button(
"Generate Project",
variant="primary",
size="lg",
elem_classes=["primary-btn"]
)
gr.Examples(
examples=PROJECT_EXAMPLES,
inputs=project_prompt,
label="Example Prompts"
)
with gr.Column(scale=1):
status_output = gr.Textbox(
label="Generation Status",
lines=12,
interactive=False,
elem_classes=["status-box"]
)
download_file = gr.File(
label="Download Project",
elem_classes=["file-download"]
)
with gr.Accordion("Generated Code Preview", open=False):
code_preview = gr.Code(
label="Raw Output",
language="markdown",
lines=20,
elem_classes=["code-output"]
)
# Tab 2: Code Snippet Generation
with gr.TabItem("Code Snippet", id="snippet"):
with gr.Row():
with gr.Column(scale=2):
code_prompt = gr.Textbox(
label="Code Request",
placeholder="Describe the component, function, or feature you want to generate...",
lines=4,
elem_classes=["input-container"]
)
with gr.Row():
code_max_tokens = gr.Slider(
minimum=256, maximum=2048, value=1024, step=128,
label="Max Tokens"
)
code_temperature = gr.Slider(
minimum=0.1, maximum=1.0, value=0.7, step=0.05,
label="Temperature"
)
code_top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=0.9, step=0.05,
label="Top P"
)
code_generate_btn = gr.Button(
"Generate Code",
variant="primary",
size="lg",
elem_classes=["primary-btn"]
)
gr.Examples(
examples=CODE_EXAMPLES,
inputs=code_prompt,
label="Example Prompts"
)
with gr.Column(scale=2):
code_output = gr.Code(
label="Generated Code",
language="typescript",
lines=25,
elem_classes=["code-output"]
)
# Tab 3: Model Info
with gr.TabItem("Model Info", id="info"):
with gr.Row():
with gr.Column():
gr.Markdown(f"""
## ORCH-7B Model
**Latest QLoRA Fine-tuned Model** (January 2025)
| Specification | Value |
|--------------|-------|
| Base Model | DeepSeek Coder 6.7B Instruct |
| Fine-tuning | QLoRA (4-bit quantization + LoRA) |
| Training Time | 43 hours on A100 GPU |
| Training Steps | 5,238 steps |
| Focus | Next.js 14+ full-stack applications |
| Output | Complete downloadable projects |
### Training Data
- Curated Next.js 14+ projects from GitHub
- Synthetic instruction-response pairs
- Focus on TypeScript, Tailwind CSS, Prisma
### Capabilities
- Full-stack application generation
- Component and API route creation
- Database schema design
- Authentication patterns
- Responsive UI with Tailwind CSS
### Links
- [ORCH-7B on HuggingFace](https://huggingface.co/orch-ai/ORCH-7B)
- [ORCH AI Organization](https://huggingface.co/orch-ai)
- [raihan-js/orch-7b](https://huggingface.co/raihan-js/orch-7b)
""")
with gr.Column():
model_info_display = gr.Textbox(
label="Current Session",
value=get_model_info(),
lines=8,
interactive=False,
elem_classes=["info-box"]
)
refresh_btn = gr.Button("Refresh Status")
refresh_btn.click(fn=get_model_info, outputs=model_info_display)
# Footer
gr.HTML(f"""
<div class="footer">
<p>
<strong>ORCH</strong> - Orchestrated Recursive Code Hierarchy
<br>
<a href="https://huggingface.co/orch-ai" target="_blank">ORCH AI</a> |
<a href="https://huggingface.co/orch-ai/ORCH-7B" target="_blank">ORCH-7B Model</a> |
<a href="https://huggingface.co/raihan-js" target="_blank">raihan-js</a>
</p>
<p style="font-size: 0.8rem; margin-top: 0.5rem;">
Model: {MODEL_INFO} | Updated: January 2025
</p>
</div>
""")
# Event handlers
generate_btn.click(
fn=generate_project,
inputs=[project_prompt, template_dropdown, max_tokens, temperature, top_p, include_extras],
outputs=[status_output, code_preview, download_file]
)
code_generate_btn.click(
fn=generate_code,
inputs=[code_prompt, code_max_tokens, code_temperature, code_top_p],
outputs=code_output
)
if __name__ == "__main__":
demo.launch()