Train_xd

Runtime error

App Files Files Community

Ignaciohhhhggfgjfrffd commited on Nov 10

Commit

e3b8521

verified ·

1 Parent(s): ed44448

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -623

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ from transformers import (
     PhiConfig, PhiForCausalLM, Qwen2Config, Qwen2ForCausalLM,
     DataCollatorForLanguageModeling, DefaultDataCollator, Adafactor
 )
-from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training
 from trl import SFTTrainer, DPOTrainer
 from diffusers import (
     UNet2DConditionModel, DDPMScheduler, AutoencoderKL, DiffusionPipeline,
@@ -157,36 +157,44 @@ class DebiasingSFTTrainer(SFTTrainer):
                     break
         return (loss, outputs) if return_outputs else loss
-@spaces.GPU()
-def _create_deduplicated_iterable_dataset(dataset, text_col, method, threshold=0.85, num_perm=128):
-    lsh_state = MinHashLSH(threshold=threshold, num_perm=num_perm) if method == 'Semántica (MinHash)' else None
-    seen_texts_state = set() if method == 'Exacta' else None
-    def gen():
-        if method == 'Exacta':
-            for example in dataset:
-                text = example.get(text_col, "")
-                if text and isinstance(text, str):
-                    if text not in seen_texts_state:
-                        seen_texts_state.add(text)
-                        yield example
-                else:
                     yield example
-        elif method == 'Semántica (MinHash)':
-            for i, example in enumerate(dataset):
-                text = example.get(text_col, "")
-                if text and isinstance(text, str) and text.strip():
-                    m = MinHash(num_perm=num_perm)
-                    for d in text.split():
-                        m.update(d.encode('utf8'))
-                    if not lsh_state.query(m):
-                        lsh_state.insert(f"key_{i}", m)
-                        yield example
-                else:
                     yield example
-        else:
-             yield from dataset
-    new_ds = IterableDataset.from_generator(gen)
-    return new_ds
 @spaces.GPU()
 def hf_login(token):
@@ -1834,594 +1842,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
             inputs=[inf_task_mode, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in, inf_temperature, inf_top_p, inf_max_new_tokens],
             outputs=[inf_text_out, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in]
         )
-if __name__ == "__main__":
-    demo.queue().launch(debug=True, share=True)
-TRANSFORMERS_AVAILABLE = True
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-def is_hub_repo_like_s1(s):
-    return "/" in s and not Path(s).exists()
-def download_from_hf_s1(repo_id, filename, token=None):
-    token = token or os.environ.get("HF_TOKEN")
-    return hf_hub_download(repo_id=repo_id, filename=filename, repo_type="dataset", token=token)
-class MediaTextDataset_s1(Dataset):
-    def __init__(self, source, csv_name="dataset.csv", text_columns=None, max_records=None):
-        self.is_hub = is_hub_repo_like_s1(source)
-        token = os.environ.get("HF_TOKEN")
-        if self.is_hub:
-            file_path = download_from_hf_s1(source, csv_name, token)
-        else:
-            file_path = Path(source) / csv_name
-        if not Path(file_path).exists():
-            alt = Path(str(file_path).replace(".csv", ".parquet"))
-            if alt.exists():
-                file_path = alt
-            else:
-                raise FileNotFoundError(f"Dataset file not found: {file_path}")
-        self.df = pd.read_parquet(file_path) if str(file_path).endswith(".parquet") else pd.read_csv(file_path)
-        if max_records:
-            self.df = self.df.head(max_records)
-        self.text_columns = text_columns or ["short_prompt", "long_prompt"]
-    def __len__(self):
-        return len(self.df)
-    def __getitem__(self, i):
-        rec = self.df.iloc[i]
-        out = {"text": {}}
-        for col in self.text_columns:
-            out["text"][col] = rec[col] if col in rec else ""
-        return out
-def load_pipeline_auto_s1(base_model, dtype=torch.float16):
-    if "gemma" in base_model.lower():
-        if not TRANSFORMERS_AVAILABLE:
-            raise RuntimeError("Transformers not installed for LLM support.")
-        tokenizer = AutoTokenizer.from_pretrained(base_model)
-        model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=dtype)
-        return {"model": model, "tokenizer": tokenizer}
-    else:
-        raise NotImplementedError("Only Gemma LLM supported in this script.")
-def find_target_modules_s1(model):
-    candidates = ["q_proj", "k_proj", "v_proj", "out_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
-    names = [n for n, m in model.named_modules() if isinstance(m, torch.nn.Linear)]
-    targets = [n.split(".")[-1] for n in names if any(c in n for c in candidates)]
-    if not targets:
-        targets = [n.split(".")[-1] for n, m in model.named_modules() if isinstance(m, torch.nn.Linear)]
-    return targets
-def unwrap_batch_s1(batch, short_col, long_col):
-    if isinstance(batch, (list, tuple)):
-        ex = batch[0]
-        if "text" in ex:
-            return ex
-        if "short" in ex and "long" in ex:
-            return {"text": {short_col: ex.get("short",""), long_col: ex.get("long","")}}
-        return {"text": ex}
-    if isinstance(batch, dict):
-        first_elem = {}
-        is_batched = any(isinstance(v, (list, tuple, np.ndarray, torch.Tensor)) for v in batch.values())
-        if is_batched:
-            for k, v in batch.items():
-                try: first = v[0]
-                except Exception: first = v
-                first_elem[k] = first
-            if "text" in first_elem:
-                t = first_elem["text"]
-                if isinstance(t, (list, tuple)) and len(t) > 0:
-                    return {"text": t[0] if isinstance(t[0], dict) else {short_col: t[0], long_col: ""}}
-                if isinstance(t, dict): return {"text": t}
-                return {"text": {short_col: str(t), long_col: ""}}
-            if ("short" in first_elem and "long" in first_elem) or (short_col in first_elem and long_col in first_elem):
-                s = first_elem.get(short_col, first_elem.get("short", ""))
-                l = first_elem.get(long_col, first_elem.get("long", ""))
-                return {"text": {short_col: str(s), long_col: str(l)}}
-            return {"text": {short_col: str(first_elem)}}
-        if "text" in batch and isinstance(batch["text"], dict):
-            return {"text": batch["text"]}
-        s = batch.get(short_col, batch.get("short", ""))
-        l = batch.get(long_col, batch.get("long", ""))
-        return {"text": {short_col: str(s), long_col: str(l)}}
-    return {"text": {short_col: str(batch), long_col: ""}}
-def train_lora_stream_s1(base_model, dataset_src, csv_name, text_cols, output_dir,
-                      epochs=1, lr=1e-4, r=8, alpha=16, batch_size=1, num_workers=0,
-                      max_train_records=None):
-    accelerator = accelerate.Accelerator()
-    pipe = load_pipeline_auto_s1(base_model)
-    model_obj = pipe["model"]
-    tokenizer = pipe["tokenizer"]
-    model_obj.train()
-    target_modules = find_target_modules_s1(model_obj)
-    lcfg = LoraConfig(r=r, lora_alpha=alpha, target_modules=target_modules, lora_dropout=0.0)
-    lora_module = get_peft_model(model_obj, lcfg)
-    dataset = MediaTextDataset_s1(dataset_src, csv_name, text_columns=text_cols, max_records=max_train_records)
-    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
-    optimizer = torch.optim.AdamW(lora_module.parameters(), lr=lr)
-    lora_module, optimizer, loader = accelerator.prepare(lora_module, optimizer, loader)
-    total_steps = max(1, epochs * len(loader))
-    step_counter = 0
-    logs = []
-    yield "[DEBUG] Starting training loop...\n", 0.0
-    for ep in range(epochs):
-        yield f"[DEBUG] Epoch {ep+1}/{epochs}\n", step_counter / total_steps
-        for i, batch in enumerate(loader):
-            ex = unwrap_batch_s1(batch, text_cols[0], text_cols[1])
-            texts = ex.get("text", {})
-            short_text = str(texts.get(text_cols[0], "") or "")
-            long_text = str(texts.get(text_cols[1], "") or "")
-            enc = tokenizer(short_text, text_pair=long_text, return_tensors="pt", padding="max_length", truncation=True, max_length=512)
-            enc = {k: v.to(accelerator.device) for k, v in enc.items()}
-            enc["labels"] = enc["input_ids"].clone()
-            outputs = lora_module(**enc)
-            forward_loss = getattr(outputs, "loss", None)
-            if forward_loss is None:
-                logits = outputs.logits if hasattr(outputs, "logits") else outputs[0]
-                forward_loss = torch.nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), enc["labels"].view(-1), ignore_index=tokenizer.pad_token_id)
-            logs.append(f"[DEBUG] Step {step_counter}, forward_loss: {forward_loss.item():.6f}")
-            optimizer.zero_grad()
-            accelerator.backward(forward_loss)
-            optimizer.step()
-            step_counter += 1
-            yield "\n".join(logs[-10:]), step_counter / total_steps
-    Path(output_dir).mkdir(parents=True, exist_ok=True)
-    lora_module.save_pretrained(output_dir)
-    yield f"[INFO] ✅ LoRA saved to {output_dir}\n", 1.0
-def upload_adapter_s1(local, repo_id):
-    token = os.environ.get("HF_TOKEN")
-    if not token:
-        raise RuntimeError("HF_TOKEN missing")
-    create_repo(repo_id, exist_ok=True)
-    upload_folder(local, repo_id=repo_id, repo_type="model", token=token)
-    return f"https://huggingface.co/{repo_id}"
-def run_ui_s1():
-    with gr.Blocks() as demo_s1:
-        gr.Markdown("# 🌐 Universal Dynamic LoRA Trainer (Gemma LLM)")
-        with gr.Row():
-            base_model = gr.Textbox(label="Base model", value="google/gemma-3-4b-it")
-            dataset = gr.Textbox(label="Dataset folder or HF repo", value="rahul7star/prompt-enhancer-dataset-01")
-            csvname = gr.Textbox(label="CSV/Parquet file", value="train-00000-of-00001.csv")
-            short_col = gr.Textbox(label="Short prompt column", value="short_prompt")
-            long_col = gr.Textbox(label="Long prompt column", value="long_prompt")
-            out = gr.Textbox(label="Output dir", value="./adapter_out")
-            repo = gr.Textbox(label="Upload HF repo (optional)", value="rahul7star/gemma-3-270m-ccebc0")
-        with gr.Row():
-            batch_size = gr.Number(value=1, label="Batch size")
-            num_workers = gr.Number(value=0, label="DataLoader num_workers")
-            r = gr.Number(value=8, label="LoRA rank")
-            a = gr.Number(value=16, label="LoRA alpha")
-            ep = gr.Number(value=1, label="Epochs")
-            lr = gr.Number(value=1e-4, label="Learning rate")
-            max_records = gr.Number(value=1000, label="Max training records")
-        logs = gr.Textbox(label="Logs (streaming)", lines=25)
-        def launch(bm, ds, csv, sc, lc, out_dir, batch, num_w, r_, a_, ep_, lr_, max_rec, repo_):
-            gen = train_lora_stream_s1(bm, ds, csv, [sc, lc], out_dir, epochs=int(ep_), lr=float(lr_), r=int(r_), alpha=int(a_), batch_size=int(batch), num_workers=int(num_w), max_train_records=int(max_rec))
-            for item in gen:
-                if isinstance(item, tuple):
-                    text = item[0]
-                else:
-                    text = item
-                yield text
-            if repo_:
-                link = upload_adapter_s1(out_dir, repo_)
-                yield f"[INFO] Uploaded to {link}\n"
-        btn = gr.Button("🚀 Start Training")
-        btn.click(fn=launch, inputs=[base_model, dataset, csvname, short_col, long_col, out, batch_size, num_workers, r, a, ep, lr, max_records, repo], outputs=[logs], queue=True)
-    return demo_s1
-CHRONOEDIT_AVAILABLE = False
-try:
-    from chronoedit_diffusers.pipeline_chronoedit import ChronoEditPipeline
-    CHRONOEDIT_AVAILABLE = True
-except Exception:
-    pass
-QWENEDIT_AVAILABLE = False
-try:
-    from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPipeline
-    QWENEDIT_AVAILABLE = True
-except Exception:
-    pass
-BNB_AVAILABLE = False
-try:
-    from transformers import BitsAndBytesConfig
-    BNB_AVAILABLE = True
-except Exception:
-    BitsAndBytesConfig = None
-XFORMERS_AVAILABLE = False
-try:
-    import xformers
-    XFORMERS_AVAILABLE = True
-except Exception:
-    pass
-ADALORA_AVAILABLE = False
-try:
-    from peft import AdaLoraConfig
-    ADALORA_AVAILABLE = True
-except Exception:
-    AdaLoraConfig = None
-IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
-VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv"}
-def is_hub_repo_like_s2(s: str) -> bool:
-    return "/" in s and not Path(s).exists()
-def download_from_hf_s2(repo_id: str, filename: str, token: str = None, repo_type: str = "dataset") -> str:
-    token = token or os.environ.get("HF_TOKEN")
-    return hf_hub_download(repo_id=repo_id, filename=filename, use_auth_token=token, repo_type=repo_type)
-def try_list_repo_files_s2(repo_id: str, repo_type: str = "dataset", token: str = None):
-    token = token or os.environ.get("HF_TOKEN")
-    try:
-        return list_repo_files(repo_id, token=token, repo_type=repo_type)
-    except Exception:
-        return []
-def find_target_modules_s2(model, candidates=("q_proj", "k_proj", "v_proj", "o_proj", "to_q", "to_k", "to_v", "proj_out", "to_out")):
-    names = [n for n, _ in model.named_modules()]
-    selected = set()
-    for cand in candidates:
-        for n in names:
-            if cand in n:
-                selected.add(n.split(".")[-1])
-    if not selected:
-        return ["to_q", "to_k", "to_v", "to_out"]
-    return list(selected)
-class MediaTextDataset_s2(Dataset):
-    def __init__(self, dataset_source: str, csv_name: str = "dataset.csv", max_frames: int = 5, image_size=(512,512), video_frame_size=(128,256), hub_token: str = None):
-        self.source = dataset_source
-        self.is_hub = is_hub_repo_like_s2(dataset_source)
-        self.max_frames = max_frames
-        self.image_size = image_size
-        self.video_frame_size = video_frame_size
-        self.hub_token = hub_token or os.environ.get("HF_TOKEN")
-        if self.is_hub:
-            try:
-                csv_local = download_from_hf_s2(self.source, csv_name, token=self.hub_token, repo_type="dataset")
-            except Exception:
-                alt = csv_name.replace(".csv", ".parquet") if csv_name.endswith(".csv") else csv_name + ".parquet"
-                csv_local = download_from_hf_s2(self.source, alt, token=self.hub_token, repo_type="dataset")
-            if str(csv_local).endswith(".parquet"):
-                df = pd.read_parquet(csv_local)
-            else:
-                df = pd.read_csv(csv_local)
-            self.df = df
-            self.root = None
-        else:
-            root = Path(dataset_source)
-            csv_path = root / csv_name
-            parquet_path = root / csv_name.replace(".csv", ".parquet") if csv_name.endswith(".csv") else root / (csv_name + ".parquet")
-            if csv_path.exists():
-                self.df = pd.read_csv(csv_path)
-            elif parquet_path.exists():
-                self.df = pd.read_parquet(parquet_path)
-            else:
-                p = root / csv_name
-                if p.exists():
-                    if p.suffix.lower() == ".parquet":
-                        self.df = pd.read_parquet(p)
-                    else:
-                        self.df = pd.read_csv(p)
-                else:
-                    raise FileNotFoundError(f"Can't find {csv_name} in {dataset_source}")
-            self.root = root
-        self.image_transform = T.Compose([T.ToPILImage(), T.Resize(image_size), T.ToTensor(), T.Normalize([0.5]*3, [0.5]*3)])
-        self.video_transform = T.Compose([T.ToPILImage(), T.Resize(video_frame_size), T.ToTensor(), T.Normalize([0.5]*3, [0.5]*3)])
-    def __len__(self):
-        return len(self.df)
-    def _maybe_download_from_hub(self, file_name: str) -> str:
-        if self.root is not None:
-            p = self.root / file_name
-            if p.exists():
-                return str(p)
-        return download_from_hf_s2(self.source, file_name, token=self.hub_token, repo_type="dataset")
-    def _read_video_frames(self, path: str, num_frames: int):
-        video_frames, _, _ = torchvision.io.read_video(str(path), pts_unit='sec')
-        total = len(video_frames)
-        if total == 0:
-            C, H, W = 3, self.video_frame_size[0], self.video_frame_size[1]
-            return torch.zeros((num_frames, C, H, W), dtype=torch.float32)
-        if total < num_frames:
-            idxs = list(range(total)) + [total-1]*(num_frames-total)
-        else:
-            idxs = np.linspace(0, total-1, num_frames).round().astype(int).tolist()
-        frames = []
-        for i in idxs:
-            arr = video_frames[i].numpy() if hasattr(video_frames[i], "numpy") else np.array(video_frames[i])
-            frames.append(self.video_transform(arr))
-        frames = torch.stack(frames, dim=0)
-        return frames
-    def __getitem__(self, idx):
-        rec = self.df.iloc[idx]
-        file_name = rec["file_name"]
-        caption = rec["text"]
-        if self.is_hub:
-            local_path = self._maybe_download_from_hub(file_name)
-        else:
-            local_path = str(Path(self.root) / file_name)
-        p = Path(local_path)
-        suffix = p.suffix.lower()
-        if suffix in IMAGE_EXTS:
-            img = torchvision.io.read_image(local_path)
-            if isinstance(img, torch.Tensor):
-                img = img.permute(1,2,0).numpy()
-            return {'type': 'image', 'image': self.image_transform(img), 'caption': caption, 'file_name': file_name}
-        elif suffix in VIDEO_EXTS:
-            frames = self._read_video_frames(local_path, self.max_frames)
-            return {'type': 'video', 'frames': frames, 'caption': caption, 'file_name': file_name}
-        else:
-            raise RuntimeError(f"Unsupported media type: {local_path}")
-def load_pipeline_auto_s2(base_model_id: str, use_4bit: bool = False, bnb_config: object = None, torch_dtype=torch.float16):
-    low = base_model_id.lower()
-    is_chrono = "chrono" in low or "wan" in low or "video" in low
-    is_qwen = "qwen" in low or "qwenimage" in low
-    if is_chrono and CHRONOEDIT_AVAILABLE:
-        if use_4bit and bnb_config is not None:
-            pipe = ChronoEditPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16)
-        else:
-            pipe = ChronoEditPipeline.from_pretrained(base_model_id, torch_dtype=torch_dtype)
-    elif is_qwen and QWENEDIT_AVAILABLE:
-        pipe = QwenImageEditPipeline.from_pretrained(base_model_id, torch_dtype=torch_dtype)
-    else:
-        if use_4bit and BNB_AVAILABLE and bnb_config is not None:
-            pipe = DiffusionPipeline.from_pretrained(base_model_id, quantization_config=bnb_config, torch_dtype=torch.float16)
-        else:
-            pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch_dtype)
-    return pipe
-def infer_target_for_task_s2(task_type: str, model_name: str) -> str:
-    low = model_name.lower()
-    if task_type == "prompt-lora" or "qwen" in low or "qwenedit" in low:
-        return "text_encoder"
-    if task_type == "text-video" or "chrono" in low or "wan" in low:
-        return "transformer"
-    return "unet"
-def attach_lora_s2(pipe, adapter_target: str, r: int = 8, alpha: int = 16, dropout: float = 0.0, use_adalora: bool = False):
-    if adapter_target == "unet":
-        target_module = pipe.unet
-        attr = "unet"
-    elif adapter_target == "transformer":
-        target_module = pipe.transformer
-        attr = "transformer"
-    elif adapter_target == "text_encoder":
-        target_module = pipe.text_encoder
-        attr = "text_encoder"
-    else:
-        raise RuntimeError("Unknown adapter_target")
-    target_modules = find_target_modules_s2(target_module)
-    if use_adalora and ADALORA_AVAILABLE:
-        lora_config = AdaLoraConfig(r=r, lora_alpha=alpha, target_modules=target_modules, init_r=4, lora_dropout=dropout)
-    else:
-        lora_config = LoraConfig(r=r, lora_alpha=alpha, target_modules=target_modules, lora_dropout=dropout, bias="none", task_type="SEQ_2_SEQ_LM")
-    peft_model = get_peft_model(target_module, lora_config)
-    setattr(pipe, attr, peft_model)
-    return pipe, attr
-def train_lora_accelerate_s2(base_model_id: str, dataset_source: str, csv_name: str, task_type: str, adapter_target_override: str, output_dir: str, epochs: int = 1, batch_size: int = 1, lr: float = 1e-4, max_train_steps: int = None, lora_r: int = 8, lora_alpha: int = 16, use_4bit: bool = False, enable_xformers: bool = False, use_adalora: bool = False, gradient_accumulation_steps: int = 1, mixed_precision: str = None, save_every_steps: int = 200, max_frames: int = 5):
-    accelerator = accelerate.Accelerator(mixed_precision=mixed_precision or ("fp16" if torch.cuda.is_available() else "no"))
-    device = accelerator.device
-    bnb_conf = None
-    if use_4bit and BNB_AVAILABLE:
-        bnb_conf = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4")
-    pipe = load_pipeline_auto_s2(base_model_id, use_4bit=use_4bit, bnb_config=bnb_conf, torch_dtype=torch.float16 if device.type == "cuda" else torch.float32)
-    if enable_xformers:
-        try:
-            if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
-                pipe.enable_xformers_memory_efficient_attention()
-            elif hasattr(pipe, "enable_attention_slicing"):
-                pipe.enable_attention_slicing()
-        except Exception as e:
-            print(f"Could not enable xformers: {e}")
-    adapter_target = adapter_target_override if adapter_target_override else infer_target_for_task_s2(task_type, base_model_id)
-    pipe, attr = attach_lora_s2(pipe, adapter_target, r=lora_r, alpha=lora_alpha, dropout=0.0, use_adalora=use_adalora)
-    peft_module = getattr(pipe, attr)
-    dataset = MediaTextDataset_s2(dataset_source, csv_name=csv_name, max_frames=max_frames)
-    dataloader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=lambda x: x)
-    trainable_params = [p for n,p in peft_module.named_parameters() if p.requires_grad]
-    optimizer = torch.optim.AdamW(trainable_params, lr=lr)
-    peft_module, optimizer, dataloader = accelerator.prepare(peft_module, optimizer, dataloader)
-    logs = []
-    global_step = 0
-    loss_fn = nn.MSELoss()
-    timesteps = None
-    if hasattr(pipe, "scheduler"):
-        try:
-            pipe.scheduler.set_timesteps(50, device=device)
-            timesteps = pipe.scheduler.timesteps
-        except Exception:
-            pass
-    for epoch in range(int(epochs)):
-        pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
-        for batch in pbar:
-            example = batch[0]
-            if example["type"] == "image":
-                img = example["image"].unsqueeze(0).to(device)
-                caption = [example["caption"]]
-                if not hasattr(pipe, "encode_prompt"):
-                    raise RuntimeError("Pipeline lacks encode_prompt - cannot encode prompts")
-                prompt_embeds, _ = pipe.encode_prompt(prompt=caption, negative_prompt=None, do_classifier_free_guidance=True, num_videos_per_prompt=1, device=device)
-                if not hasattr(pipe, "vae"):
-                    raise RuntimeError("Pipeline lacks VAE - required for latent conversion")
-                with torch.no_grad():
-                    latents = pipe.vae.encode(img.to(device)).latent_dist.sample() * pipe.vae.config.scaling_factor
-                noise = torch.randn_like(latents).to(device)
-                t = timesteps[torch.randint(0, len(timesteps), (1,)).item()].to(device) if timesteps is not None else torch.tensor(1, device=device)
-                noisy_latents = pipe.scheduler.add_noise(latents, noise, t)
-                out = peft_module(noisy_latents, t.expand(noisy_latents.shape[0]), encoder_hidden_states=prompt_embeds)
-                if hasattr(out, "sample"):
-                    noise_pred = out.sample
-                elif isinstance(out, tuple):
-                    noise_pred = out[0]
-                else:
-                    noise_pred = out
-                loss = loss_fn(noise_pred, noise)
-            else:
-                if not CHRONOEDIT_AVAILABLE:
-                    raise RuntimeError("ChronoEdit training requested but not installed in environment")
-                frames = example["frames"].unsqueeze(0).to(device)
-                frames_np = frames.squeeze(0).permute(0,2,3,1).cpu().numpy().tolist()
-                video_tensor = pipe.video_processor.preprocess(frames_np, height=frames.shape[-2], width=frames.shape[-1]).to(device)
-                latents_out = pipe.prepare_latents(video_tensor, batch_size=1, num_channels_latents=pipe.vae.config.z_dim, height=video_tensor.shape[-2], width=video_tensor.shape[-1], num_frames=frames.shape[1], dtype=video_tensor.dtype, device=device)
-                latents, condition = latents_out
-                noise = torch.randn_like(latents).to(device)
-                t = timesteps[torch.randint(0, len(timesteps), (1,)).item()].to(device)
-                noisy_latents = pipe.scheduler.add_noise(latents, noise, t)
-                latent_model_input = torch.cat([noisy_latents, condition], dim=1)
-                out = peft_module(hidden_states=latent_model_input, timestep=t.unsqueeze(0).expand(latent_model_input.shape[0]))
-                noise_pred = out[0]
-                loss = loss_fn(noise_pred, noise)
-            accelerator.backward(loss)
-            optimizer.step()
-            optimizer.zero_grad()
-            global_step += 1
-            logs.append(f"step {global_step} loss {loss.item():.6f}")
-            pbar.set_postfix({"loss": f"{loss.item():.6f}"})
-            if max_train_steps and global_step >= max_train_steps:
-                break
-            if global_step % save_every_steps == 0:
-                out_sub = Path(output_dir) / f"lora_step_{global_step}"
-                out_sub.mkdir(parents=True, exist_ok=True)
-                try:
-                    peft_module.save_pretrained(str(out_sub))
-                except Exception:
-                    torch.save({k: v.cpu() for k,v in peft_module.state_dict().items()}, str(out_sub / "adapter_state_dict.pt"))
-        if max_train_steps and global_step >= max_train_steps:
-            break
-    Path(output_dir).mkdir(parents=True, exist_ok=True)
-    try:
-        peft_module.save_pretrained(output_dir)
-    except Exception:
-        torch.save({k: v.cpu() for k,v in peft_module.state_dict().items()}, str(Path(output_dir) / "adapter_state_dict.pt"))
-    return output_dir, logs
-def test_generation_load_and_run_s2(base_model_id: str, adapter_dir: str, adapter_target: str, prompt: str, use_4bit: bool = False):
-    bnb_conf = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4") if use_4bit and BNB_AVAILABLE else None
-    pipe = load_pipeline_auto_s2(base_model_id, use_4bit=use_4bit, bnb_config=bnb_conf, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
-    try:
-        if adapter_target == "unet" and hasattr(pipe, "unet"):
-            pipe.unet.load_adapter(adapter_dir)
-        elif adapter_target == "transformer" and hasattr(pipe, "transformer"):
-            pipe.transformer.load_adapter(adapter_dir)
-        elif adapter_target == "text_encoder" and hasattr(pipe, "text_encoder"):
-            pipe.text_encoder.load_adapter(adapter_dir)
-    except Exception as e:
-        print(f"Adapter load warning: {e}")
-    pipe.to(DEVICE)
-    out = pipe(prompt=prompt, num_inference_steps=8)
-    if hasattr(out, "images"):
-        return out.images[0]
-    elif hasattr(out, "frames"):
-        frames = out.frames[0]
-        return Image.fromarray((frames[-1] * 255).clip(0,255).astype("uint8"))
-    raise RuntimeError("No images/frames returned")
-def upload_adapter_s2(local_dir: str, repo_id: str) -> str:
-    token = os.environ.get("HF_TOKEN")
-    if token is None:
-        raise RuntimeError("HF_TOKEN not set in environment for upload")
-    create_repo(repo_id, exist_ok=True)
-    upload_folder(folder_path=local_dir, repo_id=repo_id, repo_type="model", token=token)
-    return f"https://huggingface.co/{repo_id}"
-def boost_info_text_s2(use_4bit: bool, enable_xformers: bool, mixed_precision: str, device_type: str):
-    lines = [f"Device: {device_type.upper()}"]
-    lines.append("4-bit QLoRA enabled: ~4x memory saving." if use_4bit and BNB_AVAILABLE else "QLoRA disabled.")
-    lines.append("xFormers/FlashAttention: memory-efficient attention enabled." if enable_xformers and XFORMERS_AVAILABLE else "xFormers disabled.")
-    lines.append(f"Mixed precision: {mixed_precision}" if mixed_precision else "Mixed precision: default.")
-    return "\n".join(lines)
-def run_all_ui_s2(base_model_id: str, dataset_source: str, csv_name: str, task_type: str, adapter_target_override: str, lora_r: int, lora_alpha: int, epochs: int, batch_size: int, lr: float, max_train_steps: int, output_dir: str, upload_repo: str, use_4bit: bool, enable_xformers: bool, use_adalora: bool, grad_accum: int, mixed_precision: str, save_every_steps: int):
-    adapter_target = adapter_target_override if adapter_target_override else infer_target_for_task_s2(task_type, base_model_id)
-    try:
-        out_dir, logs = train_lora_accelerate_s2(base_model_id, dataset_source, csv_name, task_type, adapter_target, output_dir, epochs=epochs, lr=lr, max_train_steps=(max_train_steps if max_train_steps>0 else None), lora_r=lora_r, lora_alpha=lora_alpha, use_4bit=use_4bit, enable_xformers=enable_xformers, use_adalora=use_adalora, gradient_accumulation_steps=grad_accum, mixed_precision=(mixed_precision if mixed_precision != "none" else None), save_every_steps=save_every_steps)
-    except Exception as e:
-        return f"Training failed: {e}", None, None
-    link = None
-    if upload_repo:
-        try:
-            link = upload_adapter_s2(out_dir, upload_repo)
-        except Exception as e:
-            link = f"Upload failed: {e}"
-    try:
-        ds = MediaTextDataset_s2(dataset_source, csv_name=csv_name, max_frames=5)
-        test_prompt = ds.df.iloc[0]["text"] if len(ds.df) > 0 else "A cat on a skateboard"
-    except Exception:
-        test_prompt = "A cat on a skateboard"
-    test_img = None
-    try:
-        test_img = test_generation_load_and_run_s2(base_model_id, out_dir, adapter_target, test_prompt, use_4bit=use_4bit)
-    except Exception as e:
-        print(f"Test gen failed: {e}")
-    return "\n".join(logs[-200:]), test_img, link
-def build_ui_s2():
-    with gr.Blocks() as demo_s2:
-        gr.Markdown("# Universal LoRA Trainer — Quantization & Speedups (single-file)")
-        with gr.Row():
-            base_model = gr.Textbox(label="Base model id (Diffusers / ChronoEdit / Qwen)", value="runwayml/stable-diffusion-v1-5")
-            dataset_source = gr.Textbox(label="Dataset folder or HF repo (username/repo)", value="./dataset")
-            csv_name = gr.Textbox(label="CSV/Parquet filename", value="dataset.csv")
-            task_type = gr.Dropdown(label="Task type", choices=["text-image", "text-video", "prompt-lora"], value="text-image")
-            adapter_target_override = gr.Textbox(label="Adapter target override (leave blank for auto)", value="")
-            lora_r = gr.Slider(1, 64, value=8, step=1, label="LoRA rank (r)")
-            lora_alpha = gr.Slider(1, 128, value=16, step=1, label="LoRA alpha")
-            epochs = gr.Number(label="Epochs", value=1)
-            batch_size = gr.Number(label="Batch size (per device)", value=1)
-            lr = gr.Number(label="Learning rate", value=1e-4)
-            max_train_steps = gr.Number(label="Max train steps (0 = unlimited)", value=0)
-            save_every_steps = gr.Number(label="Save every steps", value=200)
-            output_dir = gr.Textbox(label="Local output dir for adapter", value="./adapter_out")
-            upload_repo = gr.Textbox(label="Upload adapter to HF repo (optional)", value="")
-        with gr.Row():
-            use_4bit = gr.Checkbox(label="Enable 4-bit QLoRA (bitsandbytes)", value=False)
-            enable_xformers = gr.Checkbox(label="Enable xFormers / memory efficient attention", value=False)
-            use_adalora = gr.Checkbox(label="Use AdaLoRA (if available in peft)", value=False)
-            grad_accum = gr.Number(label="Gradient accumulation steps", value=1)
-            mixed_precision = gr.Radio(choices=["none", "fp16", "bf16"], value=("fp16" if torch.cuda.is_available() else "none"), label="Mixed precision")
-            boost_info = gr.Textbox(label="Expected boost / notes", value="", lines=6)
-            start_btn = gr.Button("Start Training")
-        with gr.Row():
-            logs = gr.Textbox(label="Training logs (tail)", lines=18)
-            sample_image = gr.Image(label="Sample generated frame after training")
-            upload_link = gr.Textbox(label="Upload link / status")
-        def on_start(base_model, dataset_source, csv_name, task_type, adapter_target_override, lora_r, lora_alpha, epochs, batch_size, lr, max_train_steps, output_dir, upload_repo, use_4bit_val, enable_xformers_val, use_adalora_val, grad_accum_val, mixed_precision_val, save_every_steps):
-            boost_text = boost_info_text_s2(use_4bit_val, enable_xformers_val, mixed_precision_val, "gpu" if torch.cuda.is_available() else "cpu")
-            logs_out, sample, link = run_all_ui_s2(base_model, dataset_source, csv_name, task_type, adapter_target_override, int(lora_r), int(lora_alpha), int(epochs), int(batch_size), float(lr), int(max_train_steps), output_dir, upload_repo, use_4bit_val, enable_xformers_val, use_adalora_val, int(grad_accum_val), mixed_precision_val, int(save_every_steps))
-            return boost_text + "\n\n" + logs_out, sample, link
-        start_btn.click(on_start, inputs=[base_model, dataset_source, csv_name, task_type, adapter_target_override, lora_r, lora_alpha, epochs, batch_size, lr, max_train_steps, output_dir, upload_repo, use_4bit, enable_xformers, use_adalora, grad_accum, mixed_precision, save_every_steps], outputs=[boost_info, sample_image, upload_link])
-    return demo_s2
-def run_all_ui_s3(base_model, dataset_src, csv_name, short_col, long_col, batch_size, num_workers, r, a, ep, lr, max_rec, repo_):
-    gen = train_lora_stream_s3(base_model, dataset_src, csv_name, [short_col, long_col], epochs=int(ep), lr=float(lr), r=int(r), alpha=int(a), batch_size=int(batch_size), num_workers=int(num_workers), max_train_records=int(max_rec))
-    for item in gen:
-        yield item
-    HF_TOKEN = os.environ.get("HF_TOKEN")
-    if not repo_ or not HF_TOKEN:
-        raise ValueError("HF repo ID and HF_TOKEN required for upload.")
-    repo_ = repo_.strip()
-    create_repo(repo_, repo_type="model", exist_ok=True, token=HF_TOKEN)
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        lora_module.save_pretrained(tmp_dir)
-        upload_folder(folder_path=tmp_dir, repo_id=repo_, repo_type="model", token=HF_TOKEN)
-    link = f"https://huggingface.co/{repo_}"
-    yield "\n".join(logs) + f"\n[INFO] ✅ Uploaded successfully: {link}\n", link
-def run_ui_s3_final():
-    with gr.Blocks() as demo_s3_final:
-        gr.Markdown("# 🌐 Universal Dynamic LoRA Trainer & Inference")
-        with gr.Row():
-            base_model = gr.Textbox(label="Base model", value="google/gemma-3-4b-it")
-            dataset = gr.Textbox(label="Dataset folder or HF repo", value="rahul7star/prompt-enhancer-dataset-01")
-            csvname = gr.Textbox(label="CSV/Parquet file", value="train-00000-of-00001.csv")
-            short_col = gr.Textbox(label="Short prompt column", value="short_prompt")
-            long_col = gr.Textbox(label="Long prompt column", value="long_col")
-            repo = gr.Textbox(label="HF repo to upload LoRA", value="rahul7star/gemma-3-270m-ccebc0")
-        with gr.Row():
-            batch_size = gr.Number(value=1, label="Batch size")
-            num_workers = gr.Number(value=0, label="DataLoader num_workers")
-            r = gr.Number(value=8, label="LoRA rank")
-            a = gr.Number(value=16, label="LoRA alpha")
-            ep = gr.Number(value=1, label="Epochs")
-            lr = gr.Number(value=1e-4, label="Learning rate")
-            max_records = gr.Number(value=1000, label="Max training records")
-        logs = gr.Textbox(label="Logs (streaming)", lines=25)
-        btn = gr.Button("🚀 Start Training")
-        btn.click(fn=run_all_ui_s3,
-                  inputs=[base_model, dataset, csvname, short_col, long_col, batch_size, num_workers, r, a, ep, lr, max_records, repo],
-                  outputs=[logs],
-                  queue=True)
-        with gr.Tab("Inference (CPU)"):
-            inf_base_model = gr.Textbox(label="Base model", value="google/gemma-3-4b-it")
-            inf_lora_repo = gr.Textbox(label="LoRA HF repo", value="rahul7star/gemma-3-270m-ccebc0")
-            short_prompt = gr.Textbox(label="Short prompt")
-            long_prompt_out = gr.Textbox(label="Generated long prompt", lines=5)
-            inf_btn = gr.Button("📝 Generate Long Prompt")
-            inf_btn.click(fn=generate_long_prompt_cpu_s3,
-                          inputs=[inf_base_model, inf_lora_repo, short_prompt],
-                          outputs=[long_prompt_out])
-        with gr.Tab("Code Explain"):
-            explain_md = gr.Markdown("""
-### Universal LoRA Trainer & Inference - Code Explanation
-#### 1. CORE MECHANISMS
 *   **PEFT/LoRA**: Parameter-Efficient Fine-Tuning. Only low-rank matrices ($A$ and $B$) are trained for low-rank updates ($W' = W + B A$). This drastically reduces trainable parameters.
 *   **QLoRA (4-bit)**: Loads the base model weights in 4-bit precision (NF4 with double quantization) using `bitsandbytes`, massively reducing VRAM usage while training LoRA adapters.
 *   **Accelerator**: Manages device placement (CPU/GPU), mixed precision (`fp16`/`bf16`), and gradient accumulation for stable large-batch training simulation.
@@ -2429,27 +1855,37 @@ def run_ui_s3_final():
 *   **Gradient Accumulation**: Simulates larger batch sizes by accumulating gradients over several forward/backward passes before an optimization step.
 *   **Gradient Clipping**: Limits the maximum norm of the gradients (`max_grad_norm`) to prevent exploding gradients during training.
 *   **Memory Optimization**: Optional use of `xFormers` (FlashAttention or memory-efficient attention) to reduce memory footprint and speed up training on compatible GPUs.
-#### 2. DATA PROCESSING & AUGMENTATION
 *   **Streaming Datasets**: Uses `datasets` streaming mode to handle very large datasets without loading all into RAM.
 *   **Data Cleaning**: Removes HTML tags, normalizes whitespace, redacts PII, and removes URLs/emails.
 *   **Advanced Filtering**: Includes optional filters for text length, word repetition, language detection, and basic toxicity detection (via `unitary/toxic-bert`).
 *   **Data Augmentation**: Supports **Back-Translation (BT)** for introducing paraphrasing variations and **Counterfactual Data Augmentation (CDA)** for controlled bias testing (e.g., swapping gendered pronouns).
 *   **Synthetic Data Generation**: Uses a specified LLM to generate new training examples based on an initial prompt template.
 *   **Deduplication**: Implements both **Exact** and **Semantic (MinHash LSH)** deduplication to prevent data contamination during iterative fine-tuning.
-#### 3. TRAINING MODES
 *   **SFT (Supervised Fine-Tuning)**: Standard fine-tuning, supports **Conversation** and **Reasoning/Tool Use (CoT)** formatting styles.
 *   **DPO (Direct Preference Optimization)**: Trains directly on preference pairs (chosen vs. rejected), using the `trl` library.
 *   **Task-Specific Heads**: Supports **Sequence Classification**, **Token Classification (NER)**, and **Question Answering** by loading appropriate model heads (`AutoModelFor...`).
 *   **Seq2Seq**: For translation/summarization tasks, using `Seq2SeqTrainer`.
 *   **Diffusion (Text-to-Image/DreamBooth)**: Fine-tunes the UNet (and optionally Text Encoder) using LoRA for image generation tasks, with custom image/video data handling.
-#### 4. MODEL INITIALIZATION
 *   **Model From Scratch**: Allows initializing a model (e.g., Llama, Mistral) from a config rather than a pre-trained checkpoint, with optional auto-configuration based on expected training scale.
 *   **Multi-Adapter Merging**: Advanced feature to combine multiple existing LoRA adapters into a single, new adapter using weighted averaging (`slerp`, `linear`, etc.).
-#### 5. OUTPUT & DEPLOYMENT
 *   **Hugging Face Hub Integration**: All trained artifacts (full model/LoRA adapter) are automatically pushed to a specified repository on the HF Hub using the provided token.
 *   **Model Card Generation**: Automatically generates a `README.md` detailing training parameters and model provenance.
 *   **Inference Tabs**: Separate UI for testing the trained LoRA adapter on CPU (for Gemma/LoRA) or various pipeline modes on GPU.
 """)
-    return demo_s3
 if __name__ == "__main__":
-    run_ui().launch(debug=True)

     PhiConfig, PhiForCausalLM, Qwen2Config, Qwen2ForCausalLM,
     DataCollatorForLanguageModeling, DefaultDataCollator, Adafactor
 )
+from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training, AdaLoraConfig
 from trl import SFTTrainer, DPOTrainer
 from diffusers import (
     UNet2DConditionModel, DDPMScheduler, AutoencoderKL, DiffusionPipeline,
                     break
         return (loss, outputs) if return_outputs else loss
+def _deduplication_generator(dataset, text_col, method, threshold, num_perm):
+    if method == 'Exacta':
+        seen_texts = set()
+        for example in dataset:
+            text = example.get(text_col, "")
+            if text and isinstance(text, str):
+                if text not in seen_texts:
+                    seen_texts.add(text)
                     yield example
+            else:
+                yield example
+    elif method == 'Semántica (MinHash)':
+        lsh = MinHashLSH(threshold=threshold, num_perm=num_perm)
+        for i, example in enumerate(dataset):
+            text = example.get(text_col, "")
+            if text and isinstance(text, str) and text.strip():
+                m = MinHash(num_perm=num_perm)
+                for d in text.split():
+                    m.update(d.encode('utf8'))
+                if not lsh.query(m):
+                    lsh.insert(f"key_{i}", m)
                     yield example
+            else:
+                yield example
+    else:
+        yield from dataset
+def _create_deduplicated_iterable_dataset(dataset, text_col, method, threshold=0.85, num_perm=128):
+    return IterableDataset.from_generator(
+        _deduplication_generator,
+        gen_kwargs={
+            "dataset": dataset,
+            "text_col": text_col,
+            "method": method,
+            "threshold": threshold,
+            "num_perm": num_perm,
+        }
+    )
 @spaces.GPU()
 def hf_login(token):
             inputs=[inf_task_mode, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in, inf_temperature, inf_top_p, inf_max_new_tokens],
             outputs=[inf_text_out, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in]
         )
+    with gr.Tab("5. Explicación del Código y Mecanismos Avanzados"):
+        gr.Markdown("""
+### 🧠 Explicación del Código y Mecanismos Avanzados
+""")
+        gr.Markdown("#### 1. CORE MECHANISMS")
+        gr.Markdown("""
 *   **PEFT/LoRA**: Parameter-Efficient Fine-Tuning. Only low-rank matrices ($A$ and $B$) are trained for low-rank updates ($W' = W + B A$). This drastically reduces trainable parameters.
 *   **QLoRA (4-bit)**: Loads the base model weights in 4-bit precision (NF4 with double quantization) using `bitsandbytes`, massively reducing VRAM usage while training LoRA adapters.
 *   **Accelerator**: Manages device placement (CPU/GPU), mixed precision (`fp16`/`bf16`), and gradient accumulation for stable large-batch training simulation.
 *   **Gradient Accumulation**: Simulates larger batch sizes by accumulating gradients over several forward/backward passes before an optimization step.
 *   **Gradient Clipping**: Limits the maximum norm of the gradients (`max_grad_norm`) to prevent exploding gradients during training.
 *   **Memory Optimization**: Optional use of `xFormers` (FlashAttention or memory-efficient attention) to reduce memory footprint and speed up training on compatible GPUs.
+""")
+        gr.Markdown("#### 2. DATA PROCESSING & AUGMENTATION")
+        gr.Markdown("""
 *   **Streaming Datasets**: Uses `datasets` streaming mode to handle very large datasets without loading all into RAM.
 *   **Data Cleaning**: Removes HTML tags, normalizes whitespace, redacts PII, and removes URLs/emails.
 *   **Advanced Filtering**: Includes optional filters for text length, word repetition, language detection, and basic toxicity detection (via `unitary/toxic-bert`).
 *   **Data Augmentation**: Supports **Back-Translation (BT)** for introducing paraphrasing variations and **Counterfactual Data Augmentation (CDA)** for controlled bias testing (e.g., swapping gendered pronouns).
 *   **Synthetic Data Generation**: Uses a specified LLM to generate new training examples based on an initial prompt template.
 *   **Deduplication**: Implements both **Exact** and **Semantic (MinHash LSH)** deduplication to prevent data contamination during iterative fine-tuning.
+""")
+        gr.Markdown("#### 3. TRAINING MODES")
+        gr.Markdown("""
 *   **SFT (Supervised Fine-Tuning)**: Standard fine-tuning, supports **Conversation** and **Reasoning/Tool Use (CoT)** formatting styles.
 *   **DPO (Direct Preference Optimization)**: Trains directly on preference pairs (chosen vs. rejected), using the `trl` library.
 *   **Task-Specific Heads**: Supports **Sequence Classification**, **Token Classification (NER)**, and **Question Answering** by loading appropriate model heads (`AutoModelFor...`).
 *   **Seq2Seq**: For translation/summarization tasks, using `Seq2SeqTrainer`.
 *   **Diffusion (Text-to-Image/DreamBooth)**: Fine-tunes the UNet (and optionally Text Encoder) using LoRA for image generation tasks, with custom image/video data handling.
+""")
+        gr.Markdown("#### 4. MODEL INITIALIZATION")
+        gr.Markdown("""
 *   **Model From Scratch**: Allows initializing a model (e.g., Llama, Mistral) from a config rather than a pre-trained checkpoint, with optional auto-configuration based on expected training scale.
 *   **Multi-Adapter Merging**: Advanced feature to combine multiple existing LoRA adapters into a single, new adapter using weighted averaging (`slerp`, `linear`, etc.).
+""")
+        gr.Markdown("#### 5. OUTPUT & DEPLOYMENT")
+        gr.Markdown("""
 *   **Hugging Face Hub Integration**: All trained artifacts (full model/LoRA adapter) are automatically pushed to a specified repository on the HF Hub using the provided token.
 *   **Model Card Generation**: Automatically generates a `README.md` detailing training parameters and model provenance.
 *   **Inference Tabs**: Separate UI for testing the trained LoRA adapter on CPU (for Gemma/LoRA) or various pipeline modes on GPU.
 """)
+        gr.Markdown("### 💡 Hardware Fallback")
+        gr.Markdown(f"If CUDA/GPU is unavailable, the system defaults to CPU: **{device.upper()}**. Training and inference on CPU will be significantly slower, especially for large models or Diffusers.")
 if __name__ == "__main__":
+    demo.queue().launch(debug=True, share=True)