Train_xd

Runtime error

App Files Files Community

Ignaciohhhhggfgjfrffd commited on 16 days ago

Commit

3bb1f41

verified ·

1 Parent(s): ffde733

Update app.py

Browse files

Files changed (1) hide show

app.py +390 -14

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 os.system("pip install -U gradio")
 os.system("pip install -U bitsandbytes diffusers torchaudio torchvision torch transformers peft accelerate trl datasets")
-os.system("pip install spaces-0.1.0-py3-none-any.whl")
 import io
 import json
@@ -16,6 +17,7 @@ import importlib
 import random
 import re
 import ast
 from itertools import islice
 from pathlib import Path
 from collections import defaultdict
@@ -38,11 +40,11 @@ import textstat
 from datasketch import MinHash, MinHashLSH
 import gradio as gr
 from datasets import load_dataset, IterableDataset, Dataset as HFDataset, DatasetDict, interleave_datasets, Audio
-from huggingface_hub import login, whoami, create_repo, upload_folder, HfApi, hf_hub_download, list_repo_files
 from transformers import (
     AutoModelForCausalLM, AutoTokenizer, AutoConfig, TrainingArguments, Trainer,
     AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer,
-    AutoModelForImageClassification,
     AutoImageProcessor, AutoModelForAudioClassification, AutoFeatureExtractor, AutoModelForTokenClassification,
     DataCollatorForTokenClassification, AutoModelForQuestionAnswering, AutoModelForSpeechSeq2Seq,
     AutoProcessor, DataCollatorWithPadding, pipeline,
@@ -62,6 +64,19 @@ from diffusers import (
     get_scheduler as get_diffusers_scheduler, StableDiffusionPipeline as StableDiffusionText2ImagePipeline,
     StableDiffusionImg2ImgPipeline as StableDiffusionImage2ImagePipeline
 )
 logger = logging.getLogger(__name__)
 torch_dtype_auto = torch.float32
@@ -135,7 +150,27 @@ Este dataset fue creado utilizando la herramienta [AutoTrain-Advanced](https://h
 - **Modelo de Generación (si aplica):** `{generation_model}`
 - **Fecha de Creación:** {date}
 """
 _tox_pipe_singleton = None
 @spaces.GPU
 class DebiasingSFTTrainer(SFTTrainer):
     def __init__(self, *args, reweighting_terms=None, reweighting_factor=1.0, **kwargs):
@@ -153,6 +188,7 @@ class DebiasingSFTTrainer(SFTTrainer):
                     loss *= self.reweighting_factor
                     break
         return (loss, outputs) if return_outputs else loss
 @spaces.GPU
 class DeduplicatedIterableDataset(IterableDataset):
     def __init__(self, dataset, text_col, method, threshold=0.85, num_perm=128):
@@ -210,6 +246,7 @@ def hf_login(token):
         return f"✅ Conectado como: {user['name']}"
     except Exception as e:
         return f"❌ Error en la conexión: {e}"
 @spaces.GPU
 def _clean_text(example, text_col, **kwargs):
     text = example.get(text_col, "")
@@ -227,6 +264,7 @@ def _clean_text(example, text_col, **kwargs):
         text = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '<IP_ADDRESS>', text)
     example[text_col] = text
     return example
 @spaces.GPU
 def _apply_quality_filters(example, text_col, min_len, max_len, rep_threshold, exclude_keywords):
     text = example.get(text_col, "")
@@ -240,6 +278,7 @@ def _apply_quality_filters(example, text_col, min_len, max_len, rep_threshold, e
     if not word_counts or (max(word_counts.values()) / len(words)) > rep_threshold: return False
     lower_text = text.lower()
     return not any(keyword in lower_text for keyword in exclude_keywords)
 @spaces.GPU
 def _apply_coherence_filter(example, text_col, char_rep_threshold, ngram_rep_threshold, entropy_threshold):
     text = example.get(text_col, "")
@@ -306,6 +345,7 @@ def _apply_coherence_filter(example, text_col, char_rep_threshold, ngram_rep_thr
     if non_latin_chars > 2 and latin_chars > 10:
         return False
     return True
 @spaces.GPU
 def _get_filter_functions(**kwargs):
     filters = []
@@ -366,6 +406,7 @@ def _get_filter_functions(**kwargs):
             return True
         filters.append(stats_filter)
     return filters
 @spaces.GPU
 def _load_hf_streaming(ids, split="train", probabilities=None):
     streams = []
@@ -395,6 +436,7 @@ def _load_hf_streaming(ids, split="train", probabilities=None):
         logger.warning(f"Number of probabilities ({len(probabilities)}) does not match number of valid datasets ({len(streams)}). Ignoring weights.")
         probabilities = None
     return interleave_datasets(streams, probabilities=probabilities)
 @spaces.GPU
 def _load_uploaded_stream(files):
     all_rows = []
@@ -416,6 +458,7 @@ def _load_uploaded_stream(files):
     val_size = max(1, int(len(all_rows) * 0.01))
     random.shuffle(all_rows)
     return {"train": all_rows[:-val_size] if val_size > 0 else all_rows, "validation": all_rows[-val_size:] if val_size > 0 else []}
 @spaces.GPU
 def _guess_columns(sample):
     text_col, image_col, audio_col, label_col = "text", "image", "audio", "label"
@@ -432,6 +475,7 @@ def _guess_columns(sample):
     if "label" in keys: label_col = keys["label"]
     elif "labels" in keys: label_col = keys["labels"]
     return text_col, image_col, audio_col, label_col
 @spaces.GPU
 def _apply_cda(dataset, text_col, cda_config_str):
     try:
@@ -464,6 +508,7 @@ def _apply_cda(dataset, text_col, cda_config_str):
                                         next_texts.add(new_text)
                 current_texts.update(next_texts)
     return IterableDataset.from_generator(cda_generator)
 @spaces.GPU
 def _apply_back_translation(dataset, text_col, ratio, model_id, reverse_model_id):
     if not ratio or ratio <= 0:
@@ -491,6 +536,7 @@ def _apply_back_translation(dataset, text_col, ratio, model_id, reverse_model_id
                     except Exception as e:
                         logger.warning(f"Error en retrotraducción: {e}")
     return IterableDataset.from_generator(bt_generator)
 @spaces.GPU
 def _generate_synthetic_data(original_dataset, text_col, model_id, num_samples, prompt_template):
     if not num_samples or num_samples <= 0:
@@ -523,6 +569,7 @@ def _generate_synthetic_data(original_dataset, text_col, model_id, num_samples,
                 logger.warning(f"Error generando una muestra sintética: {e}")
                 continue
     return IterableDataset.from_generator(synthetic_generator)
 def _calculate_auto_config(block_size, is_gpt2_like, steps_per_epoch_estimate, batch_size, gradient_accumulation):
     safe_steps = int(steps_per_epoch_estimate or 10000)
     safe_batch_size = int(batch_size or 1)
@@ -540,6 +587,7 @@ def _calculate_auto_config(block_size, is_gpt2_like, steps_per_epoch_estimate, b
     layers = max(8, min(32, 8 + int(log_size * 1.5)))
     kv_heads = heads if is_gpt2_like else (max(1, heads // 4))
     return vocab_size, hidden_size, hidden_size * 2, layers, heads, safe_block_size, False, kv_heads
 @spaces.GPU
 def _get_eval_dataset(train_ds_id, eval_ds_id, uploaded_val_data, update_logs_fn):
     if eval_ds_id:
@@ -561,6 +609,7 @@ def _get_eval_dataset(train_ds_id, eval_ds_id, uploaded_val_data, update_logs_fn
             return None
     yield update_logs_fn("No se proporcionó dataset de evaluación. Omitiendo.", "Evaluación")
     return None
 def _create_training_args(output_dir, repo_id, **kwargs):
     neftune_alpha = float(kwargs.get('neftune_noise_alpha', 0.0))
     optim_args_dict = {}
@@ -610,6 +659,7 @@ def _create_training_args(output_dir, repo_id, **kwargs):
     else:
         raise ValueError("Para datasets en streaming se requiere un valor positivo para 'Máximos Pasos de Entrenamiento'.")
     return TrainingArguments(**args_dict)
 @spaces.GPU
 def _generic_model_loader(model_name_or_path, model_class, **kwargs):
     config_kwargs = {"trust_remote_code": True}
@@ -627,6 +677,7 @@ def _generic_model_loader(model_name_or_path, model_class, **kwargs):
         model_kwargs.update({"num_labels": kwargs['num_labels'], "ignore_mismatched_sizes": True})
     model = model_class.from_pretrained(model_name_or_path, **model_kwargs)
     return model
 @spaces.GPU
 def _find_all_linear_names(model):
     cls = torch.nn.Linear
@@ -639,6 +690,7 @@ def _find_all_linear_names(model):
         lora_module_names.remove('lm_head')
     common_targets = {'q_proj', 'v_proj', 'k_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'}
     return list(lora_module_names.intersection(common_targets)) or list(lora_module_names)
 @spaces.GPU
 def _sft_formatting_func(example, text_col, tokenizer, **kwargs):
     if kwargs.get('sft_format_style') == "Conversacional":
@@ -672,9 +724,11 @@ def _sft_formatting_func(example, text_col, tokenizer, **kwargs):
                 return "\n".join([m['content'] for m in messages])
         return ""
     return example.get(text_col, "")
 @spaces.GPU
 def _dpo_formatting_func(example, **kwargs):
     return {"prompt": example.get(kwargs.get('prompt_col_input', 'prompt'), ""), "chosen": example.get(kwargs.get('dpo_chosen_col_input', 'chosen'), ""), "rejected": example.get(kwargs.get('dpo_rejected_col_input', 'rejected'), "")}
 @spaces.GPU
 def _evaluate_perplexity(model, tokenizer, eval_dataset, text_col):
     model.eval()
@@ -699,6 +753,7 @@ def _evaluate_perplexity(model, tokenizer, eval_dataset, text_col):
                 break
     ppl = torch.exp(torch.stack(nlls).mean())
     return ppl.item()
 @spaces.GPU
 def _merge_multiple_loras(base_model_id, adapter_ids_str, weights_str, combination_type):
     adapter_ids = [s.strip() for s in adapter_ids_str.split(',') if s.strip()]
@@ -730,6 +785,7 @@ def _merge_multiple_loras(base_model_id, adapter_ids_str, weights_str, combinati
     tokenizer.save_pretrained(temp_dir)
     yield f"Fusión de adaptadores completada. El entrenamiento continuará con el modelo fusionado en {temp_dir}."
     return temp_dir
 @spaces.GPU
 def _run_trainer_and_upload(trainer, tokenizer, repo_id, update_logs_fn, model_card_content, **kwargs):
     yield update_logs_fn("Iniciando ciclo de entrenamiento...", "Entrenando")
@@ -750,6 +806,7 @@ def _run_trainer_and_upload(trainer, tokenizer, repo_id, update_logs_fn, model_c
     yield update_logs_fn("Subiendo al Hub...", "Subiendo")
     upload_folder(folder_path=output_dir, repo_id=repo_id, commit_message="Fin de entrenamiento")
     return output_dir, final_metrics
 @spaces.GPU
 def train_sft_dpo(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
@@ -803,6 +860,7 @@ def train_sft_dpo(model_name, train_dataset, repo_id, update_logs_fn, model_card
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en {'DPO' if is_dpo else 'SFT'}: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_sequence_classification(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
@@ -845,6 +903,7 @@ def train_sequence_classification(model_name, train_dataset, repo_id, update_log
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Sequence Classification: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_token_classification(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
@@ -902,6 +961,7 @@ def train_token_classification(model_name, train_dataset, repo_id, update_logs_f
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Token Classification: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_question_answering(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
@@ -979,6 +1039,7 @@ def train_question_answering(model_name, train_dataset, repo_id, update_logs_fn,
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Question Answering: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_seq2seq(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
@@ -1030,6 +1091,7 @@ def train_seq2seq(model_name, train_dataset, repo_id, update_logs_fn, model_card
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Seq2Seq: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_text_to_image(model_name, train_dataset, repo_id, update_logs, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
@@ -1181,6 +1243,7 @@ def train_text_to_image(model_name, train_dataset, repo_id, update_logs, model_c
     except Exception as e:
         yield update_logs(f"❌ Error en entrenamiento Text-to-Image: {str(e)}", "Error")
         raise Exception(f"Error en Text-to-Image: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def _get_data_processing_pipeline(**kwargs):
     hf_ids = [x.strip() for x in (kwargs.get('datasets_hf_text') or "").split(",") if x.strip()]
@@ -1243,6 +1306,7 @@ def _get_data_processing_pipeline(**kwargs):
                 num_perm=int(kwargs.get('minhash_num_perm', 128))
             )
     return train_dataset, kwargs
 @spaces.GPU
 def _train_and_upload(progress=gr.Progress(), **kwargs):
     logs, repo_link, final_model_path, final_metrics = "", "", None, {}
@@ -1411,6 +1475,7 @@ def _train_and_upload(progress=gr.Progress(), **kwargs):
             gr.update(value="Iniciar Entrenamiento", interactive=True),
             gr.update(visible=False)
         )
 @spaces.GPU
 def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in, temperature, top_p, max_new_tokens):
     if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
@@ -1434,6 +1499,7 @@ def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in,
             result = pipe(input_data)
         return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
     except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
 def update_inference_ui(task_mode):
     task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
     is_text_gen = task_name == "text-generation"
@@ -1449,6 +1515,7 @@ def update_inference_ui(task_mode):
         gr.update(visible=show_audio),
         gr.update(visible=is_text_gen)
     )
 @spaces.GPU
 def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
     if not hf_token:
@@ -1510,10 +1577,12 @@ def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, s
         return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
     except Exception as e:
         return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
 @spaces.GPU
 def gradio_train_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
     yield from _train_and_upload(**kwargs)
 @spaces.GPU
 def gradio_preview_data_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
@@ -1555,6 +1624,7 @@ def gradio_preview_data_wrapper(*args):
         yield preview_text
     except Exception as e:
         yield f"Error al generar la vista previa: {e}\n{traceback.format_exc()}"
 def toggle_training_mode_ui(is_scratch):
     return (
         gr.update(visible=not is_scratch),
@@ -1576,6 +1646,7 @@ def toggle_training_mode_ui(is_scratch):
         gr.update(visible=is_scratch),
         gr.update(visible=is_scratch),
     )
 def toggle_task_specific_ui(training_mode):
     is_classification = "Classification" in training_mode
     is_dpo = "DPO" in training_mode
@@ -1589,18 +1660,284 @@ def toggle_task_specific_ui(training_mode):
         gr.update(visible=is_diffusion),
         gr.update(visible=not is_diffusion)
     )
 def toggle_sft_format_ui(format_style):
     is_tool = format_style == "Razonamiento/Herramientas"
     return gr.update(visible=is_tool)
 def toggle_auto_modules_ui(is_auto):
     return gr.update(visible=not is_auto)
 def toggle_dataset_creator_ui(choice):
     is_synth = choice == "Sintético"
     return gr.update(visible=is_synth), gr.update(visible=not is_synth)
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("# 🚀 AutoTrain-Advanced: Tu Plataforma de Entrenamiento de Modelos")
-    gr.Markdown("### Una interfaz completa para fine-tuning y PEFT (LoRA).")
     with gr.Tab("1. Autenticación"):
         gr.Markdown("#### Conecta tu cuenta de Hugging Face para guardar y cargar modelos.")
@@ -1916,7 +2253,50 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
             inputs=[inf_task_mode, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in, inf_temperature, inf_top_p, inf_max_new_tokens],
             outputs=[inf_text_out, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in]
         )
-    with gr.Tab("5. Explicación del Código y Mecanismos Avanzados"):
         gr.Markdown("""
 ### 🧠 Explicación del Código y Mecanismos Avanzados
 """)
@@ -1944,14 +2324,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
 * Task-Specific Heads: Supports **Sequence Classification**, **Token Classification (NER)**, and **Question Answering** by loading appropriate model heads (`AutoModelFor...`).
 * Seq2Seq: For translation/summarization tasks, using `Seq2SeqTrainer`.
 """)
-        gr.Markdown("#### 4. MODEL INITIALIZATION & ADVANCED TECHNIQUES")
         gr.Markdown("""
-* Model From Scratch: Allows initializing a model (e.g., Llama, Mistral) from a config rather than a pre-trained checkpoint, with optional auto-configuration based on expected training scale.
-* Manual Model Configuration: When training from scratch, users can manually specify low-level configuration parameters (e.g., `vocab_size`, `hidden_size`, `num_hidden_layers`) instead of relying on the automatic scaling based on training steps.
-* Multi-Adapter Merging: Advanced feature to combine multiple existing LoRA adapters into a single, new adapter using weighted averaging (`slerp`, `linear`, etc.).
-* DoRA (Weight-Decomposed Low-Rank Adaptation): A more advanced version of LoRA that can lead to better performance.
-* RSLora (Rank-Stabilized LoRA): A variant of LoRA that adjusts the learning rate based on the rank, improving stability.
-* NEFTune: Adds noise to the embedding layer during training, which can improve the performance of the fine-tuned model.
 """)
         gr.Markdown("#### 5. OUTPUT & DEPLOYMENT")
         gr.Markdown("""
@@ -1961,4 +2337,4 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
 """)
 if __name__ == "__main__":
-    demo.queue().launch(debug=True, share=True)

 import os
 os.system("pip install -U gradio")
 os.system("pip install -U bitsandbytes diffusers torchaudio torchvision torch transformers peft accelerate trl datasets")
+os.system("pip install spaces")
+os.system("pip install gradio_huggingfacehub_search packaging torchao llmcompressor")
 import io
 import json
 import random
 import re
 import ast
+import shutil
 from itertools import islice
 from pathlib import Path
 from collections import defaultdict
 from datasketch import MinHash, MinHashLSH
 import gradio as gr
 from datasets import load_dataset, IterableDataset, Dataset as HFDataset, DatasetDict, interleave_datasets, Audio
+from huggingface_hub import login, whoami, create_repo, upload_folder, HfApi, hf_hub_download, list_repo_files, snapshot_download, list_models
 from transformers import (
     AutoModelForCausalLM, AutoTokenizer, AutoConfig, TrainingArguments, Trainer,
     AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer,
+    AutoModelForImageClassification, AutoModel, TorchAoConfig,
     AutoImageProcessor, AutoModelForAudioClassification, AutoFeatureExtractor, AutoModelForTokenClassification,
     DataCollatorForTokenClassification, AutoModelForQuestionAnswering, AutoModelForSpeechSeq2Seq,
     AutoProcessor, DataCollatorWithPadding, pipeline,
     get_scheduler as get_diffusers_scheduler, StableDiffusionPipeline as StableDiffusionText2ImagePipeline,
     StableDiffusionImg2ImgPipeline as StableDiffusionImage2ImagePipeline
 )
+from gradio_huggingfacehub_search import HuggingfaceHubSearch
+from packaging import version
+from torchao.quantization import (
+    Int4WeightOnlyConfig,
+    Int8WeightOnlyConfig,
+    Int8DynamicActivationInt8WeightConfig,
+    Float8WeightOnlyConfig,
+    Float8DynamicActivationFloat8WeightConfig,
+    GemliteUIntXWeightOnlyConfig,
+)
+from torchao.dtypes import Int4CPULayout
+from llmcompressor import oneshot
+from llmcompressor.modifiers.awq import AWQModifier
 logger = logging.getLogger(__name__)
 torch_dtype_auto = torch.float32
 - **Modelo de Generación (si aplica):** `{generation_model}`
 - **Fecha de Creación:** {date}
 """
+MAP_QUANT_TYPE_TO_NAME = {
+    "Int4WeightOnly": "int4wo",
+    "GemliteUIntXWeightOnly": "intxwo-gemlite",
+    "Int8WeightOnly": "int8wo",
+    "Int8DynamicActivationInt8Weight": "int8da8w8",
+    "Float8WeightOnly": "float8wo",
+    "Float8DynamicActivationFloat8Weight": "float8da8w8",
+    "autoquant": "autoquant",
+}
+MAP_QUANT_TYPE_TO_CONFIG = {
+    "Int4WeightOnly": Int4WeightOnlyConfig,
+    "GemliteUIntXWeightOnly": GemliteUIntXWeightOnlyConfig,
+    "Int8WeightOnly": Int8WeightOnlyConfig,
+    "Int8DynamicActivationInt8Weight": Int8DynamicActivationInt8WeightConfig,
+    "Float8WeightOnly": Float8WeightOnlyConfig,
+    "Float8DynamicActivationFloat8Weight": Float8DynamicActivationFloat8WeightConfig,
+}
 _tox_pipe_singleton = None
 @spaces.GPU
 class DebiasingSFTTrainer(SFTTrainer):
     def __init__(self, *args, reweighting_terms=None, reweighting_factor=1.0, **kwargs):
                     loss *= self.reweighting_factor
                     break
         return (loss, outputs) if return_outputs else loss
 @spaces.GPU
 class DeduplicatedIterableDataset(IterableDataset):
     def __init__(self, dataset, text_col, method, threshold=0.85, num_perm=128):
         return f"✅ Conectado como: {user['name']}"
     except Exception as e:
         return f"❌ Error en la conexión: {e}"
 @spaces.GPU
 def _clean_text(example, text_col, **kwargs):
     text = example.get(text_col, "")
         text = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '<IP_ADDRESS>', text)
     example[text_col] = text
     return example
 @spaces.GPU
 def _apply_quality_filters(example, text_col, min_len, max_len, rep_threshold, exclude_keywords):
     text = example.get(text_col, "")
     if not word_counts or (max(word_counts.values()) / len(words)) > rep_threshold: return False
     lower_text = text.lower()
     return not any(keyword in lower_text for keyword in exclude_keywords)
 @spaces.GPU
 def _apply_coherence_filter(example, text_col, char_rep_threshold, ngram_rep_threshold, entropy_threshold):
     text = example.get(text_col, "")
     if non_latin_chars > 2 and latin_chars > 10:
         return False
     return True
 @spaces.GPU
 def _get_filter_functions(**kwargs):
     filters = []
             return True
         filters.append(stats_filter)
     return filters
 @spaces.GPU
 def _load_hf_streaming(ids, split="train", probabilities=None):
     streams = []
         logger.warning(f"Number of probabilities ({len(probabilities)}) does not match number of valid datasets ({len(streams)}). Ignoring weights.")
         probabilities = None
     return interleave_datasets(streams, probabilities=probabilities)
 @spaces.GPU
 def _load_uploaded_stream(files):
     all_rows = []
     val_size = max(1, int(len(all_rows) * 0.01))
     random.shuffle(all_rows)
     return {"train": all_rows[:-val_size] if val_size > 0 else all_rows, "validation": all_rows[-val_size:] if val_size > 0 else []}
 @spaces.GPU
 def _guess_columns(sample):
     text_col, image_col, audio_col, label_col = "text", "image", "audio", "label"
     if "label" in keys: label_col = keys["label"]
     elif "labels" in keys: label_col = keys["labels"]
     return text_col, image_col, audio_col, label_col
 @spaces.GPU
 def _apply_cda(dataset, text_col, cda_config_str):
     try:
                                         next_texts.add(new_text)
                 current_texts.update(next_texts)
     return IterableDataset.from_generator(cda_generator)
 @spaces.GPU
 def _apply_back_translation(dataset, text_col, ratio, model_id, reverse_model_id):
     if not ratio or ratio <= 0:
                     except Exception as e:
                         logger.warning(f"Error en retrotraducción: {e}")
     return IterableDataset.from_generator(bt_generator)
 @spaces.GPU
 def _generate_synthetic_data(original_dataset, text_col, model_id, num_samples, prompt_template):
     if not num_samples or num_samples <= 0:
                 logger.warning(f"Error generando una muestra sintética: {e}")
                 continue
     return IterableDataset.from_generator(synthetic_generator)
 def _calculate_auto_config(block_size, is_gpt2_like, steps_per_epoch_estimate, batch_size, gradient_accumulation):
     safe_steps = int(steps_per_epoch_estimate or 10000)
     safe_batch_size = int(batch_size or 1)
     layers = max(8, min(32, 8 + int(log_size * 1.5)))
     kv_heads = heads if is_gpt2_like else (max(1, heads // 4))
     return vocab_size, hidden_size, hidden_size * 2, layers, heads, safe_block_size, False, kv_heads
 @spaces.GPU
 def _get_eval_dataset(train_ds_id, eval_ds_id, uploaded_val_data, update_logs_fn):
     if eval_ds_id:
             return None
     yield update_logs_fn("No se proporcionó dataset de evaluación. Omitiendo.", "Evaluación")
     return None
 def _create_training_args(output_dir, repo_id, **kwargs):
     neftune_alpha = float(kwargs.get('neftune_noise_alpha', 0.0))
     optim_args_dict = {}
     else:
         raise ValueError("Para datasets en streaming se requiere un valor positivo para 'Máximos Pasos de Entrenamiento'.")
     return TrainingArguments(**args_dict)
 @spaces.GPU
 def _generic_model_loader(model_name_or_path, model_class, **kwargs):
     config_kwargs = {"trust_remote_code": True}
         model_kwargs.update({"num_labels": kwargs['num_labels'], "ignore_mismatched_sizes": True})
     model = model_class.from_pretrained(model_name_or_path, **model_kwargs)
     return model
 @spaces.GPU
 def _find_all_linear_names(model):
     cls = torch.nn.Linear
         lora_module_names.remove('lm_head')
     common_targets = {'q_proj', 'v_proj', 'k_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'}
     return list(lora_module_names.intersection(common_targets)) or list(lora_module_names)
 @spaces.GPU
 def _sft_formatting_func(example, text_col, tokenizer, **kwargs):
     if kwargs.get('sft_format_style') == "Conversacional":
                 return "\n".join([m['content'] for m in messages])
         return ""
     return example.get(text_col, "")
 @spaces.GPU
 def _dpo_formatting_func(example, **kwargs):
     return {"prompt": example.get(kwargs.get('prompt_col_input', 'prompt'), ""), "chosen": example.get(kwargs.get('dpo_chosen_col_input', 'chosen'), ""), "rejected": example.get(kwargs.get('dpo_rejected_col_input', 'rejected'), "")}
 @spaces.GPU
 def _evaluate_perplexity(model, tokenizer, eval_dataset, text_col):
     model.eval()
                 break
     ppl = torch.exp(torch.stack(nlls).mean())
     return ppl.item()
 @spaces.GPU
 def _merge_multiple_loras(base_model_id, adapter_ids_str, weights_str, combination_type):
     adapter_ids = [s.strip() for s in adapter_ids_str.split(',') if s.strip()]
     tokenizer.save_pretrained(temp_dir)
     yield f"Fusión de adaptadores completada. El entrenamiento continuará con el modelo fusionado en {temp_dir}."
     return temp_dir
 @spaces.GPU
 def _run_trainer_and_upload(trainer, tokenizer, repo_id, update_logs_fn, model_card_content, **kwargs):
     yield update_logs_fn("Iniciando ciclo de entrenamiento...", "Entrenando")
     yield update_logs_fn("Subiendo al Hub...", "Subiendo")
     upload_folder(folder_path=output_dir, repo_id=repo_id, commit_message="Fin de entrenamiento")
     return output_dir, final_metrics
 @spaces.GPU
 def train_sft_dpo(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en {'DPO' if is_dpo else 'SFT'}: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_sequence_classification(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Sequence Classification: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_token_classification(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Token Classification: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_question_answering(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Question Answering: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_seq2seq(model_name, train_dataset, repo_id, update_logs_fn, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
         return final_model_path, final_metrics
     except Exception as e:
         raise Exception(f"Error en Seq2Seq: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def train_text_to_image(model_name, train_dataset, repo_id, update_logs, model_card_content, **kwargs):
     output_dir = tempfile.mkdtemp()
     except Exception as e:
         yield update_logs(f"❌ Error en entrenamiento Text-to-Image: {str(e)}", "Error")
         raise Exception(f"Error en Text-to-Image: {e}\n{traceback.format_exc()}")
 @spaces.GPU
 def _get_data_processing_pipeline(**kwargs):
     hf_ids = [x.strip() for x in (kwargs.get('datasets_hf_text') or "").split(",") if x.strip()]
                 num_perm=int(kwargs.get('minhash_num_perm', 128))
             )
     return train_dataset, kwargs
 @spaces.GPU
 def _train_and_upload(progress=gr.Progress(), **kwargs):
     logs, repo_link, final_model_path, final_metrics = "", "", None, {}
             gr.update(value="Iniciar Entrenamiento", interactive=True),
             gr.update(visible=False)
         )
 @spaces.GPU
 def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in, temperature, top_p, max_new_tokens):
     if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
             result = pipe(input_data)
         return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
     except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
 def update_inference_ui(task_mode):
     task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
     is_text_gen = task_name == "text-generation"
         gr.update(visible=show_audio),
         gr.update(visible=is_text_gen)
     )
 @spaces.GPU
 def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
     if not hf_token:
         return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
     except Exception as e:
         return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
 @spaces.GPU
 def gradio_train_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
     yield from _train_and_upload(**kwargs)
 @spaces.GPU
 def gradio_preview_data_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
         yield preview_text
     except Exception as e:
         yield f"Error al generar la vista previa: {e}\n{traceback.format_exc()}"
 def toggle_training_mode_ui(is_scratch):
     return (
         gr.update(visible=not is_scratch),
         gr.update(visible=is_scratch),
         gr.update(visible=is_scratch),
     )
 def toggle_task_specific_ui(training_mode):
     is_classification = "Classification" in training_mode
     is_dpo = "DPO" in training_mode
         gr.update(visible=is_diffusion),
         gr.update(visible=not is_diffusion)
     )
 def toggle_sft_format_ui(format_style):
     is_tool = format_style == "Razonamiento/Herramientas"
     return gr.update(visible=is_tool)
 def toggle_auto_modules_ui(is_auto):
     return gr.update(visible=not is_auto)
 def toggle_dataset_creator_ui(choice):
     is_synth = choice == "Sintético"
     return gr.update(visible=is_synth), gr.update(visible=not is_synth)
+def get_ao_username(token):
+    try:
+        api = HfApi(token=token)
+        info = api.whoami()
+        return info["name"]
+    except Exception:
+        return "anonymous"
+def check_ao_model_exists(username, quantization_type, group_size, model_name, quantized_model_name, token):
+    try:
+        models = list_models(author=username, token=token)
+        model_names = [model.id for model in models]
+        if quantized_model_name:
+            repo_name = f"{username}/{quantized_model_name}"
+        else:
+            if quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"] and group_size is not None:
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}-gs{group_size}"
+            else:
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}"
+        if repo_name in model_names:
+            return f"Model '{repo_name}' already exists in your repository."
+        else:
+            return None
+    except Exception as e:
+        return f"Error checking model existence: {str(e)}"
+def create_ao_model_card(model_name, quantization_type, group_size, token):
+    try:
+        model_path = snapshot_download(repo_id=model_name, allow_patterns=["README.md"], repo_type="model", token=token)
+        readme_path = os.path.join(model_path, "README.md")
+        original_readme = ""
+        if os.path.exists(readme_path):
+            with open(readme_path, "r", encoding="utf-8") as f:
+                original_readme = f.read()
+    except Exception:
+        original_readme = ""
+    yaml_header = f"""---
+base_model:
+- {model_name}
+tags:
+- torchao-my-repo
+---
+# {model_name} (Quantized)
+## Quantization Details
+- **Quantization Type**: {quantization_type}
+- **Group Size**: {group_size}
+"""
+    if original_readme:
+        yaml_header += "\n\n# 📄 Original Model Info\n\n" + original_readme
+    return yaml_header
+def quantize_ao_model(model_name, quantization_type, group_size=128, token=None, progress=gr.Progress()):
+    print(f"Quantizing model: {quantization_type}")
+    progress(0, desc="Preparing Quantization")
+    if quantization_type == "GemliteUIntXWeightOnly":
+        quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](group_size=group_size)
+    elif quantization_type == "Int4WeightOnly":
+        from torchao.dtypes import Int4CPULayout
+        quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type](group_size=group_size, layout=Int4CPULayout())
+    elif quantization_type == "autoquant":
+        quant_config = "autoquant"
+    else:
+        quant_config = MAP_QUANT_TYPE_TO_CONFIG[quantization_type]()
+    quantization_config = TorchAoConfig(quant_config)
+    progress(0.10, desc="Quantizing model")
+    model = AutoModel.from_pretrained(
+        model_name,
+        torch_dtype="auto",
+        quantization_config=quantization_config,
+        device_map="cpu",
+        token=token,
+    )
+    progress(0.45, desc="Quantization completed")
+    return model
+def save_ao_model(model, model_name, quantization_type, group_size=128, quantized_model_name=None, public=True, token=None, progress=gr.Progress()):
+    username = get_ao_username(token)
+    progress(0.50, desc="Preparing to push")
+    print("Saving quantized model")
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
+        tokenizer.save_pretrained(tmpdirname)
+        model.save_pretrained(tmpdirname, safe_serialization=False)
+        if quantized_model_name:
+            repo_name = f"{username}/{quantized_model_name}"
+        else:
+            if quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"] and (group_size is not None):
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}-gs{group_size}"
+            else:
+                repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}"
+        progress(0.70, desc="Creating model card")
+        model_card = create_ao_model_card(model_name, quantization_type, group_size, token)
+        with open(os.path.join(tmpdirname, "README.md"), "w") as f:
+            f.write(model_card)
+        api = HfApi(token=token)
+        api.create_repo(repo_name, exist_ok=True, private=not public)
+        progress(0.80, desc="Pushing to Hub")
+        api.upload_folder(folder_path=tmpdirname, repo_id=repo_name, repo_type="model")
+        progress(1.00, desc="Done")
+    repo_link = f"""
+    <div class="repo-link">
+        <h3>🔗 Repository Link</h3>
+        <p>Find your repo here: <a href="https://huggingface.co/{repo_name}" target="_blank">{repo_name}</a></p>
+    </div>
+    """
+    return f"<h1>🎉 Quantization Completed</h1><br/>{repo_link}"
+@spaces.GPU
+def quantize_and_save_ao(model_name, quantization_type, group_size, quantized_model_name, public, hf_token):
+    username = get_ao_username(hf_token)
+    if not username or username == "anonymous":
+        return "<div class='error-box'><h3>❌ Authentication Error</h3><p>Invalid or missing HF_TOKEN.</p></div>"
+    if group_size and str(group_size).strip():
+        try:
+            group_size = int(group_size)
+        except ValueError:
+            group_size = None
+    else:
+        group_size = None
+    exists_message = check_ao_model_exists(username, quantization_type, group_size, model_name, quantized_model_name, hf_token)
+    if exists_message:
+        return f"<div class='warning-box'><h3>⚠️ Model Already Exists</h3><p>{exists_message}</p></div>"
+    try:
+        quantized_model = quantize_ao_model(model_name, quantization_type, group_size, token=hf_token)
+        return save_ao_model(quantized_model, model_name, quantization_type, group_size, quantized_model_name, public, token=hf_token)
+    except Exception as e:
+        return f"<div class='error-box'><h3>❌ Error</h3><p>{str(e)}</p></div>"
+def get_awq_default_repo_name(model_id: str, scheme: str) -> str:
+    if not model_id or not scheme:
+        return ""
+    model_base_name = Path(model_id).name
+    suggested_name = f"{model_base_name}-AWQ-{scheme}"
+    return f"<your-username>/{suggested_name}"
+@spaces.GPU
+def run_awq_compression(
+    hf_token: str,
+    model_id: str,
+    scheme: str,
+    ignore_lm_head: bool,
+    num_calib_samples: float,
+    max_seq_len: float,
+    pipeline_mode: str,
+    upload_repo: str,
+    progress=gr.Progress(track_tqdm=True),
+):
+    logs = []
+    def log(msg: str) -> str:
+        logs.append(msg)
+        return "\n".join(logs)
+    if not model_id:
+        yield log("Error: Please provide a source model id (e.g. meta-llama/Llama-3.3-70B-Instruct).")
+        return
+    try:
+        num_calib_samples_int = int(num_calib_samples)
+        max_seq_len_int = int(max_seq_len)
+    except ValueError as e:
+        yield log(f"Error: Invalid number format for calibration settings. {e}")
+        return
+    temp_dir = tempfile.mkdtemp()
+    local_output_dir = Path(temp_dir) / f"{Path(model_id).name}-AWQ-{scheme}"
+    yield log(f"ℹ️ Quantized model will be saved temporarily to: {local_output_dir.name}")
+    if hf_token:
+        try:
+            login(token=hf_token)
+            yield log("✅ Logged in to Hugging Face Hub.")
+        except Exception as e:
+            yield log(f"⚠️ Hugging Face login failed: {e}")
+    else:
+        yield log("ℹ️ No HF token provided. You can still quantize public models and save locally.")
+    try:
+        progress(0.1, desc="Building AWQ recipe...")
+        yield log("🔧 Building AWQ recipe...")
+        ignore_patterns = ["lm_head"] if ignore_lm_head else None
+        recipe = AWQModifier(
+            targets="Linear",
+            scheme=scheme,
+            ignore=ignore_patterns,
+        )
+        yield log(f"Recipe:\n  scheme = {scheme}\n  ignore = {ignore_patterns or '[]'}")
+    except Exception as e:
+        yield log(f"❌ Failed to build AWQ recipe: {e}")
+        shutil.rmtree(temp_dir, ignore_errors=True)
+        return
+    try:
+        progress(0.25, desc="Running AWQ quantization...")
+        yield log("🚀 Starting LLM Compressor `oneshot` run (no calibration dataset)...")
+        yield log(f"  • model = {model_id}")
+        yield log(f"  • num_calibration_samples = {num_calib_samples_int}")
+        yield log(f"  • max_seq_length = {max_seq_len_int}")
+        yield log(f"  • pipeline = {pipeline_mode}")
+        oneshot(
+            model=model_id,
+            dataset=None,
+            recipe=recipe,
+            output_dir=str(local_output_dir),
+            max_seq_length=max_seq_len_int,
+            num_calibration_samples=num_calib_samples_int,
+            pipeline=pipeline_mode,
+            trust_remote_code_model=True,
+            device="cpu",
+        )
+        progress(0.8, desc="Quantization complete. Preparing upload...")
+        yield log("✅ AWQ quantization finished.")
+    except Exception as e:
+        progress(1.0, desc="Error")
+        yield log(f"❌ CRITICAL ERROR during oneshot:\n{traceback.format_exc()}")
+        shutil.rmtree(temp_dir, ignore_errors=True)
+        return
+    if upload_repo and hf_token:
+        try:
+            progress(0.9, desc="Uploading compressed model to Hugging Face Hub...")
+            yield log(f"☁️ Uploading folder `{local_output_dir.name}` to repo `{upload_repo}`...")
+            api = HfApi(token=hf_token)
+            api.create_repo(repo_id=upload_repo, repo_type="model", exist_ok=True)
+            api.upload_folder(
+                folder_path=str(local_output_dir),
+                repo_id=upload_repo,
+                repo_type="model",
+            )
+            hub_url = f"https://huggingface.co/{upload_repo}"
+            yield log(f"✅ Upload complete. Model available at:\n{hub_url}")
+        except Exception as e:
+            yield log(f"⚠️ Upload failed: {e}")
+    else:
+        yield log("ℹ️ No upload repo configured. Local files saved to temporary location.")
+    shutil.rmtree(temp_dir, ignore_errors=True)
+    progress(1.0, desc="Done!")
+    yield log("🎉 Done! AWQ compression finished successfully. Local temporary files cleaned up.")
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("# 🚀 AutoTrain-Advanced & Quantization Hub")
+    gr.Markdown("### Una plataforma unificada para Fine-Tuning, PEFT, TorchAO y AWQ Quantization.")
     with gr.Tab("1. Autenticación"):
         gr.Markdown("#### Conecta tu cuenta de Hugging Face para guardar y cargar modelos.")
             inputs=[inf_task_mode, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in, inf_temperature, inf_top_p, inf_max_new_tokens],
             outputs=[inf_text_out, inf_model_id, inf_text_in, inf_context_in, inf_image_in, inf_audio_in]
         )
+    with gr.Tab("5. TorchAO Quantization"):
+        gr.Markdown("## 🔥 TorchAO Quantizer")
+        gr.Markdown("Cuantización eficiente usando `torchao`.")
+        with gr.Row():
+            ao_token = gr.Textbox(label="HF Token (si es diferente al principal)", type="password", placeholder="Opcional")
+            ao_model_name = HuggingfaceHubSearch(label="🔍 Hub Model ID", placeholder="Search a model", search_type="model")
+            ao_quant_type = gr.Dropdown(choices=list(MAP_QUANT_TYPE_TO_NAME.keys()), value="Int8WeightOnly", label="Tipo de Cuantización")
+            ao_group_size = gr.Textbox(label="Group Size (opcional)", value="128")
+            ao_custom_name = gr.Textbox(label="Nombre Personalizado (opcional)", value="")
+            ao_public = gr.Checkbox(label="Hacer Público", value=True)
+            ao_output = gr.Markdown()
+            ao_btn = gr.Button("🚀 Cuantizar y Subir", variant="primary")
+        ao_btn.click(
+            quantize_and_save_ao,
+            inputs=[ao_model_name, ao_quant_type, ao_group_size, ao_custom_name, ao_public, hf_token_input],
+            outputs=ao_output
+        )
+    with gr.Tab("6. AWQ Quantization"):
+        gr.Markdown("## 🧱 LLM Compressor – AWQ Quantizer")
+        gr.Markdown("Cuantización AWQ usando `llmcompressor` (oneshot).")
+        with gr.Row():
+            with gr.Column():
+                awq_token = gr.Textbox(label="HF Token (si es diferente al principal)", type="password", placeholder="Opcional")
+                awq_model_id = gr.Textbox(label="Source Model ID", value="meta-llama/Llama-3.3-70B-Instruct")
+                awq_scheme = gr.Dropdown(label="AWQ Scheme", choices=["W4A16", "W4A16_ASYM"], value="W4A16_ASYM")
+                awq_ignore_head = gr.Checkbox(label="Ignore lm_head", value=True)
+                awq_calib = gr.Number(label="Calibration Samples", value=128, precision=0)
+                awq_seq_len = gr.Number(label="Max Sequence Length", value=2048, precision=0)
+                awq_pipeline = gr.Dropdown(label="Pipeline Mode", choices=["sequential", "default"], value="sequential")
+                awq_repo = gr.Textbox(label="Target HF Repo", placeholder="username/model-awq")
+                awq_btn = gr.Button("Iniciar Compresión AWQ", variant="primary")
+            with gr.Column():
+                awq_logs = gr.Textbox(label="Logs del Proceso", lines=30, interactive=False)
+        awq_btn.click(
+            run_awq_compression,
+            inputs=[hf_token_input, awq_model_id, awq_scheme, awq_ignore_head, awq_calib, awq_seq_len, awq_pipeline, awq_repo],
+            outputs=[awq_logs]
+        )
+    with gr.Tab("7. Explicación del Código"):
         gr.Markdown("""
 ### 🧠 Explicación del Código y Mecanismos Avanzados
 """)
 * Task-Specific Heads: Supports **Sequence Classification**, **Token Classification (NER)**, and **Question Answering** by loading appropriate model heads (`AutoModelFor...`).
 * Seq2Seq: For translation/summarization tasks, using `Seq2SeqTrainer`.
 """)
+        gr.Markdown("#### 4. QUANTIZATION (TorchAO & AWQ)")
         gr.Markdown("""
+* **TorchAO**: PyTorch Native Quantization. Supports Int4, Int8, and Float8 quantization techniques directly integrated with the model loading process.
+* **AWQ (Activation-aware Weight Quantization)**: Uses `llmcompressor` in oneshot mode to protect salient weights based on activation magnitude, preserving performance at 4-bit.
 """)
         gr.Markdown("#### 5. OUTPUT & DEPLOYMENT")
         gr.Markdown("""
 """)
 if __name__ == "__main__":
+    demo.queue(max_size=50).launch(debug=True, share=True)