Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1166,7 +1166,7 @@ def train_text_to_image(model_name, train_dataset, repo_id, update_logs_fn, mode
|
|
| 1166 |
eps=float(kwargs.get('adam_epsilon', 1e-8)),
|
| 1167 |
)
|
| 1168 |
|
| 1169 |
-
num_epochs = int(kwargs.get('epochs', 1))
|
| 1170 |
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
|
| 1171 |
max_train_steps = num_epochs * num_update_steps_per_epoch
|
| 1172 |
|
|
@@ -1514,123 +1514,7 @@ def _train_and_upload(**kwargs):
|
|
| 1514 |
)
|
| 1515 |
|
| 1516 |
@spaces.GPU()
|
| 1517 |
-
def
|
| 1518 |
-
if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1519 |
-
task_name = TASK_TO_PIPELINE_MAP.get(task_mode)
|
| 1520 |
-
if not task_name: return f"La inferencia para el modo '{task_mode}' no está soportada.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1521 |
-
try:
|
| 1522 |
-
pipe = pipeline(task_name, model=model_id, torch_dtype=torch_dtype_auto, trust_remote_code=True, device=0 if device == 'cuda' else -1)
|
| 1523 |
-
result = None
|
| 1524 |
-
if task_name == "text-generation":
|
| 1525 |
-
if not text_in: return "Por favor, introduce un prompt de texto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1526 |
-
result = pipe(text_in, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=temperature, top_p=top_p)
|
| 1527 |
-
elif task_name == "question-answering":
|
| 1528 |
-
if not text_in or not context_in: return "Por favor, introduce una pregunta y un contexto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1529 |
-
result = pipe(question=text_in, context=context_in)
|
| 1530 |
-
elif task_name in ["token-classification", "text2text-generation", "text-classification"]:
|
| 1531 |
-
if not text_in: return f"Por favor, introduce texto para {task_name}.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1532 |
-
result = pipe(text_in)
|
| 1533 |
-
elif task_name in ["image-classification", "audio-classification", "automatic-speech-recognition"]:
|
| 1534 |
-
input_data = image_in if "image" in task_name else audio_in
|
| 1535 |
-
if input_data is None: return f"Por favor, proporciona una entrada de { 'imagen' if 'image' in task_name else 'audio' }.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1536 |
-
result = pipe(input_data)
|
| 1537 |
-
|
| 1538 |
-
return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1539 |
-
except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
|
| 1540 |
-
|
| 1541 |
-
@spaces.GPU()
|
| 1542 |
-
def update_inference_ui(task_mode):
|
| 1543 |
-
task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
|
| 1544 |
-
is_text_gen = task_name == "text-generation"
|
| 1545 |
-
show_text = task_name in ["text-generation", "text2text-generation", "token-classification", "question-answering", "text-classification", "text-to-image"]
|
| 1546 |
-
show_context = task_name == "question-answering"
|
| 1547 |
-
show_image = task_name in ["image-classification"]
|
| 1548 |
-
show_audio = task_name in ["audio-classification", "automatic-speech-recognition"]
|
| 1549 |
-
text_label = "Pregunta" if task_name == "question-answering" else "Entrada de Texto / Prompt"
|
| 1550 |
-
|
| 1551 |
-
return (
|
| 1552 |
-
gr.update(visible=show_text, label=text_label),
|
| 1553 |
-
gr.update(visible=show_context),
|
| 1554 |
-
gr.update(visible=show_image),
|
| 1555 |
-
gr.update(visible=show_audio),
|
| 1556 |
-
gr.update(visible=is_text_gen)
|
| 1557 |
-
)
|
| 1558 |
-
|
| 1559 |
-
@spaces.GPU()
|
| 1560 |
-
def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
|
| 1561 |
-
if not hf_token:
|
| 1562 |
-
return "Error: Se requiere un token de Hugging Face.", ""
|
| 1563 |
-
if not repo_name:
|
| 1564 |
-
return "Error: Se requiere un nombre de repositorio para el dataset.", ""
|
| 1565 |
-
|
| 1566 |
-
try:
|
| 1567 |
-
login(token=hf_token)
|
| 1568 |
-
user = whoami()
|
| 1569 |
-
username = user.get("name")
|
| 1570 |
-
repo_id = f"{username}/{repo_name}"
|
| 1571 |
-
create_repo(repo_id, repo_type="dataset", exist_ok=True)
|
| 1572 |
-
|
| 1573 |
-
all_data = []
|
| 1574 |
-
|
| 1575 |
-
if creation_type == "Sintético":
|
| 1576 |
-
if not synth_model or not synth_prompt or not synth_num_samples:
|
| 1577 |
-
return "Error: Para la generación sintética se requiere un modelo, un prompt y un número de muestras.", ""
|
| 1578 |
-
|
| 1579 |
-
progress(0, desc="Cargando modelo generador...")
|
| 1580 |
-
generator = pipeline("text-generation", model=synth_model, torch_dtype=torch_dtype_auto, device=0 if device == 'cuda' else -1)
|
| 1581 |
-
|
| 1582 |
-
for i in progress.tqdm(range(int(synth_num_samples)), desc="Generando muestras"):
|
| 1583 |
-
try:
|
| 1584 |
-
generated_output = generator(synth_prompt, max_new_tokens=256, num_return_sequences=1, do_sample=True, temperature=0.9, top_p=0.95)
|
| 1585 |
-
cleaned_text = generated_output[0]['generated_text'][len(synth_prompt):].strip()
|
| 1586 |
-
if cleaned_text:
|
| 1587 |
-
all_data.append({"text": cleaned_text})
|
| 1588 |
-
except Exception as e:
|
| 1589 |
-
logger.warning(f"Error al generar muestra {i}: {e}")
|
| 1590 |
-
|
| 1591 |
-
elif creation_type == "Basado en Archivo":
|
| 1592 |
-
if not file_uploads:
|
| 1593 |
-
return "Error: Por favor, sube al menos un archivo.", ""
|
| 1594 |
-
progress(0.5, desc="Procesando archivos subidos...")
|
| 1595 |
-
file_data = _load_uploaded_stream(file_uploads)
|
| 1596 |
-
all_data = file_data.get("train", []) + file_data.get("validation", [])
|
| 1597 |
-
|
| 1598 |
-
if not all_data:
|
| 1599 |
-
return "Error: No se generaron o procesaron datos.", ""
|
| 1600 |
-
|
| 1601 |
-
progress(0.8, desc="Guardando y subiendo al Hub...")
|
| 1602 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
| 1603 |
-
data_file = os.path.join(temp_dir, "data.jsonl")
|
| 1604 |
-
with open(data_file, "w", encoding="utf-8") as f:
|
| 1605 |
-
for item in all_data:
|
| 1606 |
-
f.write(json.dumps(item, ensure_ascii=False) + "\n")
|
| 1607 |
-
|
| 1608 |
-
readme_content = DATASET_CARD_TEMPLATE.format(
|
| 1609 |
-
repo_id=repo_id,
|
| 1610 |
-
creation_type=creation_type,
|
| 1611 |
-
generation_model=synth_model if creation_type == "Sintético" else "N/A",
|
| 1612 |
-
date=datetime.now().strftime("%Y-%m-%d")
|
| 1613 |
-
)
|
| 1614 |
-
readme_file = os.path.join(temp_dir, "README.md")
|
| 1615 |
-
with open(readme_file, "w", encoding="utf-8") as f:
|
| 1616 |
-
f.write(readme_content)
|
| 1617 |
-
|
| 1618 |
-
api = HfApi()
|
| 1619 |
-
api.upload_folder(
|
| 1620 |
-
folder_path=temp_dir,
|
| 1621 |
-
repo_id=repo_id,
|
| 1622 |
-
repo_type="dataset",
|
| 1623 |
-
commit_message="Creación de dataset con AutoTrain-Advanced"
|
| 1624 |
-
)
|
| 1625 |
-
|
| 1626 |
-
dataset_link = f"https://huggingface.co/datasets/{repo_id}"
|
| 1627 |
-
return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
|
| 1628 |
-
|
| 1629 |
-
except Exception as e:
|
| 1630 |
-
return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
|
| 1631 |
-
|
| 1632 |
-
@spaces.GPU()
|
| 1633 |
-
def gradio_train_wrapper(*args):
|
| 1634 |
kwargs = dict(zip(all_input_components_dict.keys(), args))
|
| 1635 |
yield from _train_and_upload(**kwargs)
|
| 1636 |
|
|
@@ -1950,7 +1834,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
|
|
| 1950 |
)
|
| 1951 |
|
| 1952 |
train_event = start_training_button.click(
|
| 1953 |
-
|
| 1954 |
inputs=all_input_components_list,
|
| 1955 |
outputs=all_output_components
|
| 1956 |
)
|
|
|
|
| 1166 |
eps=float(kwargs.get('adam_epsilon', 1e-8)),
|
| 1167 |
)
|
| 1168 |
|
| 1169 |
+
num_epochs = int(kwargs.get('epochs', 1.0))
|
| 1170 |
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
|
| 1171 |
max_train_steps = num_epochs * num_update_steps_per_epoch
|
| 1172 |
|
|
|
|
| 1514 |
)
|
| 1515 |
|
| 1516 |
@spaces.GPU()
|
| 1517 |
+
def train_sft_dpo_wrapper(*args):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1518 |
kwargs = dict(zip(all_input_components_dict.keys(), args))
|
| 1519 |
yield from _train_and_upload(**kwargs)
|
| 1520 |
|
|
|
|
| 1834 |
)
|
| 1835 |
|
| 1836 |
train_event = start_training_button.click(
|
| 1837 |
+
train_sft_dpo_wrapper,
|
| 1838 |
inputs=all_input_components_list,
|
| 1839 |
outputs=all_output_components
|
| 1840 |
)
|