Ignaciohhhhggfgjfrffd commited on
Commit
8da19b3
·
verified ·
1 Parent(s): d4a374a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -4
app.py CHANGED
@@ -531,7 +531,6 @@ def _create_training_args(output_dir, repo_id, **kwargs):
531
  "save_strategy": "steps",
532
  "logging_steps": int(kwargs.get('logging_steps', 10)),
533
  "save_steps": int(kwargs.get('save_steps', 50)),
534
- "evaluation_strategy": "steps" if kwargs.get('run_evaluation', False) else "no",
535
  "eval_steps": int(kwargs.get('save_steps', 50)) if kwargs.get('run_evaluation', False) else None,
536
  "learning_rate": float(kwargs.get('learning_rate', 2e-5)),
537
  "fp16": kwargs.get('mixed_precision') == 'fp16' and device == 'cuda',
@@ -1166,7 +1165,7 @@ def train_text_to_image(model_name, train_dataset, repo_id, update_logs_fn, mode
1166
  eps=float(kwargs.get('adam_epsilon', 1e-8)),
1167
  )
1168
 
1169
- num_epochs = int(kwargs.get('epochs', 1.0))
1170
  num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
1171
  max_train_steps = num_epochs * num_update_steps_per_epoch
1172
 
@@ -1514,7 +1513,123 @@ def _train_and_upload(**kwargs):
1514
  )
1515
 
1516
  @spaces.GPU()
1517
- def train_sft_dpo_wrapper(*args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1518
  kwargs = dict(zip(all_input_components_dict.keys(), args))
1519
  yield from _train_and_upload(**kwargs)
1520
 
@@ -1834,7 +1949,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
1834
  )
1835
 
1836
  train_event = start_training_button.click(
1837
- train_sft_dpo_wrapper,
1838
  inputs=all_input_components_list,
1839
  outputs=all_output_components
1840
  )
 
531
  "save_strategy": "steps",
532
  "logging_steps": int(kwargs.get('logging_steps', 10)),
533
  "save_steps": int(kwargs.get('save_steps', 50)),
 
534
  "eval_steps": int(kwargs.get('save_steps', 50)) if kwargs.get('run_evaluation', False) else None,
535
  "learning_rate": float(kwargs.get('learning_rate', 2e-5)),
536
  "fp16": kwargs.get('mixed_precision') == 'fp16' and device == 'cuda',
 
1165
  eps=float(kwargs.get('adam_epsilon', 1e-8)),
1166
  )
1167
 
1168
+ num_epochs = int(kwargs.get('epochs', 1))
1169
  num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
1170
  max_train_steps = num_epochs * num_update_steps_per_epoch
1171
 
 
1513
  )
1514
 
1515
  @spaces.GPU()
1516
+ def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in, temperature, top_p, max_new_tokens):
1517
+ if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1518
+ task_name = TASK_TO_PIPELINE_MAP.get(task_mode)
1519
+ if not task_name: return f"La inferencia para el modo '{task_mode}' no está soportada.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1520
+ try:
1521
+ pipe = pipeline(task_name, model=model_id, torch_dtype=torch_dtype_auto, trust_remote_code=True, device=0 if device == 'cuda' else -1)
1522
+ result = None
1523
+ if task_name == "text-generation":
1524
+ if not text_in: return "Por favor, introduce un prompt de texto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1525
+ result = pipe(text_in, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=temperature, top_p=top_p)
1526
+ elif task_name == "question-answering":
1527
+ if not text_in or not context_in: return "Por favor, introduce una pregunta y un contexto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1528
+ result = pipe(question=text_in, context=context_in)
1529
+ elif task_name in ["token-classification", "text2text-generation", "text-classification"]:
1530
+ if not text_in: return f"Por favor, introduce texto para {task_name}.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1531
+ result = pipe(text_in)
1532
+ elif task_name in ["image-classification", "audio-classification", "automatic-speech-recognition"]:
1533
+ input_data = image_in if "image" in task_name else audio_in
1534
+ if input_data is None: return f"Por favor, proporciona una entrada de { 'imagen' if 'image' in task_name else 'audio' }.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1535
+ result = pipe(input_data)
1536
+
1537
+ return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1538
+ except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1539
+
1540
+ @spaces.GPU()
1541
+ def update_inference_ui(task_mode):
1542
+ task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
1543
+ is_text_gen = task_name == "text-generation"
1544
+ show_text = task_name in ["text-generation", "text2text-generation", "token-classification", "question-answering", "text-classification", "text-to-image"]
1545
+ show_context = task_name == "question-answering"
1546
+ show_image = task_name in ["image-classification"]
1547
+ show_audio = task_name in ["audio-classification", "automatic-speech-recognition"]
1548
+ text_label = "Pregunta" if task_name == "question-answering" else "Entrada de Texto / Prompt"
1549
+
1550
+ return (
1551
+ gr.update(visible=show_text, label=text_label),
1552
+ gr.update(visible=show_context),
1553
+ gr.update(visible=show_image),
1554
+ gr.update(visible=show_audio),
1555
+ gr.update(visible=is_text_gen)
1556
+ )
1557
+
1558
+ @spaces.GPU()
1559
+ def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
1560
+ if not hf_token:
1561
+ return "Error: Se requiere un token de Hugging Face.", ""
1562
+ if not repo_name:
1563
+ return "Error: Se requiere un nombre de repositorio para el dataset.", ""
1564
+
1565
+ try:
1566
+ login(token=hf_token)
1567
+ user = whoami()
1568
+ username = user.get("name")
1569
+ repo_id = f"{username}/{repo_name}"
1570
+ create_repo(repo_id, repo_type="dataset", exist_ok=True)
1571
+
1572
+ all_data = []
1573
+
1574
+ if creation_type == "Sintético":
1575
+ if not synth_model or not synth_prompt or not synth_num_samples:
1576
+ return "Error: Para la generación sintética se requiere un modelo, un prompt y un número de muestras.", ""
1577
+
1578
+ progress(0, desc="Cargando modelo generador...")
1579
+ generator = pipeline("text-generation", model=synth_model, torch_dtype=torch_dtype_auto, device=0 if device == 'cuda' else -1)
1580
+
1581
+ for i in progress.tqdm(range(int(synth_num_samples)), desc="Generando muestras"):
1582
+ try:
1583
+ generated_output = generator(synth_prompt, max_new_tokens=256, num_return_sequences=1, do_sample=True, temperature=0.9, top_p=0.95)
1584
+ cleaned_text = generated_output[0]['generated_text'][len(synth_prompt):].strip()
1585
+ if cleaned_text:
1586
+ all_data.append({"text": cleaned_text})
1587
+ except Exception as e:
1588
+ logger.warning(f"Error al generar muestra {i}: {e}")
1589
+
1590
+ elif creation_type == "Basado en Archivo":
1591
+ if not file_uploads:
1592
+ return "Error: Por favor, sube al menos un archivo.", ""
1593
+ progress(0.5, desc="Procesando archivos subidos...")
1594
+ file_data = _load_uploaded_stream(file_uploads)
1595
+ all_data = file_data.get("train", []) + file_data.get("validation", [])
1596
+
1597
+ if not all_data:
1598
+ return "Error: No se generaron o procesaron datos.", ""
1599
+
1600
+ progress(0.8, desc="Guardando y subiendo al Hub...")
1601
+ with tempfile.TemporaryDirectory() as temp_dir:
1602
+ data_file = os.path.join(temp_dir, "data.jsonl")
1603
+ with open(data_file, "w", encoding="utf-8") as f:
1604
+ for item in all_data:
1605
+ f.write(json.dumps(item, ensure_ascii=False) + "\n")
1606
+
1607
+ readme_content = DATASET_CARD_TEMPLATE.format(
1608
+ repo_id=repo_id,
1609
+ creation_type=creation_type,
1610
+ generation_model=synth_model if creation_type == "Sintético" else "N/A",
1611
+ date=datetime.now().strftime("%Y-%m-%d")
1612
+ )
1613
+ readme_file = os.path.join(temp_dir, "README.md")
1614
+ with open(readme_file, "w", encoding="utf-8") as f:
1615
+ f.write(readme_content)
1616
+
1617
+ api = HfApi()
1618
+ api.upload_folder(
1619
+ folder_path=temp_dir,
1620
+ repo_id=repo_id,
1621
+ repo_type="dataset",
1622
+ commit_message="Creación de dataset con AutoTrain-Advanced"
1623
+ )
1624
+
1625
+ dataset_link = f"https://huggingface.co/datasets/{repo_id}"
1626
+ return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
1627
+
1628
+ except Exception as e:
1629
+ return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
1630
+
1631
+ @spaces.GPU()
1632
+ def gradio_train_wrapper(*args):
1633
  kwargs = dict(zip(all_input_components_dict.keys(), args))
1634
  yield from _train_and_upload(**kwargs)
1635
 
 
1949
  )
1950
 
1951
  train_event = start_training_button.click(
1952
+ gradio_train_wrapper,
1953
  inputs=all_input_components_list,
1954
  outputs=all_output_components
1955
  )