Ignaciohhhhggfgjfrffd commited on
Commit
d4a374a
·
verified ·
1 Parent(s): c9b59fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -119
app.py CHANGED
@@ -1166,7 +1166,7 @@ def train_text_to_image(model_name, train_dataset, repo_id, update_logs_fn, mode
1166
  eps=float(kwargs.get('adam_epsilon', 1e-8)),
1167
  )
1168
 
1169
- num_epochs = int(kwargs.get('epochs', 1))
1170
  num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
1171
  max_train_steps = num_epochs * num_update_steps_per_epoch
1172
 
@@ -1514,123 +1514,7 @@ def _train_and_upload(**kwargs):
1514
  )
1515
 
1516
  @spaces.GPU()
1517
- def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in, temperature, top_p, max_new_tokens):
1518
- if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1519
- task_name = TASK_TO_PIPELINE_MAP.get(task_mode)
1520
- if not task_name: return f"La inferencia para el modo '{task_mode}' no está soportada.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1521
- try:
1522
- pipe = pipeline(task_name, model=model_id, torch_dtype=torch_dtype_auto, trust_remote_code=True, device=0 if device == 'cuda' else -1)
1523
- result = None
1524
- if task_name == "text-generation":
1525
- if not text_in: return "Por favor, introduce un prompt de texto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1526
- result = pipe(text_in, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=temperature, top_p=top_p)
1527
- elif task_name == "question-answering":
1528
- if not text_in or not context_in: return "Por favor, introduce una pregunta y un contexto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1529
- result = pipe(question=text_in, context=context_in)
1530
- elif task_name in ["token-classification", "text2text-generation", "text-classification"]:
1531
- if not text_in: return f"Por favor, introduce texto para {task_name}.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1532
- result = pipe(text_in)
1533
- elif task_name in ["image-classification", "audio-classification", "automatic-speech-recognition"]:
1534
- input_data = image_in if "image" in task_name else audio_in
1535
- if input_data is None: return f"Por favor, proporciona una entrada de { 'imagen' if 'image' in task_name else 'audio' }.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1536
- result = pipe(input_data)
1537
-
1538
- return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1539
- except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
1540
-
1541
- @spaces.GPU()
1542
- def update_inference_ui(task_mode):
1543
- task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
1544
- is_text_gen = task_name == "text-generation"
1545
- show_text = task_name in ["text-generation", "text2text-generation", "token-classification", "question-answering", "text-classification", "text-to-image"]
1546
- show_context = task_name == "question-answering"
1547
- show_image = task_name in ["image-classification"]
1548
- show_audio = task_name in ["audio-classification", "automatic-speech-recognition"]
1549
- text_label = "Pregunta" if task_name == "question-answering" else "Entrada de Texto / Prompt"
1550
-
1551
- return (
1552
- gr.update(visible=show_text, label=text_label),
1553
- gr.update(visible=show_context),
1554
- gr.update(visible=show_image),
1555
- gr.update(visible=show_audio),
1556
- gr.update(visible=is_text_gen)
1557
- )
1558
-
1559
- @spaces.GPU()
1560
- def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
1561
- if not hf_token:
1562
- return "Error: Se requiere un token de Hugging Face.", ""
1563
- if not repo_name:
1564
- return "Error: Se requiere un nombre de repositorio para el dataset.", ""
1565
-
1566
- try:
1567
- login(token=hf_token)
1568
- user = whoami()
1569
- username = user.get("name")
1570
- repo_id = f"{username}/{repo_name}"
1571
- create_repo(repo_id, repo_type="dataset", exist_ok=True)
1572
-
1573
- all_data = []
1574
-
1575
- if creation_type == "Sintético":
1576
- if not synth_model or not synth_prompt or not synth_num_samples:
1577
- return "Error: Para la generación sintética se requiere un modelo, un prompt y un número de muestras.", ""
1578
-
1579
- progress(0, desc="Cargando modelo generador...")
1580
- generator = pipeline("text-generation", model=synth_model, torch_dtype=torch_dtype_auto, device=0 if device == 'cuda' else -1)
1581
-
1582
- for i in progress.tqdm(range(int(synth_num_samples)), desc="Generando muestras"):
1583
- try:
1584
- generated_output = generator(synth_prompt, max_new_tokens=256, num_return_sequences=1, do_sample=True, temperature=0.9, top_p=0.95)
1585
- cleaned_text = generated_output[0]['generated_text'][len(synth_prompt):].strip()
1586
- if cleaned_text:
1587
- all_data.append({"text": cleaned_text})
1588
- except Exception as e:
1589
- logger.warning(f"Error al generar muestra {i}: {e}")
1590
-
1591
- elif creation_type == "Basado en Archivo":
1592
- if not file_uploads:
1593
- return "Error: Por favor, sube al menos un archivo.", ""
1594
- progress(0.5, desc="Procesando archivos subidos...")
1595
- file_data = _load_uploaded_stream(file_uploads)
1596
- all_data = file_data.get("train", []) + file_data.get("validation", [])
1597
-
1598
- if not all_data:
1599
- return "Error: No se generaron o procesaron datos.", ""
1600
-
1601
- progress(0.8, desc="Guardando y subiendo al Hub...")
1602
- with tempfile.TemporaryDirectory() as temp_dir:
1603
- data_file = os.path.join(temp_dir, "data.jsonl")
1604
- with open(data_file, "w", encoding="utf-8") as f:
1605
- for item in all_data:
1606
- f.write(json.dumps(item, ensure_ascii=False) + "\n")
1607
-
1608
- readme_content = DATASET_CARD_TEMPLATE.format(
1609
- repo_id=repo_id,
1610
- creation_type=creation_type,
1611
- generation_model=synth_model if creation_type == "Sintético" else "N/A",
1612
- date=datetime.now().strftime("%Y-%m-%d")
1613
- )
1614
- readme_file = os.path.join(temp_dir, "README.md")
1615
- with open(readme_file, "w", encoding="utf-8") as f:
1616
- f.write(readme_content)
1617
-
1618
- api = HfApi()
1619
- api.upload_folder(
1620
- folder_path=temp_dir,
1621
- repo_id=repo_id,
1622
- repo_type="dataset",
1623
- commit_message="Creación de dataset con AutoTrain-Advanced"
1624
- )
1625
-
1626
- dataset_link = f"https://huggingface.co/datasets/{repo_id}"
1627
- return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
1628
-
1629
- except Exception as e:
1630
- return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
1631
-
1632
- @spaces.GPU()
1633
- def gradio_train_wrapper(*args):
1634
  kwargs = dict(zip(all_input_components_dict.keys(), args))
1635
  yield from _train_and_upload(**kwargs)
1636
 
@@ -1950,7 +1834,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
1950
  )
1951
 
1952
  train_event = start_training_button.click(
1953
- gradio_train_wrapper,
1954
  inputs=all_input_components_list,
1955
  outputs=all_output_components
1956
  )
 
1166
  eps=float(kwargs.get('adam_epsilon', 1e-8)),
1167
  )
1168
 
1169
+ num_epochs = int(kwargs.get('epochs', 1.0))
1170
  num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
1171
  max_train_steps = num_epochs * num_update_steps_per_epoch
1172
 
 
1514
  )
1515
 
1516
  @spaces.GPU()
1517
+ def train_sft_dpo_wrapper(*args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1518
  kwargs = dict(zip(all_input_components_dict.keys(), args))
1519
  yield from _train_and_upload(**kwargs)
1520
 
 
1834
  )
1835
 
1836
  train_event = start_training_button.click(
1837
+ train_sft_dpo_wrapper,
1838
  inputs=all_input_components_list,
1839
  outputs=all_output_components
1840
  )