Train_xd

Runtime error

App Files Files Community

Ignaciohhhhggfgjfrffd commited on Nov 9

Commit

d4a374a

verified ·

1 Parent(s): c9b59fe

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -119

app.py CHANGED Viewed

@@ -1166,7 +1166,7 @@ def train_text_to_image(model_name, train_dataset, repo_id, update_logs_fn, mode
         eps=float(kwargs.get('adam_epsilon', 1e-8)),
     )
-    num_epochs = int(kwargs.get('epochs', 1))
     num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
     max_train_steps = num_epochs * num_update_steps_per_epoch
@@ -1514,123 +1514,7 @@ def _train_and_upload(**kwargs):
         )
 @spaces.GPU()
-def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in, temperature, top_p, max_new_tokens):
-    if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-    task_name = TASK_TO_PIPELINE_MAP.get(task_mode)
-    if not task_name: return f"La inferencia para el modo '{task_mode}' no está soportada.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-    try:
-        pipe = pipeline(task_name, model=model_id, torch_dtype=torch_dtype_auto, trust_remote_code=True, device=0 if device == 'cuda' else -1)
-        result = None
-        if task_name == "text-generation":
-            if not text_in: return "Por favor, introduce un prompt de texto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-            result = pipe(text_in, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=temperature, top_p=top_p)
-        elif task_name == "question-answering":
-            if not text_in or not context_in: return "Por favor, introduce una pregunta y un contexto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-            result = pipe(question=text_in, context=context_in)
-        elif task_name in ["token-classification", "text2text-generation", "text-classification"]:
-            if not text_in: return f"Por favor, introduce texto para {task_name}.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-            result = pipe(text_in)
-        elif task_name in ["image-classification", "audio-classification", "automatic-speech-recognition"]:
-            input_data = image_in if "image" in task_name else audio_in
-            if input_data is None: return f"Por favor, proporciona una entrada de { 'imagen' if 'image' in task_name else 'audio' }.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-            result = pipe(input_data)
-        return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-    except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
-@spaces.GPU()
-def update_inference_ui(task_mode):
-    task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
-    is_text_gen = task_name == "text-generation"
-    show_text = task_name in ["text-generation", "text2text-generation", "token-classification", "question-answering", "text-classification", "text-to-image"]
-    show_context = task_name == "question-answering"
-    show_image = task_name in ["image-classification"]
-    show_audio = task_name in ["audio-classification", "automatic-speech-recognition"]
-    text_label = "Pregunta" if task_name == "question-answering" else "Entrada de Texto / Prompt"
-    return (
-        gr.update(visible=show_text, label=text_label),
-        gr.update(visible=show_context),
-        gr.update(visible=show_image),
-        gr.update(visible=show_audio),
-        gr.update(visible=is_text_gen)
-    )
-@spaces.GPU()
-def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
-    if not hf_token:
-        return "Error: Se requiere un token de Hugging Face.", ""
-    if not repo_name:
-        return "Error: Se requiere un nombre de repositorio para el dataset.", ""
-    try:
-        login(token=hf_token)
-        user = whoami()
-        username = user.get("name")
-        repo_id = f"{username}/{repo_name}"
-        create_repo(repo_id, repo_type="dataset", exist_ok=True)
-        all_data = []
-        if creation_type == "Sintético":
-            if not synth_model or not synth_prompt or not synth_num_samples:
-                return "Error: Para la generación sintética se requiere un modelo, un prompt y un número de muestras.", ""
-            progress(0, desc="Cargando modelo generador...")
-            generator = pipeline("text-generation", model=synth_model, torch_dtype=torch_dtype_auto, device=0 if device == 'cuda' else -1)
-            for i in progress.tqdm(range(int(synth_num_samples)), desc="Generando muestras"):
-                try:
-                    generated_output = generator(synth_prompt, max_new_tokens=256, num_return_sequences=1, do_sample=True, temperature=0.9, top_p=0.95)
-                    cleaned_text = generated_output[0]['generated_text'][len(synth_prompt):].strip()
-                    if cleaned_text:
-                        all_data.append({"text": cleaned_text})
-                except Exception as e:
-                    logger.warning(f"Error al generar muestra {i}: {e}")
-        elif creation_type == "Basado en Archivo":
-            if not file_uploads:
-                return "Error: Por favor, sube al menos un archivo.", ""
-            progress(0.5, desc="Procesando archivos subidos...")
-            file_data = _load_uploaded_stream(file_uploads)
-            all_data = file_data.get("train", []) + file_data.get("validation", [])
-        if not all_data:
-            return "Error: No se generaron o procesaron datos.", ""
-        progress(0.8, desc="Guardando y subiendo al Hub...")
-        with tempfile.TemporaryDirectory() as temp_dir:
-            data_file = os.path.join(temp_dir, "data.jsonl")
-            with open(data_file, "w", encoding="utf-8") as f:
-                for item in all_data:
-                    f.write(json.dumps(item, ensure_ascii=False) + "\n")
-            readme_content = DATASET_CARD_TEMPLATE.format(
-                repo_id=repo_id,
-                creation_type=creation_type,
-                generation_model=synth_model if creation_type == "Sintético" else "N/A",
-                date=datetime.now().strftime("%Y-%m-%d")
-            )
-            readme_file = os.path.join(temp_dir, "README.md")
-            with open(readme_file, "w", encoding="utf-8") as f:
-                f.write(readme_content)
-            api = HfApi()
-            api.upload_folder(
-                folder_path=temp_dir,
-                repo_id=repo_id,
-                repo_type="dataset",
-                commit_message="Creación de dataset con AutoTrain-Advanced"
-            )
-        dataset_link = f"https://huggingface.co/datasets/{repo_id}"
-        return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
-    except Exception as e:
-        return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
-@spaces.GPU()
-def gradio_train_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
     yield from _train_and_upload(**kwargs)
@@ -1950,7 +1834,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
         )
         train_event = start_training_button.click(
-            gradio_train_wrapper,
             inputs=all_input_components_list,
             outputs=all_output_components
         )

         eps=float(kwargs.get('adam_epsilon', 1e-8)),
     )
+    num_epochs = int(kwargs.get('epochs', 1.0))
     num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
     max_train_steps = num_epochs * num_update_steps_per_epoch
         )
 @spaces.GPU()
+def train_sft_dpo_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
     yield from _train_and_upload(**kwargs)
         )
         train_event = start_training_button.click(
+            train_sft_dpo_wrapper,
             inputs=all_input_components_list,
             outputs=all_output_components
         )