Train_xd

Runtime error

App Files Files Community

Ignaciohhhhggfgjfrffd commited on Nov 9

Commit

8da19b3

verified ·

1 Parent(s): d4a374a

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -4

app.py CHANGED Viewed

@@ -531,7 +531,6 @@ def _create_training_args(output_dir, repo_id, **kwargs):
         "save_strategy": "steps",
         "logging_steps": int(kwargs.get('logging_steps', 10)),
         "save_steps": int(kwargs.get('save_steps', 50)),
-        "evaluation_strategy": "steps" if kwargs.get('run_evaluation', False) else "no",
         "eval_steps": int(kwargs.get('save_steps', 50)) if kwargs.get('run_evaluation', False) else None,
         "learning_rate": float(kwargs.get('learning_rate', 2e-5)),
         "fp16": kwargs.get('mixed_precision') == 'fp16' and device == 'cuda',
@@ -1166,7 +1165,7 @@ def train_text_to_image(model_name, train_dataset, repo_id, update_logs_fn, mode
         eps=float(kwargs.get('adam_epsilon', 1e-8)),
     )
-    num_epochs = int(kwargs.get('epochs', 1.0))
     num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
     max_train_steps = num_epochs * num_update_steps_per_epoch
@@ -1514,7 +1513,123 @@ def _train_and_upload(**kwargs):
         )
 @spaces.GPU()
-def train_sft_dpo_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
     yield from _train_and_upload(**kwargs)
@@ -1834,7 +1949,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
         )
         train_event = start_training_button.click(
-            train_sft_dpo_wrapper,
             inputs=all_input_components_list,
             outputs=all_output_components
         )

         "save_strategy": "steps",
         "logging_steps": int(kwargs.get('logging_steps', 10)),
         "save_steps": int(kwargs.get('save_steps', 50)),
         "eval_steps": int(kwargs.get('save_steps', 50)) if kwargs.get('run_evaluation', False) else None,
         "learning_rate": float(kwargs.get('learning_rate', 2e-5)),
         "fp16": kwargs.get('mixed_precision') == 'fp16' and device == 'cuda',
         eps=float(kwargs.get('adam_epsilon', 1e-8)),
     )
+    num_epochs = int(kwargs.get('epochs', 1))
     num_update_steps_per_epoch = math.ceil(len(train_dataloader) / int(kwargs.get('gradient_accumulation', 8)))
     max_train_steps = num_epochs * num_update_steps_per_epoch
         )
 @spaces.GPU()
+def run_inference(task_mode, model_id, text_in, context_in, image_in, audio_in, temperature, top_p, max_new_tokens):
+    if not model_id: return "Por favor, introduce un ID de modelo del Hub.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+    task_name = TASK_TO_PIPELINE_MAP.get(task_mode)
+    if not task_name: return f"La inferencia para el modo '{task_mode}' no está soportada.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+    try:
+        pipe = pipeline(task_name, model=model_id, torch_dtype=torch_dtype_auto, trust_remote_code=True, device=0 if device == 'cuda' else -1)
+        result = None
+        if task_name == "text-generation":
+            if not text_in: return "Por favor, introduce un prompt de texto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+            result = pipe(text_in, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=temperature, top_p=top_p)
+        elif task_name == "question-answering":
+            if not text_in or not context_in: return "Por favor, introduce una pregunta y un contexto.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+            result = pipe(question=text_in, context=context_in)
+        elif task_name in ["token-classification", "text2text-generation", "text-classification"]:
+            if not text_in: return f"Por favor, introduce texto para {task_name}.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+            result = pipe(text_in)
+        elif task_name in ["image-classification", "audio-classification", "automatic-speech-recognition"]:
+            input_data = image_in if "image" in task_name else audio_in
+            if input_data is None: return f"Por favor, proporciona una entrada de { 'imagen' if 'image' in task_name else 'audio' }.", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+            result = pipe(input_data)
+        return f"Resultado:\n\n{json.dumps(result, indent=2, ensure_ascii=False)}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+    except Exception as e: return f"Error en Inferencia: {e}\n{traceback.format_exc()}", model_id, gr.update(), gr.update(), gr.update(), gr.update()
+@spaces.GPU()
+def update_inference_ui(task_mode):
+    task_name = TASK_TO_PIPELINE_MAP.get(task_mode, "")
+    is_text_gen = task_name == "text-generation"
+    show_text = task_name in ["text-generation", "text2text-generation", "token-classification", "question-answering", "text-classification", "text-to-image"]
+    show_context = task_name == "question-answering"
+    show_image = task_name in ["image-classification"]
+    show_audio = task_name in ["audio-classification", "automatic-speech-recognition"]
+    text_label = "Pregunta" if task_name == "question-answering" else "Entrada de Texto / Prompt"
+    return (
+        gr.update(visible=show_text, label=text_label),
+        gr.update(visible=show_context),
+        gr.update(visible=show_image),
+        gr.update(visible=show_audio),
+        gr.update(visible=is_text_gen)
+    )
+@spaces.GPU()
+def create_and_upload_dataset(hf_token, repo_name, creation_type, synth_model, synth_prompt, synth_num_samples, file_uploads, progress=gr.Progress()):
+    if not hf_token:
+        return "Error: Se requiere un token de Hugging Face.", ""
+    if not repo_name:
+        return "Error: Se requiere un nombre de repositorio para el dataset.", ""
+    try:
+        login(token=hf_token)
+        user = whoami()
+        username = user.get("name")
+        repo_id = f"{username}/{repo_name}"
+        create_repo(repo_id, repo_type="dataset", exist_ok=True)
+        all_data = []
+        if creation_type == "Sintético":
+            if not synth_model or not synth_prompt or not synth_num_samples:
+                return "Error: Para la generación sintética se requiere un modelo, un prompt y un número de muestras.", ""
+            progress(0, desc="Cargando modelo generador...")
+            generator = pipeline("text-generation", model=synth_model, torch_dtype=torch_dtype_auto, device=0 if device == 'cuda' else -1)
+            for i in progress.tqdm(range(int(synth_num_samples)), desc="Generando muestras"):
+                try:
+                    generated_output = generator(synth_prompt, max_new_tokens=256, num_return_sequences=1, do_sample=True, temperature=0.9, top_p=0.95)
+                    cleaned_text = generated_output[0]['generated_text'][len(synth_prompt):].strip()
+                    if cleaned_text:
+                        all_data.append({"text": cleaned_text})
+                except Exception as e:
+                    logger.warning(f"Error al generar muestra {i}: {e}")
+        elif creation_type == "Basado en Archivo":
+            if not file_uploads:
+                return "Error: Por favor, sube al menos un archivo.", ""
+            progress(0.5, desc="Procesando archivos subidos...")
+            file_data = _load_uploaded_stream(file_uploads)
+            all_data = file_data.get("train", []) + file_data.get("validation", [])
+        if not all_data:
+            return "Error: No se generaron o procesaron datos.", ""
+        progress(0.8, desc="Guardando y subiendo al Hub...")
+        with tempfile.TemporaryDirectory() as temp_dir:
+            data_file = os.path.join(temp_dir, "data.jsonl")
+            with open(data_file, "w", encoding="utf-8") as f:
+                for item in all_data:
+                    f.write(json.dumps(item, ensure_ascii=False) + "\n")
+            readme_content = DATASET_CARD_TEMPLATE.format(
+                repo_id=repo_id,
+                creation_type=creation_type,
+                generation_model=synth_model if creation_type == "Sintético" else "N/A",
+                date=datetime.now().strftime("%Y-%m-%d")
+            )
+            readme_file = os.path.join(temp_dir, "README.md")
+            with open(readme_file, "w", encoding="utf-8") as f:
+                f.write(readme_content)
+            api = HfApi()
+            api.upload_folder(
+                folder_path=temp_dir,
+                repo_id=repo_id,
+                repo_type="dataset",
+                commit_message="Creación de dataset con AutoTrain-Advanced"
+            )
+        dataset_link = f"https://huggingface.co/datasets/{repo_id}"
+        return f"✅ Dataset creado y subido exitosamente a {repo_id}", f"### ✅ [Dataset Disponible: Visita el Repositorio]({dataset_link})"
+    except Exception as e:
+        return f"❌ Error fatal durante la creación del dataset: {e}\n{traceback.format_exc()}", ""
+@spaces.GPU()
+def gradio_train_wrapper(*args):
     kwargs = dict(zip(all_input_components_dict.keys(), args))
     yield from _train_and_upload(**kwargs)
         )
         train_event = start_training_button.click(
+            gradio_train_wrapper,
             inputs=all_input_components_list,
             outputs=all_output_components
         )