Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,6 +21,16 @@ BASE_MODEL = "bigcode/santacoder" # Modelo público
|
|
| 21 |
LORA_PATH = "./lora_output" # Carpeta donde se guardará LoRA
|
| 22 |
DATASET_PATH = "tu_dataset.json" # Cambia aquí al nombre de tu dataset
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# ============================================================
|
| 25 |
# 🧩 Función de entrenamiento LoRA
|
| 26 |
# ============================================================
|
|
@@ -58,7 +68,7 @@ def train_lora(epochs, batch_size, learning_rate):
|
|
| 58 |
|
| 59 |
trainer.train()
|
| 60 |
# Guardar LoRA
|
| 61 |
-
|
| 62 |
tokenizer.save_pretrained(LORA_PATH)
|
| 63 |
|
| 64 |
return "✅ Entrenamiento completado y guardado en ./lora_output"
|
|
@@ -71,13 +81,13 @@ def train_lora(epochs, batch_size, learning_rate):
|
|
| 71 |
def generate_text(prompt_text):
|
| 72 |
try:
|
| 73 |
# Cargar modelo base
|
| 74 |
-
|
| 75 |
-
|
| 76 |
|
| 77 |
# Aplicar LoRA
|
| 78 |
-
model = PeftModel.from_pretrained(
|
| 79 |
|
| 80 |
-
generator = pipeline("text-generation", model=model, tokenizer=
|
| 81 |
output = generator(prompt_text, max_new_tokens=100, temperature=0.7, top_p=0.9)
|
| 82 |
return output[0]["generated_text"]
|
| 83 |
except Exception as e:
|
|
@@ -86,9 +96,6 @@ def generate_text(prompt_text):
|
|
| 86 |
# ============================================================
|
| 87 |
# 💻 Interfaz Gradio
|
| 88 |
# ============================================================
|
| 89 |
-
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 90 |
-
base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
|
| 91 |
-
|
| 92 |
with gr.Blocks(title="AmorCoderAI - Entrenamiento LoRA") as demo:
|
| 93 |
gr.Markdown("# 💙 AmorCoderAI - Entrenamiento y Pruebas")
|
| 94 |
gr.Markdown("Entrena y prueba tu modelo basado en `bigcode/santacoder` con LoRA")
|
|
|
|
| 21 |
LORA_PATH = "./lora_output" # Carpeta donde se guardará LoRA
|
| 22 |
DATASET_PATH = "tu_dataset.json" # Cambia aquí al nombre de tu dataset
|
| 23 |
|
| 24 |
+
# ============================================================
|
| 25 |
+
# 🔧 Inicializar tokenizer y modelo base
|
| 26 |
+
# ============================================================
|
| 27 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 28 |
+
base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
|
| 29 |
+
|
| 30 |
+
# 🔧 Asegurar que haya un pad_token
|
| 31 |
+
if tokenizer.pad_token is None:
|
| 32 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 33 |
+
|
| 34 |
# ============================================================
|
| 35 |
# 🧩 Función de entrenamiento LoRA
|
| 36 |
# ============================================================
|
|
|
|
| 68 |
|
| 69 |
trainer.train()
|
| 70 |
# Guardar LoRA
|
| 71 |
+
base_model.save_pretrained(LORA_PATH)
|
| 72 |
tokenizer.save_pretrained(LORA_PATH)
|
| 73 |
|
| 74 |
return "✅ Entrenamiento completado y guardado en ./lora_output"
|
|
|
|
| 81 |
def generate_text(prompt_text):
|
| 82 |
try:
|
| 83 |
# Cargar modelo base
|
| 84 |
+
tokenizer_gen = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 85 |
+
base_model_gen = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
|
| 86 |
|
| 87 |
# Aplicar LoRA
|
| 88 |
+
model = PeftModel.from_pretrained(base_model_gen, LORA_PATH)
|
| 89 |
|
| 90 |
+
generator = pipeline("text-generation", model=model, tokenizer=tokenizer_gen)
|
| 91 |
output = generator(prompt_text, max_new_tokens=100, temperature=0.7, top_p=0.9)
|
| 92 |
return output[0]["generated_text"]
|
| 93 |
except Exception as e:
|
|
|
|
| 96 |
# ============================================================
|
| 97 |
# 💻 Interfaz Gradio
|
| 98 |
# ============================================================
|
|
|
|
|
|
|
|
|
|
| 99 |
with gr.Blocks(title="AmorCoderAI - Entrenamiento LoRA") as demo:
|
| 100 |
gr.Markdown("# 💙 AmorCoderAI - Entrenamiento y Pruebas")
|
| 101 |
gr.Markdown("Entrena y prueba tu modelo basado en `bigcode/santacoder` con LoRA")
|