Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,6 @@ import os
|
|
| 2 |
from huggingface_hub import login
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
import gradio as gr
|
| 5 |
-
import torch
|
| 6 |
|
| 7 |
# Autenticar usando el token almacenado como secreto
|
| 8 |
hf_token = os.getenv("HF_API_TOKEN")
|
|
@@ -11,21 +10,17 @@ login(hf_token)
|
|
| 11 |
# Cargar el modelo y el tokenizador
|
| 12 |
model_name = "DeepESP/gpt2-spanish"
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 14 |
-
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 15 |
|
| 16 |
def chat_with_gpt2_spanish(input_text):
|
| 17 |
-
|
| 18 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
-
print(f"Using device: {device}")
|
| 20 |
-
|
| 21 |
-
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
|
| 22 |
outputs = model.generate(
|
| 23 |
**inputs,
|
| 24 |
-
max_length=
|
| 25 |
-
num_beams=1,
|
| 26 |
-
temperature=0.7,
|
| 27 |
-
top_p=0.9,
|
| 28 |
-
no_repeat_ngram_size=2,
|
| 29 |
early_stopping=True
|
| 30 |
)
|
| 31 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 2 |
from huggingface_hub import login
|
| 3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 4 |
import gradio as gr
|
|
|
|
| 5 |
|
| 6 |
# Autenticar usando el token almacenado como secreto
|
| 7 |
hf_token = os.getenv("HF_API_TOKEN")
|
|
|
|
| 10 |
# Cargar el modelo y el tokenizador
|
| 11 |
model_name = "DeepESP/gpt2-spanish"
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 13 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
| 14 |
|
| 15 |
def chat_with_gpt2_spanish(input_text):
|
| 16 |
+
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
outputs = model.generate(
|
| 18 |
**inputs,
|
| 19 |
+
max_length=30, # Limitar la longitud de la respuesta
|
| 20 |
+
num_beams=1, # Usar solo un haz para velocidad
|
| 21 |
+
temperature=0.7, # Ajustar la temperatura para respuestas menos repetitivas
|
| 22 |
+
top_p=0.9, # Usar top-p (nucleus sampling) para variedad
|
| 23 |
+
no_repeat_ngram_size=2, # Evitar la repetici贸n de n-gramas
|
| 24 |
early_stopping=True
|
| 25 |
)
|
| 26 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|