Spaces:
Runtime error
Runtime error
| import os | |
| import math | |
| import json | |
| import logging | |
| from datasets import load_dataset, concatenate_datasets #list_datasets, load_from_disk | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| DataCollatorForLanguageModeling, | |
| TrainingArguments, | |
| Trainer, | |
| pipeline, | |
| ) | |
| import evaluate # type: ignore | |
| from huggingface_hub import login | |
| from translate import Translator | |
| from datasets import load_dataset, DownloadConfig | |
| # Modelo base | |
| MODEL_KEY = "EleutherAI/gpt-neo-125M" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_KEY) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_KEY) | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| # Mapa de dominios y estilos por dataset | |
| context_map = { | |
| "imdb": "Dom: Cine | Estilo: Opinión", | |
| "daily_dialog": "Dom: Conversación | Estilo: Diálogo diario", | |
| "go_emotions": "Dom: Emociones | Estilo: Clasificación emocional", | |
| "wikitext": "Dom: Enciclopedia | Estilo: Conocimiento general", | |
| } | |
| # Dataset de prueba | |
| available_datasets = list(context_map.keys()) | |
| # Función para generar texto | |
| def generate_text(dataset_name, sample_index, max_length): | |
| dataset = load_dataset(dataset_name, split="train[:1%]") # Ligero | |
| if sample_index >= len(dataset): | |
| return "Índice fuera de rango." | |
| example = dataset[sample_index] | |
| text = example.get("text") or example.get("utterance") or example.get("content") or str(example) | |
| context = context_map.get(dataset_name, "Dom: Desconocido | Estilo: Desconocido") | |
| prompt = f"{context} | Entrada: {text}" | |
| output = generator(prompt, max_length=int(max_length), num_return_sequences=1)[0]["generated_text"] | |
| return output | |
| # Traducción | |
| def translate_text(text, lang): | |
| translator = Translator(to_lang=lang) | |
| try: | |
| return translator.translate(text) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Interfaz con Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🧠 MultiDomain Text Generator + Translator") | |
| with gr.Tab("Generar desde dataset"): | |
| dataset_name = gr.Dropdown(choices=available_datasets, value="imdb", label="Elige dataset") | |
| sample_index = gr.Slider(minimum=30, maximum=200, step=1, label="Índice del ejemplo", value=0) | |
| max_len = gr.Slider(label="Longitud máxima", minimum=50, maximum=1024, step=4, value=104) | |
| output_text = gr.Textbox(label="Texto generado") | |
| btn_generate = gr.Button("Generar texto") | |
| btn_generate.click(generate_text, inputs=[dataset_name, sample_index, max_len], outputs=output_text) | |
| with gr.Tab("Traducir texto"): | |
| input_text = gr.Textbox(label="Texto a traducir") | |
| lang = gr.Textbox(label="Código de idioma destino", value="en") | |
| output_translation = gr.Textbox(label="Texto traducido") | |
| btn_translate = gr.Button("Traducir") | |
| btn_translate.click(translate_text, inputs=[input_text, lang], outputs=output_translation) | |
| demo.launch() | |