Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,16 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from datasets import load_dataset
|
| 3 |
import pandas as pd
|
|
|
|
| 4 |
|
| 5 |
# Funci贸n para generar el esquema CSV basado en las selecciones del usuario
|
| 6 |
-
def generate_csv(modalities, tasks):
|
| 7 |
columns = []
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
if modality == "Visi贸n" and task == "Detecci贸n de Objetos":
|
| 10 |
columns.extend(["imagen", "etiqueta", "coordenadas_bbox"])
|
| 11 |
elif modality == "Visi贸n" and task == "Segmentaci贸n Sem谩ntica":
|
|
@@ -30,40 +35,45 @@ def generate_csv(modalities, tasks):
|
|
| 30 |
columns.extend(["archivo_audio", "fuente_separada"])
|
| 31 |
elif modality == "Audio" and task == "S铆ntesis de Voz":
|
| 32 |
columns.extend(["texto", "archivo_audio_generado"])
|
|
|
|
| 33 |
return ", ".join(columns)
|
| 34 |
|
| 35 |
# Funci贸n para buscar datasets p煤blicos relevantes
|
| 36 |
-
def search_datasets(modalities):
|
| 37 |
-
# Simulaci贸n de b煤squeda de datasets en Hugging Face
|
| 38 |
dataset_map = {
|
| 39 |
"Visi贸n": ["coco", "imagenet", "openimages", "cityscapes"],
|
| 40 |
"NLP": ["imdb", "glue", "wmt14", "sentiment140"],
|
| 41 |
"Audio": ["common_voice", "librispeech", "fma", "musdb18"]
|
| 42 |
}
|
| 43 |
results = []
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
if modality in dataset_map:
|
| 46 |
results.extend(dataset_map[modality])
|
|
|
|
| 47 |
return "\n".join(results)
|
| 48 |
|
| 49 |
-
# Funci贸n para seleccionar datasets y agregarlos al campo de URLs
|
| 50 |
-
def select_datasets(selected_datasets, current_urls):
|
| 51 |
-
selected_datasets = selected_datasets.split("\n")
|
| 52 |
-
current_urls = current_urls.split("\n") if current_urls else []
|
| 53 |
-
updated_urls = list(set(current_urls + selected_datasets))
|
| 54 |
-
return "\n".join(updated_urls)
|
| 55 |
-
|
| 56 |
# Funci贸n para procesar datasets seleccionados
|
| 57 |
-
def process_datasets(dataset_urls):
|
| 58 |
datasets = []
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
| 60 |
if url.strip():
|
|
|
|
| 61 |
try:
|
| 62 |
dataset = load_dataset(url.strip())
|
| 63 |
datasets.append(pd.DataFrame(dataset["train"]))
|
|
|
|
| 64 |
except Exception as e:
|
|
|
|
| 65 |
return f"Error al cargar el dataset {url}: {str(e)}"
|
| 66 |
combined_dataset = pd.concat(datasets, ignore_index=True)
|
|
|
|
| 67 |
return combined_dataset.to_csv(index=False)
|
| 68 |
|
| 69 |
# Interfaz de Usuario con Gradio
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from datasets import load_dataset
|
| 3 |
import pandas as pd
|
| 4 |
+
import time
|
| 5 |
|
| 6 |
# Funci贸n para generar el esquema CSV basado en las selecciones del usuario
|
| 7 |
+
def generate_csv(modalities, tasks, progress=gr.Progress()):
|
| 8 |
columns = []
|
| 9 |
+
total_steps = len(modalities) * len(tasks)
|
| 10 |
+
progress(0, desc="Iniciando generaci贸n del esquema CSV...")
|
| 11 |
+
for i, (modality, task) in enumerate(zip(modalities, tasks)):
|
| 12 |
+
progress(i / total_steps, desc=f"Procesando {modality} - {task}...")
|
| 13 |
+
time.sleep(1) # Simulaci贸n de procesamiento
|
| 14 |
if modality == "Visi贸n" and task == "Detecci贸n de Objetos":
|
| 15 |
columns.extend(["imagen", "etiqueta", "coordenadas_bbox"])
|
| 16 |
elif modality == "Visi贸n" and task == "Segmentaci贸n Sem谩ntica":
|
|
|
|
| 35 |
columns.extend(["archivo_audio", "fuente_separada"])
|
| 36 |
elif modality == "Audio" and task == "S铆ntesis de Voz":
|
| 37 |
columns.extend(["texto", "archivo_audio_generado"])
|
| 38 |
+
progress(1, desc="Esquema CSV generado con 茅xito.")
|
| 39 |
return ", ".join(columns)
|
| 40 |
|
| 41 |
# Funci贸n para buscar datasets p煤blicos relevantes
|
| 42 |
+
def search_datasets(modalities, progress=gr.Progress()):
|
|
|
|
| 43 |
dataset_map = {
|
| 44 |
"Visi贸n": ["coco", "imagenet", "openimages", "cityscapes"],
|
| 45 |
"NLP": ["imdb", "glue", "wmt14", "sentiment140"],
|
| 46 |
"Audio": ["common_voice", "librispeech", "fma", "musdb18"]
|
| 47 |
}
|
| 48 |
results = []
|
| 49 |
+
total_steps = len(modalities)
|
| 50 |
+
progress(0, desc="Iniciando b煤squeda de datasets...")
|
| 51 |
+
for i, modality in enumerate(modalities):
|
| 52 |
+
progress(i / total_steps, desc=f"Buscando datasets para {modality}...")
|
| 53 |
+
time.sleep(1) # Simulaci贸n de procesamiento
|
| 54 |
if modality in dataset_map:
|
| 55 |
results.extend(dataset_map[modality])
|
| 56 |
+
progress(1, desc="B煤squeda de datasets completada.")
|
| 57 |
return "\n".join(results)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# Funci贸n para procesar datasets seleccionados
|
| 60 |
+
def process_datasets(dataset_urls, progress=gr.Progress()):
|
| 61 |
datasets = []
|
| 62 |
+
urls = dataset_urls.split("\n")
|
| 63 |
+
total_steps = len(urls)
|
| 64 |
+
progress(0, desc="Iniciando procesamiento de datasets...")
|
| 65 |
+
for i, url in enumerate(urls):
|
| 66 |
if url.strip():
|
| 67 |
+
progress(i / total_steps, desc=f"Procesando dataset: {url}")
|
| 68 |
try:
|
| 69 |
dataset = load_dataset(url.strip())
|
| 70 |
datasets.append(pd.DataFrame(dataset["train"]))
|
| 71 |
+
time.sleep(2) # Simulaci贸n de procesamiento
|
| 72 |
except Exception as e:
|
| 73 |
+
progress(1, desc=f"Error al cargar el dataset {url}: {str(e)}")
|
| 74 |
return f"Error al cargar el dataset {url}: {str(e)}"
|
| 75 |
combined_dataset = pd.concat(datasets, ignore_index=True)
|
| 76 |
+
progress(1, desc="Todos los datasets procesados y combinados.")
|
| 77 |
return combined_dataset.to_csv(index=False)
|
| 78 |
|
| 79 |
# Interfaz de Usuario con Gradio
|