SergioSCA commited on
Commit
b3fc81e
verified
1 Parent(s): efdd1f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -14
app.py CHANGED
@@ -1,11 +1,16 @@
1
  import gradio as gr
2
  from datasets import load_dataset
3
  import pandas as pd
 
4
 
5
  # Funci贸n para generar el esquema CSV basado en las selecciones del usuario
6
- def generate_csv(modalities, tasks):
7
  columns = []
8
- for modality, task in zip(modalities, tasks):
 
 
 
 
9
  if modality == "Visi贸n" and task == "Detecci贸n de Objetos":
10
  columns.extend(["imagen", "etiqueta", "coordenadas_bbox"])
11
  elif modality == "Visi贸n" and task == "Segmentaci贸n Sem谩ntica":
@@ -30,40 +35,45 @@ def generate_csv(modalities, tasks):
30
  columns.extend(["archivo_audio", "fuente_separada"])
31
  elif modality == "Audio" and task == "S铆ntesis de Voz":
32
  columns.extend(["texto", "archivo_audio_generado"])
 
33
  return ", ".join(columns)
34
 
35
  # Funci贸n para buscar datasets p煤blicos relevantes
36
- def search_datasets(modalities):
37
- # Simulaci贸n de b煤squeda de datasets en Hugging Face
38
  dataset_map = {
39
  "Visi贸n": ["coco", "imagenet", "openimages", "cityscapes"],
40
  "NLP": ["imdb", "glue", "wmt14", "sentiment140"],
41
  "Audio": ["common_voice", "librispeech", "fma", "musdb18"]
42
  }
43
  results = []
44
- for modality in modalities:
 
 
 
 
45
  if modality in dataset_map:
46
  results.extend(dataset_map[modality])
 
47
  return "\n".join(results)
48
 
49
- # Funci贸n para seleccionar datasets y agregarlos al campo de URLs
50
- def select_datasets(selected_datasets, current_urls):
51
- selected_datasets = selected_datasets.split("\n")
52
- current_urls = current_urls.split("\n") if current_urls else []
53
- updated_urls = list(set(current_urls + selected_datasets))
54
- return "\n".join(updated_urls)
55
-
56
  # Funci贸n para procesar datasets seleccionados
57
- def process_datasets(dataset_urls):
58
  datasets = []
59
- for url in dataset_urls.split("\n"):
 
 
 
60
  if url.strip():
 
61
  try:
62
  dataset = load_dataset(url.strip())
63
  datasets.append(pd.DataFrame(dataset["train"]))
 
64
  except Exception as e:
 
65
  return f"Error al cargar el dataset {url}: {str(e)}"
66
  combined_dataset = pd.concat(datasets, ignore_index=True)
 
67
  return combined_dataset.to_csv(index=False)
68
 
69
  # Interfaz de Usuario con Gradio
 
1
  import gradio as gr
2
  from datasets import load_dataset
3
  import pandas as pd
4
+ import time
5
 
6
  # Funci贸n para generar el esquema CSV basado en las selecciones del usuario
7
+ def generate_csv(modalities, tasks, progress=gr.Progress()):
8
  columns = []
9
+ total_steps = len(modalities) * len(tasks)
10
+ progress(0, desc="Iniciando generaci贸n del esquema CSV...")
11
+ for i, (modality, task) in enumerate(zip(modalities, tasks)):
12
+ progress(i / total_steps, desc=f"Procesando {modality} - {task}...")
13
+ time.sleep(1) # Simulaci贸n de procesamiento
14
  if modality == "Visi贸n" and task == "Detecci贸n de Objetos":
15
  columns.extend(["imagen", "etiqueta", "coordenadas_bbox"])
16
  elif modality == "Visi贸n" and task == "Segmentaci贸n Sem谩ntica":
 
35
  columns.extend(["archivo_audio", "fuente_separada"])
36
  elif modality == "Audio" and task == "S铆ntesis de Voz":
37
  columns.extend(["texto", "archivo_audio_generado"])
38
+ progress(1, desc="Esquema CSV generado con 茅xito.")
39
  return ", ".join(columns)
40
 
41
  # Funci贸n para buscar datasets p煤blicos relevantes
42
+ def search_datasets(modalities, progress=gr.Progress()):
 
43
  dataset_map = {
44
  "Visi贸n": ["coco", "imagenet", "openimages", "cityscapes"],
45
  "NLP": ["imdb", "glue", "wmt14", "sentiment140"],
46
  "Audio": ["common_voice", "librispeech", "fma", "musdb18"]
47
  }
48
  results = []
49
+ total_steps = len(modalities)
50
+ progress(0, desc="Iniciando b煤squeda de datasets...")
51
+ for i, modality in enumerate(modalities):
52
+ progress(i / total_steps, desc=f"Buscando datasets para {modality}...")
53
+ time.sleep(1) # Simulaci贸n de procesamiento
54
  if modality in dataset_map:
55
  results.extend(dataset_map[modality])
56
+ progress(1, desc="B煤squeda de datasets completada.")
57
  return "\n".join(results)
58
 
 
 
 
 
 
 
 
59
  # Funci贸n para procesar datasets seleccionados
60
+ def process_datasets(dataset_urls, progress=gr.Progress()):
61
  datasets = []
62
+ urls = dataset_urls.split("\n")
63
+ total_steps = len(urls)
64
+ progress(0, desc="Iniciando procesamiento de datasets...")
65
+ for i, url in enumerate(urls):
66
  if url.strip():
67
+ progress(i / total_steps, desc=f"Procesando dataset: {url}")
68
  try:
69
  dataset = load_dataset(url.strip())
70
  datasets.append(pd.DataFrame(dataset["train"]))
71
+ time.sleep(2) # Simulaci贸n de procesamiento
72
  except Exception as e:
73
+ progress(1, desc=f"Error al cargar el dataset {url}: {str(e)}")
74
  return f"Error al cargar el dataset {url}: {str(e)}"
75
  combined_dataset = pd.concat(datasets, ignore_index=True)
76
+ progress(1, desc="Todos los datasets procesados y combinados.")
77
  return combined_dataset.to_csv(index=False)
78
 
79
  # Interfaz de Usuario con Gradio