SergioSCA commited on
Commit
1c951a9
verified
1 Parent(s): 187ebd3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -27
app.py CHANGED
@@ -43,6 +43,8 @@ def generate_csv(modalities, vision_tasks, nlp_tasks, audio_tasks, progress=gr.P
43
  columns.extend(["archivo_audio", "fuente_separada"])
44
  elif modality == "Audio" and task == "S铆ntesis de Voz":
45
  columns.extend(["texto", "archivo_audio_generado"])
 
 
46
  progress(1, desc="Esquema CSV generado con 茅xito.")
47
  return ", ".join(columns)
48
 
@@ -51,7 +53,7 @@ def search_datasets(modalities, progress=gr.Progress()):
51
  dataset_map = {
52
  "Visi贸n": ["coco", "imagenet", "openimages", "cityscapes"],
53
  "NLP": ["imdb", "glue", "wmt14", "sentiment140"],
54
- "Audio": ["common_voice", "librispeech", "fma", "musdb18"]
55
  }
56
  results = []
57
  total_steps = len(modalities)
@@ -64,31 +66,27 @@ def search_datasets(modalities, progress=gr.Progress()):
64
  progress(1, desc="B煤squeda de datasets completada.")
65
  return "\n".join(results)
66
 
67
- # Funci贸n para seleccionar datasets y agregarlos al campo de URLs
68
- def select_datasets(selected_datasets, current_urls):
69
- selected_datasets = selected_datasets.split("\n")
70
- current_urls = current_urls.split("\n") if current_urls else []
71
- updated_urls = list(set(current_urls + selected_datasets))
72
- return "\n".join(updated_urls)
73
-
74
- # Funci贸n para procesar datasets seleccionados
75
- def process_datasets(dataset_urls, progress=gr.Progress()):
76
  datasets = []
77
- urls = dataset_urls.split("\n")
78
- total_steps = len(urls)
79
- progress(0, desc="Iniciando procesamiento de datasets...")
80
- for i, url in enumerate(urls):
81
  if url.strip():
82
- progress(i / total_steps, desc=f"Procesando dataset: {url}")
83
  try:
84
  dataset = load_dataset(url.strip())
85
- datasets.append(pd.DataFrame(dataset["train"]))
 
 
 
86
  time.sleep(2) # Simulaci贸n de procesamiento
87
  except Exception as e:
88
- progress(1, desc=f"Error al cargar el dataset {url}: {str(e)}")
89
- return f"Error al cargar el dataset {url}: {str(e)}"
90
  combined_dataset = pd.concat(datasets, ignore_index=True)
91
- progress(1, desc="Todos los datasets procesados y combinados.")
92
  return combined_dataset.to_csv(index=False)
93
 
94
  # Interfaz de Usuario con Gradio
@@ -113,7 +111,7 @@ with gr.Blocks(title="Dise帽ador de Redes Neuronales Multimodales") as demo:
113
  visible=False
114
  )
115
  audio_tasks = gr.CheckboxGroup(
116
- ["Clasificaci贸n de Audio", "Transcripci贸n de Voz", "Separaci贸n de Fuentes", "S铆ntesis de Voz"],
117
  label="Tareas para Audio",
118
  visible=False
119
  )
@@ -136,11 +134,7 @@ with gr.Blocks(title="Dise帽ador de Redes Neuronales Multimodales") as demo:
136
  datasets_output = gr.Textbox(label="Datasets Disponibles", lines=5)
137
 
138
  with gr.Row():
139
- select_datasets_btn = gr.Button("Seleccionar Datasets")
140
- dataset_urls = gr.Textbox(label="Introduce URLs de Datasets", lines=5)
141
-
142
- with gr.Row():
143
- process_datasets_btn = gr.Button("Procesar Datasets")
144
  processed_output = gr.File(label="Dataset Procesado")
145
 
146
  # Conexiones de botones a funciones
@@ -150,8 +144,7 @@ with gr.Blocks(title="Dise帽ador de Redes Neuronales Multimodales") as demo:
150
  outputs=csv_output
151
  )
152
  search_datasets_btn.click(search_datasets, inputs=[modalities], outputs=datasets_output)
153
- select_datasets_btn.click(select_datasets, inputs=[datasets_output, dataset_urls], outputs=dataset_urls)
154
- process_datasets_btn.click(process_datasets, inputs=[dataset_urls], outputs=processed_output)
155
 
156
  # Lanzar la aplicaci贸n
157
  demo.launch()
 
43
  columns.extend(["archivo_audio", "fuente_separada"])
44
  elif modality == "Audio" and task == "S铆ntesis de Voz":
45
  columns.extend(["texto", "archivo_audio_generado"])
46
+ elif modality == "Audio" and task == "MIDI": # Nueva tarea MIDI
47
+ columns.extend(["archivo_midi", "etiqueta"])
48
  progress(1, desc="Esquema CSV generado con 茅xito.")
49
  return ", ".join(columns)
50
 
 
53
  dataset_map = {
54
  "Visi贸n": ["coco", "imagenet", "openimages", "cityscapes"],
55
  "NLP": ["imdb", "glue", "wmt14", "sentiment140"],
56
+ "Audio": ["common_voice", "librispeech", "fma", "musdb18", "lmd"] # Dataset MIDI agregado
57
  }
58
  results = []
59
  total_steps = len(modalities)
 
66
  progress(1, desc="B煤squeda de datasets completada.")
67
  return "\n".join(results)
68
 
69
+ # Funci贸n para analizar datasets seleccionados y organizarlos seg煤n el esquema CSV
70
+ def analyze_datasets(selected_datasets, csv_schema, progress=gr.Progress()):
 
 
 
 
 
 
 
71
  datasets = []
72
+ schema_columns = [col.strip() for col in csv_schema.split(",")]
73
+ total_steps = len(selected_datasets.split("\n"))
74
+ progress(0, desc="Iniciando an谩lisis de datasets...")
75
+ for i, url in enumerate(selected_datasets.split("\n")):
76
  if url.strip():
77
+ progress(i / total_steps, desc=f"Analizando dataset: {url}")
78
  try:
79
  dataset = load_dataset(url.strip())
80
+ df = pd.DataFrame(dataset["train"])
81
+ # Filtrar columnas seg煤n el esquema CSV
82
+ filtered_df = df[[col for col in schema_columns if col in df.columns]]
83
+ datasets.append(filtered_df)
84
  time.sleep(2) # Simulaci贸n de procesamiento
85
  except Exception as e:
86
+ progress(1, desc=f"Error al analizar el dataset {url}: {str(e)}")
87
+ return f"Error al analizar el dataset {url}: {str(e)}"
88
  combined_dataset = pd.concat(datasets, ignore_index=True)
89
+ progress(1, desc="An谩lisis completado. Datos organizados seg煤n el esquema CSV.")
90
  return combined_dataset.to_csv(index=False)
91
 
92
  # Interfaz de Usuario con Gradio
 
111
  visible=False
112
  )
113
  audio_tasks = gr.CheckboxGroup(
114
+ ["Clasificaci贸n de Audio", "Transcripci贸n de Voz", "Separaci贸n de Fuentes", "S铆ntesis de Voz", "MIDI"], # Tarea MIDI a帽adida
115
  label="Tareas para Audio",
116
  visible=False
117
  )
 
134
  datasets_output = gr.Textbox(label="Datasets Disponibles", lines=5)
135
 
136
  with gr.Row():
137
+ analyze_datasets_btn = gr.Button("Analizar Datasets Seleccionados")
 
 
 
 
138
  processed_output = gr.File(label="Dataset Procesado")
139
 
140
  # Conexiones de botones a funciones
 
144
  outputs=csv_output
145
  )
146
  search_datasets_btn.click(search_datasets, inputs=[modalities], outputs=datasets_output)
147
+ analyze_datasets_btn.click(analyze_datasets, inputs=[datasets_output, csv_output], outputs=processed_output)
 
148
 
149
  # Lanzar la aplicaci贸n
150
  demo.launch()