SergioSCA commited on
Commit
4a9dbfa
verified
1 Parent(s): a9455bd

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +23 -27
functions.py CHANGED
@@ -12,39 +12,35 @@ def cargar_modalidades_tareas():
12
 
13
  # Funci贸n para actualizar el JSON desde HuggingFace y devolver tareas
14
  def actualizar_modalidades_tareas_desde_huggingface():
15
- MODALIDADES = ["text-classification", "token-classification", "translation", "summarization",
16
- "speech-recognition", "audio-classification", "music-classification",
17
- "image-classification", "object-detection", "image-segmentation",
18
- "question-answering", "information-retrieval"]
19
 
20
- modalidades_tareas = {}
21
  for task in MODALIDADES:
22
- url = f"https://huggingface.co/api/datasets?task={task}&limit=5"
23
- response = requests.get(url)
24
- datasets = response.json()
25
 
26
- columnas = set()
27
- for dataset in datasets:
28
- try:
29
- info = requests.get(f"https://huggingface.co/api/datasets/{dataset['id']}").json()
30
- columnas.update(info["features"].keys())
31
- except:
32
- continue
33
-
34
- modalidad = "NLP" if task in ["text-classification", "token-classification", "translation", "summarization"] else \
35
- "Audio" if task in ["speech-recognition", "audio-classification", "music-classification"] else \
36
- "Vision" if task in ["image-classification", "object-detection", "image-segmentation"] else \
37
- "RAG"
38
-
39
- if modalidad not in modalidades_tareas:
40
- modalidades_tareas[modalidad] = {"tareas": {}, "columnas_generales": []}
41
- modalidades_tareas[modalidad]["tareas"][task] = list(columnas)
42
 
 
43
  with open("modalidades_tareas.json", "w") as file:
44
- json.dump(modalidades_tareas, file, indent=4)
45
 
46
- # Devolver lista de tareas actualizada
47
- return [t for datos in modalidades_tareas.values() for t in datos["tareas"].keys()]
48
 
49
 
50
  # Cargar modalidades y tareas
 
12
 
13
  # Funci贸n para actualizar el JSON desde HuggingFace y devolver tareas
14
  def actualizar_modalidades_tareas_desde_huggingface():
15
+ MODALIDADES = ["text-classification", "image-classification", "speech-recognition"]
16
+ MODALIDAD_TAREAS = cargar_modalidades_tareas() # Cargar JSON existente
 
 
17
 
 
18
  for task in MODALIDADES:
19
+ url = f"https://huggingface.co/api/datasets?task={task}&full=true&limit=10"
20
+ response = requests.get(url).json()
 
21
 
22
+ for dataset in response:
23
+ dataset_id = dataset["id"]
24
+ dataset_info = requests.get(f"https://huggingface.co/api/datasets/{dataset_id}").json()
25
+
26
+ # A帽adir datasets y columnas al JSON
27
+ if task not in MODALIDAD_TAREAS["NLP"]["tareas"]:
28
+ MODALIDAD_TAREAS["NLP"]["tareas"][task] = {
29
+ "nombre": "Clasificaci贸n de texto",
30
+ "columnas": ["text", "label"],
31
+ "datasets": {}
32
+ }
33
+
34
+ MODALIDAD_TAREAS["NLP"]["tareas"][task]["datasets"][dataset_id] = {
35
+ "columnas": list(dataset_info.get("features", {}).keys()),
36
+ "licencia": dataset.get("license", "unknown")
37
+ }
38
 
39
+ # Guardar JSON con indentaci贸n para evitar errores <button class="citation-flag" data-index="1">
40
  with open("modalidades_tareas.json", "w") as file:
41
+ json.dump(MODALIDAD_TAREAS, file, indent=4)
42
 
43
+ return "Metadatos actualizados: columnas y datasets almacenados."
 
44
 
45
 
46
  # Cargar modalidades y tareas