|
|
|
|
|
from graphdatascience import GraphDataScience |
|
|
from typing import Dict, List, Any |
|
|
import pandas as pd |
|
|
|
|
|
def run_gds_bfs(gds: GraphDataScience, natural_graph_name: str, reverse_graph_name: str, source_name: str, max_depth: int = None, expert = False) -> Dict[str, Any]: |
|
|
""" |
|
|
Exécute un parcours en largeur (BFS) directionnel à l'aide de GDS pour trouver les descendants et les ascendants. |
|
|
|
|
|
Cette fonction nécessite deux graphes pré-projetés en mémoire GDS : |
|
|
- Un graphe "naturel" pour trouver les descendants (relations dans le sens source -> cible). |
|
|
- Un graphe "inversé" pour trouver les ascendants (relations dans le sens cible -> source). |
|
|
|
|
|
Args: |
|
|
gds: L'objet de connexion à la bibliothèque Graph Data Science. |
|
|
natural_graph_name: Le nom du graphe GDS projeté avec une orientation NATURELLE. |
|
|
reverse_graph_name: Le nom du graphe GDS projeté avec une orientation INVERSÉE. |
|
|
source_name: La propriété 'name' du nœud de départ de la recherche. |
|
|
max_depth: La profondeur maximale de recherche. Si None, la recherche est illimitée. |
|
|
|
|
|
Returns: |
|
|
Un dictionnaire contenant l'ID du nœud source et deux DataFrames pandas : |
|
|
un pour les chemins des descendants et un pour les chemins des ascendants. |
|
|
""" |
|
|
|
|
|
|
|
|
try: |
|
|
source_id_result = gds.run_cypher( |
|
|
""" |
|
|
MATCH (n {name: $source_name}) |
|
|
RETURN id(n) AS id , labels(n) as label |
|
|
LIMIT 1 |
|
|
""", |
|
|
{"source_name": source_name} |
|
|
) |
|
|
|
|
|
if source_id_result.empty or (source_id_result["label"][0]==["Author"] and not expert): |
|
|
print(f"Le modèle ou dataset avec le nom '{source_name}' n'a pas été trouvé.") |
|
|
return None |
|
|
|
|
|
|
|
|
source_node_id = source_id_result['id'][0] |
|
|
except Exception as e: |
|
|
print(f"Erreur lors de la recherche de l'ID du nœud source pour '{source_name}': {e}") |
|
|
return {"source_label": source_id_result["label"][0][0],"descendant": pd.DataFrame(), "ascendant": pd.DataFrame()} |
|
|
|
|
|
|
|
|
bfs_params = {'sourceNode': source_node_id} |
|
|
print(bfs_params) |
|
|
|
|
|
if max_depth is not None: |
|
|
bfs_params['maxDepth'] = max_depth |
|
|
|
|
|
|
|
|
|
|
|
g_natural = gds.graph.get(natural_graph_name) |
|
|
|
|
|
desc_df = gds.bfs.stream(g_natural, **bfs_params) |
|
|
print("BFS pour les descendants sur le graphe naturel terminé.") |
|
|
|
|
|
|
|
|
|
|
|
g_reverse = gds.graph.get(reverse_graph_name) |
|
|
asc_df = gds.bfs.stream(g_reverse, **bfs_params) |
|
|
print("BFS pour les ascendants sur le graphe inversé terminé.") |
|
|
print("DESC",desc_df) |
|
|
print("ASC",asc_df) |
|
|
print(source_id_result["label"][0][0]) |
|
|
|
|
|
|
|
|
return { |
|
|
"source_node": source_node_id,"source_label": source_id_result["label"][0][0], |
|
|
"descendant": desc_df, |
|
|
"ascendant": asc_df |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_genealogy_highlights(gds: "GraphDataScience", model_name: str, num_highlights: int = 2) -> Dict: |
|
|
""" |
|
|
Trouve les modèles clés dans l'ascendance et la descendance (1er/2e plus cités/téléchargés). |
|
|
|
|
|
Args: |
|
|
gds: L'instance de GraphDataScience. |
|
|
model_name: Le nom du modèle de départ. |
|
|
num_highlights: Le nombre de modèles à récupérer pour chaque catégorie (par défaut 2). |
|
|
|
|
|
Returns: |
|
|
Un dictionnaire contenant les listes de modèles unifiés pour l'affichage. |
|
|
""" |
|
|
highlights = { |
|
|
"desc_unique_models": [], |
|
|
"asc_unique_models": [] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
badges_info = { |
|
|
'desc_cited_1': { |
|
|
'text': '1er + cité', |
|
|
'class': 'bg-success', |
|
|
'title': "Ce modèle est le plus cité parmi les modèles de la descendance." |
|
|
}, |
|
|
'desc_cited_2': { |
|
|
'text': '2e + cité', |
|
|
'class': 'bg-success bg-opacity-75', |
|
|
'title': "Ce modèle est le deuxième plus cité parmi les modèles de la descendance." |
|
|
}, |
|
|
'desc_downloaded_1': { |
|
|
'text': '1er + téléchargé', |
|
|
'class': 'beta', |
|
|
'title': "Ce modèle est le plus téléchargé parmi les modèles de la descendance." |
|
|
}, |
|
|
'desc_downloaded_2': { |
|
|
'text': '2e + téléchargé', |
|
|
'class': 'alpha', |
|
|
'title': "Ce modèle est le deuxième plus téléchargé parmi les modèles de la descendance." |
|
|
}, |
|
|
|
|
|
'asc_foundation': { |
|
|
'text': 'Modèle racine', |
|
|
'class': 'bg-warning text-dark', |
|
|
'title': "Ce modèle n'a pas de parent connu." |
|
|
}, |
|
|
'asc_cited_1': { |
|
|
'text': '1er + cité', |
|
|
'class': 'bg-success', |
|
|
'title': "Ce modèle est le plus cité parmi les modèles de l'ascendance." |
|
|
}, |
|
|
'asc_cited_2': { |
|
|
'text': '2e + cité', |
|
|
'class': 'bg-success bg-opacity-75', |
|
|
'title': "Ce modèle est le deuxième plus cité parmi les modèles de l'ascendance." |
|
|
}, |
|
|
'asc_downloaded_1': { |
|
|
'text': '1er + téléchargé', |
|
|
'class': 'beta', |
|
|
'title': "Ce modèle est le plus téléchargé parmi les modèles de l'ascendance." |
|
|
}, |
|
|
'asc_downloaded_2': { |
|
|
'text': '2e + téléchargé', |
|
|
'class': 'alpha', |
|
|
'title': "Ce modèle est le deuxième plus téléchargé parmi les modèles de l'ascendance." |
|
|
}, |
|
|
} |
|
|
|
|
|
|
|
|
def process_and_assign_badges( |
|
|
unified_dict: Dict, |
|
|
model_list: List[Dict], |
|
|
badge_keys: List[str] |
|
|
): |
|
|
""" |
|
|
Fonction utilitaire pour ajouter des modèles et leurs badges à un dictionnaire unifié. |
|
|
Cela évite la duplication de code pour chaque catégorie (cité, téléchargé, etc.). |
|
|
""" |
|
|
for i, model in enumerate(model_list): |
|
|
if i < len(badge_keys): |
|
|
model_name_key = model['name'] |
|
|
badge_key = badge_keys[i] |
|
|
|
|
|
|
|
|
if model_name_key not in unified_dict: |
|
|
unified_dict[model_name_key] = model.copy() |
|
|
unified_dict[model_name_key]['badges'] = [] |
|
|
|
|
|
|
|
|
badge_to_add = badges_info[badge_key] |
|
|
if badge_to_add not in unified_dict[model_name_key]['badges']: |
|
|
unified_dict[model_name_key]['badges'].append(badge_to_add) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
desc_downloads_query = """ |
|
|
MATCH (start:Model {name: $model_name})-[:USED_IN*1..]->(descendant:Model) |
|
|
WHERE start <> descendant |
|
|
WITH descendant, size([(m:Model)<-[:USED_IN]-(descendant) | m]) AS citation_count |
|
|
RETURN descendant.name AS name, citation_count, descendant.downloads AS downloads, descendant.task AS task, descendant.license AS license, descendant.likes AS likes, descendant.createdAt AS createdAt |
|
|
ORDER BY descendant.downloads DESC, descendant.name ASC |
|
|
LIMIT $limit |
|
|
""" |
|
|
desc_cited_query = """ |
|
|
MATCH (start:Model {name: $model_name})-[:USED_IN*1..]->(descendant:Model) |
|
|
WHERE start <> descendant |
|
|
WITH descendant, size([(m:Model)<-[:USED_IN]-(descendant) | m]) AS citation_count |
|
|
RETURN descendant.name AS name, citation_count, descendant.task AS task, descendant.downloads AS downloads, descendant.license AS license, descendant.likes AS likes, descendant.createdAt AS createdAt |
|
|
ORDER BY citation_count DESC, descendant.name ASC |
|
|
LIMIT $limit |
|
|
""" |
|
|
try: |
|
|
params = {"model_name": model_name, "limit": num_highlights} |
|
|
desc_downloaded_list = gds.run_cypher(desc_downloads_query, params).to_dict('records') |
|
|
desc_cited_list = gds.run_cypher(desc_cited_query, params).to_dict('records') |
|
|
|
|
|
desc_unified_models = {} |
|
|
process_and_assign_badges(desc_unified_models, desc_cited_list, ['desc_cited_1', 'desc_cited_2']) |
|
|
process_and_assign_badges(desc_unified_models, desc_downloaded_list, ['desc_downloaded_1', 'desc_downloaded_2']) |
|
|
|
|
|
highlights["desc_unique_models"] = list(desc_unified_models.values()) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Erreur lors de la recherche des descendants: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
asc_downloads_query = """ |
|
|
MATCH (ascendant:Model)-[:USED_IN*1..]->(start:Model {name: $model_name}) |
|
|
WHERE start <> ascendant |
|
|
WITH ascendant, size([(m:Model)<-[:USED_IN]-(ascendant) | m]) AS citation_count |
|
|
RETURN ascendant.name AS name, citation_count, ascendant.downloads AS downloads, ascendant.task AS task, |
|
|
ascendant.license AS license, ascendant.likes AS likes, ascendant.createdAt AS createdAt |
|
|
ORDER BY ascendant.downloads DESC |
|
|
LIMIT 1 // On ne veut que LE plus téléchargé |
|
|
""" |
|
|
asc_cited_query = """ |
|
|
MATCH (ascendant:Model)-[:USED_IN*1..]->(start:Model {name: $model_name}) |
|
|
WHERE start <> ascendant |
|
|
WITH ascendant, size([(m:Model)<-[:USED_IN]-(ascendant) | m]) AS citation_count |
|
|
RETURN ascendant.name AS name, citation_count, ascendant.downloads AS downloads, ascendant.task AS task, |
|
|
ascendant.license AS license, ascendant.likes AS likes, ascendant.createdAt AS createdAt |
|
|
ORDER BY citation_count DESC |
|
|
LIMIT 1 // On ne veut que LE plus cité |
|
|
""" |
|
|
|
|
|
foundation_query = """ |
|
|
MATCH (foundation:Model)-[:USED_IN*1..]->(start:Model {name: $model_name}) |
|
|
WHERE NOT EXISTS( (:Model)-[:USED_IN]->(foundation) ) |
|
|
WITH foundation, size([(m:Model)<-[:USED_IN]-(foundation) | m]) AS citation_count |
|
|
RETURN DISTINCT foundation.name AS name, citation_count, foundation.downloads AS downloads, foundation.task AS task, |
|
|
foundation.license AS license, foundation.likes AS likes, foundation.createdAt AS createdAt |
|
|
LIMIT $limit |
|
|
""" |
|
|
|
|
|
try: |
|
|
params = {"model_name": model_name, "limit": num_highlights} |
|
|
asc_foundation_list = gds.run_cypher(foundation_query, params).to_dict('records') |
|
|
asc_downloaded_list = gds.run_cypher(asc_downloads_query, params).to_dict('records') |
|
|
asc_cited_list = gds.run_cypher(asc_cited_query, params).to_dict('records') |
|
|
|
|
|
asc_unified_models = {} |
|
|
|
|
|
process_and_assign_badges(asc_unified_models, asc_foundation_list, ['asc_foundation'] * num_highlights) |
|
|
process_and_assign_badges(asc_unified_models, asc_cited_list, ['asc_cited_1', 'asc_cited_2']) |
|
|
process_and_assign_badges(asc_unified_models, asc_downloaded_list, ['asc_downloaded_1', 'asc_downloaded_2']) |
|
|
|
|
|
highlights["asc_unique_models"] = list(asc_unified_models.values()) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Erreur lors de la recherche des ascendants: {e}") |
|
|
|
|
|
return highlights |
|
|
|
|
|
|
|
|
def create_node_data(node_props, label): |
|
|
""" |
|
|
Construit un dictionnaire de données pour chaque noeud |
|
|
à afficher dans le graphe front-end. |
|
|
""" |
|
|
base_data = { |
|
|
"id": node_props.get("name", "") |
|
|
} |
|
|
|
|
|
if label == "Author": |
|
|
return { |
|
|
**base_data, |
|
|
"label": node_props.get("type", "Unknown"), |
|
|
"followers": node_props.get("followers", 1) |
|
|
} |
|
|
elif label == "Model": |
|
|
licens_ =str(node_props.get("license", "Inconnue")).strip("[]") |
|
|
if licens_ =="\'other\'" or pd.isna(licens_) or licens_ =="nan": |
|
|
licens_ = "Autre" |
|
|
|
|
|
tache = node_props.get("task", "") |
|
|
if tache =="unknown": |
|
|
tache = "Inconnue" |
|
|
return { |
|
|
**base_data, |
|
|
"label": "Modèle", |
|
|
"downloads": node_props.get("downloads", 1), |
|
|
"likes": node_props.get("likes", 0), |
|
|
"license": licens_, |
|
|
"createdAt": node_props.get("createdAt", "inconnue"), |
|
|
"createdAt_dataset": node_props.get("createdAt_dataset", "inconnue"), |
|
|
"task": tache, |
|
|
"author": node_props.get("author", ""),"dataset": node_props.get("dataset", ""), |
|
|
"ascendantsCount": node_props.get("ascendantsCount", 0),"descendantsCount": node_props.get("descendantsCount", 0), |
|
|
"citationCount": node_props.get("citationCount", 0), "distance":node_props.get("distance", 0) |
|
|
} |
|
|
else: |
|
|
return { |
|
|
**base_data, |
|
|
"label": "Dataset", |
|
|
"downloads": node_props.get("downloads", 1), |
|
|
"createdAt_dataset": node_props.get("createdAt_dataset", "inconnue") |
|
|
} |
|
|
|
|
|
return { "id": node_props['name'], "label": label, **node_props } |
|
|
|
|
|
|