""" Examine les vrais tickets par pole pour comprendre le vocabulaire reel. """ import pandas as pd import sys import csv sys.stdout.reconfigure(encoding='utf-8') csv.field_size_limit(10000000) def normaliser(texte): return str(texte).strip().lower().replace('e', 'e').replace('e', 'e') def map_groupe_to_pole(groupe): g = str(groupe).upper().strip() if any(k in g for k in ["OPCON", "CHAINE DE NUIT", "CRITICITE", "CRITICIT"]): return "MONITORING" elif g == "RUN": return "RUN" elif any(k in g for k in ["DATA", "BI"]): return "DATA & BI" elif any(k in g for k in ["COMMERCE", "LOGISTIQUE"]): return "COMMERCE & MAGASINS" elif any(k in g for k in ["FINANCE", "OFFRE", "WEB", "RESSOURCES", "PRIORITAIRE", "SUPPORT", "INFRA", "ASTREINTE"]): return "FINANCE & SUPPORT" else: return "AUTRES" # Chargement dfs = [] for f in ["Tickets_1.csv", "Tickets_2.csv"]: try: dfs.append(pd.read_csv(f, sep=None, engine='python', encoding='utf-8-sig')) except: pass df = pd.concat(dfs, ignore_index=True) mapping = {normaliser(c): c for c in df.columns} col_desc = mapping.get('description') col_objet = mapping.get('objet') col_groupe = mapping.get('groupe') print(f"Colonnes trouvees: objet={col_objet}, description={col_desc}, groupe={col_groupe}") print(f"Total tickets: {len(df)}\n") if col_objet: df['text_brut'] = df[col_objet].fillna('') + " " + df[col_desc].fillna('') else: df['text_brut'] = df[col_desc].fillna('') df['pole'] = df[col_groupe].apply(map_groupe_to_pole) df = df[df['pole'] != "AUTRES"] # Afficher les groupes originaux par pole print("=" * 60) print(" GROUPES FRESHSERVICE -> POLES") print("=" * 60) for pole in sorted(df['pole'].unique()): subset = df[df['pole'] == pole] groupes = subset[col_groupe].value_counts().head(5) print(f"\n--- {pole} ({len(subset)} tickets) ---") for g, c in groupes.items(): print(f" {g}: {c}") # Afficher 3 exemples de tickets par pole print("\n" + "=" * 60) print(" EXEMPLES DE VRAIS TICKETS PAR POLE") print("=" * 60) for pole in sorted(df['pole'].unique()): subset = df[df['pole'] == pole].sample(min(3, len(df[df['pole'] == pole])), random_state=42) print(f"\n{'='*40}") print(f" {pole}") print(f"{'='*40}") for _, row in subset.iterrows(): objet = str(row.get(col_objet, ''))[:80] desc = str(row.get(col_desc, ''))[:200] print(f"\n Objet: {objet}") print(f" Desc: {desc}")