Spaces:
Running
Running
| """ | |
| Examine les vrais tickets par pole pour comprendre le vocabulaire reel. | |
| """ | |
| import pandas as pd | |
| import sys | |
| import csv | |
| sys.stdout.reconfigure(encoding='utf-8') | |
| csv.field_size_limit(10000000) | |
| def normaliser(texte): | |
| return str(texte).strip().lower().replace('e', 'e').replace('e', 'e') | |
| def map_groupe_to_pole(groupe): | |
| g = str(groupe).upper().strip() | |
| if any(k in g for k in ["OPCON", "CHAINE DE NUIT", "CRITICITE", "CRITICIT"]): | |
| return "MONITORING" | |
| elif g == "RUN": | |
| return "RUN" | |
| elif any(k in g for k in ["DATA", "BI"]): | |
| return "DATA & BI" | |
| elif any(k in g for k in ["COMMERCE", "LOGISTIQUE"]): | |
| return "COMMERCE & MAGASINS" | |
| elif any(k in g for k in ["FINANCE", "OFFRE", "WEB", "RESSOURCES", "PRIORITAIRE", "SUPPORT", "INFRA", "ASTREINTE"]): | |
| return "FINANCE & SUPPORT" | |
| else: | |
| return "AUTRES" | |
| # Chargement | |
| dfs = [] | |
| for f in ["Tickets_1.csv", "Tickets_2.csv"]: | |
| try: | |
| dfs.append(pd.read_csv(f, sep=None, engine='python', encoding='utf-8-sig')) | |
| except: | |
| pass | |
| df = pd.concat(dfs, ignore_index=True) | |
| mapping = {normaliser(c): c for c in df.columns} | |
| col_desc = mapping.get('description') | |
| col_objet = mapping.get('objet') | |
| col_groupe = mapping.get('groupe') | |
| print(f"Colonnes trouvees: objet={col_objet}, description={col_desc}, groupe={col_groupe}") | |
| print(f"Total tickets: {len(df)}\n") | |
| if col_objet: | |
| df['text_brut'] = df[col_objet].fillna('') + " " + df[col_desc].fillna('') | |
| else: | |
| df['text_brut'] = df[col_desc].fillna('') | |
| df['pole'] = df[col_groupe].apply(map_groupe_to_pole) | |
| df = df[df['pole'] != "AUTRES"] | |
| # Afficher les groupes originaux par pole | |
| print("=" * 60) | |
| print(" GROUPES FRESHSERVICE -> POLES") | |
| print("=" * 60) | |
| for pole in sorted(df['pole'].unique()): | |
| subset = df[df['pole'] == pole] | |
| groupes = subset[col_groupe].value_counts().head(5) | |
| print(f"\n--- {pole} ({len(subset)} tickets) ---") | |
| for g, c in groupes.items(): | |
| print(f" {g}: {c}") | |
| # Afficher 3 exemples de tickets par pole | |
| print("\n" + "=" * 60) | |
| print(" EXEMPLES DE VRAIS TICKETS PAR POLE") | |
| print("=" * 60) | |
| for pole in sorted(df['pole'].unique()): | |
| subset = df[df['pole'] == pole].sample(min(3, len(df[df['pole'] == pole])), random_state=42) | |
| print(f"\n{'='*40}") | |
| print(f" {pole}") | |
| print(f"{'='*40}") | |
| for _, row in subset.iterrows(): | |
| objet = str(row.get(col_objet, ''))[:80] | |
| desc = str(row.get(col_desc, ''))[:200] | |
| print(f"\n Objet: {objet}") | |
| print(f" Desc: {desc}") | |