GDM-Aide-RUN-V2 / explore_data.py
Elkristobal59's picture
feat: optimize model loading, clean imports, run V4 training and fix tests
187f074
Raw
History Blame Contribute Delete
2.53 kB
"""
Examine les vrais tickets par pole pour comprendre le vocabulaire reel.
"""
import pandas as pd
import sys
import csv
sys.stdout.reconfigure(encoding='utf-8')
csv.field_size_limit(10000000)
def normaliser(texte):
return str(texte).strip().lower().replace('e', 'e').replace('e', 'e')
def map_groupe_to_pole(groupe):
g = str(groupe).upper().strip()
if any(k in g for k in ["OPCON", "CHAINE DE NUIT", "CRITICITE", "CRITICIT"]):
return "MONITORING"
elif g == "RUN":
return "RUN"
elif any(k in g for k in ["DATA", "BI"]):
return "DATA & BI"
elif any(k in g for k in ["COMMERCE", "LOGISTIQUE"]):
return "COMMERCE & MAGASINS"
elif any(k in g for k in ["FINANCE", "OFFRE", "WEB", "RESSOURCES", "PRIORITAIRE", "SUPPORT", "INFRA", "ASTREINTE"]):
return "FINANCE & SUPPORT"
else:
return "AUTRES"
# Chargement
dfs = []
for f in ["Tickets_1.csv", "Tickets_2.csv"]:
try:
dfs.append(pd.read_csv(f, sep=None, engine='python', encoding='utf-8-sig'))
except:
pass
df = pd.concat(dfs, ignore_index=True)
mapping = {normaliser(c): c for c in df.columns}
col_desc = mapping.get('description')
col_objet = mapping.get('objet')
col_groupe = mapping.get('groupe')
print(f"Colonnes trouvees: objet={col_objet}, description={col_desc}, groupe={col_groupe}")
print(f"Total tickets: {len(df)}\n")
if col_objet:
df['text_brut'] = df[col_objet].fillna('') + " " + df[col_desc].fillna('')
else:
df['text_brut'] = df[col_desc].fillna('')
df['pole'] = df[col_groupe].apply(map_groupe_to_pole)
df = df[df['pole'] != "AUTRES"]
# Afficher les groupes originaux par pole
print("=" * 60)
print(" GROUPES FRESHSERVICE -> POLES")
print("=" * 60)
for pole in sorted(df['pole'].unique()):
subset = df[df['pole'] == pole]
groupes = subset[col_groupe].value_counts().head(5)
print(f"\n--- {pole} ({len(subset)} tickets) ---")
for g, c in groupes.items():
print(f" {g}: {c}")
# Afficher 3 exemples de tickets par pole
print("\n" + "=" * 60)
print(" EXEMPLES DE VRAIS TICKETS PAR POLE")
print("=" * 60)
for pole in sorted(df['pole'].unique()):
subset = df[df['pole'] == pole].sample(min(3, len(df[df['pole'] == pole])), random_state=42)
print(f"\n{'='*40}")
print(f" {pole}")
print(f"{'='*40}")
for _, row in subset.iterrows():
objet = str(row.get(col_objet, ''))[:80]
desc = str(row.get(col_desc, ''))[:200]
print(f"\n Objet: {objet}")
print(f" Desc: {desc}")