"""
Examine les vrais tickets par pole pour comprendre le vocabulaire reel.
"""
import pandas as pd
import sys
import csv

sys.stdout.reconfigure(encoding='utf-8')
csv.field_size_limit(10000000)

def normaliser(texte):
    return str(texte).strip().lower().replace('e', 'e').replace('e', 'e')

def map_groupe_to_pole(groupe):
    g = str(groupe).upper().strip()
    if any(k in g for k in ["OPCON", "CHAINE DE NUIT", "CRITICITE", "CRITICIT"]):
        return "MONITORING"
    elif g == "RUN":
        return "RUN"
    elif any(k in g for k in ["DATA", "BI"]):
        return "DATA & BI"
    elif any(k in g for k in ["COMMERCE", "LOGISTIQUE"]):
        return "COMMERCE & MAGASINS"
    elif any(k in g for k in ["FINANCE", "OFFRE", "WEB", "RESSOURCES", "PRIORITAIRE", "SUPPORT", "INFRA", "ASTREINTE"]):
        return "FINANCE & SUPPORT"
    else:
        return "AUTRES"

# Chargement
dfs = []
for f in ["Tickets_1.csv", "Tickets_2.csv"]:
    try:
        dfs.append(pd.read_csv(f, sep=None, engine='python', encoding='utf-8-sig'))
    except:
        pass

df = pd.concat(dfs, ignore_index=True)

mapping = {normaliser(c): c for c in df.columns}
col_desc = mapping.get('description')
col_objet = mapping.get('objet')
col_groupe = mapping.get('groupe')

print(f"Colonnes trouvees: objet={col_objet}, description={col_desc}, groupe={col_groupe}")
print(f"Total tickets: {len(df)}\n")

if col_objet:
    df['text_brut'] = df[col_objet].fillna('') + " " + df[col_desc].fillna('')
else:
    df['text_brut'] = df[col_desc].fillna('')

df['pole'] = df[col_groupe].apply(map_groupe_to_pole)
df = df[df['pole'] != "AUTRES"]

# Afficher les groupes originaux par pole
print("=" * 60)
print("  GROUPES FRESHSERVICE -> POLES")
print("=" * 60)
for pole in sorted(df['pole'].unique()):
    subset = df[df['pole'] == pole]
    groupes = subset[col_groupe].value_counts().head(5)
    print(f"\n--- {pole} ({len(subset)} tickets) ---")
    for g, c in groupes.items():
        print(f"    {g}: {c}")

# Afficher 3 exemples de tickets par pole
print("\n" + "=" * 60)
print("  EXEMPLES DE VRAIS TICKETS PAR POLE")
print("=" * 60)

for pole in sorted(df['pole'].unique()):
    subset = df[df['pole'] == pole].sample(min(3, len(df[df['pole'] == pole])), random_state=42)
    print(f"\n{'='*40}")
    print(f"  {pole}")
    print(f"{'='*40}")
    for _, row in subset.iterrows():
        objet = str(row.get(col_objet, ''))[:80]
        desc = str(row.get(col_desc, ''))[:200]
        print(f"\n  Objet: {objet}")
        print(f"  Desc:  {desc}")