Spaces:

Elkristobal59
/

GDM-Aide-RUN-V2

Running

App Files Files Community

GDM-Aide-RUN-V2 / explore_data.py

Elkristobal59

feat: optimize model loading, clean imports, run V4 training and fix tests

187f074 29 days ago

Raw

History Blame Contribute Delete

2.53 kB

	"""
	Examine les vrais tickets par pole pour comprendre le vocabulaire reel.
	"""
	import pandas as pd
	import sys
	import csv

	sys.stdout.reconfigure(encoding='utf-8')
	csv.field_size_limit(10000000)

	def normaliser(texte):
	return str(texte).strip().lower().replace('e', 'e').replace('e', 'e')

	def map_groupe_to_pole(groupe):
	g = str(groupe).upper().strip()
	if any(k in g for k in ["OPCON", "CHAINE DE NUIT", "CRITICITE", "CRITICIT"]):
	return "MONITORING"
	elif g == "RUN":
	return "RUN"
	elif any(k in g for k in ["DATA", "BI"]):
	return "DATA & BI"
	elif any(k in g for k in ["COMMERCE", "LOGISTIQUE"]):
	return "COMMERCE & MAGASINS"
	elif any(k in g for k in ["FINANCE", "OFFRE", "WEB", "RESSOURCES", "PRIORITAIRE", "SUPPORT", "INFRA", "ASTREINTE"]):
	return "FINANCE & SUPPORT"
	else:
	return "AUTRES"

	# Chargement
	dfs = []
	for f in ["Tickets_1.csv", "Tickets_2.csv"]:
	try:
	dfs.append(pd.read_csv(f, sep=None, engine='python', encoding='utf-8-sig'))
	except:
	pass

	df = pd.concat(dfs, ignore_index=True)

	mapping = {normaliser(c): c for c in df.columns}
	col_desc = mapping.get('description')
	col_objet = mapping.get('objet')
	col_groupe = mapping.get('groupe')

	print(f"Colonnes trouvees: objet={col_objet}, description={col_desc}, groupe={col_groupe}")
	print(f"Total tickets: {len(df)}\n")

	if col_objet:
	df['text_brut'] = df[col_objet].fillna('') + " " + df[col_desc].fillna('')
	else:
	df['text_brut'] = df[col_desc].fillna('')

	df['pole'] = df[col_groupe].apply(map_groupe_to_pole)
	df = df[df['pole'] != "AUTRES"]

	# Afficher les groupes originaux par pole
	print("=" * 60)
	print(" GROUPES FRESHSERVICE -> POLES")
	print("=" * 60)
	for pole in sorted(df['pole'].unique()):
	subset = df[df['pole'] == pole]
	groupes = subset[col_groupe].value_counts().head(5)
	print(f"\n--- {pole} ({len(subset)} tickets) ---")
	for g, c in groupes.items():
	print(f" {g}: {c}")

	# Afficher 3 exemples de tickets par pole
	print("\n" + "=" * 60)
	print(" EXEMPLES DE VRAIS TICKETS PAR POLE")
	print("=" * 60)

	for pole in sorted(df['pole'].unique()):
	subset = df[df['pole'] == pole].sample(min(3, len(df[df['pole'] == pole])), random_state=42)
	print(f"\n{'='*40}")
	print(f" {pole}")
	print(f"{'='*40}")
	for _, row in subset.iterrows():
	objet = str(row.get(col_objet, ''))[:80]
	desc = str(row.get(col_desc, ''))[:200]
	print(f"\n Objet: {objet}")
	print(f" Desc: {desc}")