Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import sys | |
| # Cesta k datasetu | |
| dataset_path = 'data/processed/url_dataset_cleaned.csv' | |
| try: | |
| # Načítanie datasetu | |
| df = pd.read_csv(dataset_path) | |
| # Analýza labelov | |
| print("=" * 50) | |
| print("ANALÝZA DATASETU") | |
| print("=" * 50) | |
| print(f"\nCelkový počet záznamov: {len(df)}") | |
| print(f"\nRozdělenie labelov:") | |
| print("-" * 50) | |
| label_counts = df['label'].value_counts().sort_index() | |
| for label, count in label_counts.items(): | |
| percentage = (count / len(df)) * 100 | |
| print(f"Label {label}: {count} záznamov ({percentage:.2f}%)") | |
| print("-" * 50) | |
| print(f"\nPomer label 0 / label 1: {label_counts.get(0, 0) / label_counts.get(1, 1):.2f}") | |
| # Kontrola chýbajúcich hodnôt | |
| missing = df['label'].isna().sum() | |
| if missing > 0: | |
| print(f"\nChýbajúce labely: {missing}") | |
| print("\n" + "=" * 50) | |
| except FileNotFoundError: | |
| print(f"Súbor '{dataset_path}' nebol nájdený") | |
| print(f"Aktuálny adresár: {sys.path[0]}") | |
| except KeyError: | |
| print("Stĺpec 'label' neexistuje v datasete") | |
| print(f"Dostupné stĺpce: {list(df.columns)}") # type: ignore | |
| except Exception as e: | |
| print(f"Chyba: {e}") |