Spaces:
Runtime error
Runtime error
File size: 1,274 Bytes
2cc7f91 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | import pandas as pd
import sys
# Cesta k datasetu
dataset_path = 'data/processed/url_dataset_cleaned.csv'
try:
# Načítanie datasetu
df = pd.read_csv(dataset_path)
# Analýza labelov
print("=" * 50)
print("ANALÝZA DATASETU")
print("=" * 50)
print(f"\nCelkový počet záznamov: {len(df)}")
print(f"\nRozdělenie labelov:")
print("-" * 50)
label_counts = df['label'].value_counts().sort_index()
for label, count in label_counts.items():
percentage = (count / len(df)) * 100
print(f"Label {label}: {count} záznamov ({percentage:.2f}%)")
print("-" * 50)
print(f"\nPomer label 0 / label 1: {label_counts.get(0, 0) / label_counts.get(1, 1):.2f}")
# Kontrola chýbajúcich hodnôt
missing = df['label'].isna().sum()
if missing > 0:
print(f"\nChýbajúce labely: {missing}")
print("\n" + "=" * 50)
except FileNotFoundError:
print(f"Súbor '{dataset_path}' nebol nájdený")
print(f"Aktuálny adresár: {sys.path[0]}")
except KeyError:
print("Stĺpec 'label' neexistuje v datasete")
print(f"Dostupné stĺpce: {list(df.columns)}") # type: ignore
except Exception as e:
print(f"Chyba: {e}") |