rb1337's picture
Upload 50 files
2cc7f91 verified
import pandas as pd
import sys
# Cesta k datasetu
dataset_path = 'data/processed/url_dataset_cleaned.csv'
try:
# Načítanie datasetu
df = pd.read_csv(dataset_path)
# Analýza labelov
print("=" * 50)
print("ANALÝZA DATASETU")
print("=" * 50)
print(f"\nCelkový počet záznamov: {len(df)}")
print(f"\nRozdělenie labelov:")
print("-" * 50)
label_counts = df['label'].value_counts().sort_index()
for label, count in label_counts.items():
percentage = (count / len(df)) * 100
print(f"Label {label}: {count} záznamov ({percentage:.2f}%)")
print("-" * 50)
print(f"\nPomer label 0 / label 1: {label_counts.get(0, 0) / label_counts.get(1, 1):.2f}")
# Kontrola chýbajúcich hodnôt
missing = df['label'].isna().sum()
if missing > 0:
print(f"\nChýbajúce labely: {missing}")
print("\n" + "=" * 50)
except FileNotFoundError:
print(f"Súbor '{dataset_path}' nebol nájdený")
print(f"Aktuálny adresár: {sys.path[0]}")
except KeyError:
print("Stĺpec 'label' neexistuje v datasete")
print(f"Dostupné stĺpce: {list(df.columns)}") # type: ignore
except Exception as e:
print(f"Chyba: {e}")