Spaces:
Sleeping
Sleeping
File size: 1,234 Bytes
4fec82c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | # src/data_processing.py
import pandas as pd
import os
def load_data():
"""
Load the SmartCrop dataset.
Returns:
df: pandas DataFrame
"""
data_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'SmartCrop-Dataset.csv')
df = pd.read_csv(data_path)
return df
def check_missing_values(df):
"""
Check for missing values in the dataset.
Returns:
missing_counts: Series with count of missing values per column
"""
missing_counts = df.isnull().sum()
return missing_counts
def preprocess_features(df):
"""
Preprocess dataset features and target.
Returns:
X: features DataFrame
y: target Series
"""
# Assuming your CSV has columns: N, P, K, temperature, humidity, ph, rainfall, label
feature_cols = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
target_col = 'label'
X = df[feature_cols]
y = df[target_col]
return X, y
if __name__ == "__main__":
# Quick test
df = load_data()
print("Dataset shape:", df.shape)
print("Missing values:\n", check_missing_values(df))
X, y = preprocess_features(df)
print("Features shape:", X.shape)
print("Target shape:", y.shape)
|