Spaces:

HashirAwaiz
/

Wildfire-Intelligence-MLOps

Sleeping

App Files Files Community

HashirAwaiz commited on Dec 18, 2025

Commit

e0a854f

verified ·

1 Parent(s): 28c25ef

Delete models/src

Browse files

Files changed (1) hide show

models/src/preprocessing.py +0 -51

models/src/preprocessing.py DELETED Viewed

@@ -1,51 +0,0 @@
-import pandas as pd
-import os
-# Define paths
-RAW_DATA_PATH = "data/raw/california_wildfire.csv"
-PROCESSED_DATA_PATH = "data/processed/Wildfire_Dataset.csv"
-# California Bounding Box
-LAT_MIN, LAT_MAX = 32.5, 42.0
-LON_MIN, LON_MAX = -124.5, -114.0
-def process_data():
-    print("🔥 Starting Data Processing... (Chunking 9.5M rows)")
-    if not os.path.exists(RAW_DATA_PATH):
-        print(f"❌ Error: File not found at {RAW_DATA_PATH}")
-        return
-    chunk_size = 100000
-    chunks = []
-    # Read in chunks to handle the 1.3GB size
-    for i, chunk in enumerate(pd.read_csv(RAW_DATA_PATH, chunksize=chunk_size)):
-        if i % 10 == 0:
-            print(f"   Processing chunk {i}...")
-        # Filter for California Coordinates using new column names
-        if 'latitude' in chunk.columns and 'longitude' in chunk.columns:
-            cali_chunk = chunk[
-                (chunk['latitude'] >= LAT_MIN) &
-                (chunk['latitude'] <= LAT_MAX) &
-                (chunk['longitude'] >= LON_MIN) &
-                (chunk['longitude'] <= LON_MAX)
-            ]
-            # Simple clean: Drop rows where critical weather info is missing
-            cali_chunk = cali_chunk.dropna(subset=['bi', 'tmmn', 'rmax', 'vs'])
-            chunks.append(cali_chunk)
-    if chunks:
-        df_cali = pd.concat(chunks)
-        print(f"✅ Filtered Data Shape: {df_cali.shape}")
-        # Save to processed folder
-        os.makedirs(os.path.dirname(PROCESSED_DATA_PATH), exist_ok=True)
-        df_cali.to_csv(PROCESSED_DATA_PATH, index=False)
-        print(f"💾 Saved processed data to: {PROCESSED_DATA_PATH}")
-    else:
-        print("⚠️ No data found for the specified region.")
-if __name__ == "__main__":
-    process_data()