Spaces:

Michaela299
/

Team1

Sleeping

App Files Files Community

michaela299 commited on Dec 1, 2025

Commit

361cbfe

1 Parent(s): 643d9c2

Restore app files

Browse files

Files changed (5) hide show

best_model.pth +3 -0
data_pipeline.py +156 -0
model.py +30 -0
requirements.txt +8 -0
ui.py +71 -0

best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1633d1300f4e2ae689ac619603499c5dacc876496079d72492e21254c7e3f9c9
+size 20831138

data_pipeline.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import torch
+from torch.utils.data import DataLoader, default_collate
+from torchvision import transforms
+from datasets import load_dataset
+import torch.utils.data
+# ImageNet stats for normalization
+IMAGE_MEAN = [0.485, 0.456, 0.406]
+IMAGE_STD = [0.229, 0.224, 0.225]
+IMAGE_SIZE = 256
+# Transforms for training data (with advanced augmentation)
+train_transform = transforms.Compose([
+    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
+    # geometric augmentations
+    transforms.RandomHorizontalFlip(p=0.5),
+    transforms.RandomVerticalFlip(p=0.5), # Added vertical flip
+    transforms.RandomRotation(30),         # Increased rotation range
+    # color/appearance augmentations
+    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Increased intensity
+    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)), # Added blur
+    # final conversion
+    transforms.ToTensor(),
+    transforms.Normalize(IMAGE_MEAN, IMAGE_STD)
+])
+# Transforms for validation/test data (no augmentation)
+val_test_transform = transforms.Compose([
+    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
+    transforms.ToTensor(),
+    transforms.Normalize(IMAGE_MEAN, IMAGE_STD)
+])
+def apply_transforms(batch, transform_pipeline):
+    """Applies a transform pipeline to a batch of images and converts labels."""
+    batch['image'] = [transform_pipeline(img.convert("RGB")) for img in batch['image']]
+    # This line is crucial for converting labels to tensors for batching
+    batch['label'] = torch.tensor(batch['label'])
+    return batch
+def get_dataloaders(batch_size=32, use_prototype=True):
+    """
+    Loads, splits, and prepares the PlantVillage dataset, returning DataLoaders.
+    NOTE TO TEAM: The dataloaders yield a dictionary.
+    Access batches using:
+    batch = next(iter(loader))
+    images = batch['image']
+    labels = batch['label']
+    """
+    print("Loading and preparing dataset...")
+    # Load the full dataset from Hugging Face
+    full_dataset = load_dataset("DScomp380/plant_village", split='train')
+    if use_prototype:
+        # Use 20% of data for prototyping
+        print(f"Using 20% prototype dataset (approx {len(full_dataset) * 0.2:.0f} images)...")
+        data_subset = full_dataset.train_test_split(test_size=0.8, seed=42)['train']
+    else:
+        print(f"Using 100% full dataset ({len(full_dataset)} images)...")
+        data_subset = full_dataset
+    # 70/15/15 split for train/val/test
+    train_val_test_split = data_subset.train_test_split(test_size=0.3, seed=42)
+    train_dataset = train_val_test_split['train']
+    val_test_split = train_val_test_split['test'].train_test_split(test_size=0.5, seed=42)
+    val_dataset = val_test_split['train']
+    test_dataset = val_test_split['test']
+    print(f"Total images in prototype: {len(data_subset)}")
+    print(f"Training images:   {len(train_dataset)}")
+    print(f"Validation images: {len(val_dataset)}")
+    print(f"Test images:       {len(test_dataset)}")
+    print("--------------------")
+    # Apply the correct transforms to each dataset split
+    train_dataset.set_transform(lambda batch: apply_transforms(batch, train_transform))
+    val_dataset.set_transform(lambda batch: apply_transforms(batch, val_test_transform))
+    test_dataset.set_transform(lambda batch: apply_transforms(batch, val_test_transform))
+    # Define the collate_fn for batching tensors
+    collate_fn = default_collate
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        collate_fn=collate_fn
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        collate_fn=collate_fn
+    )
+    test_loader = DataLoader(
+        test_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        collate_fn=collate_fn
+    )
+    return train_loader, val_loader, test_loader
+if __name__ == "__main__":
+    print("Running data_pipeline.py as a standalone script...")
+    # Test the pipeline with a small batch size
+    train_loader, val_loader, test_loader = get_dataloaders(batch_size=4, use_prototype=True)
+    print("\n--- Testing Train Loader ---")
+    # Test train loader
+    try:
+        # FIX: Get the batch as a dictionary first
+        batch = next(iter(train_loader))
+        # FIX: Access the data using keys
+        images = batch['image']
+        labels = batch['label']
+        print(f"Image batch shape: {images.shape}")
+        print(f"Label batch shape: {labels.shape}")
+        # Assert correct shapes
+        assert images.shape == (4, 3, IMAGE_SIZE, IMAGE_SIZE)
+        assert labels.shape == (4,)
+        print("Train loader test PASSED.")
+    except Exception as e:
+        print(f"Train loader test FAILED: {e}")
+    print("\n--- Testing Validation Loader ---")
+    # Test validation loader
+    try:
+        # FIX: Get the batch as a dictionary first
+        batch = next(iter(val_loader))
+        # FIX: Access the data using keys
+        images = batch['image']
+        labels = batch['label']
+        print(f"Image batch shape: {images.shape}")
+        print(f"Label batch shape: {labels.shape}")
+        # Assert correct shapes
+        assert images.shape == (4, 3, IMAGE_SIZE, IMAGE_SIZE)
+        assert labels.shape == (4,)
+        print("Validation loader test PASSED.")
+    except Exception as e:
+        print(f"Validation loader test FAILED: {e}")
+    print("\nData pipeline script finished.")

model.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BaselineCNN(nn.Module):
+  def __init__(self, num_classes=39):
+    super(BaselineCNN, self).__init__()
+    self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
+    self.bn1 = nn.BatchNorm2d(32)
+    self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
+    self.bn2 = nn.BatchNorm2d(64)
+    self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
+    self.bn3   = nn.BatchNorm2d(128)
+    self.pool = nn.MaxPool2d(2, 2)
+    self.fc = nn.Linear(128 * 32 * 32, num_classes)
+  def forward(self, x):
+    x = self.pool(F.relu(self.bn1(self.conv1(x))))
+    x = self.pool(F.relu(self.bn2(self.conv2(x))))
+    x = self.pool(F.relu(self.bn3(self.conv3(x))))
+    x = torch.flatten(x, 1)
+    x = self.fc(x)
+    return x

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+torch
+torchvision
+datasets
+clearml
+pytest
+scikit-learn
+matplotlib

ui.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+import numpy as np
+from model import BaselineCNN
+from data_pipeline import val_test_transform, IMAGE_SIZE
+import torch
+from datasets import load_dataset
+dataset = load_dataset("DScomp380/plant_village", split="train")
+CLASS_NAMES = dataset.features["label"].names
+#load the model
+CLASSES = 39
+model = BaselineCNN(num_classes=CLASSES)
+model.load_state_dict(torch.load("best_model.pth", map_location=torch.device('cpu')))
+model.eval()
+def predict(input_image):
+    #resize to models image size, convert to tensor, normalize values
+    image_tensor = val_test_transform(input_image)
+    #add new dimension at index 0 so each image has a batch size of atleast 1
+    image_tensor = image_tensor.unsqueeze(0)
+    #run inference
+    with torch.no_grad():
+        #pass the batch through the model
+        output = model(image_tensor)
+    #convert to probabilitiees
+    probabilities = torch.nn.functional.softmax(output,dim=1)[0]
+    numPredictionsToShow = 10
+    #get the top 5 predictions
+    topProbs, TopClassIndicies = torch.topk(probabilities, numPredictionsToShow)
+    #returns 5 largest probabilities
+    #create the output dictionary
+    result = {}
+    for rank in range(numPredictionsToShow):#loop through top 5
+        classIndex = TopClassIndicies[rank].item()#get the int value from the tensor at index rank
+        className = CLASS_NAMES[classIndex]#get human readable class name
+        probabilityValue = topProbs[rank].item()#convert prob from tensor to python float
+        result[className] = probabilityValue
+    return result
+with gr.Blocks(title="Plant Disease Classifier") as app:
+    gr.Markdown("# Plant Disease Classification")
+    gr.Markdown("Upload an image of a plant leaf to classify its disease.")
+    with gr.Row():
+        image_input = gr.Image(type="pil", label="Upload Leaf Image")
+        label_output = gr.Label(label="Predicted Disease")
+    gr.Examples(
+        examples =[], inputs=image_input)
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=predict, inputs=image_input, outputs=label_output)
+    #fn=predict,
+    # inputs=gr.Image(type="pil"),
+    # outputs=gr.Label(num_top_classes=3))
+if __name__ == "__main__":
+    app.launch(ssr_mode=False)