Spaces:

Habith
/

GoGenix_MRI_Brain

Build error

App Files Files Community

Habith commited on Mar 3

Commit

783b5ee

verified ·

1 Parent(s): 2e9520d

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -170

app.py CHANGED Viewed

@@ -8,11 +8,10 @@ from transformers import (
     TrainingArguments,
     Trainer
 )
-from datasets import load_dataset, Dataset, Image
 import numpy as np
 from huggingface_hub import HfApi
 import os
-import json
 from PIL import Image as PILImage
 # Configuration
@@ -22,73 +21,136 @@ BASE_MODEL = "Falconsai/nsfw_image_detection"
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def train_and_save_model():
-    """Train the model using YOLO format dataset"""
     try:
-        print("Loading Ultralytics/Brain-tumor dataset (YOLO format)...")
-        # Load the dataset
-        dataset = load_dataset(HF_DATASET)
-        print(f"Dataset splits available: {list(dataset.keys())}")
-        # Check dataset structure
-        if 'valid' not in dataset or 'test' not in dataset:
-            return "❌ Error: Dataset must contain 'valid' and 'test' splits"
-        train_split = dataset['valid']
-        test_split = dataset['test']
-        print("Analyzing YOLO dataset structure...")
-        # For YOLO datasets, we need to check if images and labels are separate
-        # Let's examine the structure
         if len(train_split) > 0:
             sample = train_split[0]
             print(f"Sample keys: {list(sample.keys())}")
-            # Check if it's YOLO format (has image path and labels path)
-            if 'image' in sample:
-                print(f"Image type: {type(sample['image'])}")
-            if 'label' in sample:
-                print(f"Label type: {type(sample['label'])}")
-                if isinstance(sample['label'], list) and len(sample['label']) > 0:
-                    print(f"First label sample: {sample['label'][0]}")
-        # Since Ultralytics datasets are typically for object detection,
-        # we'll convert them to classification by checking if tumor is present
-        def yolo_to_classification(item):
-            """Convert YOLO object detection labels to classification labels"""
-            image = item['image']
-            labels = item.get('label', [])
-            # For binary classification: 0 = no tumor, 1 = tumor present
-            # If there are any labels (bounding boxes), it means tumor is present
-            has_tumor = 1 if labels and len(labels) > 0 else 0
             return {
                 'image': image,
-                'label': has_tumor
             }
-        # Apply conversion
-        print("Converting YOLO labels to classification...")
-        train_classification = train_split.map(yolo_to_classification)
-        test_classification = test_split.map(yolo_to_classification)
-        # Count tumor vs no_tumor
         tumor_count = sum(1 for item in train_classification if item['label'] == 1)
         no_tumor_count = sum(1 for item in train_classification if item['label'] == 0)
-        print(f"Training set - Tumors: {tumor_count}, No tumors: {no_tumor_count}")
-        # Define class names for binary classification
         class_names = ["no_tumor", "tumor"]
         num_classes = 2
-        print(f"Using binary classification: {class_names}")
-        # Define transforms for MRI images
         transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.Grayscale(num_output_channels=3),
@@ -111,21 +173,17 @@ def train_and_save_model():
                 label = item['label']
                 if self.transform:
-                    # Ensure image is PIL Image
                     if not isinstance(image, PILImage.Image):
                         image = PILImage.fromarray(image)
                     image = self.transform(image)
                 return image, label
-        # Create dataset objects
         train_dataset = MRIDataset(train_classification, transform=transform)
         test_dataset = MRIDataset(test_classification, transform=transform)
-        print(f"Training samples: {len(train_dataset)}")
-        print(f"Test samples: {len(test_dataset)}")
-        # Load base model
         print("Loading base model...")
         model = AutoModelForImageClassification.from_pretrained(
             BASE_MODEL,
@@ -140,10 +198,10 @@ def train_and_save_model():
         # Training arguments
         training_args = TrainingArguments(
             output_dir="./results",
-            num_train_epochs=10,
             per_device_train_batch_size=8,
             per_device_eval_batch_size=8,
-            warmup_steps=500,
             weight_decay=0.01,
             logging_dir="./logs",
             logging_steps=10,
@@ -155,7 +213,7 @@ def train_and_save_model():
             remove_unused_columns=False,
         )
-        # Metrics function
         def compute_metrics(eval_pred):
             predictions, labels = eval_pred
             predictions = np.argmax(predictions, axis=1)
@@ -175,165 +233,70 @@ def train_and_save_model():
         print("Starting training...")
         train_result = trainer.train()
-        # Save model locally
         trainer.save_model(f"./{CUSTOM_MODEL_NAME}")
         processor.save_pretrained(f"./{CUSTOM_MODEL_NAME}")
-        # Push to Hugging Face Hub
-        print("Pushing model to Hugging Face Hub...")
-        trainer.push_to_hub(commit_message="Train Brain Tumor classifier (YOLO to Classification)")
-        # Display training results
         train_accuracy = train_result.metrics.get('train_accuracy', 'N/A')
         eval_accuracy = train_result.metrics.get('eval_accuracy', 'N/A')
         result_message = f"""
-        ✅ Training completed successfully!
         Model: {CUSTOM_MODEL_NAME}
-        Dataset: {HF_DATASET} (YOLO format)
-        Task: Binary Classification (Tumor Detection)
-        Classes: {', '.join(class_names)}
-        Training Samples: {len(train_dataset)}
-        Test Samples: {len(test_dataset)}
         Training Accuracy: {train_accuracy}
         Validation Accuracy: {eval_accuracy}
-        Tumor/No-Tumor Ratio: {tumor_count}/{no_tumor_count}
-        Model has been saved and pushed to Hugging Face Hub.
         """
         return result_message
     except Exception as e:
         import traceback
-        error_details = traceback.format_exc()
-        error_message = f"""
-        ❌ Error during training:
-        Error Type: {type(e).__name__}
-        Error Message: {str(e)}
-        Detailed Traceback:
-        {error_details}
-        """
-        return error_message
 def classify_mri(image):
-    """Classify a new MRI image using the trained model"""
     try:
-        # Load your custom model
         model = AutoModelForImageClassification.from_pretrained(CUSTOM_MODEL_NAME)
         processor = AutoImageProcessor.from_pretrained(CUSTOM_MODEL_NAME)
         model.to(DEVICE)
         model.eval()
-        # Preprocess image
         inputs = processor(image, return_tensors="pt").to(DEVICE)
-        # Predict
         with torch.no_grad():
             outputs = model(**inputs)
             predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        # Binary classification results
         class_names = ["No Tumor", "Tumor Detected"]
-        results = {
-            class_names[0]: float(predictions[0][0]),  # No tumor probability
-            class_names[1]: float(predictions[0][1])   # Tumor probability
-        }
-        # Add diagnostic information
-        tumor_prob = float(predictions[0][1])
-        if tumor_prob > 0.7:
-            diagnosis = "🟢 Likely no tumor"
-        elif tumor_prob > 0.3:
-            diagnosis = "🟡 Uncertain - consult specialist"
-        else:
-            diagnosis = "🔴 Possible tumor detected"
-        return {
-            "classification": results,
-            "diagnosis": diagnosis,
-            "tumor_probability": tumor_prob
-        }
     except Exception as e:
-        return f"⚠️ Model not trained yet or unavailable. Error: {str(e)}"
-# Gradio Interface
-with gr.Blocks(title="GoGenix MRI Brain Tumor Classifier") as demo:
-    gr.Markdown("# 🧠 GoGenix MRI Brain Tumor Classifier")
-    gr.Markdown(f"**Dataset**: {HF_DATASET} (YOLO Format) | **Base Model**: {BASE_MODEL}")
-    with gr.Tab("🚀 Train Model"):
-        gr.Markdown("### Train GoGenix_MRI_Brain Model")
-        gr.Markdown(f"Using YOLO format dataset: `{HF_DATASET}`")
-        gr.Markdown("**Note**: Converting object detection labels to binary classification")
-        train_btn = gr.Button("Start Training", variant="primary", size="lg")
-        output_text = gr.Textbox(
-            label="Training Status",
-            lines=20,
-            placeholder="Click 'Start Training' to begin..."
-        )
-        train_btn.click(
-            fn=train_and_save_model,
-            outputs=output_text
-        )
-    with gr.Tab("🔍 Classify MRI"):
-        gr.Markdown("### Upload MRI Image for Tumor Detection")
-        gr.Markdown("**Binary Classification**: Tumor vs No Tumor")
-        image_input = gr.Image(
-            type="pil",
-            label="Brain MRI Scan",
-            height=300
-        )
-        classify_btn = gr.Button("Analyze Scan", variant="secondary")
-        with gr.Row():
-            result_label = gr.Label(
-                label="Classification Results",
-                num_top_classes=2
-            )
-            diagnosis_text = gr.Textbox(
-                label="Diagnostic Suggestion",
-                interactive=False
-            )
-        def process_classification(image):
-            result = classify_mri(image)
-            if isinstance(result, dict) and 'classification' in result:
-                return result['classification'], result.get('diagnosis', '')
-            else:
-                return {"Error": 1.0}, result
-        classify_btn.click(
-            fn=process_classification,
-            inputs=image_input,
-            outputs=[result_label, diagnosis_text]
-        )
-    with gr.Tab("📊 Dataset Info"):
-        gr.Markdown("### YOLO Dataset Information")
-        gr.Markdown(f"""
-        **Dataset**: {HF_DATASET}
-        **Format**: YOLO (You Only Look Once) Object Detection
-        **Original Structure**:
-        - `images/` folder: Contains MRI scans
-        - `labels/` folder: Contains bounding box annotations
-        **Converted to**: Binary Classification
-        - **No Tumor**: No bounding boxes in labels
-        - **Tumor**: One or more bounding boxes present
-        **Splits**: test, valid
-        """)
 if __name__ == "__main__":
     demo.launch()

     TrainingArguments,
     Trainer
 )
+from datasets import load_dataset, DatasetDict
 import numpy as np
 from huggingface_hub import HfApi
 import os
 from PIL import Image as PILImage
 # Configuration
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def train_and_save_model():
+    """Train the model with explicit dataset format handling"""
     try:
+        print("Loading Ultralytics/Brain-tumor dataset with explicit format...")
+        # Try multiple loading methods to handle format detection issues
+        dataset = None
+        # Method 1: Try loading with explicit imagefolder format for all splits
+        try:
+            dataset = load_dataset(HF_DATASET, "imagefolder")
+            print("✅ Loaded with 'imagefolder' format")
+        except Exception as e1:
+            print(f"❌ Method 1 failed: {e1}")
+            # Method 2: Try loading without specific format
+            try:
+                dataset = load_dataset(HF_DATASET)
+                print("✅ Loaded without specific format")
+            except Exception as e2:
+                print(f"❌ Method 2 failed: {e2}")
+                # Method 3: Try loading with data_files specification
+                try:
+                    dataset = load_dataset(
+                        HF_DATASET,
+                        data_files={
+                            'train': ['**/train/**/*.jpg', '**/train/**/*.png', '**/train/**/*.jpeg'],
+                            'validation': ['**/valid/**/*.jpg', '**/valid/**/*.png', '**/valid/**/*.jpeg'],
+                            'test': ['**/test/**/*.jpg', '**/test/**/*.png', '**/test/**/*.jpeg']
+                        }
+                    )
+                    print("✅ Loaded with data_files specification")
+                except Exception as e3:
+                    print(f"❌ Method 3 failed: {e3}")
+                    return f"All loading methods failed:\n1. {e1}\n2. {e2}\n3. {e3}"
+        if dataset is None:
+            return "❌ Could not load dataset with any method"
+        print(f"Dataset splits available: {list(dataset.keys())}")
+        # Check which splits we have and map them appropriately
+        if 'train' in dataset and 'validation' in dataset:
+            train_split = dataset['train']
+            test_split = dataset['validation']
+            print("Using 'train' and 'validation' splits")
+        elif 'valid' in dataset and 'test' in dataset:
+            train_split = dataset['valid']
+            test_split = dataset['test']
+            print("Using 'valid' and 'test' splits")
+        elif 'train' in dataset and 'test' in dataset:
+            train_split = dataset['train']
+            test_split = dataset['test']
+            print("Using 'train' and 'test' splits")
+        else:
+            available_splits = list(dataset.keys())
+            return f"❌ Cannot determine train/test splits. Available splits: {available_splits}"
+        print(f"Training samples: {len(train_split)}")
+        print(f"Test samples: {len(test_split)}")
+        # Analyze dataset structure
         if len(train_split) > 0:
             sample = train_split[0]
             print(f"Sample keys: {list(sample.keys())}")
+            for key in sample.keys():
+                print(f"  {key}: {type(sample[key])}")
+        # Determine if this is a classification or object detection dataset
+        # For Ultralytics datasets, check if it has object detection format
+        def detect_dataset_type(split):
+            if len(split) == 0:
+                return "empty"
+            sample = split[0]
+            if 'objects' in sample or 'bbox' in sample or 'labels' in sample and isinstance(sample.get('labels'), list):
+                return "object_detection"
+            elif 'label' in sample and isinstance(sample['label'], (int, float)):
+                return "classification"
+            elif 'image' in sample:
+                return "image_only"
+            else:
+                return "unknown"
+        train_type = detect_dataset_type(train_split)
+        test_type = detect_dataset_type(test_split)
+        print(f"Train dataset type: {train_type}")
+        print(f"Test dataset type: {test_type}")
+        # Convert to classification format
+        def convert_to_classification(item):
+            """Convert various formats to classification format"""
+            image = item.get('image')
+            # Handle different label formats
+            if 'label' in item and isinstance(item['label'], (int, float)):
+                label = int(item['label'])
+            elif 'objects' in item or 'bbox' in item:
+                # Object detection format - convert to binary classification
+                # If there are objects/bboxes, it's tumor (1), else no tumor (0)
+                label = 1 if (item.get('objects') or item.get('bbox')) else 0
+            elif 'labels' in item and isinstance(item['labels'], list) and len(item['labels']) > 0:
+                label = 1  # Has labels = tumor
+            else:
+                label = 0  # No labels = no tumor
             return {
                 'image': image,
+                'label': label
             }
+        print("Converting dataset to classification format...")
+        train_classification = train_split.map(convert_to_classification)
+        test_classification = test_split.map(convert_to_classification)
+        # Count classes
         tumor_count = sum(1 for item in train_classification if item['label'] == 1)
         no_tumor_count = sum(1 for item in train_classification if item['label'] == 0)
+        print(f"Tumor samples: {tumor_count}, No tumor samples: {no_tumor_count}")
+        # Use binary classification
         class_names = ["no_tumor", "tumor"]
         num_classes = 2
+        print(f"Using {num_classes} classes: {class_names}")
+        # Define transforms
         transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.Grayscale(num_output_channels=3),
                 label = item['label']
                 if self.transform:
                     if not isinstance(image, PILImage.Image):
                         image = PILImage.fromarray(image)
                     image = self.transform(image)
                 return image, label
+        # Create datasets
         train_dataset = MRIDataset(train_classification, transform=transform)
         test_dataset = MRIDataset(test_classification, transform=transform)
+        # Load model
         print("Loading base model...")
         model = AutoModelForImageClassification.from_pretrained(
             BASE_MODEL,
         # Training arguments
         training_args = TrainingArguments(
             output_dir="./results",
+            num_train_epochs=5,  # Reduced for testing
             per_device_train_batch_size=8,
             per_device_eval_batch_size=8,
+            warmup_steps=100,
             weight_decay=0.01,
             logging_dir="./logs",
             logging_steps=10,
             remove_unused_columns=False,
         )
+        # Metrics
         def compute_metrics(eval_pred):
             predictions, labels = eval_pred
             predictions = np.argmax(predictions, axis=1)
         print("Starting training...")
         train_result = trainer.train()
+        # Save model
         trainer.save_model(f"./{CUSTOM_MODEL_NAME}")
         processor.save_pretrained(f"./{CUSTOM_MODEL_NAME}")
+        # Push to hub
+        trainer.push_to_hub(commit_message="Train Brain Tumor classifier")
+        # Results
         train_accuracy = train_result.metrics.get('train_accuracy', 'N/A')
         eval_accuracy = train_result.metrics.get('eval_accuracy', 'N/A')
         result_message = f"""
+        ✅ Training completed!
         Model: {CUSTOM_MODEL_NAME}
+        Dataset: {HF_DATASET}
+        Classes: {class_names}
         Training Accuracy: {train_accuracy}
         Validation Accuracy: {eval_accuracy}
         """
         return result_message
     except Exception as e:
         import traceback
+        return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}"
 def classify_mri(image):
+    """Classify MRI image"""
     try:
         model = AutoModelForImageClassification.from_pretrained(CUSTOM_MODEL_NAME)
         processor = AutoImageProcessor.from_pretrained(CUSTOM_MODEL_NAME)
         model.to(DEVICE)
         model.eval()
         inputs = processor(image, return_tensors="pt").to(DEVICE)
         with torch.no_grad():
             outputs = model(**inputs)
             predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
         class_names = ["No Tumor", "Tumor Detected"]
+        results = {class_names[i]: float(predictions[0][i]) for i in range(2)}
+        return results
     except Exception as e:
+        return f"⚠️ Error: {str(e)}"
+# Simple Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Brain Tumor Classification")
+    with gr.Tab("Train"):
+        train_btn = gr.Button("Train Model")
+        output = gr.Textbox(lines=10)
+        train_btn.click(train_and_save_model, outputs=output)
+    with gr.Tab("Classify"):
+        image = gr.Image(type="pil")
+        classify_btn = gr.Button("Classify")
+        result = gr.Label()
+        classify_btn.click(classify_mri, inputs=image, outputs=result)
 if __name__ == "__main__":
     demo.launch()