Spaces:

habeebCycle
/

Beit-Retinal

Sleeping

App Files Files Community

Habeeb Okunade commited on Aug 18, 2025

Commit

05c5199

1 Parent(s): f119f72

Update the training script

Browse files

Files changed (2) hide show

app.py +54 -15
train2.py +10 -10

app.py CHANGED Viewed

@@ -1,19 +1,21 @@
-# app.py
-import os, json, subprocess
-from fastapi import BackgroundTasks, FastAPI, UploadFile
 from transformers import AutoImageProcessor, BeitForImageClassification
 from PIL import Image
 import torch
 MODEL_DIR = os.environ.get("OUTPUT_DIR", "/home/user/outputs/beit-retina")
 CLASSES = ["AMD","DMO","DR","GLC","HR","Normal"]
 app = FastAPI(title="Retina Disease Classifier")
-# Lazy load model & processor
 processor = None
 model = None
 def load_model():
     global processor, model, CLASSES
     try:
@@ -28,21 +30,34 @@ def load_model():
         processor, model = None, None
         print(f"⚠️ Skipping model load: {e}")
 def run_training():
     try:
-        result = subprocess.run(
             ["python", "train2.py"],
-            capture_output=True,
-            text=True
         )
-        if result.returncode == 0 and os.path.exists(MODEL_DIR):
             load_model()
             print("✅ Training complete and model reloaded")
         else:
-            print("❌ Training failed:", result.stderr)
     except Exception as e:
         print("⚠️ Training exception:", str(e))
 @app.on_event("startup")
 def startup_event():
     if os.path.exists(MODEL_DIR):
@@ -50,6 +65,36 @@ def startup_event():
     else:
         print("⚠️ MODEL_DIR not found, skipping model load")
 @app.post("/predict")
 async def predict(file: UploadFile):
     if model is None:
@@ -67,9 +112,3 @@ async def predict(file: UploadFile):
         "class_id": CLASSES[pred_id],
         "probabilities": {CLASSES[i]: float(p) for i, p in enumerate(probs)}
     }
-@app.post("/train")
-async def train_endpoint(background_tasks: BackgroundTasks):
-    # Schedule the training in the background
-    background_tasks.add_task(run_training)
-    return {"status": "Training started in background"}

+import os, json, subprocess, shutil, zipfile
+from fastapi import BackgroundTasks, FastAPI, UploadFile, File
 from transformers import AutoImageProcessor, BeitForImageClassification
 from PIL import Image
 import torch
 MODEL_DIR = os.environ.get("OUTPUT_DIR", "/home/user/outputs/beit-retina")
+DATA_DIR = os.environ.get("DATA_DIR", "data2")
 CLASSES = ["AMD","DMO","DR","GLC","HR","Normal"]
 app = FastAPI(title="Retina Disease Classifier")
 processor = None
 model = None
+# ----------------------------
+# MODEL LOADING
+# ----------------------------
 def load_model():
     global processor, model, CLASSES
     try:
         processor, model = None, None
         print(f"⚠️ Skipping model load: {e}")
+# ----------------------------
+# BACKGROUND TRAINING
+# ----------------------------
 def run_training():
     try:
+        print("🔹 Starting training subprocess...")
+        process = subprocess.Popen(
             ["python", "train2.py"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            universal_newlines=True
         )
+        for line in iter(process.stdout.readline, ""):
+            print("TRAIN_LOG:", line.strip())
+        process.stdout.close()
+        return_code = process.wait()
+        if return_code == 0 and os.path.exists(MODEL_DIR):
             load_model()
             print("✅ Training complete and model reloaded")
         else:
+            print(f"❌ Training failed with code {return_code}")
     except Exception as e:
         print("⚠️ Training exception:", str(e))
+# ----------------------------
+# FASTAPI STARTUP
+# ----------------------------
 @app.on_event("startup")
 def startup_event():
     if os.path.exists(MODEL_DIR):
     else:
         print("⚠️ MODEL_DIR not found, skipping model load")
+# ----------------------------
+# ENDPOINTS
+# ----------------------------
+@app.post("/load-data")
+async def load_data(file: UploadFile = File(...)):
+    """
+    Upload a ZIP file, extract into `data/` folder for training.
+    """
+    print("🔹 Received dataset ZIP upload...")
+    if os.path.exists(DATA_DIR):
+        shutil.rmtree(DATA_DIR)
+    os.makedirs(DATA_DIR, exist_ok=True)
+    zip_path = "dataset.zip"
+    with open(zip_path, "wb") as f:
+        f.write(await file.read())
+    print(f"   ↪ Saved ZIP to {zip_path}")
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
+        zip_ref.extractall(DATA_DIR)
+    print(f"✅ Dataset extracted to {DATA_DIR}")
+    os.remove(zip_path)
+    return {"status": "Dataset uploaded and extracted"}
+@app.post("/train")
+async def train_endpoint(background_tasks: BackgroundTasks):
+    background_tasks.add_task(run_training)
+    return {"status": "Training started in background"}
 @app.post("/predict")
 async def predict(file: UploadFile):
     if model is None:
         "class_id": CLASSES[pred_id],
         "probabilities": {CLASSES[i]: float(p) for i, p in enumerate(probs)}
     }

train2.py CHANGED Viewed

@@ -16,16 +16,17 @@ from PIL import Image
 # ----------------------------
 MODEL_NAME = "microsoft/beit-base-patch16-224"
 OUTPUT_DIR = os.environ.get("OUTPUT_DIR", os.path.expanduser("~/outputs/beit-retina"))
-NUM_CLASSES = 6   # retina disease classes
 print(f"🔹 OUTPUT_DIR set to: {OUTPUT_DIR}")
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # ----------------------------
 # LOAD DATASET
 # ----------------------------
-print("🔹 Loading dataset from 'data/' folder...")
-dataset = load_dataset("imagefolder", data_dir="data")
 print(f"🔹 Dataset loaded. Columns: {dataset['train'].column_names}")
 print(f"🔹 Dataset splits: {list(dataset.keys())}")
 print(f"🔹 Number of training samples: {len(dataset['train'])}")
@@ -38,29 +39,27 @@ print(f"🔹 Loading processor from {MODEL_NAME}...")
 processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
 def transform(example):
-    # Determine correct image column
     image_column = "image" if "image" in example else [c for c in example.keys() if c != "label"][0]
     images = example[image_column]
-    # Ensure we always have a list
     if not isinstance(images, list):
         images = [images]
     processed_images = []
     for img in images:
         if isinstance(img, str):
             img = Image.open(img).convert("RGB")
         elif isinstance(img, Image.Image):
             img = img.convert("RGB")
         else:
             raise ValueError(f"Unknown type for image: {type(img)}")
         processed_images.append(img)
-    # Convert to tensors (batched)
     inputs = processor(images=processed_images, return_tensors="pt")
-    # Handle labels
     labels = example["label"]
     if not isinstance(labels, list):
         labels = [labels]
@@ -75,10 +74,10 @@ print("🔹 Transform applied successfully.")
 # ----------------------------
 # MODEL
 # ----------------------------
-print(f"🔹 Loading BEiT model ({MODEL_NAME}) with {NUM_CLASSES} classes...")
 model = BeitForImageClassification.from_pretrained(
     MODEL_NAME,
-    num_labels=NUM_CLASSES,
     ignore_mismatched_sizes=True
 )
 print("🔹 Model loaded successfully.")
@@ -111,6 +110,7 @@ args = TrainingArguments(
     num_train_epochs=5,
     weight_decay=0.01,
     logging_dir=os.path.join(OUTPUT_DIR, "logs"),
     push_to_hub=False
 )
 print("🔹 TrainingArguments configured.")

 # ----------------------------
 MODEL_NAME = "microsoft/beit-base-patch16-224"
 OUTPUT_DIR = os.environ.get("OUTPUT_DIR", os.path.expanduser("~/outputs/beit-retina"))
+DATA_DIR = os.environ.get("DATA_DIR", "data2")  # dynamic dataset path
 print(f"🔹 OUTPUT_DIR set to: {OUTPUT_DIR}")
+print(f"🔹 DATA_DIR set to: {DATA_DIR}")
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # ----------------------------
 # LOAD DATASET
 # ----------------------------
+print(f"🔹 Loading dataset from '{DATA_DIR}' folder...")
+dataset = load_dataset("imagefolder", data_dir=DATA_DIR)
 print(f"🔹 Dataset loaded. Columns: {dataset['train'].column_names}")
 print(f"🔹 Dataset splits: {list(dataset.keys())}")
 print(f"🔹 Number of training samples: {len(dataset['train'])}")
 processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
 def transform(example):
+    # Detect image column
     image_column = "image" if "image" in example else [c for c in example.keys() if c != "label"][0]
     images = example[image_column]
     if not isinstance(images, list):
         images = [images]
     processed_images = []
     for img in images:
         if isinstance(img, str):
+            print(f"   ↪ Opening image from path: {img}")
             img = Image.open(img).convert("RGB")
         elif isinstance(img, Image.Image):
+            print("   ↪ Using PIL.Image directly")
             img = img.convert("RGB")
         else:
             raise ValueError(f"Unknown type for image: {type(img)}")
         processed_images.append(img)
     inputs = processor(images=processed_images, return_tensors="pt")
     labels = example["label"]
     if not isinstance(labels, list):
         labels = [labels]
 # ----------------------------
 # MODEL
 # ----------------------------
+print(f"🔹 Loading BEiT model ({MODEL_NAME}) with {len(dataset['train'].features['label'].names)} classes...")
 model = BeitForImageClassification.from_pretrained(
     MODEL_NAME,
+    num_labels=len(dataset["train"].features["label"].names),
     ignore_mismatched_sizes=True
 )
 print("🔹 Model loaded successfully.")
     num_train_epochs=5,
     weight_decay=0.01,
     logging_dir=os.path.join(OUTPUT_DIR, "logs"),
+    logging_steps=10,
     push_to_hub=False
 )
 print("🔹 TrainingArguments configured.")