Spaces:

Iridium-193
/

SoilTextureClassification

Runtime error

App Files Files Community

Iridium-193 commited on 1 day ago

Commit

58969d7

verified ·

1 Parent(s): 3655df7

Upload 2 files

Browse files

Files changed (1) hide show

app.py +162 -2

app.py CHANGED Viewed

@@ -1,4 +1,8 @@
 import argparse
 from pathlib import Path
 from typing import Tuple, Dict
 import numpy as np
@@ -17,7 +21,7 @@ try:
 except ImportError:
     import sys
-    sys.path.append(str(Path(__file__).parent / "src"))
     from data_collection import DataCollectionManager, classify_from_percentages_simple
@@ -698,13 +702,136 @@ def create_demo(
             f"{export_note}"
         )
     # Create interface
     with gr.Blocks(title="Soil Texture Classifier") as demo:
         gr.Markdown("""
         # Soil Texture Classification
-        1. Use **Inference** to predict texture class and composition from image.
         2. Use **Contribute Data** to upload image + measured Sand/Silt/Clay for future training.
         """)
         with gr.Tabs():
@@ -770,6 +897,27 @@ def create_demo(
                         submit_btn = gr.Button("Submit Contribution", variant="primary")
                         contribution_status = gr.Markdown(label="Submission Status")
         # Event handlers
         predict_btn.click(
             fn=predict_fn,
@@ -800,6 +948,18 @@ def create_demo(
             outputs=[contribution_status]
         )
     return demo

 import argparse
+import csv
+import io
+import os
+import zipfile
 from pathlib import Path
 from typing import Tuple, Dict
 import numpy as np
 except ImportError:
     import sys
+    sys.path.insert(0, str(Path(__file__).resolve().parent / "src"))
     from data_collection import DataCollectionManager, classify_from_percentages_simple
             f"{export_note}"
         )
+    def get_dataset_stats_fn():
+        """Get statistics about the current dataset."""
+        cfg = collection_manager.config
+        num_submissions = 0
+        if cfg.csv_path.exists():
+            with cfg.csv_path.open("r", encoding="utf-8") as f:
+                reader = csv.reader(f)
+                next(reader, None)
+                num_submissions = sum(1 for _ in reader)
+        num_images = 0
+        total_size_bytes = 0
+        if cfg.images_dir.exists():
+            for p in cfg.images_dir.iterdir():
+                if p.is_file():
+                    num_images += 1
+                    total_size_bytes += p.stat().st_size
+        total_size_mb = total_size_bytes / (1024 * 1024)
+        return (
+            f"### Dataset Statistics\n"
+            f"- **Total submissions:** {num_submissions}\n"
+            f"- **Total images:** {num_images}\n"
+            f"- **Total image size:** {total_size_mb:.1f} MB\n"
+        )
+    def upload_dataset_fn(zip_file, upload_consent):
+        """Process uploaded ZIP dataset with images and CSV."""
+        if zip_file is None:
+            return "Please upload a ZIP file."
+        if not upload_consent:
+            return "Please confirm consent before uploading."
+        zip_path = zip_file if isinstance(zip_file, str) else zip_file.name
+        if not zipfile.is_zipfile(zip_path):
+            return "Invalid ZIP file."
+        max_entries = 10000
+        max_total_size = 500 * 1024 * 1024
+        results = {"added": 0, "skipped": 0, "errors": []}
+        try:
+            with zipfile.ZipFile(zip_path, "r") as zf:
+                entries = zf.infolist()
+                if len(entries) > max_entries:
+                    return f"ZIP has too many entries ({len(entries)}). Max: {max_entries}."
+                total_size = sum(e.file_size for e in entries)
+                if total_size > max_total_size:
+                    return f"ZIP too large ({total_size / 1024 / 1024:.0f} MB). Max: {max_total_size // (1024 * 1024)} MB."
+                csv_entries = [
+                    e for e in entries
+                    if e.filename.endswith(".csv") and not e.filename.startswith("__")
+                ]
+                if not csv_entries:
+                    return "No CSV found in ZIP. Expected CSV with columns: filename, sand, silt, clay."
+                with zf.open(csv_entries[0]) as csv_file:
+                    content = csv_file.read().decode("utf-8")
+                    reader = csv.DictReader(io.StringIO(content))
+                    headers = set(reader.fieldnames or [])
+                    required = {"filename", "sand", "silt", "clay"}
+                    if not required.issubset(headers):
+                        return (
+                            f"CSV must have columns: {', '.join(sorted(required))}. "
+                            f"Found: {', '.join(sorted(headers))}"
+                        )
+                    for row in reader:
+                        try:
+                            fname = row["filename"].strip()
+                            sand = float(row["sand"])
+                            silt = float(row["silt"])
+                            clay = float(row["clay"])
+                            vals = [sand, silt, clay]
+                            if any(v < 0 or v > 100 for v in vals):
+                                results["errors"].append(f"{fname}: values out of range")
+                                results["skipped"] += 1
+                                continue
+                            total = sand + silt + clay
+                            if abs(total - 100.0) > 1.0:
+                                results["errors"].append(f"{fname}: sum={total:.1f}, must be ~100")
+                                results["skipped"] += 1
+                                continue
+                            matches = [e for e in entries if Path(e.filename).name == fname]
+                            if not matches:
+                                results["errors"].append(f"Image not found in ZIP: {fname}")
+                                results["skipped"] += 1
+                                continue
+                            with zf.open(matches[0]) as img_bytes:
+                                image = Image.open(img_bytes).convert("RGB")
+                            if image.width * image.height > collection_manager.config.max_image_pixels:
+                                results["errors"].append(f"{fname}: image too large")
+                                results["skipped"] += 1
+                                continue
+                            prediction = predictor.predict(image)
+                            user_class = classify_from_percentages_simple(sand, silt, clay)
+                            submission_id = collection_manager.create_submission_id()
+                            collection_manager.save_submission(
+                                image=image,
+                                submission_id=submission_id,
+                                sand=sand, silt=silt, clay=clay,
+                                user_class=user_class,
+                                weak_label=row.get("weak_label", ""),
+                                strong_label=row.get("strong_label", ""),
+                                prediction=prediction,
+                                sample_source=row.get("source", ""),
+                                location=row.get("location", ""),
+                                notes=row.get("notes", ""),
+                                total=total,
+                            )
+                            results["added"] += 1
+                        except Exception as e:
+                            results["errors"].append(f"{row.get('filename', '?')}: {e}")
+                            results["skipped"] += 1
+        except Exception as e:
+            return f"Failed to process ZIP: {e}"
+        error_summary = ""
+        if results["errors"]:
+            shown = results["errors"][:20]
+            error_summary = "\n\n**Errors:**\n" + "\n".join(f"- {e}" for e in shown)
+            if len(results["errors"]) > 20:
+                error_summary += f"\n- ... and {len(results['errors']) - 20} more"
+        return (
+            f"### Upload Complete\n"
+            f"- **Added:** {results['added']} submissions\n"
+            f"- **Skipped:** {results['skipped']}\n"
+            f"{error_summary}"
+        )
     # Create interface
     with gr.Blocks(title="Soil Texture Classifier") as demo:
         gr.Markdown("""
         # Soil Texture Classification
+        1. Use **Inference** to predict texture class and composition from image.
         2. Use **Contribute Data** to upload image + measured Sand/Silt/Clay for future training.
+        3. Use **Dataset Management** to bulk-upload a ZIP dataset for model improvement.
         """)
         with gr.Tabs():
                         submit_btn = gr.Button("Submit Contribution", variant="primary")
                         contribution_status = gr.Markdown(label="Submission Status")
+            with gr.Tab("Dataset Management"):
+                gr.Markdown("""
+                **Upload** a dataset (ZIP) to contribute bulk data for model improvement.
+                **Upload format:** ZIP containing a CSV file and image files.
+                CSV columns: `filename`, `sand`, `silt`, `clay` (required).
+                Optional: `weak_label`, `strong_label`, `source`, `location`, `notes`.
+                """)
+                with gr.Row():
+                    with gr.Column():
+                        upload_file = gr.File(label="ZIP Dataset", file_types=[".zip"])
+                        upload_consent = gr.Checkbox(
+                            label="I confirm these images and labels can be used for model improvement.",
+                            value=False,
+                        )
+                        upload_btn = gr.Button("Upload Dataset", variant="primary")
+                        upload_status = gr.Markdown(label="Upload Status")
+                    with gr.Column():
+                        stats_btn = gr.Button("Refresh Statistics")
+                        stats_display = gr.Markdown(label="Statistics")
         # Event handlers
         predict_btn.click(
             fn=predict_fn,
             outputs=[contribution_status]
         )
+        upload_btn.click(
+            fn=upload_dataset_fn,
+            inputs=[upload_file, upload_consent],
+            outputs=[upload_status],
+        )
+        stats_btn.click(
+            fn=get_dataset_stats_fn,
+            inputs=[],
+            outputs=[stats_display],
+        )
     return demo