nemaquant

Sleeping

App Files Files Community

tyrwh commited on Aug 29, 2025

Commit

37dd438

1 Parent(s): 64a40cc

Added redis server to app.py, fixed Dockerfile

Browse files

Files changed (3) hide show

Dockerfile +4 -2
app.py +0 -3
nemaquant.py +0 -250

Dockerfile CHANGED Viewed

@@ -4,13 +4,14 @@ FROM python:3.12
 # Set the working directory in the container
 WORKDIR /app
-# Install system dependencies required by OpenCV and other packages
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libgl1-mesa-glx \
     libglib2.0-0 \
     libsm6 \
     libxrender1 \
     libxext6 \
     && rm -rf /var/lib/apt/lists/*
 # Copy the requirements file into the container at /app
@@ -41,4 +42,5 @@ EXPOSE 7860
 # Use gunicorn for production deployment if preferred over Flask's development server
 # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
 # For simplicity during development and typical HF Spaces use:
-CMD ["python", "app.py"]

 # Set the working directory in the container
 WORKDIR /app
+# Install system dependencies required by OpenCV and other packages, plus Redis
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libgl1-mesa-glx \
     libglib2.0-0 \
     libsm6 \
     libxrender1 \
     libxext6 \
+    redis-server \
     && rm -rf /var/lib/apt/lists/*
 # Copy the requirements file into the container at /app
 # Use gunicorn for production deployment if preferred over Flask's development server
 # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
 # For simplicity during development and typical HF Spaces use:
+# Start Redis server in background and then start the Flask app
+CMD redis-server --daemonize yes && python app.py

app.py CHANGED Viewed

@@ -11,10 +11,7 @@ import pandas as pd
 from werkzeug.utils import secure_filename
 import traceback
 import sys
-import re
 import io
-import threading
-import time
 import zipfile
 import cv2
 import csv

 from werkzeug.utils import secure_filename
 import traceback
 import sys
 import io
 import zipfile
 import cv2
 import csv

nemaquant.py DELETED Viewed

@@ -1,250 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-import numpy as np
-import pandas as pd
-import cv2
-import os
-from torch import cuda
-from pathlib import Path
-from ultralytics import YOLO
-from glob import glob
-import re
-from yolo_utils import load_model, detect_image
-def options():
-    parser = argparse.ArgumentParser(description="Nematode egg image processing with YOLO11 model.")
-    parser.add_argument("-m", "--img_mode", help="Mode to run", required=True)
-    parser.add_argument("-i", "--img", help="Target image directory or image (REQUIRED)", required=True)
-    parser.add_argument('-w', '--weights', help='Weights file for use with YOLO11 model')
-    parser.add_argument("-o","--output", help="Name of results file. If no file is specified, one will be created from the key file name")
-    parser.add_argument("-k", "--key", help="CSV key file to use as output template. If no file is specified, will look for one in target directory. Not used in single-image mode")
-    parser.add_argument("-a","--annotated", help="Directory to save annotated image files", required=False)
-    parser.add_argument("--conf", help="Confidence cutoff (default = 0.6)", default=0.6, type=float)
-    args = parser.parse_args()
-    return args
-def check_args():
-    args = options()
-    # basic checks on target file validity
-    args.imgpath = Path(args.img)
-    if not args.imgpath.exists():
-        raise Exception("Target %s is not a valid path" % args.img)
-    # check if img_mode is specified and valid
-    if args.img_mode:
-        valid_modes = ['dir', 'file', 'keyence']
-        if args.img_mode not in valid_modes:
-            raise Exception(f"img_mode must be one of: {', '.join(valid_modes)}")
-    # check for potential images in the target directory
-    if args.img_mode in ['dir','keyence']:
-        potential_images = [x for x in args.imgpath.iterdir() if x.suffix.lower() in ['.tif','.tiff','.jpg','.jpeg','.png']]
-        if len(potential_images) == 0:
-            raise Exception('No valid images (.png, .tif, .tiff, .jpeg, .jpg) in target folder %s' % args.img)
-        else:
-            print('%s valid images found' % len(potential_images))
-            args.subimage_paths = potential_images
-    # if no weights file, try using the default weights.pt
-    if not args.weights:
-        script_dir = Path(__file__).parent
-        default_weights = script_dir / 'weights.pt'
-        if default_weights.exists():
-            args.weights = str(default_weights)
-        else:
-            raise Exception('No weights file specified and default weights.pt not found in script directory')
-    # for /XY00/ subdirectories, we require a valid key
-    # ensure that either a key is specified, or if a single .csv exists in the target dir, use that
-    if args.img_mode == 'keyence':
-        if args.key:
-            args.keypath = Path(args.key)
-            if not arg.keypath.exists():
-                raise Exception('Specified key file does not exist: %s' % args.keypath)
-            if args.keypath.suffix != '.csv':
-                raise Exception("Specified key file is not a .csv: %s" % args.keypath)
-        else:
-            print('Running on /XY00/ subdirectories but no key specified. Looking for key file...')
-            potential_keys = list(args.imgpath.glob('*.csv'))
-            if len(potential_keys) == 0:
-                raise Exception("No .csv files found in target folder %s, please check directory" % args.img)
-            if len(potential_keys) > 1:
-                raise Exception("Multiple .csv files found in target folder %s, please specify which one to use")
-            else:
-                args.keypath = potential_keys[0]
-                args.key = str(potential_keys[0])
-    # if path to results file is specified, ensure it is .csv
-    if args.output:
-        args.outpath = Path(args.output)
-        if args.outpath.suffix != '.csv':
-            raise Exception("Specified output file is not a .csv: %s" % args.outpath)
-    else:
-        # for XY00 subdirs, name it after the required key file
-        # for an image directory, name it after the directory
-        if args.img_mode == 'keyence':
-            args.output = '%s_eggcounts.csv' % args.keypath.stem
-        else:
-            args.output = '%s_eggcounts.csv' % args.imgpath.stem
-        args.outpath = Path(args.output)
-    # finally, check the target dir to save annotated images in
-    if args.annotated:
-        args.annotpath = Path(args.annotated)
-        if not args.annotpath.exists():
-            os.mkdir(args.annotated)
-        elif not args.annotpath.is_dir():
-            raise Exception("annotated output folder is not a valid directory: %s" % args.annotated)
-    return args
-# parse a key file, make sure it all looks correct and can be merged later
-def parse_key_file(keypath):
-    key = pd.read_csv(keypath)
-    # drop potential Unnamed: 0 column if rownames from R were included without col header
-    key = key.loc[:, ~key.columns.str.contains('^Unnamed')]
-    # for now, will only allow 96-row key files
-    # can handle edge cases, but much easier if we just require 96
-    if key.shape[0] > 96:
-        raise Exception("More than 96 rows found in key. Please check formatting and try again")
-    # check if it's got at least one column formatted with what looks like plate positions
-    well_columns = []
-    for col in key.columns:
-        if key[col].dtype.kind == "O":
-            if all(key[col].str.fullmatch("[A-H][0-9]{1,2}")):
-                well_columns.append(col)
-    if len(well_columns) == 0:
-        raise Exception("No column found with well positions of format A1/A01/H12/etc.")
-    elif len(well_columns) > 1:
-        raise Exception("Multiple columns found with well positions of format A1/A01/H12/etc.")
-    # add a column named keycol, formatted to match the folder output like _A01
-    key["keycol"] = key[well_columns[0]]
-    # as the key, it should really be unique and complete, raise exception if not the case
-    if any(key["keycol"].isna()):
-        raise Exception("There appear to be blank well positions in column %s. Please fix and resubmit." % well_columns[0])
-    if len(set(key["keycol"])) < len(key["keycol"]):
-        raise Exception("There appear to be duplicated well positions in the key file. Please fix and resubmit.")
-    # if formatted A1, reformat as A01
-    key["keycol"] = key["keycol"].apply(lambda x: "_%s%s" % (re.findall("[A-H]",x)[0], re.findall("[0-9]+", x)[0].zfill(2)))
-    return key
-def detect_eggs(args, key=None):
-    if key:
-        key = parse_key_file(str(args.keypath))
-    model = YOLO(args.weights)
-    if cuda.is_available():
-        device = 'cuda'
-    else:
-        device = 'cpu'
-    model.to(device)
-    # create a couple empty lists for holding results, easier than adding to empty Pandas DF
-    tmp_well = []
-    tmp_numeggs = []
-    tmp_filenames = []
-    # single-image mode
-    if args.img_mode == 'file':
-        # imread then apply model, one-step predict() can't handle TIFF
-        img = cv2.imread(str(args.imgpath))
-        results = model.predict(img, imgsz = 1440, max_det=1000, verbose=False, conf=0.05)
-        result = results[0]
-        box_classes = [result.names[int(x)] for x in result.boxes.cls]
-        # NOTE - filtering by class is not necessary, but would make this easier to extend to multi-class models
-        # e.g. if we want to add hatched, empty eggs, etc
-        egg_xy = [x.cpu().numpy().astype(np.int32) for i,x in enumerate(result.boxes.xyxy) if box_classes[i] == 'egg']
-        print('Target image:\n%s' % str(args.imgpath))
-        print('n eggs:\n%s' % len(egg_xy))
-        if args.annotated:
-            annot = img.copy()
-            for xy in egg_xy:
-                cv2.rectangle(annot, tuple(xy[0:2]), tuple(xy[2:4]), (0,0,255), 4)
-            annot_path = args.annotpath / ('%s_annotated%s' % (args.imgpath.stem, args.imgpath.suffix))
-            cv2.imwrite(str(annot_path), annot)
-            print('Saving annotations to %s...' % str(annot_path))
-    # multi-image mode, runs differently depending on whether you have /XY00/ subdirectories
-    elif args.img_mode in ['dir', 'keyence']:
-        subdir_paths = []
-        if args.img_mode == 'keyence':
-            total_subdirs = len(args.subdir_paths)
-            for i, subdir in enumerate(args.subdir_paths):
-                # Report progress
-                progress_percent = int(((i + 1) / total_subdirs) * 90) # Scale to 0-90% range
-                print(f"PROGRESS: {progress_percent}")
-                sys.stdout.flush() # Flush output buffer
-                # check that the empty file with well name is present
-                well = [x.name for x in subdir.iterdir() if re.match("_[A-H][0-9]{1,2}", x.name)][0]
-                if len(well) == 0:
-                    raise Exception("No well position file of format _A01 found in subdirectory:\n%s" % subdir)
-                # print the XY subdirectory name for tracking purposes
-                xy = subdir.name
-                print(xy)
-                # search for a filename with CH4 in it
-                # TODO - confirm with sweetpotato group that the CH4.tif or CH4.jpg will be present in all cases
-                candidate_img_paths = list(subdir.glob('*CH4*'))
-                # if none or more than one, just skip the folder vs raise exceptions
-                if len(candidate_img_paths) == 0:
-                    print("No CH4 image found for subdirectory %s" % subdir)
-                    continue
-                elif len(candidate_img_paths) > 1:
-                    print("Multiple CH4 images found in subdirectory %s" % subdir)
-                    continue
-                impath = candidate_img_paths[0]
-                # get the actual output
-                img = cv2.imread(str(impath))
-                results = model.predict(img, imgsz = 1440, verbose=False, conf=0.05)
-                result = results[0]
-                box_classes = [result.names[int(x)] for x in result.boxes.cls]
-                egg_xy = [x.cpu().numpy().astype(np.int32) for i,x in enumerate(result.boxes.xyxy) if box_classes[i] == 'egg']
-                # append relevant output to temporary lists
-                tmp_well.append(well)
-                tmp_numeggs.append(len(egg_xy))
-                tmp_filenames.append(impath.name)
-                # annotate and save image if needed
-                if args.annotated:
-                    annot = img.copy()
-                    for xy in egg_xy:
-                        cv2.rectangle(annot, tuple(xy[0:2]), tuple(xy[2:4]), (0,0,255), 4)
-                    annot_path = args.annotpath / ('%s_annotated%s' % (impath.stem, impath.suffix))
-                    cv2.imwrite(str(annot_path), annot)
-            # make a CSV to merge with the key
-            results = pd.DataFrame({
-                "keycol": tmp_well,
-                "num_eggs": tmp_numeggs,
-                "filename": tmp_filenames,
-                "folder": args.img})
-            # merge and save
-            outdf = key.merge(results, on = "keycol", how = "left")
-            outdf = outdf.drop("keycol", axis = 1)
-        else:
-            # apply the model on each image
-            # running model() on the target dir instead of image-by-image would be cleaner
-            # but makes saving annotated images more complicated
-            # can maybe revisit later
-            total_images = len(args.subimage_paths)
-            for i, impath in enumerate(sorted(args.subimage_paths)):
-                # Report progress
-                progress_percent = int(((i + 1) / total_images) * 90) # Scale to 0-90% range
-                print(f"PROGRESS: {progress_percent}")
-                sys.stdout.flush() # Flush output buffer
-                img = cv2.imread(str(impath))
-                results = model.predict(img, imgsz = 1440, verbose=False, conf=0.05)
-                result = results[0]
-                box_classes = [result.names[int(x)] for x in result.boxes.cls]
-                egg_xy = [x.cpu().numpy().astype(np.int32) for i,x in enumerate(result.boxes.xyxy) if box_classes[i] == 'egg']
-                tmp_numeggs.append(len(egg_xy))
-                tmp_filenames.append(impath.name)
-                # annotate if needed
-                if args.annotated:
-                    annot = img.copy()
-                    for xy in egg_xy:
-                        cv2.rectangle(annot, tuple(xy[0:2]), tuple(xy[2:4]), (0,0,255), 4)
-                    annot_path = args.annotpath / ('%s_annotated%s' % (impath.stem, impath.suffix))
-                    cv2.imwrite(str(annot_path), annot)
-            outdf = pd.DataFrame({
-                "filename": tmp_filenames,
-                "num_eggs": tmp_numeggs})
-        # save final pandas df, print some updates for user
-        outdf.sort_values(by='filename', inplace=True)
-        outdf.to_csv(str(args.outpath), index=False)
-        print('Saving output to %s...' % str(args.outpath))
-        if args.annotated:
-            print('Saving annotated images to %s...' % str(args.annotpath))