tyrwh commited on
Commit
37dd438
·
1 Parent(s): 64a40cc

Added redis server to app.py, fixed Dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +4 -2
  2. app.py +0 -3
  3. nemaquant.py +0 -250
Dockerfile CHANGED
@@ -4,13 +4,14 @@ FROM python:3.12
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
7
- # Install system dependencies required by OpenCV and other packages
8
  RUN apt-get update && apt-get install -y --no-install-recommends \
9
  libgl1-mesa-glx \
10
  libglib2.0-0 \
11
  libsm6 \
12
  libxrender1 \
13
  libxext6 \
 
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  # Copy the requirements file into the container at /app
@@ -41,4 +42,5 @@ EXPOSE 7860
41
  # Use gunicorn for production deployment if preferred over Flask's development server
42
  # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
43
  # For simplicity during development and typical HF Spaces use:
44
- CMD ["python", "app.py"]
 
 
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
7
+ # Install system dependencies required by OpenCV and other packages, plus Redis
8
  RUN apt-get update && apt-get install -y --no-install-recommends \
9
  libgl1-mesa-glx \
10
  libglib2.0-0 \
11
  libsm6 \
12
  libxrender1 \
13
  libxext6 \
14
+ redis-server \
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
  # Copy the requirements file into the container at /app
 
42
  # Use gunicorn for production deployment if preferred over Flask's development server
43
  # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
44
  # For simplicity during development and typical HF Spaces use:
45
+ # Start Redis server in background and then start the Flask app
46
+ CMD redis-server --daemonize yes && python app.py
app.py CHANGED
@@ -11,10 +11,7 @@ import pandas as pd
11
  from werkzeug.utils import secure_filename
12
  import traceback
13
  import sys
14
- import re
15
  import io
16
- import threading
17
- import time
18
  import zipfile
19
  import cv2
20
  import csv
 
11
  from werkzeug.utils import secure_filename
12
  import traceback
13
  import sys
 
14
  import io
 
 
15
  import zipfile
16
  import cv2
17
  import csv
nemaquant.py DELETED
@@ -1,250 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- import numpy as np
5
- import pandas as pd
6
- import cv2
7
- import os
8
- from torch import cuda
9
- from pathlib import Path
10
- from ultralytics import YOLO
11
- from glob import glob
12
- import re
13
- from yolo_utils import load_model, detect_image
14
-
15
- def options():
16
- parser = argparse.ArgumentParser(description="Nematode egg image processing with YOLO11 model.")
17
- parser.add_argument("-m", "--img_mode", help="Mode to run", required=True)
18
- parser.add_argument("-i", "--img", help="Target image directory or image (REQUIRED)", required=True)
19
- parser.add_argument('-w', '--weights', help='Weights file for use with YOLO11 model')
20
- parser.add_argument("-o","--output", help="Name of results file. If no file is specified, one will be created from the key file name")
21
- parser.add_argument("-k", "--key", help="CSV key file to use as output template. If no file is specified, will look for one in target directory. Not used in single-image mode")
22
- parser.add_argument("-a","--annotated", help="Directory to save annotated image files", required=False)
23
- parser.add_argument("--conf", help="Confidence cutoff (default = 0.6)", default=0.6, type=float)
24
- args = parser.parse_args()
25
- return args
26
-
27
- def check_args():
28
- args = options()
29
- # basic checks on target file validity
30
- args.imgpath = Path(args.img)
31
- if not args.imgpath.exists():
32
- raise Exception("Target %s is not a valid path" % args.img)
33
- # check if img_mode is specified and valid
34
- if args.img_mode:
35
- valid_modes = ['dir', 'file', 'keyence']
36
- if args.img_mode not in valid_modes:
37
- raise Exception(f"img_mode must be one of: {', '.join(valid_modes)}")
38
- # check for potential images in the target directory
39
- if args.img_mode in ['dir','keyence']:
40
- potential_images = [x for x in args.imgpath.iterdir() if x.suffix.lower() in ['.tif','.tiff','.jpg','.jpeg','.png']]
41
- if len(potential_images) == 0:
42
- raise Exception('No valid images (.png, .tif, .tiff, .jpeg, .jpg) in target folder %s' % args.img)
43
- else:
44
- print('%s valid images found' % len(potential_images))
45
- args.subimage_paths = potential_images
46
-
47
- # if no weights file, try using the default weights.pt
48
- if not args.weights:
49
- script_dir = Path(__file__).parent
50
- default_weights = script_dir / 'weights.pt'
51
- if default_weights.exists():
52
- args.weights = str(default_weights)
53
- else:
54
- raise Exception('No weights file specified and default weights.pt not found in script directory')
55
-
56
- # for /XY00/ subdirectories, we require a valid key
57
- # ensure that either a key is specified, or if a single .csv exists in the target dir, use that
58
- if args.img_mode == 'keyence':
59
- if args.key:
60
- args.keypath = Path(args.key)
61
- if not arg.keypath.exists():
62
- raise Exception('Specified key file does not exist: %s' % args.keypath)
63
- if args.keypath.suffix != '.csv':
64
- raise Exception("Specified key file is not a .csv: %s" % args.keypath)
65
- else:
66
- print('Running on /XY00/ subdirectories but no key specified. Looking for key file...')
67
- potential_keys = list(args.imgpath.glob('*.csv'))
68
- if len(potential_keys) == 0:
69
- raise Exception("No .csv files found in target folder %s, please check directory" % args.img)
70
- if len(potential_keys) > 1:
71
- raise Exception("Multiple .csv files found in target folder %s, please specify which one to use")
72
- else:
73
- args.keypath = potential_keys[0]
74
- args.key = str(potential_keys[0])
75
-
76
- # if path to results file is specified, ensure it is .csv
77
- if args.output:
78
- args.outpath = Path(args.output)
79
- if args.outpath.suffix != '.csv':
80
- raise Exception("Specified output file is not a .csv: %s" % args.outpath)
81
- else:
82
- # for XY00 subdirs, name it after the required key file
83
- # for an image directory, name it after the directory
84
- if args.img_mode == 'keyence':
85
- args.output = '%s_eggcounts.csv' % args.keypath.stem
86
- else:
87
- args.output = '%s_eggcounts.csv' % args.imgpath.stem
88
- args.outpath = Path(args.output)
89
-
90
- # finally, check the target dir to save annotated images in
91
- if args.annotated:
92
- args.annotpath = Path(args.annotated)
93
- if not args.annotpath.exists():
94
- os.mkdir(args.annotated)
95
- elif not args.annotpath.is_dir():
96
- raise Exception("annotated output folder is not a valid directory: %s" % args.annotated)
97
- return args
98
-
99
- # parse a key file, make sure it all looks correct and can be merged later
100
- def parse_key_file(keypath):
101
- key = pd.read_csv(keypath)
102
- # drop potential Unnamed: 0 column if rownames from R were included without col header
103
- key = key.loc[:, ~key.columns.str.contains('^Unnamed')]
104
- # for now, will only allow 96-row key files
105
- # can handle edge cases, but much easier if we just require 96
106
- if key.shape[0] > 96:
107
- raise Exception("More than 96 rows found in key. Please check formatting and try again")
108
- # check if it's got at least one column formatted with what looks like plate positions
109
- well_columns = []
110
- for col in key.columns:
111
- if key[col].dtype.kind == "O":
112
- if all(key[col].str.fullmatch("[A-H][0-9]{1,2}")):
113
- well_columns.append(col)
114
- if len(well_columns) == 0:
115
- raise Exception("No column found with well positions of format A1/A01/H12/etc.")
116
- elif len(well_columns) > 1:
117
- raise Exception("Multiple columns found with well positions of format A1/A01/H12/etc.")
118
- # add a column named keycol, formatted to match the folder output like _A01
119
- key["keycol"] = key[well_columns[0]]
120
- # as the key, it should really be unique and complete, raise exception if not the case
121
- if any(key["keycol"].isna()):
122
- raise Exception("There appear to be blank well positions in column %s. Please fix and resubmit." % well_columns[0])
123
- if len(set(key["keycol"])) < len(key["keycol"]):
124
- raise Exception("There appear to be duplicated well positions in the key file. Please fix and resubmit.")
125
- # if formatted A1, reformat as A01
126
- key["keycol"] = key["keycol"].apply(lambda x: "_%s%s" % (re.findall("[A-H]",x)[0], re.findall("[0-9]+", x)[0].zfill(2)))
127
- return key
128
-
129
- def detect_eggs(args, key=None):
130
- if key:
131
- key = parse_key_file(str(args.keypath))
132
- model = YOLO(args.weights)
133
- if cuda.is_available():
134
- device = 'cuda'
135
- else:
136
- device = 'cpu'
137
- model.to(device)
138
- # create a couple empty lists for holding results, easier than adding to empty Pandas DF
139
- tmp_well = []
140
- tmp_numeggs = []
141
- tmp_filenames = []
142
- # single-image mode
143
- if args.img_mode == 'file':
144
- # imread then apply model, one-step predict() can't handle TIFF
145
- img = cv2.imread(str(args.imgpath))
146
- results = model.predict(img, imgsz = 1440, max_det=1000, verbose=False, conf=0.05)
147
- result = results[0]
148
- box_classes = [result.names[int(x)] for x in result.boxes.cls]
149
- # NOTE - filtering by class is not necessary, but would make this easier to extend to multi-class models
150
- # e.g. if we want to add hatched, empty eggs, etc
151
- egg_xy = [x.cpu().numpy().astype(np.int32) for i,x in enumerate(result.boxes.xyxy) if box_classes[i] == 'egg']
152
- print('Target image:\n%s' % str(args.imgpath))
153
- print('n eggs:\n%s' % len(egg_xy))
154
- if args.annotated:
155
- annot = img.copy()
156
- for xy in egg_xy:
157
- cv2.rectangle(annot, tuple(xy[0:2]), tuple(xy[2:4]), (0,0,255), 4)
158
- annot_path = args.annotpath / ('%s_annotated%s' % (args.imgpath.stem, args.imgpath.suffix))
159
- cv2.imwrite(str(annot_path), annot)
160
- print('Saving annotations to %s...' % str(annot_path))
161
- # multi-image mode, runs differently depending on whether you have /XY00/ subdirectories
162
- elif args.img_mode in ['dir', 'keyence']:
163
- subdir_paths = []
164
- if args.img_mode == 'keyence':
165
- total_subdirs = len(args.subdir_paths)
166
- for i, subdir in enumerate(args.subdir_paths):
167
- # Report progress
168
- progress_percent = int(((i + 1) / total_subdirs) * 90) # Scale to 0-90% range
169
- print(f"PROGRESS: {progress_percent}")
170
- sys.stdout.flush() # Flush output buffer
171
-
172
- # check that the empty file with well name is present
173
- well = [x.name for x in subdir.iterdir() if re.match("_[A-H][0-9]{1,2}", x.name)][0]
174
- if len(well) == 0:
175
- raise Exception("No well position file of format _A01 found in subdirectory:\n%s" % subdir)
176
- # print the XY subdirectory name for tracking purposes
177
- xy = subdir.name
178
- print(xy)
179
- # search for a filename with CH4 in it
180
- # TODO - confirm with sweetpotato group that the CH4.tif or CH4.jpg will be present in all cases
181
- candidate_img_paths = list(subdir.glob('*CH4*'))
182
- # if none or more than one, just skip the folder vs raise exceptions
183
- if len(candidate_img_paths) == 0:
184
- print("No CH4 image found for subdirectory %s" % subdir)
185
- continue
186
- elif len(candidate_img_paths) > 1:
187
- print("Multiple CH4 images found in subdirectory %s" % subdir)
188
- continue
189
- impath = candidate_img_paths[0]
190
- # get the actual output
191
- img = cv2.imread(str(impath))
192
- results = model.predict(img, imgsz = 1440, verbose=False, conf=0.05)
193
- result = results[0]
194
- box_classes = [result.names[int(x)] for x in result.boxes.cls]
195
- egg_xy = [x.cpu().numpy().astype(np.int32) for i,x in enumerate(result.boxes.xyxy) if box_classes[i] == 'egg']
196
- # append relevant output to temporary lists
197
- tmp_well.append(well)
198
- tmp_numeggs.append(len(egg_xy))
199
- tmp_filenames.append(impath.name)
200
- # annotate and save image if needed
201
- if args.annotated:
202
- annot = img.copy()
203
- for xy in egg_xy:
204
- cv2.rectangle(annot, tuple(xy[0:2]), tuple(xy[2:4]), (0,0,255), 4)
205
- annot_path = args.annotpath / ('%s_annotated%s' % (impath.stem, impath.suffix))
206
- cv2.imwrite(str(annot_path), annot)
207
- # make a CSV to merge with the key
208
- results = pd.DataFrame({
209
- "keycol": tmp_well,
210
- "num_eggs": tmp_numeggs,
211
- "filename": tmp_filenames,
212
- "folder": args.img})
213
- # merge and save
214
- outdf = key.merge(results, on = "keycol", how = "left")
215
- outdf = outdf.drop("keycol", axis = 1)
216
- else:
217
- # apply the model on each image
218
- # running model() on the target dir instead of image-by-image would be cleaner
219
- # but makes saving annotated images more complicated
220
- # can maybe revisit later
221
- total_images = len(args.subimage_paths)
222
- for i, impath in enumerate(sorted(args.subimage_paths)):
223
- # Report progress
224
- progress_percent = int(((i + 1) / total_images) * 90) # Scale to 0-90% range
225
- print(f"PROGRESS: {progress_percent}")
226
- sys.stdout.flush() # Flush output buffer
227
-
228
- img = cv2.imread(str(impath))
229
- results = model.predict(img, imgsz = 1440, verbose=False, conf=0.05)
230
- result = results[0]
231
- box_classes = [result.names[int(x)] for x in result.boxes.cls]
232
- egg_xy = [x.cpu().numpy().astype(np.int32) for i,x in enumerate(result.boxes.xyxy) if box_classes[i] == 'egg']
233
- tmp_numeggs.append(len(egg_xy))
234
- tmp_filenames.append(impath.name)
235
- # annotate if needed
236
- if args.annotated:
237
- annot = img.copy()
238
- for xy in egg_xy:
239
- cv2.rectangle(annot, tuple(xy[0:2]), tuple(xy[2:4]), (0,0,255), 4)
240
- annot_path = args.annotpath / ('%s_annotated%s' % (impath.stem, impath.suffix))
241
- cv2.imwrite(str(annot_path), annot)
242
- outdf = pd.DataFrame({
243
- "filename": tmp_filenames,
244
- "num_eggs": tmp_numeggs})
245
- # save final pandas df, print some updates for user
246
- outdf.sort_values(by='filename', inplace=True)
247
- outdf.to_csv(str(args.outpath), index=False)
248
- print('Saving output to %s...' % str(args.outpath))
249
- if args.annotated:
250
- print('Saving annotated images to %s...' % str(args.annotpath))