Be2Jay Claude commited on
Commit
27a8db0
ยท
1 Parent(s): 1142ca3

Fix RT-DETR module dependency issue

Browse files

- Integrate RT-DETR functions directly into app.py
- Remove dependency on deleted test_visual_validation module
- Add load_rtdetr_model(), detect_with_rtdetr()
- Add apply_universal_filter() and helper functions
- Fixes ModuleNotFoundError on Hugging Face

๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +196 -13
app.py CHANGED
@@ -22,14 +22,202 @@ from io import BytesIO
22
  from inference_sdk import InferenceHTTPClient
23
  import tempfile
24
 
25
- # test_visual_validation์—์„œ ๊ฐ€์ ธ์˜ค๊ธฐ (์ง€์—ฐ import๋กœ ๋ณ€๊ฒฝ)
26
- # from test_visual_validation import (
27
- # load_rtdetr_model,
28
- # detect_with_rtdetr,
29
- # apply_universal_filter,
30
- # calculate_morphological_features,
31
- # calculate_visual_features
32
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # YOLOv8 import
35
  # ============================================================
@@ -234,10 +422,7 @@ def load_rtdetr_on_demand():
234
  """RT-DETR ๋ชจ๋ธ์„ ํ•„์š”์‹œ์—๋งŒ ๋กœ๋”ฉ"""
235
  global processor, model
236
  if processor is None or model is None:
237
- print("๐Ÿ”„ RT-DETR ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
238
- from test_visual_validation import load_rtdetr_model
239
  processor, model = load_rtdetr_model()
240
- print("โœ… RT-DETR ๋กœ๋”ฉ ์™„๋ฃŒ")
241
  return "โœ… RT-DETR ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ"
242
  else:
243
  return "โ„น๏ธ RT-DETR ๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋”ฉ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค"
@@ -270,7 +455,6 @@ def detect_with_selected_model(image, confidence, model_type):
270
  if model_type == "RT-DETR":
271
  if processor is None or model is None:
272
  raise ValueError("โš ๏ธ RT-DETR ๋ชจ๋ธ์ด ๋กœ๋”ฉ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. '๐Ÿ”„ RT-DETR ๋กœ๋“œ' ๋ฒ„ํŠผ์„ ๋จผ์ € ํด๋ฆญํ•˜์„ธ์š”.")
273
- from test_visual_validation import detect_with_rtdetr
274
  return detect_with_rtdetr(image, processor, model, confidence)
275
  elif model_type == "VIDraft/Shrimp":
276
  return detect_with_roboflow(image, confidence)
@@ -310,7 +494,6 @@ def interactive_detect(image, confidence, filter_threshold, show_all, model_type
310
  all_detections_scored = all_detections
311
  else:
312
  # RT-DETR: Universal Filter ์‚ฌ์šฉ
313
- from test_visual_validation import apply_universal_filter
314
  all_detections_scored = apply_universal_filter(all_detections, image, threshold=0)
315
 
316
  # ํ•„ํ„ฐ ์ž„๊ณ„๊ฐ’ ์ ์šฉ
 
22
  from inference_sdk import InferenceHTTPClient
23
  import tempfile
24
 
25
+ # ============================================================
26
+ # RT-DETR ๋ฐ ํ•„ํ„ฐ๋ง ํ•จ์ˆ˜๋“ค (์ด์ „ test_visual_validation์—์„œ ํ†ตํ•ฉ)
27
+ # ============================================================
28
+ import cv2
29
+
30
+ def load_rtdetr_model():
31
+ """RT-DETR ๋ชจ๋ธ ๋กœ๋“œ"""
32
+ print("๐Ÿ”„ RT-DETR ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
33
+ processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
34
+ model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
35
+ model.eval()
36
+ print("โœ… RT-DETR ๋กœ๋”ฉ ์™„๋ฃŒ")
37
+ return processor, model
38
+
39
+ def detect_with_rtdetr(image, processor, model, confidence=0.3):
40
+ """RT-DETR๋กœ ๊ฐ์ฒด ๊ฒ€์ถœ"""
41
+ inputs = processor(images=image, return_tensors="pt")
42
+ with torch.no_grad():
43
+ outputs = model(**inputs)
44
+
45
+ target_sizes = torch.tensor([image.size[::-1]])
46
+ results = processor.post_process_object_detection(
47
+ outputs,
48
+ target_sizes=target_sizes,
49
+ threshold=confidence
50
+ )[0]
51
+
52
+ detections = []
53
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
54
+ x1, y1, x2, y2 = box.tolist()
55
+ detections.append({
56
+ 'bbox': [x1, y1, x2, y2],
57
+ 'confidence': score.item(),
58
+ 'label': label.item()
59
+ })
60
+
61
+ return detections
62
+
63
+ def calculate_morphological_features(bbox, image_size):
64
+ """ํ˜•ํƒœํ•™์  ํŠน์ง• ๊ณ„์‚ฐ"""
65
+ x1, y1, x2, y2 = bbox
66
+ width = x2 - x1
67
+ height = y2 - y1
68
+
69
+ # Aspect ratio (๊ธด ์ชฝ / ์งง์€ ์ชฝ)
70
+ aspect_ratio = max(width, height) / max(min(width, height), 1)
71
+
72
+ # Area ratio (์ด๋ฏธ์ง€ ๋Œ€๋น„ ๋ฉด์ )
73
+ img_w, img_h = image_size
74
+ area_ratio = (width * height) / (img_w * img_h)
75
+
76
+ # Compactness (4ฯ€ * Area / Perimeterยฒ)
77
+ perimeter = 2 * (width + height)
78
+ compactness = (4 * np.pi * width * height) / max(perimeter ** 2, 1)
79
+
80
+ return {
81
+ 'aspect_ratio': aspect_ratio,
82
+ 'area_ratio': area_ratio,
83
+ 'compactness': compactness,
84
+ 'width': width,
85
+ 'height': height
86
+ }
87
+
88
+ def calculate_visual_features(image_pil, bbox):
89
+ """์‹œ๊ฐ์  ํŠน์ง• ๊ณ„์‚ฐ (์ƒ‰์ƒ, ํ…์Šค์ฒ˜)"""
90
+ # PIL โ†’ OpenCV
91
+ image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
92
+ x1, y1, x2, y2 = [int(v) for v in bbox]
93
+
94
+ # ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ์˜์—ญ ์ถ”์ถœ
95
+ roi = image_cv[y1:y2, x1:x2]
96
+ if roi.size == 0:
97
+ return {'hue': 100, 'saturation': 255, 'color_std': 255}
98
+
99
+ # HSV ๋ณ€ํ™˜
100
+ hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
101
+
102
+ # ์ƒ‰์ƒ (Hue)
103
+ hue_mean = np.mean(hsv[:, :, 0])
104
+
105
+ # ์ฑ„๋„ (Saturation)
106
+ saturation = np.mean(hsv[:, :, 1])
107
+
108
+ # ์ƒ‰์ƒ ์ผ๊ด€์„ฑ (ํ‘œ์ค€ํŽธ์ฐจ)
109
+ color_std = np.std(hsv[:, :, 0])
110
+
111
+ return {
112
+ 'hue': hue_mean,
113
+ 'saturation': saturation,
114
+ 'color_std': color_std
115
+ }
116
+
117
+ def apply_universal_filter(detections, image, threshold=90):
118
+ """๋ฒ”์šฉ ์ƒˆ์šฐ ํ•„ํ„ฐ ์ ์šฉ"""
119
+ img_size = image.size
120
+ filtered = []
121
+
122
+ for det in detections:
123
+ bbox = det['bbox']
124
+
125
+ # 1. ํ˜•ํƒœํ•™์  ํŠน์ง•
126
+ morph = calculate_morphological_features(bbox, img_size)
127
+
128
+ # 2. ์‹œ๊ฐ์  ํŠน์ง•
129
+ visual = calculate_visual_features(image, bbox)
130
+
131
+ # 3. ์ ์ˆ˜ ๊ณ„์‚ฐ
132
+ score = 0
133
+ reasons = []
134
+
135
+ # Aspect ratio (4:1 ~ 9:1)
136
+ if 4.0 <= morph['aspect_ratio'] <= 9.0:
137
+ score += 25
138
+ reasons.append(f"โœ“ ์ข…ํšก๋น„ {morph['aspect_ratio']:.1f}")
139
+ elif 3.0 <= morph['aspect_ratio'] < 4.0 or 9.0 < morph['aspect_ratio'] <= 10.0:
140
+ score += 12
141
+ reasons.append(f"โ–ณ ์ข…ํšก๋น„ {morph['aspect_ratio']:.1f}")
142
+ else:
143
+ score -= 5
144
+ reasons.append(f"โœ— ์ข…ํšก๋น„ {morph['aspect_ratio']:.1f}")
145
+
146
+ # Compactness (< 0.50, ๊ธด ํ˜•ํƒœ)
147
+ if morph['compactness'] < 0.40:
148
+ score += 30
149
+ reasons.append(f"โœ“ ์„ธ์žฅ๋„ {morph['compactness']:.2f}")
150
+ elif 0.40 <= morph['compactness'] < 0.50:
151
+ score += 15
152
+ reasons.append(f"โ–ณ ์„ธ์žฅ๋„ {morph['compactness']:.2f}")
153
+ else:
154
+ reasons.append(f"โœ— ์„ธ์žฅ๋„ {morph['compactness']:.2f}")
155
+ score -= 20
156
+
157
+ # Area
158
+ abs_area = morph['width'] * morph['height']
159
+ if 50000 <= abs_area <= 500000:
160
+ score += 35
161
+ reasons.append(f"โœ“ ๋ฉด์  {abs_area/1000:.0f}K")
162
+ elif 500000 < abs_area <= 800000:
163
+ score -= 10
164
+ reasons.append(f"โ–ณ ๋ฉด์  {abs_area/1000:.0f}K")
165
+ elif abs_area > 800000:
166
+ score -= 30
167
+ reasons.append(f"โœ— ๋ฉด์  {abs_area/1000:.0f}K (๋„ˆ๋ฌดํผ)")
168
+ else:
169
+ score -= 10
170
+ reasons.append(f"โœ— ๋ฉด์  {abs_area/1000:.0f}K (๋„ˆ๋ฌด์ž‘์Œ)")
171
+
172
+ # Hue (์ƒ‰์ƒ)
173
+ hue = visual['hue']
174
+ if hue < 40 or hue > 130:
175
+ score += 10
176
+ reasons.append(f"โœ“ ์ƒ‰์ƒ {hue:.0f}")
177
+ elif 90 <= hue <= 130:
178
+ score -= 5
179
+ reasons.append(f"โœ— ์ƒ‰์ƒ {hue:.0f} (๋ฐฐ๊ฒฝ)")
180
+ else:
181
+ reasons.append(f"โ–ณ ์ƒ‰์ƒ {hue:.0f}")
182
+
183
+ # Saturation
184
+ if visual['saturation'] < 85:
185
+ score += 20
186
+ reasons.append(f"โœ“ ์ฑ„๋„ {visual['saturation']:.0f}")
187
+ elif 85 <= visual['saturation'] < 120:
188
+ score += 5
189
+ reasons.append(f"โ–ณ ์ฑ„๋„ {visual['saturation']:.0f}")
190
+ else:
191
+ score -= 15
192
+ reasons.append(f"โœ— ์ฑ„๋„ {visual['saturation']:.0f} (๋†’์Œ)")
193
+
194
+ # Color consistency
195
+ if visual['color_std'] < 50:
196
+ score += 15
197
+ reasons.append(f"โœ“ ์ƒ‰์ƒ์ผ๊ด€์„ฑ {visual['color_std']:.1f}")
198
+ elif 50 <= visual['color_std'] < 80:
199
+ score += 5
200
+ reasons.append(f"โ–ณ ์ƒ‰์ƒ์ผ๊ด€์„ฑ {visual['color_std']:.1f}")
201
+ else:
202
+ score -= 10
203
+ reasons.append(f"โœ— ์ƒ‰์ƒ์ผ๊ด€์„ฑ {visual['color_std']:.1f} (๋ถˆ์ผ์น˜)")
204
+
205
+ # RT-DETR confidence
206
+ if 'confidence' in det:
207
+ if det['confidence'] >= 0.3:
208
+ score += 15
209
+ reasons.append(f"โœ“ ์‹ ๋ขฐ๋„ {det['confidence']:.0%}")
210
+ elif det['confidence'] >= 0.1:
211
+ score += 8
212
+ reasons.append(f"โ–ณ ์‹ ๋ขฐ๋„ {det['confidence']:.0%}")
213
+ else:
214
+ reasons.append(f"โœ— ์‹ ๋ขฐ๋„ {det['confidence']:.0%}")
215
+
216
+ det['filter_score'] = score
217
+ det['filter_reasons'] = reasons
218
+ filtered.append(det)
219
+
220
+ return filtered
221
 
222
  # YOLOv8 import
223
  # ============================================================
 
422
  """RT-DETR ๋ชจ๋ธ์„ ํ•„์š”์‹œ์—๋งŒ ๋กœ๋”ฉ"""
423
  global processor, model
424
  if processor is None or model is None:
 
 
425
  processor, model = load_rtdetr_model()
 
426
  return "โœ… RT-DETR ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ"
427
  else:
428
  return "โ„น๏ธ RT-DETR ๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋”ฉ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค"
 
455
  if model_type == "RT-DETR":
456
  if processor is None or model is None:
457
  raise ValueError("โš ๏ธ RT-DETR ๋ชจ๋ธ์ด ๋กœ๋”ฉ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. '๐Ÿ”„ RT-DETR ๋กœ๋“œ' ๋ฒ„ํŠผ์„ ๋จผ์ € ํด๋ฆญํ•˜์„ธ์š”.")
 
458
  return detect_with_rtdetr(image, processor, model, confidence)
459
  elif model_type == "VIDraft/Shrimp":
460
  return detect_with_roboflow(image, confidence)
 
494
  all_detections_scored = all_detections
495
  else:
496
  # RT-DETR: Universal Filter ์‚ฌ์šฉ
 
497
  all_detections_scored = apply_universal_filter(all_detections, image, threshold=0)
498
 
499
  # ํ•„ํ„ฐ ์ž„๊ณ„๊ฐ’ ์ ์šฉ