Spaces:
Running
Running
File size: 13,012 Bytes
8a74c03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
#!/usr/bin/env python3
"""
Improved Person on Track Detector using a completely different approach
Instead of relying on text descriptions, use multiple specific questions and cross-validation
"""
import sys
import os
from io import BytesIO
from PIL import Image
# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
class ImprovedPersonOnTrackDetector:
"""Much better person-on-track detector using multiple validation approaches"""
def __init__(self, model_manager):
self.model_manager = model_manager
self.cnn_model = model_manager.cnn_model
self.transformer_model = model_manager.transformer_model
def detect_person_on_track(self, image: Image.Image) -> dict:
"""Improved detection using multiple specific questions and validation"""
try:
# APPROACH 1: Multiple specific questions to CNN model
questions = [
"Are there any people visible in this image?",
"Is anyone standing on railway tracks?",
"Do you see a person on train tracks?",
"Are the train tracks empty of people?",
"Is this image showing people near trains?"
]
cnn_responses = {}
for i, question in enumerate(questions):
response = self.cnn_model.generate_caption(image, question)
cleaned_response = self._clean_response(response, question)
cnn_responses[f"q{i+1}"] = {
"question": question,
"response": cleaned_response,
"analysis": self._analyze_yes_no_response(cleaned_response, question)
}
# APPROACH 2: Use Transformer for scene description
scene_description = self.transformer_model.generate_caption(image, "Describe this scene in detail")
# APPROACH 3: Use CNN for object detection
objects_response = self.cnn_model.generate_caption(image, "What objects do you see in this image?")
objects_cleaned = self._clean_response(objects_response, "What objects do you see in this image?")
# COMBINE ALL APPROACHES
final_analysis = self._combine_all_analyses(cnn_responses, scene_description, objects_cleaned)
return final_analysis
except Exception as e:
return {
"person_on_track": False,
"people_count": 0,
"confidence": 0.0,
"analysis": f"Detection failed: {str(e)}",
"detailed_analysis": {"error": str(e)}
}
def _clean_response(self, response, original_question):
"""Remove question repetition and extract meaningful response"""
if not response:
return ""
response = response.strip()
question_lower = original_question.lower()
response_lower = response.lower()
# If response is just the question, return empty
if response_lower == question_lower:
return ""
# If response starts with the question, remove it
if response_lower.startswith(question_lower):
cleaned = response[len(original_question):].strip()
return cleaned.lstrip('?.,!:') if cleaned else ""
# If response contains too many words from the question, likely repetition
question_words = set(question_lower.split())
response_words = set(response_lower.split())
overlap = len(question_words.intersection(response_words))
if len(response_words) < 10 and overlap > len(question_words) * 0.6:
return "" # Likely question repetition
return response
def _analyze_yes_no_response(self, response, question):
"""Analyze response to extract yes/no meaning"""
if not response:
return {"answer": "UNCLEAR", "confidence": 0.1}
response_lower = response.lower().strip()
# Direct yes/no answers
if response_lower in ["yes", "no"]:
return {"answer": response_lower.upper(), "confidence": 0.9}
# Check for yes indicators
yes_indicators = ["yes", "there is", "there are", "i see", "visible", "present", "standing", "person"]
no_indicators = ["no", "not", "none", "empty", "clear", "nobody", "no one", "absent"]
yes_score = sum(1 for indicator in yes_indicators if indicator in response_lower)
no_score = sum(1 for indicator in no_indicators if indicator in response_lower)
if yes_score > no_score:
confidence = min(0.7, 0.4 + yes_score * 0.1)
return {"answer": "YES", "confidence": confidence}
elif no_score > yes_score:
confidence = min(0.7, 0.4 + no_score * 0.1)
return {"answer": "NO", "confidence": confidence}
else:
return {"answer": "UNCLEAR", "confidence": 0.3}
def _combine_all_analyses(self, cnn_responses, scene_description, objects_response):
"""Combine all analysis approaches to make final decision"""
# Count YES/NO responses from CNN questions
yes_count = 0
no_count = 0
unclear_count = 0
total_confidence = 0
question_results = []
for key, response_data in cnn_responses.items():
analysis = response_data["analysis"]
answer = analysis["answer"]
confidence = analysis["confidence"]
if answer == "YES":
yes_count += 1
elif answer == "NO":
no_count += 1
else:
unclear_count += 1
total_confidence += confidence
question_results.append({
"question": response_data["question"],
"response": response_data["response"],
"answer": answer,
"confidence": confidence
})
# Analyze scene description for people/track keywords
scene_lower = scene_description.lower()
people_keywords = ["person", "people", "man", "woman", "human", "individual"]
track_keywords = ["track", "tracks", "rail", "railway", "train"]
people_in_scene = any(keyword in scene_lower for keyword in people_keywords)
tracks_in_scene = any(keyword in scene_lower for keyword in track_keywords)
# Analyze objects response
objects_lower = objects_response.lower() if objects_response else ""
people_in_objects = any(keyword in objects_lower for keyword in people_keywords)
# DECISION LOGIC - Much more sophisticated
person_on_track = False
people_count = 0
confidence = 0.3
# Method 1: Majority vote from specific questions
total_responses = yes_count + no_count + unclear_count
if total_responses > 0:
yes_percentage = yes_count / total_responses
no_percentage = no_count / total_responses
if yes_percentage >= 0.6: # 60% or more say YES
person_on_track = True
confidence = 0.6 + yes_percentage * 0.2
analysis = f"Multiple questions confirm person presence ({yes_count}/{total_responses} positive)"
people_count = min(yes_count, 3) # Estimate based on positive responses
elif no_percentage >= 0.6: # 60% or more say NO
person_on_track = False
confidence = 0.6 + no_percentage * 0.2
analysis = f"Multiple questions confirm no person on tracks ({no_count}/{total_responses} negative)"
people_count = 0
else:
# Mixed responses - use secondary validation
if people_in_scene and tracks_in_scene:
person_on_track = True
confidence = 0.5
analysis = f"Scene analysis suggests person near tracks (mixed question results)"
people_count = 1
else:
person_on_track = False
confidence = 0.4
analysis = f"Unclear from questions, scene analysis suggests safe"
people_count = 0
# Method 2: Cross-validation with scene description
if people_in_scene and tracks_in_scene and not person_on_track:
# Scene suggests people + tracks but questions said no - be conservative
person_on_track = False
analysis = f"Scene mentions people and tracks but specific questions indicate safe"
confidence = max(confidence, 0.5)
elif not people_in_scene and person_on_track:
# Questions said yes but scene doesn't mention people - lower confidence
confidence *= 0.7
analysis = f"Questions suggest person present but scene unclear"
# Method 3: Object detection validation
if people_in_objects and not people_in_scene and not person_on_track:
# Objects mention people but scene doesn't - possible person present
person_on_track = True
confidence = 0.4
analysis = f"Object detection suggests person presence"
people_count = 1
# Final confidence adjustment
avg_question_confidence = total_confidence / max(len(cnn_responses), 1)
confidence = (confidence + avg_question_confidence) / 2
return {
"person_on_track": person_on_track,
"people_count": people_count,
"confidence": min(confidence, 1.0),
"analysis": analysis,
"detailed_analysis": {
"question_results": question_results,
"yes_responses": yes_count,
"no_responses": no_count,
"unclear_responses": unclear_count,
"scene_description": scene_description,
"people_in_scene": people_in_scene,
"tracks_in_scene": tracks_in_scene,
"objects_response": objects_response,
"people_in_objects": people_in_objects
}
}
def test_improved_detector():
"""Test the improved detector approach"""
print("TESTING IMPROVED PERSON ON TRACK DETECTOR")
print("=" * 60)
print("Using multiple questions + scene analysis + object detection")
print()
try:
from local_models import get_local_model_manager
from app import extract_frames_from_video
local_manager = get_local_model_manager()
improved_detector = ImprovedPersonOnTrackDetector(local_manager)
print("+ Improved detector ready")
except Exception as e:
print(f"- Setup error: {e}")
return
# Test with first video
video_path = "test\\1.mp4"
if not os.path.exists(video_path):
print(f"- Video not found: {video_path}")
return
try:
with open(video_path, 'rb') as f:
video_data = f.read()
video_file = BytesIO(video_data)
frames = extract_frames_from_video(video_file, fps=0.5)
if not frames:
print("- No frames extracted")
return
frame_data = frames[0]
print(f"+ Testing frame at {frame_data['timestamp']:.1f}s")
# Test improved detector
result = improved_detector.detect_person_on_track(frame_data['frame'])
print(f"\n" + "=" * 50)
print("IMPROVED DETECTOR RESULTS")
print("=" * 50)
analysis = result.get('analysis', 'No analysis')
people_count = result.get('people_count', 0)
confidence = result.get('confidence', 0)
person_on_track = result.get('person_on_track', False)
if person_on_track:
print(f"π¨ ALERT: {analysis}")
else:
print(f"β
SAFE: {analysis}")
print(f"π₯ People Count: {people_count}")
print(f"π Confidence: {confidence:.0%}")
# Show detailed analysis
detailed = result.get('detailed_analysis', {})
if 'question_results' in detailed:
print(f"\nπ Question Analysis:")
for q_result in detailed['question_results']:
print(f" Q: {q_result['question']}")
print(f" A: {q_result['answer']} ({q_result['confidence']:.0%}) - {q_result['response'][:50]}...")
print(f"\nπ― This approach should be much more accurate!")
except Exception as e:
print(f"- Test error: {e}")
if __name__ == "__main__":
test_improved_detector() |