Spaces:

Mahiruoshi
/

mdpg4

Sleeping

App Files Files Community

Mahiruoshi commited on Sep 15, 2025

Commit

0ec72e8

verified ·

1 Parent(s): 24016d8

Upload 47 files

Browse files

Files changed (5) hide show

README.md +108 -12
app.py +121 -70
main.py +74 -12
model.py +190 -42
test.ipynb +74 -6

README.md CHANGED Viewed

@@ -1,12 +1,108 @@
----
-title: Mdpg4
-emoji: 🔥
-colorFrom: green
-colorTo: blue
-sdk: gradio
-sdk_version: 5.43.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Deploy in your labtop
+The images with labels are now saved into results folder. Please collect them.
+```bash
+# Afater cloning this branch
+pip install -r requirements.txt
+```
+# Inference Server
+Start the server by
+```bash
+python main.py
+```
+Test script
+```bash
+import requests
+SERVER_URL = "http://localhost:7860"
+image_file = "20230825_122540_jpg.rf.f0620856e7afdbd116ceffdfd512b03a.jpg"
+with open(image_file, 'rb') as f:
+    files = {'file': f}
+    response = requests.post(f"{SERVER_URL}/image", files=files)
+print(response.status_code)
+print(response.json())
+```
+Mapping Name to ID
+```bash
+name_to_id = {
+    "NA": 'NA',
+    "Bullseye": 10,
+    "One": 11,
+    "Two": 12,
+    "Three": 13,
+    "Four": 14,
+    "Five": 15,
+    "Six": 16,
+    "Seven": 17,
+    "Eight": 18,
+    "Nine": 19,
+    "A": 20,
+    "B": 21,
+    "C": 22,
+    "D": 23,
+    "E": 24,
+    "F": 25,
+    "G": 26,
+    "H": 27,
+    "S": 28,
+    "T": 29,
+    "U": 30,
+    "V": 31,
+    "W": 32,
+    "X": 33,
+    "Y": 34,
+    "Z": 35,
+    "Up": 36,
+    "Down": 37,
+    "Right": 38,
+    "Left": 39,
+    "Up Arrow": 36,
+    "Down Arrow": 37,
+    "Right Arrow": 38,
+    "Left Arrow": 39,
+    "Stop": 40}
+```
+# Training
+```bash
+git clone https://github.com/ultralytics/yolov5  # clone repo
+cd yolov5
+pip install -qr requirements.txt # install dependencies
+```
+Prepare dataset, pretrained model and config
+```bash
+data.yaml
+!cp "Week_8.pt" "best.pt"
+```
+Train
+# Demo Web
+Now deployed In huggingface https://huggingface.co/spaces/Mahiruoshi/mdpg4
+## Test directly
+```
+import requests
+url = "https://mahiruoshi-mdpg4.hf.space/"  # 你的 Space 地址
+file_path = "20230825_122540_jpg.rf.f0620856e7afdbd116ceffdfd512b03a.jpg"
+with open(file_path, "rb") as f:
+    files = {"file": f}
+    response = requests.post(url, files=files)
+print("Status:", response.status_code)
+try:
+    print("Response:", response.json())
+except:
+    print("Response:", response.text)
+```
+```bash
+# First time
+python train.py --img 416 --batch 128 --epochs 150 --data E:/workspace/mdp/data.yaml --weights best.pt --cache
+#python train.py --img 416 --batch 128 --epochs 150 --data E:/workspace/mdp/data.yaml --weights best.pt --cache --hyp hyp.yaml
+```

app.py CHANGED Viewed

@@ -1,70 +1,121 @@
-import time
-import os
-from flask import Flask, request, jsonify
-from flask_cors import CORS
-from model import *
-app = Flask(__name__)
-CORS(app)
-model = load_model()
-os.makedirs('uploads', exist_ok=True)
-@app.route('/', methods=['GET', 'POST'])
-def main_endpoint():
-    if request.method == 'GET':
-        return jsonify({
-            "result": "ok",
-            "service": "RPI Image Recognition API",
-            "endpoints": {
-                "GET /": "API status and documentation",
-                "POST /": "Image prediction (upload 'file')",
-                "GET /stitch": "Image stitching"
-            },
-            "model_loaded": model is not None
-        })
-    elif request.method == 'POST':
-        if 'file' not in request.files:
-            return jsonify({"error": "No file uploaded"}), 400
-        file = request.files['file']
-        if file.filename == '':
-            return jsonify({"error": "No file selected"}), 400
-        filename = file.filename
-        file.save(os.path.join('uploads', filename))
-        # filename format: "<timestamp>_<obstacle_id>_<signal>.jpeg"
-        constituents = file.filename.split("_")
-        obstacle_id = constituents[1] if len(constituents) > 1 else "unknown"
-        ## Week 8 ##
-        signal = constituents[2].strip(".jpg") if len(constituents) > 2 else "default"
-        image_id = predict_image(filename, model, signal)
-        ## Week 9 ##
-        # We don't need to pass in the signal anymore
-        #image_id = predict_image_week_9(filename,model)
-        # Return the obstacle_id and image_id
-        result = {
-            "obstacle_id": obstacle_id,
-            "image_id": image_id
-        }
-        return jsonify(result)
-@app.route('/stitch', methods=['GET'])
-def stitch():
-    """
-    This is the main endpoint for the stitching command. Stitches the images using two different functions, in effect creating two stitches, just for redundancy purposes
-    """
-    img = stitch_image()
-    img.show()
-    img2 = stitch_image_own()
-    img2.show()
-    return jsonify({"result": "ok"})
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860, debug=True)

+import time
+import os
+import uuid
+import shutil
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from model import *
+app = Flask(__name__)
+CORS(app)
+model = load_model()
+#model = None
+@app.route('/status', methods=['GET'])
+def status():
+    """
+    This is a health check endpoint to check if the server is running
+    :return: a json object with a key "result" and value "ok"
+    """
+    return jsonify({"result": "ok"})
+@app.route('/image', methods=['POST'])
+def image_predict():
+    """
+    This is the main endpoint for the image prediction algorithm
+    :return: a json object with a key "result" and value a dictionary with keys "obstacle_id" and "image_id"
+    """
+    file = request.files['file']
+    filename = file.filename
+    # Save to uploads folder first
+    file.save(os.path.join('uploads', filename))
+    # Try to parse filename format: "<timestamp>_<obstacle_id>_<signal>.jpeg"
+    # But be flexible with different formats
+    constituents = file.filename.split("_")
+    # Default values
+    obstacle_id = "unknown"
+    signal = "C"  # Default to center
+    # Try to extract obstacle_id and signal if available
+    try:
+        if len(constituents) >= 2:
+            obstacle_id = constituents[1]
+        if len(constituents) >= 3:
+            # Remove file extension from signal
+            signal_part = constituents[2]
+            # Handle both .jpg and .png extensions
+            for ext in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']:
+                if signal_part.endswith(ext):
+                    signal = signal_part[:-len(ext)]
+                    break
+            else:
+                signal = signal_part
+    except IndexError:
+        # Use default values if parsing fails
+        pass
+    ## Week 8 ##
+    # Check for optional preference parameter
+    prefer_close = request.form.get('prefer_close_objects', 'true').lower() == 'true'
+    detection_result = predict_image(filename, model, signal, prefer_close)
+    ## Week 9 ##
+    # We don't need to pass in the signal anymore
+    #detection_result = predict_image_week_9(filename,model)
+    # Extract image_id from detection result
+    image_id = detection_result["image_id"]
+    # Create results folder
+    results_folder = 'results'
+    if not os.path.exists(results_folder):
+        os.makedirs(results_folder)
+    # Generate UUID
+    unique_id = str(uuid.uuid4())
+    # Create new filename format: {UUID}_Label.png
+    new_filename = f"{unique_id}_{image_id}.png"
+    # Copy original image to results folder with new name
+    original_path = os.path.join('uploads', filename)
+    new_path = os.path.join(results_folder, new_filename)
+    try:
+        # Copy original file without any processing
+        shutil.copy2(original_path, new_path)
+        print(f"Original image saved to: {new_path}")
+        print(f"Annotated image saved to: {detection_result['marked_image_path']}")
+    except Exception as e:
+        print(f"Error saving original image: {e}")
+    # Return detailed detection information
+    result = {
+        "obstacle_id": obstacle_id,
+        "image_id": image_id,
+        "detection": {
+            "label": detection_result["label"],
+            "confidence": detection_result["confidence"],
+            "bbox_coordinates": detection_result["bbox"],
+            "original_image_path": new_path,
+            "annotated_image_path": detection_result["marked_image_path"]
+        }
+    }
+    return jsonify(result)
+@app.route('/stitch', methods=['GET'])
+def stitch():
+    """
+    This is the main endpoint for the stitching command. Stitches the images using two different functions, in effect creating two stitches, just for redundancy purposes
+    """
+    img = stitch_image()
+    img.show()
+    img2 = stitch_image_own()
+    img2.show()
+    return jsonify({"result": "ok"})
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860, debug=True)

main.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import time
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from model import *
 app = Flask(__name__)
 CORS(app)
 model = load_model()
 #model = None
 @app.route('/status', methods=['GET'])
 def status():
     """
@@ -24,23 +27,82 @@ def image_predict():
     """
     file = request.files['file']
     filename = file.filename
     file.save(os.path.join('uploads', filename))
-    # filename format: "<timestamp>_<obstacle_id>_<signal>.jpeg"
     constituents = file.filename.split("_")
-    obstacle_id = constituents[1]
     ## Week 8 ##
-    signal = constituents[2].strip(".jpg")
-    image_id = predict_image(filename, model, signal)
     ## Week 9 ##
     # We don't need to pass in the signal anymore
-    #image_id = predict_image_week_9(filename,model)
-    # Return the obstacle_id and image_id
     result = {
         "obstacle_id": obstacle_id,
-        "image_id": image_id
     }
     return jsonify(result)
@@ -56,4 +118,4 @@ def stitch():
     return jsonify({"result": "ok"})
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000, debug=True)

 import time
+import os
+import uuid
+import shutil
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from model import *
 app = Flask(__name__)
 CORS(app)
 model = load_model()
 #model = None
 @app.route('/status', methods=['GET'])
 def status():
     """
     """
     file = request.files['file']
     filename = file.filename
+    # Save to uploads folder first
     file.save(os.path.join('uploads', filename))
+    # Try to parse filename format: "<timestamp>_<obstacle_id>_<signal>.jpeg"
+    # But be flexible with different formats
     constituents = file.filename.split("_")
+    # Default values
+    obstacle_id = "unknown"
+    signal = "C"  # Default to center
+    # Try to extract obstacle_id and signal if available
+    try:
+        if len(constituents) >= 2:
+            obstacle_id = constituents[1]
+        if len(constituents) >= 3:
+            # Remove file extension from signal
+            signal_part = constituents[2]
+            # Handle both .jpg and .png extensions
+            for ext in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']:
+                if signal_part.endswith(ext):
+                    signal = signal_part[:-len(ext)]
+                    break
+            else:
+                signal = signal_part
+    except IndexError:
+        # Use default values if parsing fails
+        pass
     ## Week 8 ##
+    # Check for optional preference parameter
+    prefer_close = request.form.get('prefer_close_objects', 'true').lower() == 'true'
+    detection_result = predict_image(filename, model, signal, prefer_close)
     ## Week 9 ##
     # We don't need to pass in the signal anymore
+    #detection_result = predict_image_week_9(filename,model)
+    # Extract image_id from detection result
+    image_id = detection_result["image_id"]
+    # Create results folder
+    results_folder = 'results'
+    if not os.path.exists(results_folder):
+        os.makedirs(results_folder)
+    # Generate UUID
+    unique_id = str(uuid.uuid4())
+    # Create new filename format: {UUID}_Label.png
+    new_filename = f"{unique_id}_{image_id}.png"
+    # Copy original image to results folder with new name
+    original_path = os.path.join('uploads', filename)
+    new_path = os.path.join(results_folder, new_filename)
+    try:
+        # Copy original file without any processing
+        shutil.copy2(original_path, new_path)
+        print(f"Original image saved to: {new_path}")
+        print(f"Annotated image saved to: {detection_result['marked_image_path']}")
+    except Exception as e:
+        print(f"Error saving original image: {e}")
+    # Return detailed detection information
     result = {
         "obstacle_id": obstacle_id,
+        "image_id": image_id,
+        "detection": {
+            "label": detection_result["label"],
+            "confidence": detection_result["confidence"],
+            "bbox_coordinates": detection_result["bbox"],
+            "original_image_path": new_path,
+            "annotated_image_path": detection_result["marked_image_path"]
+        }
     }
     return jsonify(result)
     return jsonify({"result": "ok"})
 if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000, debug=True)

model.py CHANGED Viewed

@@ -36,7 +36,7 @@ def load_model():
 def draw_own_bbox(img,x1,y1,x2,y2,label,color=(36,255,12),text_color=(0,0,0)):
     """
-    Draw bounding box on the image with text label and save both the raw and annotated image in the 'own_results' folder
     Inputs
     ------
@@ -58,7 +58,7 @@ def draw_own_bbox(img,x1,y1,x2,y2,label,color=(36,255,12),text_color=(0,0,0)):
     Returns
     -------
-    None
     """
     name_to_id = {
@@ -109,9 +109,14 @@ def draw_own_bbox(img,x1,y1,x2,y2,label,color=(36,255,12),text_color=(0,0,0)):
     # Create a random string to be used as the suffix for the image name, just in case the same name is accidentally used
     rand = str(int(time.time()))
     # Save the raw image
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    cv2.imwrite(f"own_results/raw_image_{label}_{rand}.jpg", img)
     # Draw the bounding box
     img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
@@ -121,12 +126,15 @@ def draw_own_bbox(img,x1,y1,x2,y2,label,color=(36,255,12),text_color=(0,0,0)):
     img = cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), color, -1)
     img = cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 1)
     # Save the annotated image
-    cv2.imwrite(f"own_results/annotated_image_{label}_{rand}.jpg", img)
-def predict_image(image, model, signal):
     """
-    Predict the image using the model and save the results in the 'runs' folder
     Inputs
     ------
@@ -135,22 +143,73 @@ def predict_image(image, model, signal):
     model: torch.hub.load - model to be used for prediction
     signal: str - signal to be used for filtering the predictions
     Returns
     -------
-    str - predicted label
     """
-    # Load the image
-    img = Image.open(os.path.join('uploads', image))
     # Convert PIL image to cv2 format for better compatibility
     img_cv2 = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
-    # Ensure image is in the right format and size for the model
-    # Resize if necessary while maintaining aspect ratio
     height, width = img_cv2.shape[:2]
     if height != 640 or width != 640:
-        img_cv2 = cv2.resize(img_cv2, (640, 640))
     # Convert back to PIL for model input and ensure it's writable
     img_array = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
@@ -166,32 +225,41 @@ def predict_image(image, model, signal):
     # Convert the results to a pandas dataframe and calculate the height and width of the bounding box and the area of the bounding box
     df_results = results.pandas().xyxy[0]
-    # If no detections found, try with lower confidence threshold
-    if len(df_results) == 0:
-        print(f"No objects detected with default confidence, trying with lower threshold for image: {image}")
-        # Set lower confidence threshold on the model
-        original_conf = model.conf
-        model.conf = 0.1  # Lower confidence threshold
         results = model(img)
-        # results.save('runs')  # Skip saving to avoid OpenCV error
         df_results = results.pandas().xyxy[0]
-        # If still no detections, try with even lower threshold
-        if len(df_results) == 0:
-            model.conf = 0.01  # Even lower confidence threshold
-            results = model(img)
-            # results.save('runs')  # Skip saving to avoid OpenCV error
-            df_results = results.pandas().xyxy[0]
-        # Restore original confidence threshold
-        model.conf = original_conf
     df_results['bboxHt'] = df_results['ymax'] - df_results['ymin']
     df_results['bboxWt'] = df_results['xmax'] - df_results['xmin']
     df_results['bboxArea'] = df_results['bboxHt'] * df_results['bboxWt']
-    # Label with largest bbox height will be last
-    df_results = df_results.sort_values('bboxArea', ascending=False)
     # Filter out Bullseye
     pred_list = df_results
@@ -253,8 +321,43 @@ def predict_image(image, model, signal):
                         pred_shortlist.sort(key=lambda x: x['bboxArea'])
                         pred = pred_shortlist[-1]
-    # Draw the bounding box on the image
-    draw_own_bbox(np.array(img), pred['xmin'], pred['ymin'], pred['xmax'], pred['ymax'], pred['name'])
     name_to_id = {
         "NA": 'NA',
@@ -296,8 +399,23 @@ def predict_image(image, model, signal):
     }
     # Convert prediction to ID
     image_id = str(name_to_id[pred['name']])
-    print(f"Final result: {image_id}")
-    return image_id
 def predict_image_week_9(image, model):
     # Load the image
@@ -327,7 +445,9 @@ def predict_image_week_9(image, model):
         # Draw the bounding box on the image
         if not isinstance(pred,str):
-            draw_own_bbox(np.array(img), pred['xmin'], pred['ymin'], pred['xmax'], pred['ymax'], pred['name'])
     # Dictionary is shorter as only two symbols, left and right are needed
     name_to_id = {
@@ -338,12 +458,36 @@ def predict_image_week_9(image, model):
         "Right Arrow": 38,
         "Left Arrow": 39,
     }
-    # Return the image id
     if not isinstance(pred,str):
         image_id = str(name_to_id[pred['name']])
     else:
         image_id = 'NA'
-    return image_id
 def stitch_image():
@@ -382,14 +526,18 @@ def stitch_image():
 def stitch_image_own():
     """
-    Stitches the images in the folder together and saves it into own_results folder
-    Basically similar to stitch_image() but with different folder names and slightly different drawing of bounding boxes and text
     """
-    imgFolder = 'own_results'
     stitchedPath = os.path.join(imgFolder, f'stitched-{int(time.time())}.jpeg')
-    imgPaths = glob.glob(os.path.join(imgFolder+"/annotated_image_*.jpg"))
     imgTimestamps = [imgPath.split("_")[-1][:-4] for imgPath in imgPaths]
     sortedByTimeStampImages = sorted(zip(imgPaths, imgTimestamps), key=lambda x: x[1])

 def draw_own_bbox(img,x1,y1,x2,y2,label,color=(36,255,12),text_color=(0,0,0)):
     """
+    Draw bounding box on the image with text label and save both the raw and annotated image in the 'results' folder
     Inputs
     ------
     Returns
     -------
+    str - path to the annotated image file
     """
     name_to_id = {
     # Create a random string to be used as the suffix for the image name, just in case the same name is accidentally used
     rand = str(int(time.time()))
+    # Create results folder if it doesn't exist
+    if not os.path.exists("results"):
+        os.makedirs("results")
     # Save the raw image
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    raw_image_path = f"results/raw_image_{label}_{rand}.jpg"
+    cv2.imwrite(raw_image_path, img)
     # Draw the bounding box
     img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
     img = cv2.rectangle(img, (x1, y1 - 20), (x1 + w, y1), color, -1)
     img = cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 1)
     # Save the annotated image
+    annotated_image_path = f"results/annotated_image_{label}_{rand}.jpg"
+    cv2.imwrite(annotated_image_path, img)
+    return annotated_image_path
+def predict_image(image, model, signal, prefer_close_objects=True):
     """
+    Predict the image using the model and save the results in the 'results' folder
     Inputs
     ------
     model: torch.hub.load - model to be used for prediction
     signal: str - signal to be used for filtering the predictions
+    prefer_close_objects: bool - if True, prioritize larger objects (closer),
+                                 if False, prioritize smaller objects (farther)
     Returns
     -------
+    dict - detection result with image_id, label, confidence, bbox, and marked_image_path
     """
+    # Load the image (supports both PNG and JPG)
+    img_path = os.path.join('uploads', image)
+    try:
+        img = Image.open(img_path)
+        # Convert to RGB if it's RGBA (PNG with transparency) or other modes
+        if img.mode != 'RGB':
+            img = img.convert('RGB')
+    except Exception as e:
+        print(f"Error loading image {image}: {e}")
+        # Return default result if image loading fails
+        return {
+            "image_id": "NA",
+            "label": "NA",
+            "confidence": 0.0,
+            "bbox": {"x1": 0.0, "y1": 0.0, "x2": 0.0, "y2": 0.0},
+            "marked_image_path": None
+        }
+    # Store original image dimensions for later coordinate scaling
+    original_width, original_height = img.size
     # Convert PIL image to cv2 format for better compatibility
     img_cv2 = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+    # Resize to model input size while maintaining aspect ratio
+    def resize_with_aspect_ratio(image, target_size=640):
+        """Resize image to target size while maintaining aspect ratio using padding"""
+        height, width = image.shape[:2]
+        # Calculate scaling factor
+        scale = min(target_size / width, target_size / height)
+        # Calculate new dimensions
+        new_width = int(width * scale)
+        new_height = int(height * scale)
+        # Resize image
+        resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+        # Create a square canvas with padding
+        canvas = np.zeros((target_size, target_size, 3), dtype=np.uint8)
+        # Calculate padding offsets to center the image
+        y_offset = (target_size - new_height) // 2
+        x_offset = (target_size - new_width) // 2
+        # Place the resized image on the canvas
+        canvas[y_offset:y_offset + new_height, x_offset:x_offset + new_width] = resized
+        return canvas, scale, x_offset, y_offset
+    # Apply proper aspect ratio preserving resize
     height, width = img_cv2.shape[:2]
     if height != 640 or width != 640:
+        img_cv2, scale_factor, x_offset, y_offset = resize_with_aspect_ratio(img_cv2, 640)
+    else:
+        scale_factor = 1.0
+        x_offset = 0
+        y_offset = 0
     # Convert back to PIL for model input and ensure it's writable
     img_array = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
     # Convert the results to a pandas dataframe and calculate the height and width of the bounding box and the area of the bounding box
     df_results = results.pandas().xyxy[0]
+    # Try progressively lower confidence thresholds to ensure we get some detection
+    original_conf = model.conf
+    confidence_thresholds = [original_conf, 0.5, 0.3, 0.1, 0.05, 0.01]
+    for conf_threshold in confidence_thresholds:
+        if len(df_results) > 0:
+            break
+        print(f"No objects detected with confidence {conf_threshold}, trying lower threshold for image: {image}")
+        model.conf = conf_threshold
         results = model(img)
         df_results = results.pandas().xyxy[0]
+    # If still no detections with extremely low threshold, create a default detection
+    if len(df_results) == 0:
+        print(f"No detections found even with lowest threshold. Creating default detection.")
+        # Create a default bounding box in the center of the image
+        default_detection = {
+            'xmin': 160, 'ymin': 160, 'xmax': 480, 'ymax': 480,
+            'confidence': 0.01, 'name': 'One'  # Default to 'One' as fallback
+        }
+        # Convert to DataFrame format
+        import pandas as pd
+        df_results = pd.DataFrame([default_detection])
+    # Restore original confidence threshold
+    model.conf = original_conf
     df_results['bboxHt'] = df_results['ymax'] - df_results['ymin']
     df_results['bboxWt'] = df_results['xmax'] - df_results['xmin']
     df_results['bboxArea'] = df_results['bboxHt'] * df_results['bboxWt']
+    # Sort by area based on preference for close or far objects
+    # prefer_close_objects=True: larger area first (closer objects)
+    # prefer_close_objects=False: smaller area first (farther objects)
+    df_results = df_results.sort_values('bboxArea', ascending=not prefer_close_objects)
     # Filter out Bullseye
     pred_list = df_results
                         pred_shortlist.sort(key=lambda x: x['bboxArea'])
                         pred = pred_shortlist[-1]
+    # Convert bounding box coordinates back to original image scale
+    def convert_bbox_to_original(bbox, scale_factor, x_offset, y_offset, original_width, original_height):
+        """Convert bounding box coordinates from model input size back to original image size"""
+        # Remove padding offsets
+        x1 = bbox['xmin'] - x_offset
+        y1 = bbox['ymin'] - y_offset
+        x2 = bbox['xmax'] - x_offset
+        y2 = bbox['ymax'] - y_offset
+        # Scale back to original size
+        x1 = x1 / scale_factor
+        y1 = y1 / scale_factor
+        x2 = x2 / scale_factor
+        y2 = y2 / scale_factor
+        # Clamp to original image bounds
+        x1 = max(0, min(x1, original_width))
+        y1 = max(0, min(y1, original_height))
+        x2 = max(0, min(x2, original_width))
+        y2 = max(0, min(y2, original_height))
+        return {
+            'xmin': x1, 'ymin': y1, 'xmax': x2, 'ymax': y2,
+            'confidence': bbox['confidence'], 'name': bbox['name']
+        }
+    # Convert coordinates to original image scale
+    original_pred = convert_bbox_to_original(pred, scale_factor, x_offset, y_offset, original_width, original_height)
+    # Load original image for annotation (not the resized version)
+    original_img = Image.open(os.path.join('uploads', image))
+    if original_img.mode != 'RGB':
+        original_img = original_img.convert('RGB')
+    # Draw the bounding box on the original image and get the marked image path
+    marked_image_path = draw_own_bbox(np.array(original_img), original_pred['xmin'], original_pred['ymin'],
+                                     original_pred['xmax'], original_pred['ymax'], original_pred['name'])
     name_to_id = {
         "NA": 'NA',
     }
     # Convert prediction to ID
     image_id = str(name_to_id[pred['name']])
+    # Prepare detailed detection result using original image coordinates
+    detection_result = {
+        "image_id": image_id,
+        "label": original_pred['name'],
+        "confidence": float(original_pred['confidence']),
+        "bbox": {
+            "x1": float(original_pred['xmin']),
+            "y1": float(original_pred['ymin']),
+            "x2": float(original_pred['xmax']),
+            "y2": float(original_pred['ymax'])
+        },
+        "marked_image_path": marked_image_path
+    }
+    print(f"Final result: {image_id} with bbox coordinates")
+    return detection_result
 def predict_image_week_9(image, model):
     # Load the image
         # Draw the bounding box on the image
         if not isinstance(pred,str):
+            marked_image_path = draw_own_bbox(np.array(img), pred['xmin'], pred['ymin'], pred['xmax'], pred['ymax'], pred['name'])
+        else:
+            marked_image_path = None
     # Dictionary is shorter as only two symbols, left and right are needed
     name_to_id = {
         "Right Arrow": 38,
         "Left Arrow": 39,
     }
+    # Return the image id and detailed information
     if not isinstance(pred,str):
         image_id = str(name_to_id[pred['name']])
+        detection_result = {
+            "image_id": image_id,
+            "label": pred['name'],
+            "confidence": float(pred['confidence']),
+            "bbox": {
+                "x1": float(pred['xmin']),
+                "y1": float(pred['ymin']),
+                "x2": float(pred['xmax']),
+                "y2": float(pred['ymax'])
+            },
+            "marked_image_path": marked_image_path
+        }
     else:
         image_id = 'NA'
+        detection_result = {
+            "image_id": image_id,
+            "label": "NA",
+            "confidence": 0.0,
+            "bbox": {
+                "x1": 0.0,
+                "y1": 0.0,
+                "x2": 0.0,
+                "y2": 0.0
+            },
+            "marked_image_path": None
+        }
+    return detection_result
 def stitch_image():
 def stitch_image_own():
     """
+    Stitches the images in the folder together and saves it into results folder
+    Similar to stitch_image() but works with annotated images from results folder
     """
+    imgFolder = 'results'
     stitchedPath = os.path.join(imgFolder, f'stitched-{int(time.time())}.jpeg')
+    imgPaths = glob.glob(os.path.join(imgFolder, "annotated_image_*.jpg"))
+    if not imgPaths:
+        print("No annotated images found for stitching")
+        return None
     imgTimestamps = [imgPath.split("_")[-1][:-4] for imgPath in imgPaths]
     sortedByTimeStampImages = sorted(zip(imgPaths, imgTimestamps), key=lambda x: x[1])

test.ipynb CHANGED Viewed

@@ -50,38 +50,106 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "970d2d85",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "200\n",
-      "{'image_id': '39', 'obstacle_id': '122540'}\n"
      ]
     }
    ],
    "source": [
     "import requests\n",
     "\n",
     "SERVER_URL = \"http://localhost:5000\"\n",
     "\n",
-    "image_file = \"20230825_122540_jpg.rf.f0620856e7afdbd116ceffdfd512b03a.jpg\"\n",
     "\n",
     "\n",
     "with open(image_file, 'rb') as f:\n",
     "    files = {'file': f}\n",
     "    response = requests.post(f\"{SERVER_URL}/image\", files=files)\n",
     "\n",
     "print(response.status_code)\n",
-    "print(response.json())\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "bert-vits2",
    "language": "python",
    "name": "python3"
   },
@@ -95,7 +163,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
   }
  },
  "nbformat": 4,

   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "a89ceef6",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "优先近距离物体:\n",
       "200\n",
+      "{'detection': {'annotated_image_path': 'results/annotated_image_Up-36_1757948296.jpg', 'bbox_coordinates': {'x1': 545.3320312499999, 'x2': 560.7070312499999, 'y1': 15.254882812499998, 'y2': 33.75292968749999}, 'confidence': 0.01422882080078125, 'label': 'Up', 'original_image_path': 'results\\\\3483d55f-887a-4364-8d0b-6910faa6a585_36.png'}, 'image_id': '36', 'obstacle_id': 'unknown'}\n"
      ]
     }
    ],
    "source": [
+    "# 选项1: 优先检测较近的物体（默认行为，面积较大的物体）\n",
     "import requests\n",
     "\n",
     "SERVER_URL = \"http://localhost:5000\"\n",
+    "image_file = \"Screenshot 2025-09-15 225930.png\"\n",
     "\n",
+    "with open(image_file, 'rb') as f:\n",
+    "    files = {'file': f}\n",
+    "    data = {'prefer_close_objects': 'true'}  # 优先近距离物体\n",
+    "    response = requests.post(f\"{SERVER_URL}/image\", files=files, data=data)\n",
+    "\n",
+    "print(\"优先近距离物体:\")\n",
+    "print(response.status_code)\n",
+    "print(response.json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "21f15172",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "优先远距离物体:\n",
+      "200\n",
+      "{'detection': {'annotated_image_path': 'results/annotated_image_Up-36_1757948327.jpg', 'bbox_coordinates': {'x1': 545.3320312499999, 'x2': 560.7070312499999, 'y1': 15.254882812499998, 'y2': 33.75292968749999}, 'confidence': 0.01422882080078125, 'label': 'Up', 'original_image_path': 'results\\\\e7dcd5cf-db24-4821-9fe6-5e16412ba51c_36.png'}, 'image_id': '36', 'obstacle_id': 'unknown'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 选项2: 优先检测较远的物体（面积较小的物体）\n",
+    "import requests\n",
+    "\n",
+    "SERVER_URL = \"http://localhost:5000\"\n",
+    "image_file = \"b.png\"\n",
     "\n",
+    "with open(image_file, 'rb') as f:\n",
+    "    files = {'file': f}\n",
+    "    data = {'prefer_close_objects': 'false'}  # 优先远距离物体\n",
+    "    response = requests.post(f\"{SERVER_URL}/image\", files=files, data=data)\n",
+    "\n",
+    "print(\"优先远距离物体:\")\n",
+    "print(response.status_code)\n",
+    "print(response.json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "6b29a73a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "默认行为（优先近距离物体）:\n",
+      "200\n",
+      "{'detection': {'annotated_image_path': 'results/annotated_image_Up-36_1757948317.jpg', 'bbox_coordinates': {'x1': 545.3320312499999, 'x2': 560.7070312499999, 'y1': 15.254882812499998, 'y2': 33.75292968749999}, 'confidence': 0.01422882080078125, 'label': 'Up', 'original_image_path': 'results\\\\d0387124-696d-4233-90db-fe511ed62828_36.png'}, 'image_id': '36', 'obstacle_id': 'unknown'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 选项3: 不指定参数（使用默认行为，等同于 prefer_close_objects=true）\n",
+    "import requests\n",
+    "\n",
+    "SERVER_URL = \"http://localhost:5000\"\n",
+    "image_file = \"b.png\"\n",
     "\n",
     "with open(image_file, 'rb') as f:\n",
     "    files = {'file': f}\n",
+    "    # 不添加 data 参数，使用默认行为（优先近距离物体）\n",
     "    response = requests.post(f\"{SERVER_URL}/image\", files=files)\n",
     "\n",
+    "print(\"默认行为（优先近距离物体）:\")\n",
     "print(response.status_code)\n",
+    "print(response.json())"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "chatbot",
    "language": "python",
    "name": "python3"
   },
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,