import io import matplotlib.pyplot as plt import inflect from PIL import Image import warnings import logging from transformers import logging as hf_logging def render_results_in_image(in_pil_img, in_results): plt.figure(figsize=(12, 8)) plt.imshow(in_pil_img) ax = plt.gca() for prediction in in_results: box = prediction["box"] score = prediction["score"] label = prediction["label"] x, y = box['xmin'], box['ymin'] w = box['xmax'] - box['xmin'] h = box['ymax'] - box['ymin'] ax.add_patch(plt.Rectangle((x, y), w, h, fill=False, color="lime", linewidth=2)) ax.text( x, y - 5, f"{label}: {score:.2f}", color="yellow", fontsize=10, backgroundcolor="black" ) plt.axis("off") # Save to buffer buf = io.BytesIO() plt.savefig(buf, format="png", bbox_inches="tight", pad_inches=0) buf.seek(0) modified_img = Image.open(buf) plt.close() return modified_img def summarize_predictions_natural_language(predictions): if not predictions: return "No objects detected." summary = {} p = inflect.engine() for pred in predictions: label = pred["label"] summary[label] = summary.get(label, 0) + 1 result = "In this image, there are " for i, (label, count) in enumerate(summary.items()): count_str = p.number_to_words(count) result += f"{count_str} {label}" if count > 1: result += "s" if i < len(summary) - 1: result += ", " result += "." return result def ignore_warnings(): warnings.filterwarnings("ignore", message="Some weights of the model checkpoint") warnings.filterwarnings("ignore", message="Could not find image processor class") warnings.filterwarnings("ignore", message="The `max_size` parameter is deprecated") logging.basicConfig(level=logging.ERROR) hf_logging.set_verbosity_error()