from .captioning import generate_caption from .detection import detect_objects from .reasoning import reason def run_pipeline(image, question): caption = generate_caption(image) labels, boxes = detect_objects(image) answer = reason(", ".join(labels), caption, question) return caption, labels, boxes, answer