| from .captioning import generate_caption | |
| from .detection import detect_objects | |
| from .reasoning import reason | |
| def run_pipeline(image, question): | |
| caption = generate_caption(image) | |
| labels, boxes = detect_objects(image) | |
| answer = reason(", ".join(labels), caption, question) | |
| return caption, labels, boxes, answer |