| | """ |
| | cli.py |
| | ────── |
| | Command-line interface for ClearPath Scene Description. |
| | |
| | Usage examples: |
| | python cli.py --image photo.jpg |
| | python cli.py --image photo.jpg --speak |
| | python cli.py --video footage.mp4 --interval 3 --speak |
| | python cli.py --camera --speak # live webcam loop (press q to quit) |
| | """ |
| |
|
| | import argparse |
| | import sys |
| | import logging |
| | import time |
| |
|
| | from PIL import Image |
| | import cv2 |
| |
|
| | from scene_captioner import SceneCaptioner |
| | from safety_classifier import SafetyClassifier |
| | from tts_engine import TTSEngine |
| |
|
| | logging.basicConfig( |
| | level=logging.INFO, |
| | format="%(asctime)s [%(levelname)s] %(message)s", |
| | datefmt="%H:%M:%S", |
| | ) |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | RED = "\033[91m" |
| | GREEN = "\033[92m" |
| | YELLOW = "\033[93m" |
| | CYAN = "\033[96m" |
| | BOLD = "\033[1m" |
| | RESET = "\033[0m" |
| |
|
| |
|
| | def print_result(caption: str, result, timestamp: str = ""): |
| | print() |
| | print("─" * 60) |
| | if timestamp: |
| | print(f"{CYAN}⏱ {timestamp}{RESET}") |
| | print(f"{BOLD}📝 Caption:{RESET}") |
| | print(f" {caption}") |
| | print() |
| | if result.is_dangerous: |
| | print(f"{RED}{BOLD}⚠️ CLASSIFICATION : DANGEROUS{RESET}") |
| | print(f"{RED} Hazard categories : {', '.join(result.hazards)}{RESET}") |
| | print(f"{RED} Matched tokens : {', '.join(result.matches)}{RESET}") |
| | else: |
| | print(f"{GREEN}{BOLD}✅ CLASSIFICATION : SAFE{RESET}") |
| | print("─" * 60) |
| | print() |
| |
|
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser( |
| | description="ClearPath — Real-Time Scene Description for Visually-Impaired People", |
| | formatter_class=argparse.RawDescriptionHelpFormatter, |
| | epilog=__doc__, |
| | ) |
| | group = parser.add_mutually_exclusive_group(required=True) |
| | group.add_argument("--image", metavar="PATH", help="Path to an image file") |
| | group.add_argument("--video", metavar="PATH", help="Path to a video file") |
| | group.add_argument("--camera", action="store_true", help="Use webcam (live loop)") |
| |
|
| | parser.add_argument("--speak", action="store_true", help="Read description aloud via TTS") |
| | parser.add_argument("--interval", type=float, default=3.0, |
| | help="Seconds between captures in video/camera mode (default: 3)") |
| | parser.add_argument("--model", default=None, |
| | help="Override Qwen model ID (e.g. Qwen/Qwen2-VL-7B-Instruct)") |
| | args = parser.parse_args() |
| |
|
| | |
| | logger.info("Loading captioning model …") |
| | captioner = SceneCaptioner(model_id=args.model) if args.model else SceneCaptioner() |
| | classifier = SafetyClassifier() |
| | tts = TTSEngine() if args.speak else None |
| |
|
| | def run(image: Image.Image, ts: str = ""): |
| | caption = captioner.describe(image) |
| | result = classifier.classify(caption) |
| | print_result(caption, result, ts) |
| | if tts: |
| | prefix = "Danger detected. " if result.is_dangerous else "Safe. " |
| | tts.speak(prefix + caption) |
| | return caption, result |
| |
|
| | |
| | if args.image: |
| | img = Image.open(args.image).convert("RGB") |
| | run(img) |
| |
|
| | |
| | elif args.video: |
| | cap = cv2.VideoCapture(args.video) |
| | fps = cap.get(cv2.CAP_PROP_FPS) or 25 |
| | step = max(1, int(fps * args.interval)) |
| | idx = 0 |
| | print(f"{CYAN}Processing video — capturing every {args.interval}s …{RESET}") |
| |
|
| | while True: |
| | ret, frame = cap.read() |
| | if not ret: |
| | break |
| | if idx % step == 0: |
| | rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| | pil = Image.fromarray(rgb) |
| | ts = f"Frame {idx} / {round(idx / fps, 1)}s" |
| | run(pil, ts) |
| | idx += 1 |
| | cap.release() |
| | print(f"{GREEN}Video processing complete.{RESET}") |
| |
|
| | |
| | elif args.camera: |
| | cap = cv2.VideoCapture(0) |
| | if not cap.isOpened(): |
| | sys.exit("❌ Could not open webcam.") |
| | print(f"{CYAN}Live camera mode — capturing every {args.interval}s. Press Ctrl+C to quit.{RESET}") |
| | try: |
| | while True: |
| | ret, frame = cap.read() |
| | if not ret: |
| | continue |
| | cv2.imshow("ClearPath — press q to quit", frame) |
| | if cv2.waitKey(1) & 0xFF == ord("q"): |
| | break |
| | rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| | pil = Image.fromarray(rgb) |
| | run(pil, ts=time.strftime("%H:%M:%S")) |
| | time.sleep(args.interval) |
| | except KeyboardInterrupt: |
| | print("\nStopped.") |
| | finally: |
| | cap.release() |
| | cv2.destroyAllWindows() |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|