andrevp commited on
Commit
2fe6f6e
·
verified ·
1 Parent(s): 35ab828

Add --live flag and /live command for streaming mode

Browse files
Files changed (1) hide show
  1. chat_minicpmo.py +15 -2
chat_minicpmo.py CHANGED
@@ -380,7 +380,7 @@ def run_interactive(model, processor, args):
380
  current_file = args.file
381
  current_audio = args.audio
382
  print("MiniCPM-o 4.5 MLX Chat")
383
- print("Commands: /image <path> | /audio <path> | /clear | /quit")
384
  if current_file:
385
  print(f"Loaded image: {current_file}")
386
  if current_audio:
@@ -414,6 +414,11 @@ def run_interactive(model, processor, args):
414
  current_file = None
415
  print(f"Audio loaded: {current_audio}\n")
416
  continue
 
 
 
 
 
417
 
418
  print()
419
 
@@ -449,6 +454,8 @@ def main():
449
  python chat_minicpmo.py photo.jpg -p "What's in this image?"
450
  python chat_minicpmo.py --audio speech.wav -p "Transcribe this."
451
  python chat_minicpmo.py --audio speech.wav # interactive with audio
 
 
452
  python chat_minicpmo.py # interactive mode
453
  """,
454
  )
@@ -463,13 +470,19 @@ def main():
463
  parser.add_argument("--max-tokens", type=int, default=512, help="Max tokens")
464
  parser.add_argument("--temp", type=float, default=0.0, help="Temperature")
465
  parser.add_argument("--max-slices", type=int, default=9, help="Max image slices")
 
 
 
466
  args = parser.parse_args()
467
 
468
  print("Loading model...", flush=True)
469
  model, processor = load(args.model, trust_remote_code=True)
470
  print("Model ready.\n")
471
 
472
- if args.prompt:
 
 
 
473
  run_once(model, processor, args)
474
  else:
475
  run_interactive(model, processor, args)
 
380
  current_file = args.file
381
  current_audio = args.audio
382
  print("MiniCPM-o 4.5 MLX Chat")
383
+ print("Commands: /image <path> | /audio <path> | /live | /clear | /quit")
384
  if current_file:
385
  print(f"Loaded image: {current_file}")
386
  if current_audio:
 
414
  current_file = None
415
  print(f"Audio loaded: {current_audio}\n")
416
  continue
417
+ if prompt.lower() == "/live":
418
+ from streaming import run_live_mode
419
+ run_live_mode(model, processor, args)
420
+ print()
421
+ continue
422
 
423
  print()
424
 
 
454
  python chat_minicpmo.py photo.jpg -p "What's in this image?"
455
  python chat_minicpmo.py --audio speech.wav -p "Transcribe this."
456
  python chat_minicpmo.py --audio speech.wav # interactive with audio
457
+ python chat_minicpmo.py --live # full duplex streaming
458
+ python chat_minicpmo.py --live --capture-region 0,0,1920,1080
459
  python chat_minicpmo.py # interactive mode
460
  """,
461
  )
 
470
  parser.add_argument("--max-tokens", type=int, default=512, help="Max tokens")
471
  parser.add_argument("--temp", type=float, default=0.0, help="Temperature")
472
  parser.add_argument("--max-slices", type=int, default=9, help="Max image slices")
473
+ parser.add_argument("--live", action="store_true", help="Full duplex streaming mode")
474
+ parser.add_argument("--capture-region", default=None, help="Screen region x,y,w,h (default: primary monitor)")
475
+ parser.add_argument("--audio-device", default="BlackHole", help="Audio input device (default: BlackHole)")
476
  args = parser.parse_args()
477
 
478
  print("Loading model...", flush=True)
479
  model, processor = load(args.model, trust_remote_code=True)
480
  print("Model ready.\n")
481
 
482
+ if args.live:
483
+ from streaming import run_live_mode
484
+ run_live_mode(model, processor, args)
485
+ elif args.prompt:
486
  run_once(model, processor, args)
487
  else:
488
  run_interactive(model, processor, args)