| |
| """ |
| MonkeyOCR Command Line Interface |
| Process documents using MonkeyOCR with MLX-VLM optimization |
| """ |
|
|
| import sys |
| import os |
| import argparse |
| import time |
| from pathlib import Path |
| from loguru import logger |
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="MonkeyOCR: Advanced OCR with MLX-VLM optimization for Apple Silicon" |
| ) |
| parser.add_argument("input_path", help="Path to PDF or image file to process") |
| parser.add_argument( |
| "-o", "--output", |
| help="Output directory (default: same as input file)", |
| default=None |
| ) |
| parser.add_argument( |
| "-c", "--config", |
| help="Config file path", |
| default="model_configs_mps.yaml" |
| ) |
| parser.add_argument( |
| "--verbose", "-v", |
| action="store_true", |
| help="Enable verbose logging" |
| ) |
| |
| args = parser.parse_args() |
| |
| |
| if args.verbose: |
| logger.add(sys.stderr, level="DEBUG") |
| else: |
| logger.add(sys.stderr, level="INFO") |
| |
| |
| input_path = Path(args.input_path) |
| if not input_path.exists(): |
| logger.error(f"Input file not found: {input_path}") |
| sys.exit(1) |
| |
| |
| supported_extensions = {'.pdf', '.png', '.jpg', '.jpeg'} |
| if input_path.suffix.lower() not in supported_extensions: |
| logger.error(f"Unsupported file type: {input_path.suffix}") |
| logger.info(f"Supported formats: {', '.join(supported_extensions)}") |
| sys.exit(1) |
| |
| |
| if args.output: |
| output_dir = Path(args.output) |
| else: |
| output_dir = input_path.parent |
| |
| output_dir.mkdir(parents=True, exist_ok=True) |
| |
| logger.info(f"π Starting MonkeyOCR processing...") |
| logger.info(f"π Input: {input_path}") |
| logger.info(f"π Output: {output_dir}") |
| logger.info(f"βοΈ Config: {args.config}") |
| |
| try: |
| |
| from app import process_document, initialize_model |
| |
| |
| logger.info("π§ Initializing MonkeyOCR model...") |
| start_time = time.time() |
| model = initialize_model(args.config) |
| init_time = time.time() - start_time |
| logger.info(f"β
Model initialized in {init_time:.2f}s") |
| |
| |
| logger.info("π Processing document...") |
| process_start = time.time() |
| |
| markdown_content, layout_pdf_path = process_document(str(input_path)) |
| |
| process_time = time.time() - process_start |
| logger.info(f"β‘ Document processed in {process_time:.2f}s") |
| |
| |
| output_name = input_path.stem |
| markdown_file = output_dir / f"{output_name}.md" |
| |
| with open(markdown_file, 'w', encoding='utf-8') as f: |
| f.write(markdown_content) |
| |
| logger.info(f"π Markdown saved: {markdown_file}") |
| |
| if layout_pdf_path and os.path.exists(layout_pdf_path): |
| logger.info(f"π¨ Layout PDF: {layout_pdf_path}") |
| |
| |
| logger.info("π Processing completed successfully!") |
| logger.info(f"β±οΈ Total time: {time.time() - start_time:.2f}s") |
| |
| |
| lines = markdown_content.split('\n')[:10] |
| logger.info("π Preview:") |
| for line in lines: |
| if line.strip(): |
| logger.info(f" {line}") |
| |
| if len(lines) >= 10: |
| logger.info(" ...") |
| |
| except KeyboardInterrupt: |
| logger.warning("β οΈ Processing interrupted by user") |
| sys.exit(1) |
| except Exception as e: |
| logger.error(f"β Processing failed: {e}") |
| if args.verbose: |
| import traceback |
| traceback.print_exc() |
| sys.exit(1) |
|
|
| if __name__ == "__main__": |
| main() |
|
|