File size: 2,225 Bytes
e99f9b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
"""
Simple test script for structured_ocr.py
"""

import os
import sys
import json
from pathlib import Path

def main():
    print("Testing OCR with a sample image file")
    
    # Path to the sample image file
    image_path = os.path.join("input", "recipe.jpg")
    
    # Check if the file exists
    if not os.path.isfile(image_path):
        print(f"Error: Image file not found at {image_path}")
        return
    
    print(f"File found: {image_path}")
    
    # Create the output directory if it doesn't exist
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)
    
    output_path = os.path.join(output_dir, "recipe_test.json")
    
    # Import the StructuredOCR class
    from structured_ocr import StructuredOCR
    
    # Initialize OCR processor
    processor = StructuredOCR()
    
    try:
        # Process the image file
        print(f"Processing image file: {image_path}")
        result = processor.process_file(image_path, file_type="image")
        
        # Save the result to the output file
        with open(output_path, 'w') as f:
            json.dump(result, f, indent=2)
        
        print(f"Image processing completed successfully. Output saved to {output_path}")
        
        # Check if the output file exists
        if os.path.isfile(output_path):
            print(f"Output file exists at {output_path}")
            # Print the file size
            file_size = os.path.getsize(output_path)
            print(f"Output file size: {file_size} bytes")
            
            # Print a preview of the output file
            print("\nPreview of output file:")
            with open(output_path, 'r') as f:
                data = json.load(f)
                print(f"File name: {data.get('file_name', '')}")
                print(f"Topics: {', '.join(data.get('topics', []))}")
                print(f"Languages: {', '.join(data.get('languages', []))}")
                print("OCR contents keys:", list(data.get('ocr_contents', {}).keys()))
        else:
            print(f"Error: Output file not found at {output_path}")
    
    except Exception as e:
        print(f"Error processing image: {e}")

if __name__ == "__main__":
    main()