Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Simple test script for structured_ocr.py | |
| """ | |
| import os | |
| import sys | |
| import json | |
| from pathlib import Path | |
| def main(): | |
| print("Testing OCR with a sample image file") | |
| # Path to the sample image file | |
| image_path = os.path.join("input", "recipe.jpg") | |
| # Check if the file exists | |
| if not os.path.isfile(image_path): | |
| print(f"Error: Image file not found at {image_path}") | |
| return | |
| print(f"File found: {image_path}") | |
| # Create the output directory if it doesn't exist | |
| output_dir = "output" | |
| os.makedirs(output_dir, exist_ok=True) | |
| output_path = os.path.join(output_dir, "recipe_test.json") | |
| # Import the StructuredOCR class | |
| from structured_ocr import StructuredOCR | |
| # Initialize OCR processor | |
| processor = StructuredOCR() | |
| try: | |
| # Process the image file | |
| print(f"Processing image file: {image_path}") | |
| result = processor.process_file(image_path, file_type="image") | |
| # Save the result to the output file | |
| with open(output_path, 'w') as f: | |
| json.dump(result, f, indent=2) | |
| print(f"Image processing completed successfully. Output saved to {output_path}") | |
| # Check if the output file exists | |
| if os.path.isfile(output_path): | |
| print(f"Output file exists at {output_path}") | |
| # Print the file size | |
| file_size = os.path.getsize(output_path) | |
| print(f"Output file size: {file_size} bytes") | |
| # Print a preview of the output file | |
| print("\nPreview of output file:") | |
| with open(output_path, 'r') as f: | |
| data = json.load(f) | |
| print(f"File name: {data.get('file_name', '')}") | |
| print(f"Topics: {', '.join(data.get('topics', []))}") | |
| print(f"Languages: {', '.join(data.get('languages', []))}") | |
| print("OCR contents keys:", list(data.get('ocr_contents', {}).keys())) | |
| else: | |
| print(f"Error: Output file not found at {output_path}") | |
| except Exception as e: | |
| print(f"Error processing image: {e}") | |
| if __name__ == "__main__": | |
| main() |