Spaces:
Runtime error
Runtime error
Delete ocr_image.py
Browse files- ocr_image.py +0 -67
ocr_image.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
| 1 |
-
import argparse
|
| 2 |
-
import os.path
|
| 3 |
-
|
| 4 |
-
from texify.inference import batch_inference
|
| 5 |
-
from texify.model.model import load_model
|
| 6 |
-
from texify.model.processor import load_processor
|
| 7 |
-
from PIL import Image
|
| 8 |
-
from texify.settings import settings
|
| 9 |
-
from texify.util import is_valid_image
|
| 10 |
-
import json
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
def inference_single_image(image_path, json_path, model, processor):
|
| 14 |
-
image = Image.open(image_path)
|
| 15 |
-
text = batch_inference([image], model, processor)
|
| 16 |
-
write_data = [{"image_path": image_path, "text": text[0]}]
|
| 17 |
-
with open(json_path, "w+") as f:
|
| 18 |
-
json_repr = json.dumps(write_data, indent=4)
|
| 19 |
-
f.write(json_repr)
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def inference_image_dir(image_dir, json_path, model, processor, max=None):
|
| 23 |
-
image_paths = [os.path.join(image_dir, image_name) for image_name in os.listdir(image_dir)]
|
| 24 |
-
image_paths = [ip for ip in image_paths if is_valid_image(ip)]
|
| 25 |
-
if max:
|
| 26 |
-
image_paths = image_paths[:max]
|
| 27 |
-
|
| 28 |
-
write_data = []
|
| 29 |
-
for i in range(0, len(image_paths), settings.BATCH_SIZE):
|
| 30 |
-
batch = image_paths[i:i+settings.BATCH_SIZE]
|
| 31 |
-
images = [Image.open(image_path) for image_path in batch]
|
| 32 |
-
text = batch_inference(images, model, processor)
|
| 33 |
-
for image_path, t in zip(batch, text):
|
| 34 |
-
write_data.append({"image_path": image_path, "text": t})
|
| 35 |
-
|
| 36 |
-
with open(json_path, "w+") as f:
|
| 37 |
-
json_repr = json.dumps(write_data, indent=4)
|
| 38 |
-
f.write(json_repr)
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
def main():
|
| 42 |
-
parser = argparse.ArgumentParser(description="OCR an image of a LaTeX equation.")
|
| 43 |
-
parser.add_argument("image", type=str, help="Path to image or folder of images to OCR.")
|
| 44 |
-
parser.add_argument("--max", type=int, help="Maximum number of images to OCR if a folder is passes.", default=None)
|
| 45 |
-
parser.add_argument("--json_path", type=str, help="Path to JSON file to save results to.", default=os.path.join(settings.DATA_DIR, "results.json"))
|
| 46 |
-
args = parser.parse_args()
|
| 47 |
-
|
| 48 |
-
image_path = args.image
|
| 49 |
-
model = load_model()
|
| 50 |
-
processor = load_processor()
|
| 51 |
-
|
| 52 |
-
json_path = os.path.abspath(args.json_path)
|
| 53 |
-
os.makedirs(os.path.dirname(json_path), exist_ok=True)
|
| 54 |
-
|
| 55 |
-
if os.path.isfile(image_path):
|
| 56 |
-
inference_single_image(image_path, json_path, model, processor)
|
| 57 |
-
else:
|
| 58 |
-
inference_image_dir(image_path, json_path, model, processor, args.max)
|
| 59 |
-
|
| 60 |
-
print(f"Wrote results to {json_path}")
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
if __name__ == "__main__":
|
| 64 |
-
main()
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|