Spaces:

samaritan-ai
/

SamaritanOCR

Sleeping

File size: 3,819 Bytes

43bca44

from utils import error_rates
import copy
import os
import cv2
import json

from copy import deepcopy

import numpy as np

def interpolate(key1, key2, lf, lf_idx, step_percent):
    x0 = lf[lf_idx][key1]
    y0 = lf[lf_idx][key2]
    x1 = lf[lf_idx+1][key1]
    y1 = lf[lf_idx+1][key2]

    x = x1 * step_percent + x0 * (1.0 - step_percent)
    y = y1 * step_percent + y0 * (1.0 - step_percent)

    return x, y

def get_subdivide_pt(i, pred_full, lf):
    percent = (float(i)+0.5) / float(len(pred_full))
    lf_percent = (len(lf)-1) * percent

    lf_idx = int(np.floor(lf_percent))
    step_percent = lf_percent - lf_idx

    x0, y0 = interpolate("x0", "y0", lf, lf_idx, step_percent)
    x1, y1 = interpolate("x1", "y1", lf, lf_idx, step_percent)

    return x0, y0, x1, y1

def save_improved_idxs(improved_idxs, decoded_hw, decoded_raw_hw, out, x, json_folder):

    output_lines = [{
        "gt": gt['gt']
    } for gt in x['gt_json']]


    # for i in improved_idxs:
    for i in range(len(output_lines)):

        if not i in improved_idxs:
            output_lines[i] = x['gt_json'][i]
            continue

        k = improved_idxs[i]

        # We want to trim the LF results
        # good to keep around the full length of the prediciton
        # so we can generate the full line-level images later
        # at a different resolution
        line_points = []
        after_line_points = []
        lf_path = out['lf']
        end = out['ending'][k]
        for j in range(len(lf_path)):
            p = lf_path[j][k]
            s = out['results_scale']

            if j > end:
                after_line_points.append({
                    "x0": p[0][1] * s,
                    "x1": p[0][0] * s,
                    "y0": p[1][1] * s,
                    "y1": p[1][0] * s
                })
            else:
                line_points.append({
                    "x0": p[0][1] * s,
                    "x1": p[0][0] * s,
                    "y0": p[1][1] * s,
                    "y1": p[1][0] * s
                })

        begin = out['beginning'][k]
        begin_f = int(np.floor(begin))
        p0 = out['lf'][begin_f][k]
        if begin_f+1 >= len(out['lf']):
             p = p0
        else:
            p1 = out['lf'][begin_f+1][k]
            t = begin - np.floor(begin)
            p = p0 * (1 - t) + p1 * t

        sol_point = {
            "x0": p[0][1] * s,
            "x1": p[0][0] * s,
            "y0": p[1][1] * s,
            "y1": p[1][0] * s
        }

        img_file_name = "{}_{}.png".format(x['img_key'], i)

        output_lines[i]['pred'] = decoded_hw[k]
        output_lines[i]['pred_full'] = decoded_raw_hw[k]
        output_lines[i]['sol'] = sol_point
        output_lines[i]['lf'] = line_points
        output_lines[i]['after_lf'] = after_line_points
        output_lines[i]['start_idx'] = 1 #TODO: update to backward idx
        output_lines[i]['hw_path'] = img_file_name

        line_img = out['line_imgs'][k]

        full_img_file_name = os.path.join(json_folder, img_file_name)
        cv2.imwrite(full_img_file_name, line_img)

    json_path = x['json_path']
    with open(json_path, 'w') as f:
        # print('written data to:', f)
        json.dump(output_lines, f)

def update_ideal_results(pick, costs, decoded_hw, gt_json):

    most_ideal_pred = []
    improved_idxs = {}

    for i in range(len(gt_json)):
        gt_obj = gt_json[i]

        prev_pred = gt_obj.get('pred', '')
        gt = gt_obj['gt']

        pred = decoded_hw[pick[i]]

        prev_cer = error_rates.cer(gt, prev_pred)
        cer = costs[i]

        if cer > prev_cer or len(pred) == 0:
            most_ideal_pred.append(prev_pred)
            continue

        most_ideal_pred.append(pred)
        improved_idxs[i] = pick[i]

    return most_ideal_pred, improved_idxs