Spaces:

mehreenmehreen
/

ArabicOCR

Build error

App Files Files Community

msaeed3 commited on May 26, 2025

Commit

e295beb

1 Parent(s): 6f01ce4

version 1.0

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

arabic/decode_one_image.py +225 -0
arabic/page_htr.py +283 -0
arabic/post_process_routines.py +301 -0
arabic/test_hw_helper_routines.py +147 -0
arabic/warp_routines.py +368 -0
coords/__init__.py +0 -0
coords/points.py +342 -0
coords/poly_routines.py +195 -0
coords/text_cleaning_routines.py +82 -0
coords/text_gt.py +101 -0
model/trial_26_A/muharaf_charset.json +320 -0
model/trial_26_A/set0/config_2600.yaml +25 -0
model/trial_26_A/set0/pretrain/hw.pt +3 -0
model/trial_26_A/set0/pretrain/lf.pt +3 -0
model/trial_26_A/set0/pretrain/sol.pt +3 -0
py3/e2e/__init__.py +0 -0
py3/e2e/alignment_dataset.py +69 -0
py3/e2e/e2e_model.py +207 -0
py3/e2e/e2e_postprocessing.py +182 -0
py3/e2e/forward_pass.py +86 -0
py3/e2e/handwriting_alignment_loss.py +125 -0
py3/e2e/nms.py +162 -0
py3/e2e/validation_utils.py +137 -0
py3/e2e/visualization.py +176 -0
py3/hw/__init__.py +0 -0
py3/hw/cnn_lstm.py +117 -0
py3/lf/__init__.py +0 -0
py3/lf/fast_patch_view.py +96 -0
py3/lf/lf_cnn.py +45 -0
py3/lf/line_follower.py +181 -0
py3/lf/models/__init__.py +36 -0
py3/lf/models/res_unet.py +147 -0
py3/lf/models/resnet.py +335 -0
py3/lf/models/tools.py +144 -0
py3/lf/stn/__init__.py +0 -0
py3/lf/stn/gridgen.py +126 -0
py3/sol/__init__.py +0 -0
py3/sol/crop_transform.py +35 -0
py3/sol/crop_utils.py +48 -0
py3/sol/start_of_line_finder.py +42 -0
py3/sol/vgg.py +157 -0
py3/utils/__init__.py +0 -0
py3/utils/character_set.ipynb +539 -0
py3/utils/character_set.py +61 -0
py3/utils/continuous_state.py +87 -0
py3/utils/dataset_parse.py +17 -0
py3/utils/dataset_wrapper.py +27 -0
py3/utils/error_rates.py +21 -0
py3/utils/fast_inverse.py +58 -0
py3/utils/safe_load.py +30 -0

arabic/decode_one_image.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import os
+import sys
+import torch
+from utils.continuous_state import init_model
+from e2e import e2e_model, e2e_postprocessing, visualization
+from e2e.e2e_model import E2EModel
+import torch
+from torch import nn
+from torch.autograd import Variable
+import json
+import cv2
+import numpy as np
+import codecs
+import yaml
+from collections import defaultdict
+import operator
+import pandas as pd
+from utils import error_rates
+import matplotlib.pyplot as plt
+import argparse
+# Network output on one image
+# Will read from file if org_img is none
+def network_output(config_file, image_path, model_mode = "best_overall",
+                   flip=False, use_unet=False, org_img=None, device="cuda"):
+    with open(config_file) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    if use_unet:
+        config['network']['lf']['u_net'] = True
+        #print('config changed')
+    char_set_path = config['network']['hw']['char_set_path']
+    ### Change hw's num_of_outputs in config
+    with open(char_set_path) as f:
+        char_set = json.load(f)
+    config["network"]["hw"]["num_of_outputs"] = len(char_set['idx_to_char']) + 1
+    dtype =torch.FloatTensor
+    if 'cuda' in device:
+        dtype =torch.cuda.FloatTensor
+    sol, lf, hw = init_model(config, sol_dir=model_mode, lf_dir=model_mode, hw_dir=model_mode,
+                             device=device)
+    e2e = E2EModel(sol, lf, hw, dtype=dtype, device=device)
+    e2e.eval()
+    if org_img is None:
+        org_img = cv2.imread(image_path)
+    if flip:
+        org_img = cv2.flip(org_img, 1)
+    target_dim1 = 512
+    s = target_dim1 / float(org_img.shape[1])
+    pad_amount = 128
+    org_img = np.pad(org_img, ((pad_amount,pad_amount),(pad_amount,pad_amount), (0,0)), 'constant', constant_values=255)
+    before_padding = org_img
+    target_dim0 = int(org_img.shape[0] * s)
+    target_dim1 = int(org_img.shape[1] * s)
+    full_img = org_img.astype(np.float32)
+    full_img = full_img.transpose([2,1,0])[None,...]
+    full_img = torch.from_numpy(full_img)
+    full_img = full_img / 128 - 1
+    img = cv2.resize(org_img,(target_dim1, target_dim0), interpolation = cv2.INTER_CUBIC)
+    img = img.astype(np.float32)
+    img = img.transpose([2,1,0])[None,...]
+    img = torch.from_numpy(img)
+    img = img / 128 - 1
+    out = e2e.forward({
+        "resized_img": img,
+        "full_img": full_img,
+        "resize_scale": 1.0/s
+    }, use_full_img=True, device=device)
+    out = e2e_postprocessing.results_to_numpy(out)
+    if out is None:
+        print ("No Results")
+        return
+    # take into account the padding
+    out['sol'][:,:2] = out['sol'][:,:2] - pad_amount
+    for l in out['lf']:
+        l[:,:2,:2] = l[:,:2,:2] - pad_amount
+    out['image_path'] = image_path
+    return out
+def decode_one_img_with_info(config_path, out, visualize=False, flip=False, org_img=None, device="cuda"):
+    with open(config_path) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+    char_set_path = config['network']['hw']['char_set_path']
+    with open(char_set_path) as f:
+        char_set = json.load(f)
+    idx_to_char = {}
+    for k,v in char_set['idx_to_char'].items():
+        idx_to_char[int(k)] = v
+    out = dict(out)
+    image_path = str(out['image_path'])
+    #print(image_path)
+    if org_img is None:
+        org_img = cv2.imread(image_path)
+    if flip:
+        org_img = cv2.flip(org_img, 1)
+    # Postprocessing Steps
+    out['idx'] = np.arange(out['sol'].shape[0])
+    out = e2e_postprocessing.trim_ends(out)
+    e2e_postprocessing.filter_on_pick(out, e2e_postprocessing.select_non_empty_string(out))
+    out = e2e_postprocessing.postprocess(out,
+        sol_threshold=config['post_processing']['sol_threshold'],
+        lf_nms_params={
+            "overlap_range": config['post_processing']['lf_nms_range'],
+            "overlap_threshold": config['post_processing']['lf_nms_threshold']
+        }
+    )
+    order = e2e_postprocessing.read_order(out)
+    e2e_postprocessing.filter_on_pick(out, order)
+    # Get output strings and CER
+    output_strings = []
+    output_strings, decoded_raw_hw = e2e_postprocessing.decode_handwriting(out, idx_to_char)
+    return out, output_strings
+def write_line_images(images, parent_img_fullpath, result_dir='Result', flip=True):
+    directory = os.path.dirname(parent_img_fullpath)
+    parent_basename = os.path.basename(parent_img_fullpath)
+    dir_basename = parent_basename[0:parent_basename.rfind('_')]
+    result_dir = os.path.join(directory, result_dir)
+    new_directory = os.path.join(directory, result_dir, dir_basename)
+    if not os.path.exists(result_dir):
+        os.mkdir(result_dir)
+    if not os.path.exists(new_directory):
+        os.mkdir(new_directory)
+    # Get rid of file extension
+    parent_basename = parent_basename[0:parent_basename.rfind('.')]
+    for ind, img in enumerate(images):
+        filename = os.path.join(new_directory,
+                                parent_basename + '_' + str(ind) + '.png')
+        if flip:
+            img = cv2.flip(img, 1)
+        cv2.imwrite(filename, img)
+# Write a one time header if needed
+def write_empty(result_file):
+    result_df = pd.DataFrame(columns = ['image_file', 'ground_truth','prediction',
+                                        'region_type',
+  #                                      'gt_poly',
+                                        'lf_points', 'beginning', 'ending', 'SOL'])
+    result_df.to_csv(result_file, index=False)
+# TODO: Verify that (x,y) are the first two of SOL output
+def add_offset_to_sol(sol, offset):
+    for ind in range(len(sol)):
+        sol[ind][0] += offset[0]
+        sol[ind][1] += offset[1]
+    return sol
+def add_offset_to_lf(lf, offset):
+    for pt_ind in range(len(lf)):
+        for line_ind in range(len(lf[0])):
+            lf[pt_ind][line_ind][0][0] = lf[pt_ind][line_ind][0][0] + offset[0]
+            lf[pt_ind][line_ind][1][0] = lf[pt_ind][line_ind][1][0] + offset[1]
+            lf[pt_ind][line_ind][0][1] = lf[pt_ind][line_ind][0][1] + offset[0]
+            lf[pt_ind][line_ind][1][1] = lf[pt_ind][line_ind][1][1] + offset[1]
+    return lf
+# The merged output will be in out1
+def merge_out(out1, out2, offset):
+    lf1 = out1['lf']
+    lf2 = add_offset_to_lf(out2['lf'], offset)
+    out1['lf'].extend(lf2)
+    out1['beginning'] = np.concatenate((out1['beginning'], out2['beginning']))
+    out1['ending'] = np.concatenate((out1['ending'], out2['ending']))
+    sol2 = add_offset_to_sol(out2['sol'], offset)
+    out1['sol'] = np.vstack((out1['sol'], sol2))
+    return out1
+def split_image_horizontal(img_file):
+    f1 = img_file[:-4] + '_1' + '.jpg'
+    f2 = img_file[:-4] + '_2' + '.jpg'
+    img = cv2.imread(img_file)
+    [ht, width, colors] = img.shape
+    ht1 = int(ht/2)
+    img1 = img[:ht1, :, :]
+    img2 = img[ht1:, :, :]
+    cv2.imwrite(f1, img1)
+    cv2.imwrite(f2, img2)
+    return f1, f2, ht1

arabic/page_htr.py ADDED Viewed

	@@ -0,0 +1,283 @@

+import sys
+sys.path.append('py3/')
+sys.path.append('coords')
+import test_hw_helper_routines as test_hw
+import json
+import os
+import torch
+import matplotlib.pyplot as plt
+import cv2
+import sys
+import pandas as pd
+import numpy as np
+import decode_one_image as decode
+import post_process_routines as post
+import points
+import warp_routines as warp
+from datetime import datetime, timezone
+from utils import error_rates
+import text_cleaning_routines as clean
+import time
+import argparse
+def add_meta(json_obj):
+    # Get the current date and time in UTC
+    now_utc = datetime.now(timezone.utc)
+    # Format the date and time as a string
+    formatted_date_utc = now_utc.strftime('%Y-%m-%dT%H:%M:%S')
+    json_obj['timeStamps'] = {"created": formatted_date_utc,
+                            "lastEdited": "",
+                            "submitted": "",
+                            "checked": ""
+                          }
+    json_obj["annotators"] =  {
+                            "creator": "SFR",
+                            "lastEditor": "",
+                            "transcriber": "",
+                            "transcription_QA": "",
+                            "transcription_tagging": "",
+                            "transcription_tagging_QA": "" }
+    return json_obj
+def reset_time(json_obj):
+    json_obj["time"] = 0
+    for line in json_obj:
+        if line.startswith("line_"):
+            json_obj[line]["transcribeTime"] = 0
+            json_obj[line]["annotateTime"] = 0
+            json_obj[line]["edited"] = "0"
+    return json_obj
+def get_hw(config_file, device="cuda"):
+    config = test_hw.get_config(config_file)
+    idx_to_char = test_hw.load_char_set(config['network']['hw']['char_set_path'])
+    if 'hw_to_save' in config['pretraining'].keys():
+        pt_file = config['pretraining']['hw_to_save']
+    else:
+        pt_file = 'hw.pt'
+    pt_filename = os.path.join(config['pretraining']['snapshot_path'], pt_file)
+    config["network"]["hw"]["num_of_outputs"] = len(idx_to_char) + 1
+    print('...Using snapshot', pt_filename)
+    HW = test_hw.load_HW(config['network']['hw'], pt_filename)
+    device = torch.device(device)
+    HW.to(device)
+    HW.eval()
+    return HW, idx_to_char
+def sort_lines(json_obj, copy_lines_with_text_only=False):
+    top_left = []
+    keys = []
+    for k, v in json_obj.items():
+        if k.startswith('line_'):
+            keys.append(k)
+            poly = np.array(points.list_to_xy(v['coord']))
+            top_left.append([np.max(poly, 0)[0], np.min(poly, 0)[1]])
+    sorted_indices = sorted(range(len(top_left)),
+                            key=lambda i: (top_left[i][1], -top_left[i][0]))
+    sorted_json = dict()
+    # Copy all non-line keys
+    for k, v in json_obj.items():
+        if not k.startswith('line_'):
+            sorted_json[k] = v
+    # Copy all lines
+    for i, ind in enumerate(sorted_indices):
+        if copy_lines_with_text_only and len(json_obj[keys[ind]]['text']) == 0:
+            continue
+        sorted_json[f'line_{i + 1}'] = json_obj[keys[ind]]
+    return sorted_json
+# This will not do line annotations...only transcriptions
+def complete_annotations_for_directory(input_dir, config_file,
+                                       annotator, model_mode="pretrain",
+                                       do_all=False):
+    total_done = 0
+    files = os.listdir(input_dir)
+    files.sort()
+    HW, idx_to_char = get_hw(config_file)
+    for f in files:
+        if not f.lower().endswith('.jpg'):
+            continue
+        img_file = os.path.join(input_dir, f)
+        json_file = img_file[:-4] + '_annotate_' + annotator + '.json'
+        if not os.path.exists(json_file):
+            print('No Json for', img_file)
+            continue
+        print('doing', img_file)
+        with open(json_file) as fin:
+            json_obj = json.load(fin)
+        # Add meta information and reset the timings in json
+        #json_obj = add_meta(json_obj)
+        #json_obj = reset_time(json_obj)
+        for line, values in json_obj.items():
+            if not line.startswith('line_'):
+                continue
+            if not do_all and len(values['text']) > 0:
+                continue
+            img = cv2.imread(img_file)
+            line_img = warp.get_line_image(values['coord'], img)
+            line_text = test_hw.get_predicted_str(HW, None, idx_to_char, flip=True,
+                                                  img=line_img, read_image=False)
+            line_text_logical_order = clean.get_clean_visual_order(line_text)
+            json_obj[line]['text'] = line_text_logical_order
+        json_obj = sort_lines(json_obj)
+        with open(json_file, 'w') as fout:
+            json.dump(json_obj, fout, indent=2)
+        total_done += 1
+    return total_done
+# This will do bulk annotation in the whole dir
+def predict_annotations_for_directory(input_dir, config_file, annotator, model_mode="pretrain",
+                                      skip_if_json_exists=False, device="cuda"):
+    print('skip_if_json_exists', skip_if_json_exists)
+    files = os.listdir(input_dir)
+    files.sort()
+    done = 0
+    for f in files:
+        if not f.lower().endswith('.jpg'):
+            continue
+        img_file = os.path.join(input_dir, f)
+        image_arr = cv2.imread(img_file)
+        #plt.imshow(image_arr)
+        json_file = img_file[:-4] + '_annotate_' + annotator + '.json'
+        if os.path.exists(json_file) and skip_if_json_exists:
+            print('already done', json_file)
+            continue
+        print('doing', img_file)
+        out = decode.network_output(config_file, img_file, flip=True, model_mode=model_mode, device=device)
+        out, predicted_text = decode.decode_one_img_with_info(config_file, out, flip=True, device=device)
+        poly_list = post.get_polygon_list_tuples(out)
+        # Get rid of degenerate points
+        to_del_ind = []
+        for ind, p in enumerate(poly_list):
+            if len(p) < 3:
+                to_del_ind.append(ind)
+        if len(to_del_ind) > 0:
+            print('Deleting poly at index', to_del_ind)
+            poly_list = [poly_list[i] for i in range(len(poly_list)) if i not in to_del_ind]
+            predicted_text = [predicted_text[i] for i in range(len(predicted_text)) if i not in to_del_ind]
+        del_list, poly_list = post.get_poly_no_overlap(img_file, poly_list, 0.7)
+        if len(del_list) > 0:
+            print('polygons deleted', len(del_list), del_list)
+            print(len(poly_list))
+        predicted_text = [predicted_text[i] for i in range(len(predicted_text)) if i not in del_list]
+        poly_list = post.flip_polygon(img_file, poly_list)
+        #post.draw_image_with_poly("", img_file, poly_list, convert=False)
+        page_json = post.create_annotations_json(predicted_text, poly_list)
+        with open(json_file, 'w') as fout:
+            json.dump(page_json, fout)
+        done += 1
+    return done
+def page_htr_one_file(img_file, config_file, model_mode="pretrain", device="cuda"):
+    image_arr = cv2.imread(img_file)
+    out = decode.network_output(config_file, img_file, flip=True, model_mode=model_mode, device=device)
+    out, predicted_text = decode.decode_one_img_with_info(config_file, out, flip=True, device=device)
+    poly_list = post.get_polygon_list_tuples(out)
+    # Get rid of degenerate points
+    to_del_ind = []
+    for ind, p in enumerate(poly_list):
+        if len(p) < 3:
+            to_del_ind.append(ind)
+    if len(to_del_ind) > 0:
+        #print('Deleting poly at index', to_del_ind)
+        poly_list = [poly_list[i] for i in range(len(poly_list)) if i not in to_del_ind]
+        predicted_text = [predicted_text[i] for i in range(len(predicted_text)) if i not in to_del_ind]
+    del_list, poly_list = post.get_poly_no_overlap(img_file, poly_list, 0.7)
+    predicted_text = [predicted_text[i] for i in range(len(predicted_text)) if i not in del_list]
+    predicted_text = [clean.get_clean_visual_order(txt) for txt in predicted_text]
+    poly_list = post.flip_polygon(img_file, poly_list)
+    #post.draw_image_with_poly("", img_file, poly_list, convert=False)
+    page_json = post.create_annotations_json(predicted_text, poly_list)
+    torch.cuda.empty_cache()
+    return page_json
+def hw_one_file(img_file, config_file, json_obj, model_mode="pretrain", line_key=None):
+    HW, idx_to_char = get_hw(config_file)
+    for line, values in json_obj.items():
+        if not line.startswith('line_'):
+            continue
+        # IF line_key is specified then modify only that line
+        if line_key is not None and len(line_key) > 0. and line != line_key:
+            continue
+        img = cv2.imread(img_file)
+        line_img = warp.get_line_image(values['coord'], img)
+        line_text = test_hw.get_predicted_str(HW, None, idx_to_char, flip=True,
+                                              img=line_img, read_image=False)
+        line_text = make_manual_text_correction(line_text)
+        # New change
+        line_text_logical_order = clean.get_clean_visual_order(line_text)
+        json_obj[line]['text'] = line_text_logical_order
+    json_obj = sort_lines(json_obj)
+    torch.cuda.empty_cache()
+    return json_obj
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run HTR module")
+    parser.add_argument("--line_htr", type=int, required=True, help="If 1, do line_htr else do page_htr")
+    parser.add_argument("--img_path", type=str, required=True, help="Image path")
+    parser.add_argument("--config_file", type=str, required=True, help="SFR_Arabic config file")
+    parser.add_argument("--original_json", type=str, required=True, help="Original JSON")
+    parser.add_argument("--line_key", type=str, required=True, help="line key")
+    args = parser.parse_args()
+    json_obj = {}
+    if args.line_htr == 1:
+        json_obj = json.loads(args.original_json)
+        json_obj = hw_one_file(args.img_path, args.config_file, json_obj,
+                    model_mode="pretrain", line_key=args.line_key)
+    else:
+        json_obj = json.loads(args.original_json)
+        json_obj = page_htr_one_file(args.img_path, args.config_file, device="cuda")
+    print('BEGIN_OUT')
+    print(json.dumps(json_obj))
+# python3 arabic/page_htr.py --line_htr 0 --img_path ../../datasets/kclds/KEllis/bk_on_server/bk/KEllis2018-150a.jpg --config_file model/trial_26_A/set0/config_2600.yaml --original_json {} --line_key 0

arabic/post_process_routines.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import numpy as np
+import cv2
+import json
+import sys
+sys.path.append('coords/')
+import points
+from PIL import Image, ImageDraw
+import os
+import matplotlib.pyplot as plt
+from utils import error_rates
+from arabic_reshaper import ArabicReshaper
+# This file has routines for working with CERs and polygons
+def correct_pt(value, max_value):
+    boundary = False
+    if value < 0:
+        value = 0
+        boundary = True
+    if value >= max_value:
+        value = max_value - 1
+        boundary = True
+    return [value, boundary]
+# Each polygon is a list of (x,y) tuples
+def get_polygon_list_tuples(out):
+    img = cv2.imread(out["image_path"])
+    img_height, img_width = img.shape[:2]
+    polygon_list = []
+    prev = [-1, -1]
+    for line_ind in range(len(out['lf'][0])):
+        polygon = []
+        begin_ind = out['beginning'][line_ind]
+        end_ind = out['ending'][line_ind]
+        begin_ind = int(np.floor(begin_ind))
+        end_ind = int(np.ceil(end_ind))
+        end_ind = min(end_ind, len(out['lf'])-1)
+        for pt_ind in range(begin_ind, end_ind+1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][0])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][0])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        for pt_ind in range(end_ind, begin_ind-1, -1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][1])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][1])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        polygon_list.append(polygon)
+        if len(polygon) < 3:
+            print('WARNING: DEGENERATE POLYGON AT INDEX', len(polygon_list))
+    return polygon_list
+# Each polygon is a list of (x,y) tuples
+def get_polygon_list_without_trim(out):
+    img = cv2.imread(out["image_path"])
+    img_height, img_width = img.shape[:2]
+    polygon_list = []
+    for line_ind in range(len(out['lf'][0])):
+        polygon = []
+        begin_ind = 0
+        end_ind = len(out['lf'])-1
+        prev = [-1, -1]
+        for pt_ind in range(begin_ind, end_ind+1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][0])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][0])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        for pt_ind in range(end_ind, begin_ind-1, -1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][1])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][1])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        if len(polygon) >= 3:
+            polygon_list.append(polygon)
+    return polygon_list
+# Each polygon passed as input is a list of (x,y) tuples
+# Same for output
+def percent_intersection(size, poly1, poly2):
+    im1 = Image.new(mode="1", size=size)
+    draw1 = ImageDraw.Draw(im1)
+    draw1.polygon(poly1, fill=1)
+    im2 = Image.new(mode="1", size=size)
+    draw2 = ImageDraw.Draw(im2)
+    draw2.polygon(poly2, fill=1)
+    mask1 = np.asarray(im1, dtype=bool)
+    mask2 = np.asarray(im2, dtype=bool)
+    intersection_mask = mask1 & mask2
+    #plt.imshow(intersection)
+    intersection_area = intersection_mask.sum()
+    percent1 = intersection_area / mask1.sum()
+    percent2 = intersection_area / mask2.sum()
+    return intersection_area, percent1, percent2
+def get_poly_no_overlap(img_name, poly_list, threshold=0.6):
+    img=Image.open(img_name)
+    size=img.size
+    #polygons = [points.list_to_xy(p) for p in poly_list]
+    polygons = poly_list
+    del_list = []
+    current = 0
+    next_ind = current+1
+    last_deleted = -1
+    while next_ind<len(polygons):
+        # Check these are not degernate polygons
+        if len(polygons[current]) < 3:
+            del_list.append(current)
+            current, next_ind = (current+1, next_ind+1)
+            continue
+        if len(polygons[next_ind]) < 3:
+            del_list.append(next_ind)
+            next_ind += 1
+            continue
+        # End check
+        overlap_area, percent1, percent2 = percent_intersection(size,
+                                                                polygons[current],
+                                                                polygons[next_ind])
+        if percent1 > threshold or percent2 > threshold:
+            to_del = current if percent1 > percent2 else next_ind
+            current, next_ind = (current, next_ind+1) if percent1<percent2\
+                                else (next_ind, next_ind+1)
+            del_list.append(to_del)
+            last_deleted = to_del
+            #print('last deleted', to_del)
+        else: # when no overlap is found
+            current, next_ind = (current+1, next_ind+1)
+            if current <= last_deleted:
+                current = last_deleted + 1
+                next_ind = current + 1
+    all_ind = set(range(len(poly_list)))
+    good_ind = all_ind.difference(set(del_list))
+    poly_non_overlapping = [poly_list[i] for i in good_ind]
+    return del_list, poly_non_overlapping
+def dump_polygons_json(out, polygons = None, filename=None):
+    if filename is None:
+        filename = out["image_path"][:-3] + "json"
+    if polygons is None:
+        polygons = get_polygon_list(out)
+    lf_dict = {}
+    for ind, poly in enumerate(polygons):
+        lf_dict['line_' + str(ind+1)] = points.xy_to_list(poly)
+    with open(filename, 'w') as fout:
+        json_dumps_str = json.dumps(lf_dict, indent=2)
+        #print('....json_dumps_str', json_dumps_str)
+        print(json_dumps_str, file=fout)
+def write_json_file(out, poly_list = None, json_file=None):
+    if poly_list is None:
+        poly_list = get_polygon_list(out)
+    dump_polygons_json(out, poly_list, json_file)
+def write_text_file(out, predicted_text, filename=None):
+    if filename is None:
+        filename = out["image_path"][:-3] + "txt"
+    prediction_para = '\n'.join(predicted_text)
+    with open(filename, 'w') as f:
+        f.write(prediction_para)
+# won't flip the polygons...only the image
+def draw_image_with_poly(directory, image, poly, convert=True, flip=False):
+    img = cv2.imread(os.path.join(directory, image))
+    if flip:
+        img = cv2.flip(img, 1)
+    plt.imshow(img)
+    colors = ['red', 'green', 'blue']
+    for ind, p in enumerate(poly):
+        if convert:
+            p = points.list_to_xy(p)
+        points.draw_poly(plt, p, colors[ind%3])
+        plt.text(p[-1][0], p[-1][1], str(ind))
+arabic_reshaper_configuration = {
+    'delete_harakat': True,
+    'support_ligatures': True
+}
+def remove_diacritics(txt):
+    reshaper = ArabicReshaper(configuration=arabic_reshaper_configuration)
+    txt_without_diacritics = reshaper.reshape(txt)
+    return(txt_without_diacritics)
+def get_cer_matrix(gt_list, prediction_list):
+    total_gt = len(gt_list)
+    total_predictions = len(prediction_list)
+    cer_matrix = np.zeros((total_gt, total_predictions))
+    for ind_g, g in enumerate(gt_list):
+        for ind_p, p in enumerate(prediction_list):
+            cer_matrix[ind_g, ind_p] = error_rates.cer(g, p)
+            if cer_matrix[ind_g, ind_p] > 1:
+                cer_matrix[ind_g, ind_p] = 1
+    return cer_matrix
+# https://en.wikipedia.org/wiki/Dynamic_time_warping
+def get_dtw(dist_matrix):
+    r,c = dist_matrix.shape
+    #dtw = np.zeros((r+1, c+1))
+    #for i in range(dtw.shape[0]):
+        #for j in range(dtw.shape[1]):
+    #        dtw[i, j] = np.inf
+    dtw = np.full((r+1, c+1), np.inf)
+    dtw[0, 0] = 0
+    for i in range(1, dtw.shape[0]):
+        for j in range(1, dtw.shape[1]):
+            dtw[i, j] = dist_matrix[i-1, j-1] + np.min([dtw[i-1, j], dtw[i, j-1],
+                                                    dtw[i-1, j-1]])
+    return dtw[-1, -1]
+def get_cers(gt_list, prediction_list, no_diacritics=False):
+    if no_diacritics:
+        gt_list = [remove_diacritics(g) for g in gt_list]
+        prediction_list = [remove_diacritics(p) for p in prediction_list]
+    cer_matrix = get_cer_matrix(gt_list, prediction_list)
+    cer_gt = cer_matrix.min(axis=1).flatten()
+    cer_p = cer_matrix.min(axis=0).flatten()
+    cer_dtw = get_dtw(cer_matrix)
+    gt = '\n'.join(gt_list)
+    pred = '\n'.join(prediction_list)
+    cer_para = error_rates.cer(gt, pred)
+    return cer_dtw, np.sum(cer_gt), np.sum(cer_p), cer_para
+def get_cers_wer(gt_list, prediction_list, no_diacritics=False):
+    if no_diacritics:
+        gt_list = [remove_diacritics(g) for g in gt_list]
+        prediction_list = [remove_diacritics(p) for p in prediction_list]
+    cer_matrix = get_cer_matrix(gt_list, prediction_list)
+    cer_gt = cer_matrix.min(axis=1).flatten()
+    cer_p = cer_matrix.min(axis=0).flatten()
+    cer_dtw = get_dtw(cer_matrix)
+    gt = '\n'.join(gt_list)
+    pred = '\n'.join(prediction_list)
+    cer_para = error_rates.cer(gt, pred)
+    wer_para = error_rates.wer(gt, pred)
+    return cer_dtw, np.sum(cer_gt), np.sum(cer_p), cer_para, wer_para
+def flip_polygon(img_file, poly_list):
+    img = cv2.imread(img_file)
+    h, w = img.shape[:2]
+    flipped_poly_list = []
+    for p in poly_list:
+        flipped = [(w-x, y) for (x, y) in p]
+        flipped_poly_list.append(flipped)
+    return flipped_poly_list
+# This will create annotations that can be used in scribeArabic annotation tool
+def create_annotations_json(predicted_text, poly_list):
+    if len(predicted_text) != len(poly_list):
+        print("POLYGONG LIST LEN Not same as PREDICTED TEXT LEN")
+        slkfj
+    page_json = dict()
+    for ind, (ocr, poly) in enumerate(zip(predicted_text, poly_list)):
+        pts = points.xy_to_list(poly)
+        line_key = f'line_{ind+1}'
+        page_json[line_key] = dict()
+        page_json[line_key]['coord'] = pts
+        page_json[line_key]['text'] = ocr
+    return page_json

arabic/test_hw_helper_routines.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import sys
+sys.path.append('py3/')
+import os
+import json
+import yaml
+import pandas as pd
+import numpy as np
+import torch
+import hw
+from hw import cnn_lstm
+from utils import string_utils, error_rates
+import cv2
+HT = 60
+def load_char_set(char_set_path):
+    with open(char_set_path) as f:
+        char_set = json.load(f)
+    idx_to_char = {}
+    for k,v in char_set['idx_to_char'].items():
+        idx_to_char[int(k)] = v
+    return idx_to_char
+def get_config(config_file):
+    with open(config_file) as f:
+        config = yaml.load(f, Loader=yaml.loader.SafeLoader)
+    return config
+def load_HW(hw_network_config, pt_filename):
+    HW = cnn_lstm.create_model(hw_network_config)
+    hw_state = torch.load(pt_filename)
+    HW.load_state_dict(hw_state)
+    device = torch.device("cuda")
+    HW.to(device)
+    return HW
+def get_predicted_str(HW, img_file, idx_to_char, device="cuda", flip=False,
+                      show=False, read_image=True, img=None, tokenizer=None):
+    device = torch.device(device)
+    if read_image:
+        img = cv2.imread(img_file)
+    ht, width = img.shape[:2]
+    if ht != HT:
+        new_width = int(width/ht*HT)
+        img = cv2.resize(img, (new_width, HT))
+    if show:
+        plt.imshow(img)
+        plt.show()
+    if flip:
+        img = np.flip(img, axis=1)
+    img = img.astype(np.float32)
+    img = img / 128.0 - 1.0
+    img = np.expand_dims(img, 0)
+    img = img.transpose([0,3,1,2])
+    img = torch.from_numpy(img)
+    IMG = img.to(device)
+    #print('img size is', img.size())
+    preds = HW(IMG).cpu()
+    out = preds.permute(1,0,2)
+    out = out.data.numpy()
+    logits = out[0,...]
+    pred, raw_pred = string_utils.naive_decode(logits)
+    if tokenizer is None:
+        pred_str = string_utils.label2str_single(pred, idx_to_char, False)
+    else:
+        pred_str = tokenizer.decode(pred)
+    del IMG
+    return pred_str
+def write_csv_all_predictions(config_file, suffix="", device="cuda", flip=False, pt_file='hw.pt',
+                              test_file_to_use="", result_file="", tokenizer=None):
+    result_df = pd.DataFrame(columns=["image", "ground_truth", "prediction", "CER", "WER"])
+    config = get_config(config_file)
+    idx_to_char = load_char_set(config['network']['hw']['char_set_path'])
+    if 'hw_to_save' in config['pretraining'].keys():
+        pt_file = config['pretraining']['hw_to_save']
+    else:
+        pt_file = pt_file
+    pt_filename = os.path.join(config['pretraining']['snapshot_path'], pt_file)
+    config["network"]["hw"]["num_of_outputs"] = len(idx_to_char) + 1
+    if tokenizer is not None:
+        config["network"]["hw"]["num_of_outputs"] = tokenizer.get_vocab_size()
+        pt_filename = os.path.join(config['pretraining']['snapshot_path'], f"hw_tokenizer_{tokenizer.get_vocab_size()}.pt")
+    if len(suffix) > 0:
+        pt_filename = pt_filename[:-3] + suffix + '.pt'
+    print('...Using snapshot', pt_filename)
+    HW = load_HW(config['network']['hw'], pt_filename)
+    device = torch.device(device)
+    HW.to(device)
+    HW.eval()
+    if test_file_to_use == "":
+        test_json_file = config['testing']['test_file']
+    else:
+        test_json_file = test_file_to_use
+        print('Using test file', test_json_file)
+    with open(test_json_file) as f:
+        json_obj = json.load(f)
+        for ind, obj in enumerate(json_obj):
+            #obj is a list of: [jsonfile imgfile]
+            #open the json file and get a list of predictions
+            with open(obj[0]) as f:
+                image_list = json.load(f)
+            for record in image_list:
+                # If not gt in file
+                if record['gt'] == 'None' or isinstance(record['gt'], float) or record['gt'] == 'nan' or len(record['gt']) == 0:
+                    print(type(record['gt']), record['gt'], record['hw_path'])
+                    continue
+                #print(record['hw_path'])
+                predicted_str = get_predicted_str(HW, record['hw_path'], idx_to_char, device=device,
+                                                  flip=flip, tokenizer=tokenizer)
+                cer = error_rates.cer(record['gt'], predicted_str)
+                wer = error_rates.wer(record['gt'], predicted_str)
+                result_df.loc[len(result_df)] = [record['hw_path'], record['gt'],
+                                                predicted_str, cer, wer]
+    if len(result_file) == 0:
+        result_file = config_file.replace("config", "result")
+        result_file = result_file.replace("yaml", "csv")
+    if len(suffix)>0:
+        result_file = result_file[:-4] + suffix + '.csv'
+    result_df.to_csv(result_file, index=False)
+    return result_df

arabic/warp_routines.py ADDED Viewed

	@@ -0,0 +1,368 @@

+from svgpathtools import Path, Line
+from scipy.interpolate import griddata
+import numpy as np
+import cv2
+import sys
+sys.path.append('../../coords/')
+import points
+import torch
+def generate_offset_mapping(img, ts, path, offset_1, offset_2, max_min = None, cube_size = None):
+    # cube_size = 80
+    offset_1_pts = []
+    offset_2_pts = []
+    # for t in ts:
+    for i in range(len(ts)):
+        t = ts[i]
+        pt = path.point(t)
+        norm = None
+        if i == 0:
+            norm = normal(pt, path.point(ts[i+1]))
+            norm = norm / dis(complex(0,0), norm)
+        elif i == len(ts)-1:
+            norm = normal(path.point(ts[i-1]), pt)
+            norm = norm / dis(complex(0,0), norm)
+        else:
+            norm1 = normal(path.point(ts[i-1]), pt)
+            norm1 = norm1 / dis(complex(0,0), norm1)
+            norm2 = normal(pt, path.point(ts[i+1]))
+            norm2 = norm2 / dis(complex(0,0), norm2)
+            norm = (norm1 + norm2)/2
+            norm = norm / dis(complex(0,0), norm)
+        offset_vector1 = offset_1 * norm
+        offset_vector2 = offset_2 * norm
+        pt1 = pt + offset_vector1
+        pt2 = pt + offset_vector2
+        offset_1_pts.append(complexToNpPt(pt1))
+        offset_2_pts.append(complexToNpPt(pt2))
+    offset_1_pts = np.array(offset_1_pts)
+    offset_2_pts = np.array(offset_2_pts)
+    h,w = img.shape[:2]
+    offset_source2 = np.array([(cube_size*i, 0) for i in range(len(offset_1_pts))], dtype=np.float32)
+    offset_source1 = np.array([(cube_size*i, cube_size) for i in range(len(offset_2_pts))], dtype=np.float32)
+    offset_source1 = offset_source1[::-1]
+    offset_source2 = offset_source2[::-1]
+    source = np.concatenate([offset_source1, offset_source2])
+    destination = np.concatenate([offset_1_pts, offset_2_pts])
+    source = source[:,::-1]
+    destination = destination[:,::-1]
+    n_w = int(offset_source2[:,0].max())
+    n_h = int(cube_size)
+    grid_x, grid_y = np.mgrid[0:n_h, 0:n_w]
+    grid_z = griddata(source, destination, (grid_x, grid_y), method='cubic')
+    map_x = np.append([], [ar[:,1] for ar in grid_z]).reshape(n_h,n_w)
+    map_y = np.append([], [ar[:,0] for ar in grid_z]).reshape(n_h,n_w)
+    map_x_32 = map_x.astype('float32')
+    map_y_32 = map_y.astype('float32')
+    rectified_to_warped_x = map_x_32
+    rectified_to_warped_y = map_y_32
+    grid_x, grid_y = np.mgrid[0:h, 0:w]
+    grid_z = griddata(source, destination, (grid_x, grid_y), method='cubic')
+    map_x = np.append([], [ar[:,1] for ar in grid_z]).reshape(h,w)
+    map_y = np.append([], [ar[:,0] for ar in grid_z]).reshape(h,w)
+    map_x_32 = map_x.astype('float32')
+    map_y_32 = map_y.astype('float32')
+    warped_to_rectified_x = map_x_32
+    warped_to_rectified_y = map_y_32
+    return rectified_to_warped_x, rectified_to_warped_y, warped_to_rectified_x, warped_to_rectified_y, max_min
+def dis(pt1, pt2):
+    a = (pt1.real - pt2.real)**2
+    b = (pt1.imag - pt2.imag)**2
+    return np.sqrt(a+b)
+def complexToNpPt(pt):
+    return np.array([pt.real, pt.imag], dtype=np.float32)
+def normal(pt1, pt2):
+    dif = pt1 - pt2
+    return complex(-dif.imag, dif.real)
+def find_t_spacing(path, cube_size):
+    l = path.length()
+    error = 0.01
+    init_step_size = cube_size / l
+    last_t = 0
+    cur_t = 0
+    pts = []
+    ts = [0]
+    pts.append(complexToNpPt(path.point(cur_t)))
+    path_lookup = {}
+    for target in np.arange(cube_size, int(l), cube_size):
+        step_size = init_step_size
+        for i in range(1000):
+            cur_length = dis(path.point(last_t), path.point(cur_t))
+            if np.abs(cur_length - cube_size) < error:
+                break
+            step_t = min(cur_t + step_size, 1.0)
+            step_l = dis(path.point(last_t), path.point(step_t))
+            if np.abs(step_l - cube_size) < np.abs(cur_length - cube_size):
+                cur_t = step_t
+                continue
+            step_t = max(cur_t - step_size, 0.0)
+            step_t = max(step_t, last_t)
+            step_t = max(step_t, 1.0)
+            step_l = dis(path.point(last_t), path.point(step_t))
+            if np.abs(step_l - cube_size) < np.abs(cur_length - cube_size):
+                cur_t = step_t
+                continue
+            step_size = step_size / 2.0
+        last_t = cur_t
+        ts.append(cur_t)
+        pts.append(complexToNpPt(path.point(cur_t)))
+    pts = np.array(pts)
+    return ts
+def remap_with_grid_sample(input_image, map_x, map_y, padding, img_tensor=None, device="cuda"):
+    reshaped = False
+    H, W = input_image.shape[:2]
+    if len(input_image.shape) == 2:
+        #print('reshaping', input_image.shape)
+        input_image = input_image.reshape(H, W, 1)
+        reshaped = True
+    if img_tensor is None:
+        # Convert input image to PyTorch tensor in NCHW format and normalize to [0, 1]
+        img_tensor = torch.from_numpy(input_image).permute(2, 0, 1).unsqueeze(0).float() / 255.0
+        img_tensor = img_tensor.to(device)
+    #print(input_image.shape, map_x.shape)
+    # Convert map_x and map_y to normalized coordinates in the range [-1, 1]
+    norm_map_x = (torch.from_numpy(map_x.copy()).float() / (W - 1)) * 2 - 1
+    norm_map_y = (torch.from_numpy(map_y.copy()).float() / (H - 1)) * 2 - 1
+    # Stack normalized coordinates to create a grid of shape (1, H, W, 2)
+    grid = torch.stack((norm_map_x, norm_map_y), dim=-1).unsqueeze(0)
+    # Ensure grid is on the same device as the input tensor (e.g., GPU if available)
+    grid = grid.to(img_tensor.device)
+    # Apply grid_sample to perform the remap operation
+    output_tensor = torch.nn.functional.grid_sample(img_tensor, grid, mode='bilinear',
+                                                    padding_mode=padding, align_corners=True)
+    # Convert back to NumPy and scale back to [0, 255]
+    output_image = (output_tensor.squeeze(dim=0).permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)
+    if reshaped:
+        output_image = output_image[:, :, 0]
+    return output_image
+def generate_offset_mapping_1(img, ts, path, offset_1, offset_2,
+                              cube_size = None):
+    offset_1_pts = []
+    offset_2_pts = []
+    # for t in ts:
+    for i in range(len(ts)):
+        t = ts[i]
+        pt = path.point(t)
+        norm = None
+        if i == 0:
+            norm = normal(pt, path.point(ts[i+1]))
+            norm = norm / dis(complex(0,0), norm)
+        elif i == len(ts)-1:
+            norm = normal(path.point(ts[i-1]), pt)
+            norm = norm / dis(complex(0,0), norm)
+        else:
+            norm1 = normal(path.point(ts[i-1]), pt)
+            norm1 = norm1 / dis(complex(0,0), norm1)
+            norm2 = normal(pt, path.point(ts[i+1]))
+            norm2 = norm2 / dis(complex(0,0), norm2)
+            norm = (norm1 + norm2)/2
+            norm = norm / dis(complex(0,0), norm)
+        offset_vector1 = offset_1 * norm
+        offset_vector2 = offset_2 * norm
+        pt1 = pt + offset_vector1
+        pt2 = pt + offset_vector2
+        offset_1_pts.append(complexToNpPt(pt1))
+        offset_2_pts.append(complexToNpPt(pt2))
+    offset_1_pts = np.array(offset_1_pts)
+    offset_2_pts = np.array(offset_2_pts)
+    h,w = img.shape[:2]
+    offset_source2 = np.array([(cube_size*i, 0) for i in range(len(offset_1_pts))], dtype=np.float32)
+    offset_source1 = np.array([(cube_size*i, cube_size) for i in range(len(offset_2_pts))], dtype=np.float32)
+    offset_source1 = offset_source1[::-1]
+    offset_source2 = offset_source2[::-1]
+    source = np.concatenate([offset_source1, offset_source2])
+    destination = np.concatenate([offset_1_pts, offset_2_pts])
+    source = source[:,::-1]
+    destination = destination[:,::-1]
+    n_w = int(offset_source2[:,0].max())
+    n_h = int(cube_size)
+    grid_x, grid_y = np.mgrid[0:n_h, 0:n_w]
+    grid_z = griddata(source, destination, (grid_x, grid_y), method='cubic')
+    map_x = np.append([], [ar[:,1] for ar in grid_z]).reshape(n_h,n_w)
+    map_y = np.append([], [ar[:,0] for ar in grid_z]).reshape(n_h,n_w)
+    map_x_32 = map_x.astype('float32')
+    map_y_32 = map_y.astype('float32')
+    rectified_to_warped_x = map_x_32
+    rectified_to_warped_y = map_y_32
+    return rectified_to_warped_x, rectified_to_warped_y
+def get_warped_images(img, polygon_list, baseline_list, target_height=60, device="cuda"):
+    num_lines = len(polygon_list)
+    all_lines = ""
+    warped_list = []
+    region_output_data = []
+#    img_tensor = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).float() / 255.0
+#    img_tensor = img_tensor.to(device)
+    for ind in range(len(polygon_list)):
+        line_mask = extract_region_mask(img, polygon_list[ind])
+        summed_axis0 = (line_mask.astype(float) / 255).sum(axis=0)
+        avg_height0 = np.median(summed_axis0[summed_axis0 != 0])
+        target_step_size = avg_height0*1.1
+        paths = []
+        for i in range(len(baseline_list[ind])-1):
+            i_1 = i+1
+            p1 = baseline_list[ind][i]
+            p2 = baseline_list[ind][i_1]
+            p1_c = complex(*p1)
+            p2_c = complex(*p2)
+            paths.append(Line(p1_c, p2_c))
+        if len(paths) == 0:
+            continue
+        #try:
+        if True:
+            # Add a bit on the end
+            tan = paths[-1].unit_tangent(1.0)
+            p3_c = p2_c + target_step_size * tan
+            paths.append(Line(p2_c, p3_c))
+            path = Path(*paths)
+            n_w = target_height*path.length()/target_step_size
+            ts = np.arange(0, 1, target_height/float(n_w))
+            (rectified_to_warped_x,
+             rectified_to_warped_y) = generate_offset_mapping_1(img, ts, path, target_step_size/2,
+                                                                -target_step_size/2,
+                                                                cube_size=target_height)
+            rectified_to_warped_x = rectified_to_warped_x[::-1,::-1]
+            rectified_to_warped_y = rectified_to_warped_y[::-1,::-1]
+            #warped = remap_with_grid_sample(img, rectified_to_warped_x,
+            #                                rectified_to_warped_y, "border", img_tensor=img_tensor, device=device)
+            warped = cv2.remap(img, rectified_to_warped_x, rectified_to_warped_y, cv2.INTER_CUBIC, borderValue=(255,255,255))
+            warped_list.append(warped)
+    return warped_list
+def extract_region_mask(img, bounding_poly):
+    pts = np.array(bounding_poly, np.int32)
+    #http://stackoverflow.com/a/15343106/3479446
+    mask = np.zeros(img.shape[:2], dtype=np.uint8)
+    roi_corners = np.array([pts], dtype=np.int32)
+    ignore_mask_color = (255,)
+    cv2.fillPoly(mask, roi_corners, ignore_mask_color, lineType=cv2.LINE_8)
+    return mask
+def get_baseline(poly_points, right_to_left=True):
+    baseline = points.get_baseline_chunks(poly_points)
+    # Right to left order
+    #if right_to_left:
+    #    baseline.sort(key=lambda x: x[0], reverse=True)
+    return baseline
+# First argument ignored if third argument is given
+def get_line_image(polygon_flat_list, img, polygon_pts=None, target_height=60):
+    if polygon_pts is None:
+        polygon_pts = points.list_to_xy(polygon_flat_list)
+    baseline = get_baseline(polygon_pts)
+    line_img = get_warped_images(img, [polygon_pts], [baseline], target_height=target_height)
+    if len(line_img) > 0:
+        return line_img[0]
+    return None

coords/__init__.py ADDED Viewed

File without changes

coords/points.py ADDED Viewed

	@@ -0,0 +1,342 @@

+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle
+import numpy as np
+from PIL import Image, ImageDraw
+# img_file is the filename with full path
+# points is a string of coordinate points read from XML
+def generate_cropped_region(img_file, points):
+    # Process points
+    points_list = points.split(' ')
+    xy_pts = [(int(points.split(",")[0]),
+               int(points.split(",")[1])) for points in points_list]
+    pts = np.array(xy_pts)
+    img_obj = Image.open(img_file)
+    img = np.array(img_obj)
+    # Crop the image
+    [min_x, min_y] = np.min(pts, axis=0)
+    [max_x, max_y] = np.max(pts, axis=0)
+    cropped_img = img[min_y:max_y+1, min_x:max_x+1, :]
+    cropped_img_obj = Image.fromarray(cropped_img)
+    return cropped_img_obj, (min_x, min_y), (max_x, max_y)
+# pts is numpy 2D points array
+# img also numpy arrray/cv2 array
+def generate_cropped_image(img, pts):
+    img_obj = Image.fromarray(img)
+    # Create a polygonal mask
+    mask = Image.new('L', (img_obj.width, img_obj.height), color=0)
+    draw_mask = ImageDraw.Draw(mask)
+    draw_mask.polygon(list(pts.flatten()), fill=255)
+    mask = np.array(mask).astype(bool)
+    # Choose the polygonal area from image
+    output_img = np.zeros_like(img)
+    output_img[mask] = img[mask]
+    # Crop the image
+    [min_x, min_y] = np.min(pts, axis=0).astype(int)
+    [max_x, max_y] = np.max(pts, axis=0).astype(int)
+    cropped_img = output_img[min_y:max_y+1, min_x:max_x+1, :]
+    return cropped_img
+def draw_poly(plt, xy_pts, color='green'):
+    plt.gca().add_patch(Rectangle(xy_pts[0], 10, 10, facecolor='yellow'))
+    for pts1, pts2 in zip(xy_pts, xy_pts[1:]):
+        #img = color_rect(img,pts1[0], pts1[1], pts2[0], pts2[1])
+        draw_line(plt, pts1[0], pts1[1], pts2[0], pts2[1], color)
+        draw_line(plt, xy_pts[0][0], xy_pts[0][1],
+                   xy_pts[-1][0], xy_pts[-1][1], color)
+def draw_baseline(plt, xy_pts, color='red'):
+    plt.gca().add_patch(Rectangle(xy_pts[0], 100, 100, facecolor='blue'))
+    for pts1, pts2 in zip(xy_pts, xy_pts[1:]):
+        #img = color_rect(img,pts1[0], pts1[1], pts2[0], pts2[1])
+        draw_line(plt, pts1[0], pts1[1], pts2[0], pts2[1], color=color)
+def draw_line(plt_obj, x1, y1, x2, y2, color='g'):
+    plt_obj.plot([x1, x2], [y1, y2], color=color, linewidth=1)
+# The argument points is a string.
+# Function returns a list of (x,y) tuples
+def get_xy_pts(points):
+    points_list = points.split(' ')
+    xy_pts = [(int(points.split(",")[0]),
+               int(points.split(",")[1])) for points in points_list]
+    return xy_pts
+# bbox is not necessarily a polygon or rectangle. Just a list of (x,y) tuples
+# If apply_correction is True then all points are restricted to lie within
+# top left and bottom right
+def add_offset_to_polygon(bbox, offset, apply_correction=False,
+                          top_left=[], bottom_right=[]):
+    new_bbox = []
+    for i,coord in enumerate(bbox):
+        new_bbox.append((coord[0]+offset[0], coord[1]+offset[1]))
+    if apply_correction:
+        top_left = np.array(top_left)
+        bottom_right = np.array(bottom_right)
+        pts = np.array(new_bbox)
+        for j in [0, 1]:
+            ind = np.where(pts[:, j] > bottom_right[j])
+            pts[ind, j] = bottom_right[j]
+        for j in [0, 1]:
+            ind = np.where(pts[:, j] < top_left[j])
+            pts[ind, j] = top_left[j]
+        new_bbox = list(map(tuple, pts))
+    return new_bbox
+def add_offset_to_polygon_list(polygon_list, offset):
+    new_polygon_list = []
+    for poly in polygon_list:
+        new_poly = add_offset_to_polygon(poly, offset)
+        new_polygon_list.append(new_poly)
+    return new_polygon_list
+def combine_poly(poly1, poly2):
+    main_poly = [poly1[0], poly1[1]]
+    if poly1[1][0] != poly2[0][0] or poly1[1][1] != poly2[0][1]:
+        main_poly.append(poly2[0])
+    main_poly.extend(poly2[1:3])
+    if poly1[2][0] != poly2[3][0] or poly1[2][1] != poly2[3][1]:
+        main_poly.append(poly2[3])
+    main_poly.extend(poly1[2:])
+    return main_poly
+# Get left, upper, right, lower min_x, min_y, max_x, max_y
+def get_max_min_polygon(polygon):
+    x_list = [x[0] for x in polygon]
+    y_list = [y[1] for y in polygon]
+    return min(x_list), min(y_list), max(x_list), max(y_list)
+def add_offset_to_baseline(baseline, offset):
+    new_baseline = []
+    for pts in baseline:
+        new_baseline.append((pts[0]+offset[0], pts[1]+offset[1]))
+    return new_baseline
+def add_offset_to_baseline_list(baseline_list, offset):
+    new_list = []
+    for b in baseline_list:
+        new_b = add_offset_to_baseline(b, offset)
+        new_list.append(new_b)
+    return new_list
+def combine_baseline(base1, base2):
+    #combined = [base1[0], base1[1], base2[0], base2[1]]
+    combined = [base2[1], base2[0], base1[1], base1[0]]
+    return combined
+def get_x_y(polygon):
+    x_list = [x[0] for x in polygon]
+    y_list = [y[1] for y in polygon]
+    return x_list, y_list
+# num_pts is number of points on baseline to get
+def get_baseline_regression(poly_pts, num_pts=10, deg=1):
+    if len(poly_pts) <= 4:
+        deg = 1
+    x, y = get_x_y(poly_pts)
+    model = (np.polyfit(x, y, deg))
+    p = np.poly1d(model)
+    # get the x against which we want y
+    x1, y1, x2, y2 = get_max_min_polygon(poly_pts)
+    num_pts = min(num_pts, x2-x1+1)
+    num_pts = int(num_pts)
+    x = np.linspace(x1, x2, num_pts, endpoint=True, dtype=int)
+    y = p(x)
+    baseline = [(a, b) for a,b in zip(x, y)]
+    return baseline
+# Given a coordinates list return xy tuples
+def list_to_xy(coord_list):
+    xy_list = []
+    for ind in range(0, len(coord_list), 2):
+        xy_list.append((coord_list[ind], coord_list[ind+1]))
+    return xy_list
+# Given an (x,y) list of tuples, return a flat list
+def xy_to_list(tuples_list):
+    flat_list = [x for pair in tuples_list for x in pair]
+    return flat_list
+# x is a list of points
+# y is a cooresponding list of points
+def get_baseline_from_xy(x, y, num_pts=10, deg=1):
+    if len(x) <= 4:
+        deg = 1
+    model = (np.polyfit(x, y, deg))
+    p = np.poly1d(model)
+    # get the x against which we want y
+    x1, y1, x2, y2 = get_max_min_polygon(poly_pts)
+    num_pts = min(num_pts, x2-x1+1)
+    x = np.linspace(x1, x2, num_pts, endpoint=True, dtype=int)
+    y = p(x)
+    baseline = [(a, b) for a,b in zip(x, y)]
+    return baseline
+# Here img is the 2D numpy array
+# xy_pts is a list of (x,y) tuples
+def generate_cropped_region_from_polypts(img, xy_pts):
+    pts = np.array(xy_pts)
+    # Crop the image
+    [min_x, min_y] = np.ceil(np.min(pts, axis=0)).astype(int)
+    [max_x, max_y] = np.floor(np.max(pts, axis=0)).astype(int)
+    cropped_img = img[min_y:max_y+1, min_x:max_x+1, :]
+    cropped_img_obj = Image.fromarray(cropped_img)
+    return cropped_img_obj, (min_x, min_y), (max_x, max_y)
+# Will generate a line image by using xy_pts as a mask
+def generate_line_image(img, xy_pts):
+    pts = np.array(xy_pts)
+    [min_x, min_y] = np.ceil(np.min(pts, axis=0)).astype(int)
+    [max_x, max_y] = np.floor(np.max(pts, axis=0)).astype(int)
+    (width, ht) = (max_x-min_x+1, max_y-min_y+1)
+    xy_pts = add_offset_to_polygon(xy_pts, (-min_x, -min_y))
+    img = img[min_y:max_y+1, min_x:max_x+1, :]
+    #print('min_y:max_y+1, min_x:max_x+1, :', min_y, max_y+1, min_x, max_x+1)
+    img_obj = Image.fromarray(img)
+    draw_img = ImageDraw.Draw(img_obj)
+    # Create a polygonal mask
+    mask = Image.fromarray(np.zeros((img.shape[0], img.shape[1])))
+    draw_mask = ImageDraw.Draw(mask)
+    draw_mask.polygon(xy_pts, fill='white')
+    mask = np.array(mask).astype(bool)
+    # Choose the polygonal area from image
+    output_img = np.zeros_like(img)+255
+    output_img[mask] = img[mask]
+    output_img = Image.fromarray(output_img)
+    return output_img, xy_pts
+# restrict coordinates to lie between 0 and max (included)
+def restrict_pts(pts, max_p):
+    pts = [(max(0, x), max(0, y)) for (x,y) in pts]
+    pts = [(min(max_p[0], x), min(max_p[1], y)) for (x,y) in pts]
+    return pts
+# assuming poly_pts is a list of (x,y) tuples
+# This will add more points by interpolating between two points
+def expand_poly(poly_pts, min_x_increment=10):
+    poly_pts = np.array(poly_pts).astype(int)
+    new_poly = []
+    #    for ind, (curr, nxt) in enumerate(zip(poly_pts[:-1], poly_pts[1:])):
+    for ind, curr in enumerate(poly_pts):
+        nxt = poly_pts[(ind+1)%len(poly_pts)]
+        if np.abs(nxt[0] - curr[0]) < min_x_increment:
+            new_poly.append(curr)
+            new_poly.append(nxt)
+            continue
+        x1, x2 = curr[0], nxt[0]
+        y1, y2 = curr[1], nxt[1]
+        new_poly.append(curr)
+        increment = -1*min_x_increment if nxt[0] < curr[0] else min_x_increment
+        for x in range(curr[0]+1, nxt[0], increment):
+            slope = float(y2-y1)/(x2-x1)
+            y = slope*(x - x2) + y2
+            new_poly.append((x, y))
+        new_poly.append(nxt)
+    return new_poly
+# Chunks_len ignored if chunk_len_auto is True
+def get_baseline_chunks(poly_pts, chunks_len=300, chunk_len_auto=True):
+    baseline = []
+    poly_pts = expand_poly(poly_pts)
+    p = np.array(poly_pts)
+    max_x, max_y = np.max(p, 0)
+    min_x, min_y = np.min(p, 0)
+    # Decide chunks_len
+    if chunk_len_auto:
+        if (len(poly_pts) >= 250):
+            total_chunks = 5
+        else:
+            total_chunks = int(np.ceil(len(poly_pts)/50))
+        chunks_len = int((max_x-min_x)/total_chunks)
+    else:
+        total_chunks = int((max_x-min_x)/chunks_len)
+    #print('expanded len', len(poly_pts), 'total chunks', total_chunks)
+    for i in range(1, total_chunks+1):
+        p1 = [pt for pt in p if (pt[0]-min_x)>=(i-1)*chunks_len and (pt[0]-min_x)<i*chunks_len]
+        #print(p1)
+        if i == total_chunks:
+            p1 = [pt for pt in p if (pt[0] - min_x)>=(i-1)*chunks_len]
+        b = get_baseline_regression(p1, num_pts=12)
+        # Points are in ascending order (increasing x - left to right)
+        if len(baseline) != 0:
+            # This will smooth out the line
+            # Get rid of last 4 points and connect the point with next 4 points
+            baseline = baseline[:-4]
+            baseline.extend(b[4:])
+            if i == total_chunks:
+                baseline.extend(b[-1:])
+        else:
+            baseline = b
+    # Make sure baseline does not repeat
+    prev_pt = baseline[0]
+    baseline_clean = [prev_pt]
+    for b in baseline[1:]:
+        if b != prev_pt:
+            baseline_clean.append(b)
+            prev_pt = b
+    return baseline_clean
+# Making sure a value is not outside a boundary or has negative value
+def correct_pt(value, max_value):
+    if value < 0:
+        return 0
+    if value > max_value:
+        return max_value
+    return value
+# Assume poly is list of (x, y) tuples or [x, y] list
+# Will retrieve a vertically oriented baseline
+# Will return top to bottom and bottom to top if reversed is true
+def get_vertical_baseline(poly, reversed=False):
+    flipped_poly = [(y, x) for (x, y) in poly]
+    baseline = get_baseline_chunks(flipped_poly)
+    # Flip back
+    baseline = [(y, x) for (x, y) in baseline]
+    if reversed:
+        baseline.sort(key=lambda x: x[1], reverse=True)
+    return baseline
+# Check the polygon is valid
+# If x coord or y coord don't change, its not valid
+def valid_poly(poly_pts):
+    if len(poly_pts) <= 2:
+        return False
+    x = [pt[0] for pt in poly_pts]
+    y = [pt[1] for pt in poly_pts]
+    if np.max(x) - np.min(x) <= 1e-2:
+        return False
+    if np.max(y) - np.min(y) <= 1e-2:
+        return False
+    return True

coords/poly_routines.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import numpy as np
+import cv2
+import json
+import sys
+import points
+from PIL import Image, ImageDraw
+import os
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle
+def correct_pt(value, max_value):
+    boundary = False
+    if value < 0:
+        value = 0
+        boundary = True
+    if value >= max_value:
+        value = max_value - 1
+        boundary = True
+    return [value, boundary]
+# Each polygon is a list of (x,y) tuples
+def get_polygon_list_tuples(out):
+    img = cv2.imread(out["image_path"])
+    img_height, img_width = img.shape[:2]
+    polygon_list = []
+    prev = [-1, -1]
+    for line_ind in range(len(out['lf'][0])):
+        polygon = []
+        begin_ind = out['beginning'][line_ind]
+        end_ind = out['ending'][line_ind]
+        begin_ind = int(np.floor(begin_ind))
+        end_ind = int(np.ceil(end_ind))
+        end_ind = min(end_ind, len(out['lf'])-1)
+        for pt_ind in range(begin_ind, end_ind+1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][0])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][0])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        for pt_ind in range(end_ind, begin_ind-1, -1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][1])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][1])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        polygon_list.append(polygon)
+        if len(polygon) < 3:
+            print('WARNING: DEGENERATE POLYGON AT INDEX', len(polygon_list))
+    return polygon_list
+# Each polygon is a list of (x,y) tuples
+def get_polygon_list_without_trim(out):
+    img = cv2.imread(out["image_path"])
+    img_height, img_width = img.shape[:2]
+    polygon_list = []
+    for line_ind in range(len(out['lf'][0])):
+        polygon = []
+        begin_ind = 0
+        end_ind = len(out['lf'])-1
+        prev = [-1, -1]
+        for pt_ind in range(begin_ind, end_ind+1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][0])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][0])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        for pt_ind in range(end_ind, begin_ind-1, -1):
+            pt_x = float(out['lf'][pt_ind][line_ind][0][1])
+            pt_y = float(out['lf'][pt_ind][line_ind][1][1])
+            pt_x, boundary_x = correct_pt(pt_x, img_width)
+            pt_y, boundary_y = correct_pt(pt_y, img_height)
+            if prev != [pt_x, pt_y]:
+                polygon.append((pt_x, pt_y))
+                prev = [pt_x, pt_y]
+        if len(polygon) >= 3:
+            polygon_list.append(polygon)
+    return polygon_list
+# Each polygon passed as input is a list of (x,y) tuples
+# Same for output
+def percent_intersection(size, poly1, poly2):
+    im1 = Image.new(mode="1", size=size)
+    draw1 = ImageDraw.Draw(im1)
+    draw1.polygon(poly1, fill=1)
+    im2 = Image.new(mode="1", size=size)
+    draw2 = ImageDraw.Draw(im2)
+    draw2.polygon(poly2, fill=1)
+    mask1 = np.asarray(im1, dtype=bool)
+    mask2 = np.asarray(im2, dtype=bool)
+    intersection_mask = mask1 & mask2
+    #plt.imshow(intersection)
+    intersection_area = intersection_mask.sum()
+    percent1 = intersection_area / mask1.sum()
+    percent2 = intersection_area / mask2.sum()
+    return intersection_area, percent1, percent2
+def get_poly_no_overlap(img_name, poly_list, threshold=0.6):
+    img=Image.open(img_name)
+    size=img.size
+    #polygons = [points.list_to_xy(p) for p in poly_list]
+    polygons = poly_list
+    del_list = []
+    current = 0
+    next_ind = current+1
+    last_deleted = -1
+    while next_ind<len(polygons):
+        # Check these are not degernate polygons
+        if len(polygons[current]) < 3:
+            del_list.append(current)
+            current, next_ind = (current+1, next_ind+1)
+            continue
+        if len(polygons[next_ind]) < 3:
+            del_list.append(next_ind)
+            next_ind += 1
+            continue
+        # End check
+        overlap_area, percent1, percent2 = percent_intersection(size,
+                                                                polygons[current],
+                                                                polygons[next_ind])
+        if percent1 > threshold or percent2 > threshold:
+            to_del = current if percent1 > percent2 else next_ind
+            current, next_ind = (current, next_ind+1) if percent1<percent2\
+                                else (next_ind, next_ind+1)
+            del_list.append(to_del)
+            last_deleted = to_del
+            #print('last deleted', to_del)
+        else: # when no overlap is found
+            current, next_ind = (current+1, next_ind+1)
+            if current <= last_deleted:
+                current = last_deleted + 1
+                next_ind = current + 1
+    all_ind = set(range(len(poly_list)))
+    good_ind = all_ind.difference(set(del_list))
+    poly_non_overlapping = [poly_list[i] for i in good_ind]
+    return del_list, poly_non_overlapping
+def dump_polygons_json(out, polygons = None, filename=None):
+    if filename is None:
+        filename = out["image_path"][:-3] + "json"
+    if polygons is None:
+        polygons = get_polygon_list(out)
+    lf_dict = {}
+    for ind, poly in enumerate(polygons):
+        lf_dict['line_' + str(ind+1)] = points.xy_to_list(poly)
+    with open(filename, 'w') as fout:
+        json_dumps_str = json.dumps(lf_dict, indent=2)
+        #print('....json_dumps_str', json_dumps_str)
+        print(json_dumps_str, file=fout)
+# won't flip the polygons...only the image
+def draw_image_with_poly(directory, image, poly, convert=True, flip=False):
+    img = cv2.imread(os.path.join(directory, image))
+    if flip:
+        img = cv2.flip(img, 1)
+    plt.imshow(img)
+    colors = ['red', 'green', 'blue']
+    for ind, p in enumerate(poly):
+        if convert:
+            p = points.list_to_xy(p)
+        points.draw_poly(plt, p, colors[ind%3])
+        plt.text(p[-1][0], p[-1][1], str(ind))
+def flip_polygon(img_file, poly_list):
+    img = cv2.imread(img_file)
+    h, w = img.shape[:2]
+    flipped_poly_list = []
+    for p in poly_list:
+        flipped = [(w-x, y) for (x, y) in p]
+        flipped_poly_list.append(flipped)
+    return flipped_poly_list

coords/text_cleaning_routines.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import json
+from bidi.algorithm import get_display
+import re
+def correct_brackets(text):
+    text = switch_chars(text, '{', '}')
+    text = switch_chars(text, '(', ')')
+    text = switch_chars(text, '[', ']')
+    text = switch_chars(text, '«', '»')
+    return text
+def switch_chars(text, x, y):
+    t = list(text)
+    ind_x = [i for i,j in enumerate(t) if j==x]
+    ind_y = [i for i,j in enumerate(t) if j==y]
+    for i in ind_x:
+        t[i] = y
+    for i in ind_y:
+        t[i] = x
+    return ''.join(t)
+def clean_text(input_text):
+    cleaned_text = input_text.replace('\u0009', ' ')
+    cleaned_text = cleaned_text.replace('\u000A', ' ')
+    cleaned_text = cleaned_text.replace('\u00D7', 'x')
+    cleaned_text = cleaned_text.replace('\u066A', '%')
+    cleaned_text = cleaned_text.replace('\u06f3', '\u0663')
+    cleaned_text = cleaned_text.replace('\u06f7', '\u0667')
+    cleaned_text = cleaned_text.replace('\u06f9', '\u0669')
+    cleaned_text = cleaned_text.replace('\u2018', "'")
+    cleaned_text = cleaned_text.replace('\u2019', "'")
+    cleaned_text = cleaned_text.replace('\u201C', '"')
+    cleaned_text = cleaned_text.replace('\u201D', '"')
+    cleaned_text = cleaned_text.replace('…', '...')
+    cleaned_text = cleaned_text.replace('\u2033', "\u064b")
+    cleaned_text = cleaned_text.replace('\u2044', '/')
+    cleaned_text = cleaned_text.replace('\u2e17', '\u201e')
+    pattern = r'[\u2013\u2014]'
+    cleaned_text = re.sub(pattern, '-', cleaned_text)
+    pattern = r'[●•\xb7]'
+    cleaned_text = re.sub(pattern, '.', cleaned_text)
+    return cleaned_text
+def get_char_sets():
+    english_lower = range(ord('a'), ord('z')+1)
+    english_upper = range(ord('A'), ord('Z')+1)
+    english_numbers = range(ord('0'), ord('9')+1)
+    english_ord = set(english_lower).union(english_upper)
+    english_numbers = {chr(c) for c in set(english_numbers)}
+    english_alphabet = {chr(c) for c in english_ord}
+    # This includes numerals/digits also
+    arabic_unicodes = range(ord("\u0600"), ord("\u06ff")+1)
+    arabic_ord = set(arabic_unicodes)
+    arabic_chars = {chr(c) for c in arabic_ord}
+    arabic_numbers_ord = range(ord("\u0660"), ord("\u0669")+1)
+    arabic_digits = {chr(c) for c in arabic_numbers_ord}
+    return {'english_alphabet': english_alphabet, 'arabic_unicodes': arabic_chars,
+            'latin_digits': english_numbers, 'arabic_digits': arabic_digits}
+def get_clean_visual_order(text):
+    charset_dict = get_char_sets()
+    text_set = set(text)
+    has_english_alphabet = len(text_set.intersection(charset_dict['english_alphabet'])) > 0
+    has_latin_digits = len(text_set.intersection(charset_dict['latin_digits'])) > 0
+    has_arabic_digits = len(text_set.intersection(charset_dict['arabic_digits'])) > 0
+    if has_arabic_digits or has_english_alphabet or has_latin_digits:
+        text_visual_order = get_display(text, base_dir='R')[::-1]
+        text_visual_order = correct_brackets(text_visual_order)
+    else:
+        text_visual_order = text
+    clean_visual_order = clean_text(text_visual_order)
+    return clean_visual_order

coords/text_gt.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# Extract text from json file
+# As all lines need sorting etc. this is being added to coords folder
+import sys
+import points
+import json
+import pandas as pd
+import os
+import numpy as np
+def sort_lines(lines_list):
+    line_starts = [line['baseline'][0] for line in lines_list]
+    sorted_starts = sorted(enumerate(line_starts), key=lambda x: (x[1][1], -x[1][0]))
+    sorted_start_ind = [x[0] for x in sorted_starts]
+    sorted_lines = [lines_list[i] for i in sorted_start_ind]
+    return sorted_lines
+def is_valid_key(key, json_obj):
+    if not key.lower().startswith('line_'):
+        return False
+    if "deleted" in json_obj[key].keys() and json_obj[key]["deleted"] != "0":
+        return False
+    # No text field
+    if not 'text' in json_obj[key]:
+        return False
+    # Text empty
+    json_obj[key]['text'] = json_obj[key]['text'].replace('\t', ' ')
+    if json_obj[key]['text'].strip() == "":
+        return False
+    return True
+def get_text(json_file, return_list=False):
+    with open(json_file) as fin:
+        json_obj = json.load(fin)
+    to_remove_ind = []
+    # Get keys in json_obj
+    keys = list(json_obj.keys())
+    # Get list of line objects
+    lines = [json_obj[k] for k in keys if is_valid_key(k, json_obj)]
+    # Get baseline of each line
+    for ind, line in enumerate(lines):
+        poly_pts = line["coord"]
+        poly_pts = points.list_to_xy(poly_pts)
+        if len(poly_pts) <= 2:
+            print(json_file, len(poly_pts))
+            to_remove_ind.append(ind)
+        if not points.valid_poly(poly_pts):
+            to_remove_ind.append(ind)
+            continue
+        try:
+            baseline = points.get_baseline_chunks(poly_pts)
+            baseline.sort(key=lambda x: x[0], reverse=True)
+            line['baseline'] = baseline
+        except Exception as e:
+            #print(len(poly_pts))
+            #print(poly_pts)
+            #print(json_file)
+            to_remove_ind.append(ind)
+    # REmove the lines causing exception
+    cleaned_lines = [lines[ind] for ind in range(len(lines)) if not ind in to_remove_ind]
+    # Sort the lines
+    sorted_lines = sort_lines(cleaned_lines)
+    text = []
+    for l in sorted_lines:
+        text.append(l["text"])
+    if return_list:
+        return text
+    return '\n'.join(text)
+def get_json_file(img_fullname):
+    dir, img_name = os.path.split(img_fullname)
+    json_files = []
+    #annotators = []
+    base_file = img_name[:-4]
+    files = os.listdir(dir)
+    for f in files:
+        prefix = base_file + '_annotate_'
+        if f.startswith(prefix):
+            # Check if its a timestamp in filename
+            partial_string = f[len(prefix):]
+            ind1 = partial_string.rfind('.')
+            ind2 = partial_string.find('.')
+            if (ind1 == ind2):
+                json_files.append(f)
+    if len(json_files) > 1:
+        print('More than one json found...returning 0th one', json_files)
+    if len(json_files) == 0:
+        print('No json found')
+        return None
+    return os.path.join(dir, json_files[0])

model/trial_26_A/muharaf_charset.json ADDED Viewed

	@@ -0,0 +1,320 @@

+{
+  "char_to_idx": {
+    " ": 1,
+    "!": 2,
+    "\"": 3,
+    "#": 4,
+    "$": 5,
+    "%": 6,
+    "&": 7,
+    "'": 8,
+    "(": 9,
+    ")": 10,
+    "*": 11,
+    "+": 12,
+    ",": 13,
+    "-": 14,
+    ".": 15,
+    "/": 16,
+    "0": 17,
+    "1": 18,
+    "2": 19,
+    "3": 20,
+    "4": 21,
+    "5": 22,
+    "6": 23,
+    "7": 24,
+    "8": 25,
+    "9": 26,
+    ":": 27,
+    "=": 28,
+    "A": 29,
+    "B": 30,
+    "C": 31,
+    "D": 32,
+    "E": 33,
+    "F": 34,
+    "G": 35,
+    "H": 36,
+    "I": 37,
+    "J": 38,
+    "K": 39,
+    "L": 40,
+    "M": 41,
+    "N": 42,
+    "O": 43,
+    "P": 44,
+    "Q": 45,
+    "R": 46,
+    "S": 47,
+    "T": 48,
+    "U": 49,
+    "V": 50,
+    "W": 51,
+    "X": 52,
+    "Y": 53,
+    "Z": 54,
+    "[": 55,
+    "\\": 56,
+    "]": 57,
+    "_": 58,
+    "a": 59,
+    "b": 60,
+    "c": 61,
+    "d": 62,
+    "e": 63,
+    "f": 64,
+    "g": 65,
+    "h": 66,
+    "i": 67,
+    "j": 68,
+    "k": 69,
+    "l": 70,
+    "m": 71,
+    "n": 72,
+    "o": 73,
+    "p": 74,
+    "q": 75,
+    "r": 76,
+    "s": 77,
+    "t": 78,
+    "u": 79,
+    "v": 80,
+    "x": 81,
+    "y": 82,
+    "z": 83,
+    "|": 84,
+    "\u00ba": 85,
+    "\u00c3": 86,
+    "\u00c8": 87,
+    "\u00c9": 88,
+    "\u00ca": 89,
+    "\u00e0": 90,
+    "\u00e7": 91,
+    "\u00e8": 92,
+    "\u00e9": 93,
+    "\u00ea": 94,
+    "\u060c": 95,
+    "\u061b": 96,
+    "\u061f": 97,
+    "\u0621": 98,
+    "\u0622": 99,
+    "\u0623": 100,
+    "\u0624": 101,
+    "\u0625": 102,
+    "\u0626": 103,
+    "\u0627": 104,
+    "\u0628": 105,
+    "\u0629": 106,
+    "\u062a": 107,
+    "\u062b": 108,
+    "\u062c": 109,
+    "\u062d": 110,
+    "\u062e": 111,
+    "\u062f": 112,
+    "\u0630": 113,
+    "\u0631": 114,
+    "\u0632": 115,
+    "\u0633": 116,
+    "\u0634": 117,
+    "\u0635": 118,
+    "\u0636": 119,
+    "\u0637": 120,
+    "\u0638": 121,
+    "\u0639": 122,
+    "\u063a": 123,
+    "\u0640": 124,
+    "\u0641": 125,
+    "\u0642": 126,
+    "\u0643": 127,
+    "\u0644": 128,
+    "\u0645": 129,
+    "\u0646": 130,
+    "\u0647": 131,
+    "\u0648": 132,
+    "\u0649": 133,
+    "\u064a": 134,
+    "\u064b": 135,
+    "\u064c": 136,
+    "\u064d": 137,
+    "\u064e": 138,
+    "\u064f": 139,
+    "\u0650": 140,
+    "\u0651": 141,
+    "\u0652": 142,
+    "\u0660": 143,
+    "\u0661": 144,
+    "\u0662": 145,
+    "\u0663": 146,
+    "\u0664": 147,
+    "\u0665": 148,
+    "\u0666": 149,
+    "\u0667": 150,
+    "\u0668": 151,
+    "\u0669": 152,
+    "\u06a4": 153,
+    "\u06a8": 154,
+    "\u201e": 155,
+    "\ufb6c": 156,
+    "\ufc63": 157
+  },
+  "idx_to_char": {
+    "1": " ",
+    "2": "!",
+    "3": "\"",
+    "4": "#",
+    "5": "$",
+    "6": "%",
+    "7": "&",
+    "8": "'",
+    "9": "(",
+    "10": ")",
+    "11": "*",
+    "12": "+",
+    "13": ",",
+    "14": "-",
+    "15": ".",
+    "16": "/",
+    "17": "0",
+    "18": "1",
+    "19": "2",
+    "20": "3",
+    "21": "4",
+    "22": "5",
+    "23": "6",
+    "24": "7",
+    "25": "8",
+    "26": "9",
+    "27": ":",
+    "28": "=",
+    "29": "A",
+    "30": "B",
+    "31": "C",
+    "32": "D",
+    "33": "E",
+    "34": "F",
+    "35": "G",
+    "36": "H",
+    "37": "I",
+    "38": "J",
+    "39": "K",
+    "40": "L",
+    "41": "M",
+    "42": "N",
+    "43": "O",
+    "44": "P",
+    "45": "Q",
+    "46": "R",
+    "47": "S",
+    "48": "T",
+    "49": "U",
+    "50": "V",
+    "51": "W",
+    "52": "X",
+    "53": "Y",
+    "54": "Z",
+    "55": "[",
+    "56": "\\",
+    "57": "]",
+    "58": "_",
+    "59": "a",
+    "60": "b",
+    "61": "c",
+    "62": "d",
+    "63": "e",
+    "64": "f",
+    "65": "g",
+    "66": "h",
+    "67": "i",
+    "68": "j",
+    "69": "k",
+    "70": "l",
+    "71": "m",
+    "72": "n",
+    "73": "o",
+    "74": "p",
+    "75": "q",
+    "76": "r",
+    "77": "s",
+    "78": "t",
+    "79": "u",
+    "80": "v",
+    "81": "x",
+    "82": "y",
+    "83": "z",
+    "84": "|",
+    "85": "\u00ba",
+    "86": "\u00c3",
+    "87": "\u00c8",
+    "88": "\u00c9",
+    "89": "\u00ca",
+    "90": "\u00e0",
+    "91": "\u00e7",
+    "92": "\u00e8",
+    "93": "\u00e9",
+    "94": "\u00ea",
+    "95": "\u060c",
+    "96": "\u061b",
+    "97": "\u061f",
+    "98": "\u0621",
+    "99": "\u0622",
+    "100": "\u0623",
+    "101": "\u0624",
+    "102": "\u0625",
+    "103": "\u0626",
+    "104": "\u0627",
+    "105": "\u0628",
+    "106": "\u0629",
+    "107": "\u062a",
+    "108": "\u062b",
+    "109": "\u062c",
+    "110": "\u062d",
+    "111": "\u062e",
+    "112": "\u062f",
+    "113": "\u0630",
+    "114": "\u0631",
+    "115": "\u0632",
+    "116": "\u0633",
+    "117": "\u0634",
+    "118": "\u0635",
+    "119": "\u0636",
+    "120": "\u0637",
+    "121": "\u0638",
+    "122": "\u0639",
+    "123": "\u063a",
+    "124": "\u0640",
+    "125": "\u0641",
+    "126": "\u0642",
+    "127": "\u0643",
+    "128": "\u0644",
+    "129": "\u0645",
+    "130": "\u0646",
+    "131": "\u0647",
+    "132": "\u0648",
+    "133": "\u0649",
+    "134": "\u064a",
+    "135": "\u064b",
+    "136": "\u064c",
+    "137": "\u064d",
+    "138": "\u064e",
+    "139": "\u064f",
+    "140": "\u0650",
+    "141": "\u0651",
+    "142": "\u0652",
+    "143": "\u0660",
+    "144": "\u0661",
+    "145": "\u0662",
+    "146": "\u0663",
+    "147": "\u0664",
+    "148": "\u0665",
+    "149": "\u0666",
+    "150": "\u0667",
+    "151": "\u0668",
+    "152": "\u0669",
+    "153": "\u06a4",
+    "154": "\u06a8",
+    "155": "\u201e",
+    "156": "\ufb6c",
+    "157": "\ufc63"
+  }
+}

model/trial_26_A/set0/config_2600.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+snapshot_path: model/trial_26_A/set0/pretrain/
+network:
+  hw:
+    char_set_path: model/trial_26_A/muharaf_charset.json
+    cnn_out_size: 1024
+    input_height: 60
+    num_of_channels: 3
+    num_of_outputs: 158
+    use_instance_norm: true
+  lf:
+    look_ahead_matrix: null
+    step_bias: null
+  sol:
+    base0: 16
+    base1: 16
+post_processing:
+  lf_nms_range:
+  - 0
+  - 6
+  lf_nms_threshold: 0.5
+  sol_threshold: 0.1

model/trial_26_A/set0/pretrain/hw.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e1f8a27a6fc5e64c1c93b72b18200db7b6c240053d3e016ad138b4cd3521b08
+size 73251954

model/trial_26_A/set0/pretrain/lf.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b7082953c1269b67ba0801311604884255a4fe4bdef821dcf1ce169ab582d6f
+size 22228762

model/trial_26_A/set0/pretrain/sol.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e94e56959c5175067734e00d7e2e02ff05018f37eb91d726f2319504df60c832
+size 36979951

py3/e2e/__init__.py ADDED Viewed

File without changes

py3/e2e/alignment_dataset.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from torch.utils.data import Dataset
+import json
+import os
+import cv2
+import numpy as np
+import torch
+import random
+from utils import safe_load
+def collate(batch):
+    return batch
+class AlignmentDataset(Dataset):
+    def __init__(self, set_list, data_range=None, ignore_json=False, resize_width=512):
+        self.ignore_json = ignore_json
+        self.resize_width = resize_width
+        self.ids = set_list
+        self.ids.sort()
+        if data_range is not None:
+            self.ids = random.sample(self.ids, data_range)
+        print("Alignment Ids Count:", len(self.ids))
+    def __len__(self):
+        return len(self.ids)
+    def __getitem__(self, idx):
+        gt_json_path, img_path = self.ids[idx]
+        gt_json = []
+        if not self.ignore_json:
+            gt_json = safe_load.json_state(gt_json_path)
+            if gt_json is None:
+                return None
+        org_img = cv2.imread(img_path)
+        full_img = org_img.astype(np.float32)
+        full_img = full_img.transpose([2,1,0])[None,...]
+        full_img = torch.from_numpy(full_img)
+        full_img = full_img / 128 - 1
+        target_dim1 = self.resize_width
+        s = target_dim1 / float(org_img.shape[1])
+        target_dim0 = int(org_img.shape[0]/float(org_img.shape[1]) * target_dim1)
+        img = cv2.resize(org_img,(target_dim1, target_dim0), interpolation = cv2.INTER_CUBIC)
+        img = img.astype(np.float32)
+        img = img.transpose([2,1,0])[None,...]
+        img = torch.from_numpy(img)
+        img = img / 128 - 1
+        image_key = gt_json_path[:-len('.json')]
+        return {
+            "resized_img": img,
+            "full_img": full_img,
+            "resize_scale": 1.0/s,
+            "gt_lines": [x['gt'] for x in gt_json],
+            "img_key": image_key,
+            "json_path": gt_json_path,
+            "gt_json": gt_json
+        }

py3/e2e/e2e_model.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import cv2
+import numpy as np
+from utils import string_utils, error_rates
+from utils import transformation_utils
+from . import handwriting_alignment_loss
+from . import e2e_postprocessing
+import copy
+from scipy.optimize import linear_sum_assignment
+import math
+#from pynvml import *
+# max_lines_per_image is the max lines in a batch for HW to process
+class E2EModel(nn.Module):
+    def __init__(self, sol, lf, hw, dtype=torch.cuda.FloatTensor, max_lines_per_image=8, device="cuda"):
+        super(E2EModel, self).__init__()
+        self.dtype = dtype
+        self.sol = sol
+        self.lf = lf
+        self.hw = hw
+        self.line = None
+        self.max_lines_per_image = max_lines_per_image
+        self.device=device
+    def train(self):
+        self.sol.train()
+        self.lf.train()
+        self.hw.train()
+    def eval(self):
+        self.sol.eval()
+        self.lf.eval()
+        self.hw.eval()
+    def forward(self, x, use_full_img=True, accpet_threshold=0.1, volatile=True, gt_lines=None,
+                idx_to_char=None, HW_cuda=0, device="cuda"):
+        if device != self.device:
+            print('Wrong device is set', 'param', device, 'self', self.device)
+            asldjfdkfj
+        sol_img = Variable(x['resized_img'].type(self.dtype), requires_grad=False)
+        if use_full_img:
+            img = Variable(x['full_img'].type(self.dtype), requires_grad=False)
+            scale = x['resize_scale']
+            results_scale = 1.0
+        else:
+            img = sol_img
+            scale = 1.0
+            results_scale = x['resize_scale']
+        original_starts = self.sol(sol_img)
+        start = original_starts
+        #Take at least one point
+        sorted_start, sorted_indices = torch.sort(start[...,0:1], dim=1, descending=True)
+        #print("sorted_start size", sorted_start.size())
+        #print("sorted_start", sorted_start)
+        min_threshold = sorted_start[0,1,0].data
+        accpet_threshold = min(accpet_threshold, min_threshold)
+        # There should not be more than 56 points to avoid out of memory
+        if sorted_start.size()[1] > 56:
+            accpet_threshold = max(accpet_threshold, sorted_start[0,55,0].data)
+            #print('using accept_threshold', accpet_threshold, sorted_start[0,55,0].data)
+        select = original_starts[...,0:1] >= accpet_threshold
+        select_idx = np.where(select.data.cpu().numpy())[1]
+        select = select.expand(select.size(0), select.size(1), start.size(2))
+        select = select.detach()
+        start = start[select].view(start.size(0), -1, start.size(2))
+        perform_forward = len(start.size()) == 3
+        if not perform_forward:
+            return None
+        forward_img = img
+        start = start.transpose(0,1)
+        positions = torch.cat([
+           start[...,1:3]  * scale,
+           start[...,3:4],
+           start[...,4:5]  * scale,
+           start[...,0:1]
+        ], 2)
+        #print('positions size', positions.size())
+        hw_out = []
+        p_interval = positions.size(0)
+        lf_xy_positions = None
+        line_imgs = []
+#        show_mem_status(1, "before for in FORWARD")
+        for p in range(0,min(positions.size(0), np.inf), p_interval):
+            sub_positions = positions[p:p+p_interval,0,:]
+            sub_select_idx = select_idx[p:p+p_interval]
+            batch_size = sub_positions.size(0)
+            sub_positions = [sub_positions]
+            # print(sub_positions)
+            # sys.exit()
+            expand_img = forward_img.expand(sub_positions[0].size(0), img.size(1), img.size(2), img.size(3))
+            step_size = 8 #5
+            extra_bw = 1 #1
+            forward_steps = 30 #40
+            grid_line, _, out_positions, xy_positions = self.lf(expand_img, sub_positions, steps=step_size)
+            grid_line, _, out_positions, xy_positions = self.lf(expand_img, [out_positions[step_size]], steps=step_size+extra_bw, negate_lw=True)
+            grid_line, _, out_positions, xy_positions = self.lf(expand_img, [out_positions[step_size+extra_bw]], steps=forward_steps, allow_end_early=True)
+            #show_mem_status(1, 'after lf')
+            if lf_xy_positions is None:
+                lf_xy_positions = xy_positions
+            else:
+                for i in range(len(lf_xy_positions)):
+                    lf_xy_positions[i] = torch.cat([
+                        lf_xy_positions[i],
+                        xy_positions[i]
+                    ])
+            expand_img = expand_img.transpose(2,3)
+            hw_interval = p_interval
+            for h in range(0,min(grid_line.size(0), np.inf), hw_interval):
+                sub_out_positions = [o[h:h+hw_interval] for o in out_positions]
+                sub_xy_positions = [o[h:h+hw_interval] for o in xy_positions]
+                sub_sub_select_idx = sub_select_idx[h:h+hw_interval]
+                line = torch.nn.functional.grid_sample(expand_img[h:h+hw_interval].detach(), grid_line[h:h+hw_interval], align_corners=True)
+                line = line.transpose(2,3)
+                for l in line:
+                    l = l.transpose(0,1).transpose(1,2)
+                    l = (l + 1)*128
+                    l_np = l.data.cpu().numpy()
+                    line_imgs.append(l_np)
+                #     cv2.imwrite("example_line_out.png", l_np)
+                #     print "Saved!"
+                #     raw_input()
+                # REsize to 60 ht
+                # Mehreen add: To avoid out of memory errors. A large batch has to be split up for HW network to process
+                # This case will arise when SOL finds too many lines on a page
+                batch, channels, old_ht, old_width = line.size()
+                line = line.detach().cpu()
+                total_todo = batch
+                #show_mem_status(0, '.... Before hw line')
+                start_index = 0
+                while total_todo > 0:
+                    mini_batch_size = min(self.max_lines_per_image, total_todo)
+                    partial_lines = line[start_index:start_index+mini_batch_size, :, :, :]
+                    #print('start_index, end_index', start_index, start_index+mini_batch_size)
+                    start_index += mini_batch_size
+                    total_todo = total_todo - mini_batch_size
+                    #print('partial_line size', partial_lines.size())
+                    partial_lines = partial_lines.to(self.device)
+                    out = self.hw(partial_lines)
+                    if "cuda" in device:
+                        torch.cuda.empty_cache()
+                    out = out.transpose(0, 1)
+                    hw_out.append(out)
+                #print('batch size: ', batch)
+#                new_ht = 60
+#                new_width = int(old_width/old_ht*new_ht)
+                #print('line type', type(line), line.size())
+#                self.line = nn.functional.interpolate(line, size=(new_ht, new_width),
+#                                                 mode='bilinear', align_corners=True)
+# Mehreen commented out for processing entire batch in one go
+#                out = self.hw(line)
+#                out = out.transpose(0,1)
+#                hw_out.append(out)
+                #show_mem_status(0, '.... After hw line')
+        hw_out = torch.cat(hw_out, 0)
+        # print(original_starts,positions,lf_xy_positions,hw_out,results_scale,line_imgs)
+        return {
+            "original_sol": original_starts,
+            "sol": positions,
+            "lf": lf_xy_positions,
+            "hw": hw_out,
+            "results_scale": results_scale,
+            "line_imgs": line_imgs
+        }

py3/e2e/e2e_postprocessing.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from utils import string_utils, error_rates
+import numpy as np
+from . import nms
+import copy
+def get_trimmed_polygons(out):
+    all_polygons = []
+    for j in range(out['lf'][0].shape[0]):
+        begin = out['beginning'][j]
+        end = out['ending'][j]
+        last_xy = None
+        begin_f = int(np.floor(begin))
+        end_f = int(np.ceil(end))
+        points = []
+        for i in range(begin_f, end_f+1):
+            if i == begin_f:
+                p0 = out['lf'][i][j]
+                p1 = out['lf'][i+1][j]
+                t = begin - np.floor(begin)
+                p = p0 * (1 - t) + p1 * t
+            elif i == end_f:
+                p0 = out['lf'][i-1][j]
+                if i != len(out['lf']):
+                    p1 = out['lf'][i][j]
+                    t = end - np.floor(end)
+                    p = p0 * (1 - t) + p1 * t
+                else:
+                    p = p0
+            else:
+                p = out['lf'][i][j]
+            points.append(p)
+        points = np.array(points)
+        all_polygons.append(points)
+    return all_polygons
+def trim_ends(out):
+    lf_length = len(out['lf'])
+    hw = out['hw']
+    # Mehreen: hw is (14, 361, 197) a 14x361 matrix for each character. selected is (14, 361)
+    selected = hw.argmax(axis=-1)
+    beginning = np.argmax(selected != 0, axis=1)
+    ending = selected.shape[1] - 1 - np.argmax(selected[:,::-1] != 0, axis=1)
+    beginning_percent = (beginning+0.5) / float(selected.shape[1])
+    ending_percent = (ending+0.5) / float(selected.shape[1])
+    lf_beginning = lf_length * beginning_percent
+    lf_ending = lf_length * ending_percent
+    out['beginning'] = lf_beginning
+    out['ending'] = lf_ending
+    return out
+def filter_on_pick(out, pick):
+    out['sol'] = out['sol'][pick]
+    out['lf'] = [l[pick] for l in out['lf']]
+    out['hw'] = out['hw'][pick]
+    if 'idx' in out:
+        out['idx'] = out['idx'][pick]
+    if 'beginning' in out:
+        out['beginning'] = out['beginning'][pick]
+    if 'ending' in out:
+        out['ending'] = out['ending'][pick]
+def filter_on_pick_no_copy(out, pick):
+    output = {}
+    output['sol'] = out['sol'][pick]
+    output['lf'] = [l[pick] for l in out['lf']]
+    output['hw'] = out['hw'][pick]
+    ##Mehreen
+    #print(pick)
+    #out['line_imgs'] = out['line_imgs'][pick]
+    ## End mehreen
+    if 'idx' in out:
+        output['idx'] = out['idx'][pick]
+    if 'beginning' in out:
+        output['beginning'] = out['beginning'][pick]
+    if 'ending' in out:
+        output['ending'] = out['ending'][pick]
+    return output
+def select_non_empty_string(out):
+    selected = out['hw'].argmax(axis=-1)
+    return np.where(selected.sum(axis=1) != 0)
+def postprocess(out, **kwargs):
+    out = copy.copy(out)
+    # postprocessing should be done with numpy data
+    sol_threshold = kwargs.get("sol_threshold", None)
+    sol_nms_threshold = kwargs.get("sol_nms_threshold", None)
+    lf_nms_params = kwargs.get('lf_nms_params', None)
+    lf_nms_2_params = kwargs.get('lf_nms_2_params', None)
+    if sol_threshold is not None:
+        pick = np.where(out['sol'][:,-1] > sol_threshold)
+        filter_on_pick(out, pick)
+    #Mehreen: this is passed as None from run_hwr from decode_one_img_with_info
+    if sol_nms_threshold is not None:
+        raise Exception("This is not correct")
+        pick = nms.sol_nms_single(out['sol'], sol_nms_threshold)
+        out['sol'] =  out['sol'][pick]
+    #Mehreen: When post-processing this part is done. sample_config lf_nms_range: [0,6] lf_nms_threshold: 0.5
+    if lf_nms_params is not None:
+        confidences = out['sol'][:,-1]
+        overlap_range = lf_nms_params['overlap_range']
+        overlap_thresh = lf_nms_params['overlap_threshold']
+        lf_setup = np.concatenate([l[None,...] for l in out['lf']])
+        lf_setup = [lf_setup[:,i] for i in range(lf_setup.shape[1])]
+        pick = nms.lf_non_max_suppression_area(lf_setup, confidences, overlap_range, overlap_thresh)
+        filter_on_pick(out, pick)
+    #Mehreen: When post-processing this part is None from decode_one_img_with_info
+    if lf_nms_2_params is not None:
+        confidences = out['sol'][:,-1]
+        overlap_thresh = lf_nms_2_params['overlap_threshold']
+        refined_lf = get_trimmed_polygons(out)
+        pick = nms.lf_non_max_suppression_area(refined_lf, confidences, None, overlap_thresh)
+        filter_on_pick(out, pick)
+    return out
+def read_order(out):
+    first_pt = out['lf'][0][:,:2,0]
+    first_pt = first_pt[:,::-1]
+    first_pt = np.concatenate([first_pt, np.arange(first_pt.shape[0])[:,None]], axis=1)
+    first_pt = first_pt.tolist()
+    first_pt.sort()
+    return [int(p[2]) for p in first_pt]
+def decode_handwriting(out, idx_to_char):
+    hw_out = out['hw']
+    list_of_pred = []
+    list_of_raw_pred = []
+    for i in range(hw_out.shape[0]):
+        logits = hw_out[i,...]
+        pred, raw_pred = string_utils.naive_decode(logits)
+        pred_str = string_utils.label2str_single(pred, idx_to_char, False)
+        raw_pred_str = string_utils.label2str_single(raw_pred, idx_to_char, True)
+        list_of_pred.append(pred_str)
+        list_of_raw_pred.append(raw_pred_str)
+    return list_of_pred, list_of_raw_pred
+def results_to_numpy(out):
+    return {
+        "sol": out['sol'].data.cpu().numpy()[:,0,:],
+        "lf": [l.data.cpu().numpy() for l in out['lf']] if out['lf'] is not None else None,
+        "hw": out['hw'].data.cpu().numpy(),
+        "results_scale": out['results_scale'],
+        "line_imgs": out['line_imgs'],
+    }
+def align_to_gt_lines(decoded_hw, gt_lines):
+    costs = []
+    for i in range(len(decoded_hw)):
+        costs.append([])
+        for j in range(len(gt_lines)):
+            pred = decoded_hw[i]
+            gt = gt_lines[j]
+            cer = error_rates.cer(gt, pred)
+            costs[i].append(cer)
+    costs = np.array(costs)
+    min_idx = costs.argmin(axis=0)
+    min_val = costs.min(axis=0)
+    return min_idx, min_val

py3/e2e/forward_pass.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from e2e import e2e_model
+from e2e.e2e_model import E2EModel
+from . import validation_utils
+from utils import error_rates
+import itertools
+import copy
+import numpy as np
+import cv2
+def forward_pass(x, e2e, config, thresholds, idx_to_char, update_json=False):
+    gt_lines = x['gt_lines']
+    gt = "\n".join(gt_lines)
+    out_original = e2e(x)
+    results = {}
+    if out_original is None:
+        #TODO: not a good way to handle this, but fine for now
+        None
+    gt_lines = x['gt_lines']
+    gt = "\n".join(gt_lines)
+    out_original = E2EModel.results_to_numpy(out_original)
+    out_original['idx'] = np.arange(out_original['sol'].shape[0])
+    decoded_hw, decoded_raw_hw = E2EModel.decode_handwriting(out_original, idx_to_char)
+    pick, costs = E2EModel.align_to_gt_lines(decoded_hw, gt_lines)
+    most_ideal_pred_lines, improved_idxs = validation_utils.update_ideal_results(pick, costs, decoded_hw, x['gt_json'])
+    # if update_json:
+    #     validation_utils.save_improved_idxs(improved_idxs, decoded_hw,
+    #                                         decoded_raw_hw, out_original,
+    #                                         x, config[dataset_lookup]['json_folder'], config['alignment']['trim_to_sol'])
+    sol_thresholds = thresholds[0]
+    sol_thresholds_idx = list(range(len(sol_thresholds)))
+    lf_nms_ranges =  thresholds[1]
+    lf_nms_ranges_idx = list(range(len(lf_nms_ranges)))
+    lf_nms_thresholds = thresholds[2]
+    lf_nms_thresholds_idx = list(range(len(lf_nms_thresholds)))
+    most_ideal_pred_lines = "\n".join(most_ideal_pred_lines)
+    ideal_pred_lines = [decoded_hw[i] for i in pick]
+    ideal_pred_lines = "\n".join(ideal_pred_lines)
+    error = error_rates.cer(gt, ideal_pred_lines)
+    ideal_result = error
+    error = error_rates.cer(gt, most_ideal_pred_lines)
+    most_ideal_result = error
+    for key in itertools.product(sol_thresholds_idx, lf_nms_ranges_idx, lf_nms_thresholds_idx):
+        i,j,k = key
+        sol_threshold = sol_thresholds[i]
+        lf_nms_range = lf_nms_ranges[j]
+        lf_nms_threshold = lf_nms_thresholds[k]
+        out = copy.copy(out_original)
+        out = E2EModel.postprocess(out,
+            sol_threshold=sol_threshold,
+            lf_nms_params={
+                "overlap_range": lf_nms_range,
+                "overlap_threshold": lf_nms_threshold
+        })
+        order = E2EModel.read_order(out)
+        E2EModel.filter_on_pick(out, order)
+        # draw_img = E2EModel.draw_output(out, img)
+        # cv2.imwrite("test_b_samples/test_img_{}.png".format(a), draw_img)
+        preds = [decoded_hw[i] for i in out['idx']]
+        pred = "\n".join(preds)
+        error = error_rates.cer(gt, pred)
+        results[key] = error
+    return results, ideal_result, most_ideal_result

py3/e2e/handwriting_alignment_loss.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from utils import string_utils, error_rates
+import torch
+from scipy.optimize import linear_sum_assignment
+import numpy as np
+from torch.autograd import Variable
+def accumulate_scores(out, out_positions, xy_positions, gt_state, idx_to_char):
+    preds = out.transpose(0,1).cpu()
+    batch_size = preds.size(1)
+    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
+    for i, logits in enumerate(out.data.cpu().numpy()):
+        raw_decode, raw_decode_full = string_utils.naive_decode(logits)
+        pred_str = string_utils.label2str_single(raw_decode, idx_to_char, False)
+        pred_str_full = string_utils.label2str_single(raw_decode_full, idx_to_char, True)
+        sub_out_positions = [o[i].data.cpu().numpy().tolist() for o in out_positions]
+        sub_xy_positions = [o[i].data.cpu().numpy().tolist() for o in xy_positions]
+        for gt_obj in gt_state:
+            gt_text = gt_obj['gt']
+            cer = error_rates.cer(gt_text, pred_str)
+            #This is a terrible way to do this...
+            gt_obj['errors'] = gt_obj.get('errors', [])
+            gt_obj['pred'] = gt_obj.get('pred', [])
+            gt_obj['pred_full'] = gt_obj.get('pred_full', [])
+            gt_obj['path'] = gt_obj.get('path', [])
+            gt_obj['path_xy'] = gt_obj.get('path_xy', [])
+            gt_obj['errors'].append(cer)
+            gt_obj['pred'].append(pred_str)
+            gt_obj['pred_full'].append(pred_str_full)
+            gt_obj['path'].append(sub_out_positions)
+            gt_obj['path_xy'].append(sub_xy_positions)
+def update_alignment(out, gt_lines, alignments, idx_to_char, idx_mapping, sol_positions):
+    preds = out.cpu()
+    batch_size = preds.size(1)
+    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
+    for i, logits in enumerate(out.data.cpu().numpy()):
+        raw_decode, raw_decode_full = string_utils.naive_decode(logits)
+        pred_str = string_utils.label2str_single(raw_decode, idx_to_char, False)
+        for j, gt in enumerate(gt_lines):
+            cer = error_rates.cer(gt, pred_str)
+            global_i = idx_mapping[i]
+            c = sol_positions[i,0,-1].data[0]
+            # alignment_error = cer
+            alignment_error = cer + 0.1 * (1.0 - c)
+            if alignment_error < alignments[j][0]:
+                alignments[j][0] = alignment_error
+                alignments[j][1] = global_i
+                # alignments[j][2] = out[i][:,None,:]
+                alignments[j][2] = None
+                alignments[j][3] = pred_str
+def alignment(predictions, hw_scores, alpha_alignment=0.1, alpha_backprop=0.1):
+    confidences = predictions[:,:,4]
+    log_confidences = torch.log(confidences + 1e-10)
+    log_one_minus_confidences = torch.log(1.0 - confidences + 1e-10)
+    expanded_log_confidences = log_confidences[:,:,None].expand(confidences.size(0), confidences.size(1), hw_scores.size(2))
+    expanded_log_one_minus_confidences = log_one_minus_confidences[:,:,None].expand(confidences.size(0), confidences.size(1), hw_scores.size(2))
+    C = alpha_alignment * hw_scores - expanded_log_confidences + expanded_log_one_minus_confidences
+    C = C.data.cpu().numpy()
+    X = np.zeros_like(C)
+    idxs = []
+    for b in range(C.shape[0]):
+        C_i = C[b]
+        row_ind, col_ind = linear_sum_assignment(C_i.T)
+        idxs.append((col_ind, row_ind))
+    return idxs
+def loss(preds, non_hw_sol, hw_sol, gt_lines, char_to_idx, criterion):
+    label_lengths = []
+    all_labels = []
+    for gt_str in gt_lines:
+        l = string_utils.str2label_single(gt_str, char_to_idx)
+        all_labels.append(l)
+        label_lengths.append(len(l))
+    all_labels = np.concatenate(all_labels)
+    label_lengths = np.array(label_lengths)
+    labels = torch.from_numpy(all_labels.astype(np.int32))
+    label_lengths = torch.from_numpy(label_lengths.astype(np.int32))
+    labels =  Variable(labels, requires_grad=False)
+    label_lengths = Variable(label_lengths, requires_grad=False)
+    batch_size = preds.size(0)
+    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
+    ctc_loss = 1e-2 * criterion(preds.cpu(), labels, preds_size, label_lengths)
+    log_one_minus_confidences = torch.log(1.0 - non_hw_sol[:,:,0] + 1e-10)
+    log_confidences = torch.log(hw_sol[:,:,0] + 1e-10)
+    selected_confidence = log_confidences.sum()
+    not_selected_confidence = log_one_minus_confidences.sum()
+    confidence_loss = -selected_confidence - not_selected_confidence
+    # print " - - - - Losses - - - - "
+    # print ctc_loss.data[0]
+    # print selected_confidence.data[0], log_confidences.size()
+    # print not_selected_confidence.data[0], log_one_minus_confidences.size()
+    # print ""
+    return ctc_loss + confidence_loss.cpu()

py3/e2e/nms.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import torch
+import numpy as np
+import pyclipper
+def sol_non_max_suppression(start_torch, overlap_thresh):
+    #Todo: Make this work with batches
+    #Rotation is not taken into account
+    start = start_torch.data.cpu().numpy()
+    pick = sol_nms_single(start[0], overlap_thresh)
+    zero_idx = [0 for _ in range(len(pick))]
+    select = (zero_idx, pick)
+    return start_torch[select][None,...]
+def sol_nms_single(start, overlap_thresh):
+    # Based on https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
+    # Maybe could port to pytorch to work over the tensors directly
+    # Mehreen comment: mapping start[:,1] is x, start[:,2] is y, start[:,3] is x and start[:,4] is theta
+    # So x1,y1 is top left corner and x2,y2 is bottom right corner
+    x1 = start[:,1] - start[:,3]
+    y1 = start[:,2] - start[:,3]
+    x2 = start[:,1] + start[:,3]
+    y2 = start[:,2] + start[:,3]
+    c = start[:,0]
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    idxs = np.argsort(c)
+    pick = []
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+        overlap = (w * h) / area[idxs[:last]]
+        idxs = np.delete(idxs, np.concatenate(([last],
+            np.where(overlap > overlap_thresh)[0])))
+    return pick
+def lf_non_max_suppression_area(lf_xy_positions, confidences, overlap_range, overlap_thresh):
+    # lf_xy_positions = np.concatenate([l.data.cpu().numpy()[None,...] for l in lf_xy_positions])
+    # lf_xy_positions = lf_xy_positions[:,:,:2,:2]
+    # print lf_xy_positions
+    # raw_input()
+    lf_xy_positions = [l[:,:2,:2] for l in lf_xy_positions]
+    #this assumes equal length positions
+    # lf_xy_positions = np.concatenate([l[None,...] for l in lf_xy_positions])
+    # lf_xy_positions = lf_xy_positions[:,:,:2,:2]
+    c = confidences
+    bboxes = []
+    center_lines = []
+    scales = []
+    for i in range(len(lf_xy_positions)):
+        pts = lf_xy_positions[i]
+    # for i in xrange(lf_xy_positions.shape[1]):
+        # pts = lf_xy_positions[:,i,:]
+        if overlap_range is not None:
+            pts = pts[overlap_range[0]: overlap_range[1]]
+        f = pts[0]
+        delta = f[:,0] - f[:,1]
+        scale = np.sqrt( (delta**2).sum() )
+        scales.append(scale)
+        # ls = pts[:,:,0].tolist() + pts[:,:,1][::-1].tolist()
+        # ls = [[int(x[0]), int(x[1])] for x in ls]
+        # poly_regions.append(ls)
+        center_lines.append( (pts[:,:,0] + pts[:,:,1])/2.0 )
+        min_x = pts[:,0].min()
+        max_x = pts[:,0].max()
+        min_y = pts[:,1].min()
+        max_y = pts[:,1].max()
+        bboxes.append((min_x, min_y, max_x, max_y))
+    bboxes = np.array(bboxes)
+    if len(bboxes.shape) < 2:
+        return []
+    x1 = bboxes[:,0]
+    y1 = bboxes[:,1]
+    x2 = bboxes[:,2]
+    y2 = bboxes[:,3]
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    idxs = np.argsort(c)
+    overlapping_regions = []
+    pick = []
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+        # compute the width and height of the bounding box
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+        # compute the ratio of overlap
+        overlap_bb = (w * h) / area[idxs[:last]]
+        overlap = []
+        for step, j in enumerate(idxs[:last]):
+            #Skip anything that does't actually have any overlap
+            if overlap_bb[step] < 0.1:
+                overlap.append(0)
+                continue
+            path0 = center_lines[i]
+            path1 = center_lines[j]
+            path = np.concatenate([path0, path1[::-1]])
+            path = [[int(x[0]), int(x[1])] for x in path]
+            expected_scale = (scales[i] + scales[j])/2.0
+            one_off_area = expected_scale**2 * (path0.shape[0] + path1.shape[0])/2.0
+            simple_path = pyclipper.SimplifyPolygon(path, pyclipper.PFT_NONZERO)
+            inter_area = 0
+            for path in simple_path:
+                inter_area += abs(pyclipper.Area(path))
+            area_ratio  = inter_area / one_off_area
+            area_ratio = 1.0 - area_ratio
+            overlap.append(area_ratio)
+        overlap = np.array(overlap)
+        to_delete = np.concatenate(([last], np.where(overlap > overlap_thresh)[0]))
+        idxs = np.delete(idxs, to_delete)
+    return pick

py3/e2e/validation_utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from utils import error_rates
+import copy
+import os
+import cv2
+import json
+from copy import deepcopy
+import numpy as np
+def interpolate(key1, key2, lf, lf_idx, step_percent):
+    x0 = lf[lf_idx][key1]
+    y0 = lf[lf_idx][key2]
+    x1 = lf[lf_idx+1][key1]
+    y1 = lf[lf_idx+1][key2]
+    x = x1 * step_percent + x0 * (1.0 - step_percent)
+    y = y1 * step_percent + y0 * (1.0 - step_percent)
+    return x, y
+def get_subdivide_pt(i, pred_full, lf):
+    percent = (float(i)+0.5) / float(len(pred_full))
+    lf_percent = (len(lf)-1) * percent
+    lf_idx = int(np.floor(lf_percent))
+    step_percent = lf_percent - lf_idx
+    x0, y0 = interpolate("x0", "y0", lf, lf_idx, step_percent)
+    x1, y1 = interpolate("x1", "y1", lf, lf_idx, step_percent)
+    return x0, y0, x1, y1
+def save_improved_idxs(improved_idxs, decoded_hw, decoded_raw_hw, out, x, json_folder):
+    output_lines = [{
+        "gt": gt['gt']
+    } for gt in x['gt_json']]
+    # for i in improved_idxs:
+    for i in range(len(output_lines)):
+        if not i in improved_idxs:
+            output_lines[i] = x['gt_json'][i]
+            continue
+        k = improved_idxs[i]
+        # We want to trim the LF results
+        # good to keep around the full length of the prediciton
+        # so we can generate the full line-level images later
+        # at a different resolution
+        line_points = []
+        after_line_points = []
+        lf_path = out['lf']
+        end = out['ending'][k]
+        for j in range(len(lf_path)):
+            p = lf_path[j][k]
+            s = out['results_scale']
+            if j > end:
+                after_line_points.append({
+                    "x0": p[0][1] * s,
+                    "x1": p[0][0] * s,
+                    "y0": p[1][1] * s,
+                    "y1": p[1][0] * s
+                })
+            else:
+                line_points.append({
+                    "x0": p[0][1] * s,
+                    "x1": p[0][0] * s,
+                    "y0": p[1][1] * s,
+                    "y1": p[1][0] * s
+                })
+        begin = out['beginning'][k]
+        begin_f = int(np.floor(begin))
+        p0 = out['lf'][begin_f][k]
+        if begin_f+1 >= len(out['lf']):
+             p = p0
+        else:
+            p1 = out['lf'][begin_f+1][k]
+            t = begin - np.floor(begin)
+            p = p0 * (1 - t) + p1 * t
+        sol_point = {
+            "x0": p[0][1] * s,
+            "x1": p[0][0] * s,
+            "y0": p[1][1] * s,
+            "y1": p[1][0] * s
+        }
+        img_file_name = "{}_{}.png".format(x['img_key'], i)
+        output_lines[i]['pred'] = decoded_hw[k]
+        output_lines[i]['pred_full'] = decoded_raw_hw[k]
+        output_lines[i]['sol'] = sol_point
+        output_lines[i]['lf'] = line_points
+        output_lines[i]['after_lf'] = after_line_points
+        output_lines[i]['start_idx'] = 1 #TODO: update to backward idx
+        output_lines[i]['hw_path'] = img_file_name
+        line_img = out['line_imgs'][k]
+        full_img_file_name = os.path.join(json_folder, img_file_name)
+        cv2.imwrite(full_img_file_name, line_img)
+    json_path = x['json_path']
+    with open(json_path, 'w') as f:
+        # print('written data to:', f)
+        json.dump(output_lines, f)
+def update_ideal_results(pick, costs, decoded_hw, gt_json):
+    most_ideal_pred = []
+    improved_idxs = {}
+    for i in range(len(gt_json)):
+        gt_obj = gt_json[i]
+        prev_pred = gt_obj.get('pred', '')
+        gt = gt_obj['gt']
+        pred = decoded_hw[pick[i]]
+        prev_cer = error_rates.cer(gt, prev_pred)
+        cer = costs[i]
+        if cer > prev_cer or len(pred) == 0:
+            most_ideal_pred.append(prev_pred)
+            continue
+        most_ideal_pred.append(pred)
+        improved_idxs[i] = pick[i]
+    return most_ideal_pred, improved_idxs

py3/e2e/visualization.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import numpy as np
+import cv2
+def draw_output(out, img):
+    img = img.copy()
+    # print(out['lf'][0].shape[0], out['sol'].shape[0])
+    # sys.exit()
+    for i in range(out['sol'].shape[0]):
+        j=i
+        p = out['sol'][i]
+        c = int(255 * p[-1])
+        color = (c,0,255-c)
+        x = p[0]
+        y = p[1]
+        r = p[2]
+        x_comp = np.cos(r)
+        y_comp = -np.sin(r)
+        s = p[3]
+        rx = x + s * x_comp * 2
+        ry = y + s * y_comp * 2
+        rx2 = x - s * x_comp
+        ry2 = y - s * y_comp
+        rx = int(rx)
+        ry = int(ry)
+        rx2 = int(rx2)
+        ry2 = int(ry2)
+        x = int(x)
+        y = int(y)
+        scale = abs(int(s))
+        color = (0,0,255)
+        cv2.circle(img,(x,y), int(scale), color, 2)
+        cv2.circle(img,(x,y), 4, color, -1)
+        cv2.arrowedLine(img, (x,y), (rx,ry), color, 2, tipLength=0.25)
+        # cv2.line(img, (rx2,ry2), (rx,ry), color, 2)
+        cv2.putText(img,str(i),(x,y), cv2.FONT_HERSHEY_SIMPLEX, 1,(0,255,0),2,cv2.LINE_AA)
+    # for j in range(out['lf'][0].shape[0]):
+        begin = out['beginning'][j]
+        end = out['ending'][j]
+        last_xy = None
+        # for i in xrange(len(out['lf'])):
+        begin_f = int(np.floor(begin))
+        end_f = int(np.ceil(end))
+        for i in range(begin_f, end_f+1):
+            if i == begin_f:
+                p0 = out['lf'][i][j].mean(axis=1)
+                p1 = out['lf'][i+1][j].mean(axis=1)
+                t = begin - np.floor(begin)
+                p = p0 * (1 - t) + p1 * t
+            elif i == end_f:
+                p0 = out['lf'][i-1][j].mean(axis=1)
+                if i != len(out['lf']):
+                    p1 = out['lf'][i][j].mean(axis=1)
+                    t = end - np.floor(end)
+                    p = p0 * (1 - t) + p1 * t
+                else:
+                    p = p0
+            else:
+                p =  out['lf'][i][j].mean(axis=1)
+            x = p[0]
+            y = p[1]
+            x = int(x)
+            y = int(y)
+            # c = int(255 * p[-1])
+            # color = (c,0,255-c)
+            color = (0,150,0)
+            cv2.circle(img,(x,y), 4, color, -1)
+            if last_xy is not None:
+                cv2.line(img, (x,y), last_xy, color, int(s))
+            last_xy = (x,y)
+    return img
+def draw_output_original(out, img):
+    img = img.copy()
+    for j in range(out['lf'][0].shape[0]):
+        begin = out['beginning'][j]
+        end = out['ending'][j]
+        last_xy = None
+        # for i in xrange(len(out['lf'])):
+        begin_f = int(np.floor(begin))
+        end_f = int(np.ceil(end))
+        for i in range(begin_f, end_f+1):
+            if i == begin_f:
+                p0 = out['lf'][i][j].mean(axis=1)
+                p1 = out['lf'][i+1][j].mean(axis=1)
+                t = begin - np.floor(begin)
+                p = p0 * (1 - t) + p1 * t
+            elif i == end_f:
+                p0 = out['lf'][i-1][j].mean(axis=1)
+                if i != len(out['lf']):
+                    p1 = out['lf'][i][j].mean(axis=1)
+                    t = end - np.floor(end)
+                    p = p0 * (1 - t) + p1 * t
+                else:
+                    p = p0
+            else:
+                p =  out['lf'][i][j].mean(axis=1)
+            x = p[0]
+            y = p[1]
+            x = int(x)
+            y = int(y)
+            color = (0,0,0)
+            cv2.circle(img,(x,y), 4, color, -1)
+            if last_xy is not None:
+                cv2.line(img, (x,y), last_xy, color, 2)
+            last_xy = (x,y)
+    for i in range(out['sol'].shape[0]):
+        p = out['sol'][i]
+        c = int(255 * p[-1])
+        color = (c,0,255-c)
+        x = p[0]
+        y = p[1]
+        r = p[2]
+        x_comp = np.cos(r)
+        y_comp = -np.sin(r)
+        s = p[3]
+        rx = x + s * x_comp * 2
+        ry = y + s * y_comp * 2
+        rx2 = x - s * x_comp
+        ry2 = y - s * y_comp
+        rx = int(rx)
+        ry = int(ry)
+        rx2 = int(rx2)
+        ry2 = int(ry2)
+        x = int(x)
+        y = int(y)
+        scale = abs(int(s))
+        # color = (0,0,255)
+        cv2.circle(img,(x,y), int(scale), color, 2)
+        cv2.circle(img,(x,y), 4, color, -1)
+        cv2.arrowedLine(img, (x,y), (rx,ry), color, 2, tipLength=0.25)
+        # cv2.line(img, (rx2,ry2), (rx,ry), color, 2)
+        cv2.putText(img,str(i),(x,y), cv2.FONT_HERSHEY_SIMPLEX, 1,(0,255,0),2,cv2.LINE_AA)
+    return img

py3/hw/__init__.py ADDED Viewed

File without changes

py3/hw/cnn_lstm.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import torch
+from torch import nn
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+class BidirectionalLSTM(nn.Module):
+    def __init__(self, nIn, nHidden, nOut):
+        super(BidirectionalLSTM, self).__init__()
+        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True, dropout=0.5, num_layers=2)
+        self.embedding = nn.Linear(nHidden * 2, nOut)
+    def forward(self, input):
+        #print('blstm input', input.size())
+        recurrent, notused = self.rnn(input)
+        #print('rnn output', recurrent.size(), 'not used', notused)
+        T, b, h = recurrent.size()
+        t_rec = recurrent.view(T * b, h)
+        output = self.embedding(t_rec)  # [T * b, nOut]
+        output = output.view(T, b, -1)
+        #print('.....', output.size())
+        return output
+class CRNN(nn.Module):
+    def __init__(self, cnnOutSize, nc, nclass, nh, n_rnn=2, leakyRelu=False, use_instance_norm=False):
+        super(CRNN, self).__init__()
+        ks = [3, 3, 3, 3, 3, 3, 2]
+        ps = [1, 1, 1, 1, 1, 1, 0]
+        ss = [1, 1, 1, 1, 1, 1, 1]
+        nm = [64, 128, 256, 256, 512, 512, 512]
+        cnn = nn.Sequential()
+        def convRelu(i, batchNormalization=False):
+            nIn = nc if i == 0 else nm[i - 1]
+            nOut = nm[i]
+            cnn.add_module('conv{0}'.format(i),
+                           nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
+            if batchNormalization:
+                if not use_instance_norm:
+                    cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
+                else:
+                    cnn.add_module(f'instancenorm{i}', nn.InstanceNorm2d(nOut))
+            if leakyRelu:
+                cnn.add_module('relu{0}'.format(i),
+                               nn.LeakyReLU(0.2, inplace=True))
+            else:
+                cnn.add_module('relu{0}'.format(i), nn.ReLU(True))
+        convRelu(0)
+        cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))  # 64x16x64
+        convRelu(1)
+        cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))  # 128x8x32
+        convRelu(2, True)
+        convRelu(3)
+        cnn.add_module('pooling{0}'.format(2),
+                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 256x4x16
+        convRelu(4, True)
+        convRelu(5)
+        cnn.add_module('pooling{0}'.format(3),
+                       nn.MaxPool2d((2, 2), (2, 1), (0, 1)))  # 512x2x16
+        convRelu(6, True)  # 512x1x16
+        self.cnn = cnn
+        # Mehreen: nclass is the total outut characters. nh is set to 512 by create_model
+        self.rnn = BidirectionalLSTM(cnnOutSize, nh, nclass)
+        ###MEHREEN ADD PARAM dim=2
+        self.softmax = nn.LogSoftmax(dim=2)
+    def forward(self, input):
+        conv = self.cnn(input)
+        b, c, h, w = conv.size()
+        #print('.....', input.size())
+        #print('....', b, c, h, w)
+        if torch.any(torch.isnan(conv)):
+            print("CONV IS NAN (b,c,h,w) = ", b, c, h, w)
+            #iimg = input.cpu()[0].permute(2, 1, 0)
+            #print('....iimg.size', input.size())
+            #plt.imshow(iimg)
+            #plt.show()
+        ####MEHREEN change this
+        #conv = conv.view(b, -1, w) ###<--original
+        # to
+        conv = torch.reshape(conv, (b, c*h, w))
+        ###End mehreen
+        conv = conv.permute(2, 0, 1)  # [w, b, c]
+        # rnn features
+        output = self.rnn(conv)
+        if torch.any(torch.isnan(output)):
+            print("OUTPUT FROM RNN IS NAN")
+        ###MEHREEN ADD
+        output = self.softmax(output)
+        if torch.any(torch.isnan(output)):
+            print("OUTPUT FROM SOFTMAX IS NAN")
+        if torch.any(torch.isinf(output)):
+            print("OUTPUT FROM SOFTMAX IS INF")
+        ###END MEHREEN
+        return output
+def create_model(config):
+    use_instance_norm = False
+    if 'use_instance_norm' in config and config['use_instance_norm']:
+        use_instance_norm = True
+    crnn = CRNN(config['cnn_out_size'], config['num_of_channels'], config['num_of_outputs'], 512,
+                use_instance_norm=use_instance_norm)
+    return crnn

py3/lf/__init__.py ADDED Viewed

File without changes

py3/lf/fast_patch_view.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import torch
+from torch.autograd import Variable
+import sys
+from utils import transformation_utils
+def get_patches(image, crop_window, grid_gen, allow_end_early=False, device="cuda"):
+        dtype = torch.FloatTensor
+        if 'cuda' in device:
+            dtype = torch.cuda.FloatTensor
+        pts = Variable(torch.FloatTensor([
+            [-1.0, -1.0, 1.0, 1.0],
+            [-1.0, 1.0, -1.0, 1.0],
+            [ 1.0, 1.0,  1.0, 1.0]
+        ]).type_as(image.data), requires_grad=False)[None,...]
+        bounds = crop_window.matmul(pts)
+        min_bounds, _ = bounds.min(dim=-1)
+        max_bounds, _ = bounds.max(dim=-1)
+        d_bounds = max_bounds - min_bounds
+        floored_idx_offsets = torch.floor(min_bounds[:,:2].data).long()
+        max_d_bounds = d_bounds.max(dim=0)[0].max(dim=0)[0]
+        crop_size = torch.ceil(max_d_bounds).long()
+        if image.is_cuda:
+            crop_size = crop_size.cuda()
+        w = crop_size.item()
+        memory_space = Variable(torch.zeros(d_bounds.size(0), 3, w, w).type_as(image.data), requires_grad=False)
+        translations = []
+        N = transformation_utils.compute_renorm_matrix(memory_space)
+        all_skipped = True
+        for b_i in range(memory_space.size(0)):
+            o = floored_idx_offsets[b_i]
+            t = Variable(dtype([
+                [1,0,-o[0]],
+                [0,1,-o[1]],
+                [0,0,    1]
+            ]), requires_grad=False).expand(3,3)
+            translations.append(N.mm(t)[None,...])
+            skip_slice = False
+            s_x = (o[0], o[0]+w)
+            s_y = (o[1], o[1]+w)
+            t_x = (0, w)
+            t_y = (0, w)
+            if o[0] < 0:
+                s_x = (0, w+o[0])
+                t_x = (-o[0], w)
+            if o[1] < 0:
+                s_y = (0, w+o[1])
+                t_y = (-o[1], w)
+            if o[0]+w >= image.size(2):
+                s_x = (s_x[0], image.size(2))
+                t_x = (t_x[0], image.size(2) - s_x[0])
+            if o[1]+w >= image.size(3):
+                s_y = (s_y[1], image.size(3))
+                t_y = (t_y[1], image.size(3) - s_y[1])
+            if s_x[0] >= s_x[1]:
+                skip_slice = True
+            if t_x[0] >= t_x[1]:
+                skip_slice = True
+            if s_y[0] >= s_y[1]:
+                skip_slice = True
+            if t_y[0] >= t_y[1]:
+                skip_slice = True
+            if not skip_slice:
+                all_skipped = False
+                i_s  = image[b_i:b_i+1, :, s_x[0]:s_x[1], s_y[0]:s_y[1]]
+                memory_space[b_i:b_i+1, :, t_x[0]:t_x[1], t_y[0]:t_y[1]] = i_s
+        if all_skipped and allow_end_early:
+            return None
+        translations = torch.cat(translations, 0)
+        grid = grid_gen(translations.bmm(crop_window))
+        grid = grid[:,:,:,0:2] / grid[:,:,:,2:3]
+        resampled = torch.nn.functional.grid_sample(memory_space.transpose(2,3), grid, mode='bilinear',
+                                                    align_corners=True)
+        return resampled

py3/lf/lf_cnn.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+from torch import nn
+def convRelu(i, batchNormalization=False, leakyRelu=False):
+    nc = 3
+    ks = [3, 3, 3, 3, 3, 3, 2]
+    ps = [1, 1, 1, 1, 1, 1, 1]
+    ss = [1, 1, 1, 1, 1, 1, 1]
+    nm = [64, 128, 256, 256, 512, 512, 512]
+    cnn = nn.Sequential()
+    nIn = nc if i == 0 else nm[i - 1]
+    nOut = nm[i]
+    cnn.add_module('conv{0}'.format(i),
+                   nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
+    if batchNormalization:
+        # Mehreen comment: track_running_stat is set to True to be able to load author's state_dict
+        # It was set to False in the original py3 version (no param in author's version)
+        cnn.add_module('batchnorm{0}'.format(i), nn.InstanceNorm2d(nOut, track_running_stats=True))
+        # cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
+    if leakyRelu:
+        cnn.add_module('relu{0}'.format(i),
+                       nn.LeakyReLU(0.2, inplace=True))
+    else:
+        cnn.add_module('relu{0}'.format(i), nn.ReLU(True))
+    return cnn
+def makeCnn():
+    cnn = nn.Sequential()
+    cnn.add_module('convRelu{0}'.format(0), convRelu(0))
+    cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2))
+    cnn.add_module('convRelu{0}'.format(1), convRelu(1))
+    cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2))
+    cnn.add_module('convRelu{0}'.format(2), convRelu(2, True))
+    cnn.add_module('convRelu{0}'.format(3), convRelu(3))
+    cnn.add_module('pooling{0}'.format(2), nn.MaxPool2d(2, 2))
+    cnn.add_module('convRelu{0}'.format(4), convRelu(4, True))
+    cnn.add_module('convRelu{0}'.format(5), convRelu(5))
+    cnn.add_module('pooling{0}'.format(3), nn.MaxPool2d(2, 2))
+    cnn.add_module('convRelu{0}'.format(6), convRelu(6, True))
+    cnn.add_module('pooling{0}'.format(4), nn.MaxPool2d(2, 2))
+    return cnn

py3/lf/line_follower.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from .stn.gridgen import AffineGridGen, PerspectiveGridGen, GridGen
+import numpy as np
+from utils import transformation_utils
+from .lf_cnn import makeCnn
+from .fast_patch_view import get_patches
+class LineFollower(nn.Module):
+    def __init__(self, output_grid_size=32, dtype=torch.cuda.FloatTensor, device="cuda"):
+        super(LineFollower, self).__init__()
+        cnn = makeCnn()
+        position_linear = nn.Linear(512,5)
+        position_linear.weight.data.zero_()
+        position_linear.bias.data[0] = 0
+        position_linear.bias.data[1] = 0
+        position_linear.bias.data[2] = 0
+        self.output_grid_size = output_grid_size
+        self.dtype = dtype
+        self.cnn = cnn
+        self.position_linear = position_linear
+        self.device = device
+    def forward(self, image, positions, steps=None, all_positions=[], reset_interval=-1, randomize=False, negate_lw=False, skip_grid=False, allow_end_early=False):
+        batch_size = image.size(0)
+        renorm_matrix = transformation_utils.compute_renorm_matrix(image)
+        expanded_renorm_matrix = renorm_matrix.expand(batch_size,3,3)
+        t = ((np.arange(self.output_grid_size) + 0.5) / float(self.output_grid_size))[:,None].astype(np.float32)
+        t = np.repeat(t,axis=1, repeats=self.output_grid_size)
+        t = Variable(torch.from_numpy(t), requires_grad=False)
+        t = t.to(self.device)
+        s = t.t()
+        t = t[:,:,None]
+        s = s[:,:,None]
+        interpolations = torch.cat([
+            (1-t)*s,
+            (1-t)*(1-s),
+            t*s,
+            t*(1-s),
+        ], dim=-1)
+        view_window = Variable(self.dtype([
+            [2,0,2],
+            [0,2,0],
+            [0,0,1]
+        ])).expand(batch_size,3,3)
+        step_bias = Variable(self.dtype([
+            [1,0,2],
+            [0,1,0],
+            [0,0,1]
+        ])).expand(batch_size,3,3)
+        invert = Variable(self.dtype([
+            [-1,0,0],
+            [0,-1,0],
+            [0,0,1]
+        ])).expand(batch_size,3,3)
+        if negate_lw:
+            view_window = invert.bmm(view_window)
+        grid_gen = GridGen(32,32, device=self.device)
+        view_window_imgs = []
+        next_windows = []
+        reset_windows = True
+        for i in range(steps):
+            if i%reset_interval != 0 or reset_interval==-1:
+                p_0 = positions[-1]
+                if i == 0 and len(p_0.size()) == 3 and p_0.size()[1] == 3 and p_0.size()[2] == 3:
+                    current_window = p_0
+                    reset_windows = False
+                    next_windows.append(p_0)
+            else:
+                p_0 = all_positions[i].type(self.dtype)
+                reset_windows = True
+                if randomize:
+                    add_noise = p_0.clone()
+                    add_noise.data.zero_()
+                    mul_moise = p_0.clone()
+                    mul_moise.data.fill_(1.0)
+                    add_noise[:,0].data.uniform_(-2, 2)
+                    add_noise[:,1].data.uniform_(-2, 2)
+                    add_noise[:,2].data.uniform_(-.1, .1)
+                    p_0 = p_0 * mul_moise + add_noise
+            if reset_windows:
+                reset_windows = False
+                current_window = transformation_utils.get_init_matrix(p_0)
+                if len(next_windows) == 0:
+                    next_windows.append(current_window)
+            else:
+                current_window = next_windows[-1].detach()
+            crop_window = current_window.bmm(view_window)
+            resampled = get_patches(image, crop_window, grid_gen, allow_end_early, device=self.device)
+            if resampled is None and i > 0:
+                #get patches checks to see if stopping early is allowed
+                break
+            if resampled is None and i == 0:
+                #Odd case where it start completely off of the edge
+                #This happens rarely, but maybe should be more eligantly handled
+                #in the future
+                resampled = Variable(torch.zeros(crop_window.size(0), 3, 32, 32).type_as(image.data), requires_grad=False)
+            # Process Window CNN
+            cnn_out = self.cnn(resampled)
+            cnn_out = torch.squeeze(cnn_out, dim=2)
+            cnn_out = torch.squeeze(cnn_out, dim=2)
+            delta = self.position_linear(cnn_out)
+            next_window = transformation_utils.get_step_matrix(delta)
+            next_window = next_window.bmm(step_bias)
+            if negate_lw:
+                next_window = invert.bmm(next_window).bmm(invert)
+            next_windows.append(current_window.bmm(next_window))
+        grid_line = []
+        mask_line = []
+        line_done = []
+        xy_positions = []
+        a_pt = Variable(torch.Tensor(
+            [
+                [0, 1,1],
+                [0,-1,1]
+            ]
+        )).to(self.device)
+        a_pt = a_pt.transpose(1,0)
+        a_pt = a_pt.expand(batch_size, a_pt.size(0), a_pt.size(1))
+        for i in range(0, len(next_windows)-1):
+            w_0 = next_windows[i]
+            w_1 = next_windows[i+1]
+            pts_0 = w_0.bmm(a_pt)
+            pts_1 = w_1.bmm(a_pt)
+            xy_positions.append(pts_0)
+            if skip_grid:
+                continue
+            pts = torch.cat([pts_0, pts_1], dim=2)
+            grid_pts = expanded_renorm_matrix.bmm(pts)
+            grid = interpolations[None,:,:,None,:] * grid_pts[:,None,None,:,:]
+            grid = grid.sum(dim=-1)[...,:2]
+            grid_line.append(grid)
+        xy_positions.append(pts_1)
+        if skip_grid:
+            grid_line = None
+        else:
+            grid_line = torch.cat(grid_line, dim=1)
+        return grid_line, view_window_imgs, next_windows, xy_positions

py3/lf/models/__init__.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from functools import partial
+import torch
+#from utils1 import coerce_to_path_and_check_exist
+from .res_unet import ResUNet
+from .tools import safe_model_state_dict
+def get_model(name=None):
+    if name is None:
+        name = 'res_unet18'
+    return {
+        'res_unet18': partial(ResUNet, encoder_name='resnet18'),
+        'res_unet34': partial(ResUNet, encoder_name='resnet34'),
+        'res_unet50': partial(ResUNet, encoder_name='resnet50'),
+        'res_unet101': partial(ResUNet, encoder_name='resnet101'),
+        'res_unet152': partial(ResUNet, encoder_name='resnet152'),
+    }[name]
+def load_model_from_path(model_path, device=None, attributes_to_return=None, eval_mode=True):
+    if device is None:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    checkpoint = torch.load(coerce_to_path_and_check_exist(model_path), map_location=device.type)
+    checkpoint['model_kwargs']['pretrained_encoder'] = False
+    model = get_model(checkpoint['model_name'])(checkpoint['n_classes'], **checkpoint['model_kwargs']).to(device)
+    model.load_state_dict(safe_model_state_dict(checkpoint['model_state']))
+    if eval_mode:
+        model.eval()
+    if attributes_to_return is not None:
+        if isinstance(attributes_to_return, str):
+            attributes_to_return = [attributes_to_return]
+        return model, [checkpoint.get(key) for key in attributes_to_return]
+    else:
+        return model

py3/lf/models/res_unet.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from collections import OrderedDict
+from torch import nn
+from .resnet import get_resnet_model
+from .tools import conv1x1, conv3x3, DecoderModule, get_norm_layer, UpsampleCatConv
+#from utils1.logger import print_info, print_warning
+INPUT_CHANNELS = 3
+FINAL_LAYER_CHANNELS = 32
+LAYER1_REDUCED_CHANNELS = 128
+LAYER2_REDUCED_CHANNELS = 256
+LAYER3_REDUCED_CHANNELS = 512
+LAYER4_REDUCED_CHANNELS = 1024
+class ResUNet(nn.Module):
+    """U-Net with residual encoder backbone."""
+    @property
+    def name(self):
+        return self.enc_name.replace('res', 'res_u')
+    def __init__(self, n_classes, **kwargs):
+        super().__init__()
+        self.n_classes = n_classes
+        self.norm_layer_kwargs = kwargs.pop('norm_layer', dict())
+        self.norm_layer = get_norm_layer(**self.norm_layer_kwargs)
+        self.no_maxpool = kwargs.get('no_maxpool', False)
+        self.conv_as_maxpool = kwargs.get('conv_as_maxpool', True)
+        self.use_upcatconv = kwargs.get('use_upcatconv', False)
+        self.use_deconv = kwargs.get('use_deconv', True)
+        assert not (self.use_deconv and self.use_upcatconv)
+        self.same_up_channels = kwargs.get('same_up_channels', False)
+        self.use_conv1x1 = kwargs.get('use_conv1x1', False)
+        assert not (self.conv_as_maxpool and self.no_maxpool)
+        self.enc_name = kwargs.get('encoder_name', 'resnet18')
+        self.reduced_layers = kwargs.get('reduced_layers', False) and self.enc_name not in ['resnet18, resnet34']
+        pretrained = kwargs.get('pretrained_encoder', False)
+        replace_with_dilation = kwargs.get('replace_with_dilation')
+        strides = kwargs.get('strides', 2)
+        resnet = get_resnet_model(self.enc_name)(pretrained, progress=False, norm_layer=self.norm_layer_kwargs,
+                                                 strides=strides, replace_with_dilation=replace_with_dilation)
+        self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu)
+        # XXX: maxpool creates high amplitude high freq activations, removing it leads to better results
+        if self.conv_as_maxpool:
+            layer0_out_channels = self.get_nb_out_channels(self.layer0)
+            self.layer1 = nn.Sequential(*[conv3x3(layer0_out_channels, layer0_out_channels, stride=2),
+                                          self.norm_layer(layer0_out_channels),
+                                          nn.ReLU()] + list(resnet.layer1.children()))
+        elif self.no_maxpool:
+            self.layer1 = nn.Sequential(*list(resnet.layer1.children()))
+        else:
+            self.layer1 = nn.Sequential(*[resnet.maxpool] + list(resnet.layer1.children()))
+        self.layer2, self.layer3, self.layer4 = resnet.layer2, resnet.layer3, resnet.layer4
+        layer0_out_channels = self.get_nb_out_channels(self.layer0)
+        layer1_out_channels = self.get_nb_out_channels(self.layer1)
+        layer2_out_channels = self.get_nb_out_channels(self.layer2)
+        layer3_out_channels = self.get_nb_out_channels(self.layer3)
+        layer4_out_channels = self.get_nb_out_channels(self.layer4)
+        if self.reduced_layers:
+            self.layer1_red = self._reducing_layer(layer1_out_channels, LAYER1_REDUCED_CHANNELS)
+            self.layer2_red = self._reducing_layer(layer2_out_channels, LAYER2_REDUCED_CHANNELS)
+            self.layer3_red = self._reducing_layer(layer3_out_channels, LAYER3_REDUCED_CHANNELS)
+            self.layer4_red = self._reducing_layer(layer4_out_channels, LAYER4_REDUCED_CHANNELS)
+            layer1_out_channels, layer2_out_channels = LAYER1_REDUCED_CHANNELS, LAYER2_REDUCED_CHANNELS
+            layer3_out_channels, layer4_out_channels = LAYER3_REDUCED_CHANNELS, LAYER4_REDUCED_CHANNELS
+        self.layer4_up = self._upsampling_layer(layer4_out_channels, layer3_out_channels, layer3_out_channels)
+        self.layer3_up = self._upsampling_layer(layer3_out_channels, layer2_out_channels, layer2_out_channels)
+        self.layer2_up = self._upsampling_layer(layer2_out_channels, layer1_out_channels, layer1_out_channels)
+        self.layer1_up = self._upsampling_layer(layer1_out_channels, layer0_out_channels, layer0_out_channels)
+        self.layer0_up = self._upsampling_layer(layer0_out_channels, FINAL_LAYER_CHANNELS, INPUT_CHANNELS)
+        self.final_layer = self._final_layer(FINAL_LAYER_CHANNELS)
+        if not pretrained:
+            self._init_conv_weights()
+        print("Model {} initialisated with norm_layer={}({}) and kwargs {}"
+                   .format(self.name, self.norm_layer.func.__name__, self.norm_layer.keywords, kwargs))
+    def _reducing_layer(self, in_channels, out_channels):
+        return nn.Sequential(OrderedDict([
+            ('conv', conv1x1(in_channels, out_channels)),
+            ('bn', self.norm_layer(out_channels)),
+            ('relu', nn.ReLU()),
+        ]))
+    def get_nb_out_channels(self, layer):
+        return list(filter(lambda e: isinstance(e, nn.Conv2d), layer.modules()))[-1].out_channels
+    def _upsampling_layer(self, in_channels, out_channels, cat_channels):
+        if self.use_upcatconv:
+            return UpsampleCatConv(in_channels + cat_channels, out_channels, norm_layer=self.norm_layer,
+                                   use_conv1x1=self.use_conv1x1)
+        else:
+            up_channels = in_channels if self.same_up_channels else None
+            return DecoderModule(in_channels, out_channels, cat_channels, up_channels=up_channels,
+                                 norm_layer=self.norm_layer, n_conv=1, use_deconv=self.use_deconv,
+                                 use_conv1x1=self.use_conv1x1)
+    def _final_layer(self, in_channels):
+        return nn.Sequential(OrderedDict([('conv', conv1x1(in_channels, self.n_classes))]))
+    def _init_conv_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_uniform_(m.weight)
+    def load_state_dict_for_unet(self, state_dict):
+        unloaded_params = []
+        state = self.state_dict()
+        for name, param in state_dict.items():
+            if name in state and state[name].shape == param.shape:
+                if isinstance(param, nn.Parameter):
+                    param = param.data
+                state[name].copy_(param)
+            else:
+                unloaded_params.append(name)
+        if len(unloaded_params) > 0:
+            print('load_state_dict: {} not found'.format(unloaded_params))
+    def forward(self, x):
+        x0 = self.layer0(x)
+        x1 = self.layer1(x0)
+        x2 = self.layer2(x1)
+        x3 = self.layer3(x2)
+      #  x4 = self.layer4(x3)
+        if self.reduced_layers:
+            x4 = self.layer4_red(x4)
+            x3 = self.layer3_red(x3)
+            x2 = self.layer2_red(x2)
+            x1 = self.layer1_red(x1)
+  #      x3 = self.layer4_up(x4, other=x3)
+        x2 = self.layer3_up(x3, other=x2)
+        x1 = self.layer2_up(x2, other=x1)
+        x0 = self.layer1_up(x1, other=x0)
+        x = self.layer0_up(x0, other=x)
+        x = self.final_layer(x)
+        return x

py3/lf/models/resnet.py ADDED Viewed

	@@ -0,0 +1,335 @@

+from toolz import keyfilter
+import torch
+import torch.nn as nn
+from torch.utils.model_zoo import load_url as load_state_dict_from_url
+from .tools import conv3x3, conv1x1, get_norm_layer
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+def get_resnet_model(name):
+    if name is None:
+        name = 'resnet18'
+    return {
+        'resnet18': resnet18,
+        'resnet34': resnet34,
+        'resnet50': resnet50,
+        'resnet101': resnet101,
+        'resnet152': resnet152,
+        'resnext50_32x4d': resnext50_32x4d,
+        'resnext101_32x8d': resnext101_32x8d,
+        'wide_resnet50_2': wide_resnet50_2,
+        'wide_resnet101_2': wide_resnet101_2,
+    }[name]
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, layers, n_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, strides=2, replace_with_dilation=None, **kwargs):
+        super(ResNet, self).__init__()
+        self.norm_layer_kwargs = kwargs.get('norm_layer', dict())
+        norm_layer = get_norm_layer(**self.norm_layer_kwargs)
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.groups = groups
+        self.base_width = width_per_group
+        self.dilation = 1
+        if replace_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_with_dilation = [False, False, False]
+        elif isinstance(replace_with_dilation, bool):
+            replace_with_dilation = [replace_with_dilation] * 3
+        assert len(replace_with_dilation) == 3
+        self.strides = strides if not isinstance(strides, int) else [strides] * 5
+        assert len(self.strides) == 5
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=self.strides[0], padding=3, bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=self.strides[1], padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=self.strides[2], dilate=replace_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=self.strides[3], dilate=replace_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=self.strides[4], dilate=replace_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, n_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+        return x
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
+        if not model.norm_layer_kwargs.get('track_running_stats', True):
+            state_dict = keyfilter(lambda k: 'running' not in k, state_dict)
+        model.load_state_dict(state_dict)
+    return model
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs)
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, **kwargs)
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs)
+def resnet101(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs)
+def resnet152(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>'_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, **kwargs)
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs)
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs)
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], pretrained, progress, **kwargs)
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], pretrained, progress, **kwargs)

py3/lf/models/tools.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from collections import OrderedDict
+from functools import partial
+from torch import nn, cat
+from torch.nn import functional as F
+def get_norm_layer(**kwargs):
+    name = kwargs.get('name', 'instance_norm')
+    momentum = kwargs.get('momentum', 0.1)
+    affine = kwargs.get('affine', True)
+    track_stats = kwargs.get('track_running_stats', False)
+    num_groups = kwargs.get('num_groups', 32)
+    norm_layer = {
+        'batch_norm': partial(nn.BatchNorm2d, momentum=momentum, affine=affine, track_running_stats=track_stats),
+        'group_norm': partial(nn.GroupNorm, num_groups=num_groups, affine=affine),
+        'instance_norm': partial(nn.InstanceNorm2d, momentum=momentum, affine=affine, track_running_stats=track_stats),
+    }[name]
+    if norm_layer.func == nn.GroupNorm:
+        return lambda num_channels: norm_layer(num_channels=num_channels)
+    else:
+        return norm_layer
+def initialize_weights(*models):
+    for model in models:
+        for module in model.modules():
+            if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
+                nn.init.kaiming_normal_(module.weight)
+                if module.bias is not None:
+                    module.bias.data.zero_()
+            elif isinstance(module, nn.BatchNorm2d):
+                module.weight.data.fill_(1)
+                module.bias.data.zero_()
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+def safe_model_state_dict(state_dict):
+    """Convert a state dict saved from a DataParallel module to normal module state_dict."""
+    if not next(iter(state_dict)).startswith("module."):
+        return state_dict  # abort if dict is not a DataParallel model_state
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        new_state_dict[k[7:]] = v  # remove 'module.' prefix
+    return new_state_dict
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class UpsampleCatConv(nn.Module):
+    def __init__(self, in_channels, out_channels, norm_layer=None, mode='bilinear', use_conv1x1=False):
+        super().__init__()
+        norm_layer = norm_layer if norm_layer is not None else nn.BatchNorm2d
+        conv_layer = conv1x1 if use_conv1x1 else conv3x3
+        self.mode = mode
+        self.conv = conv_layer(in_channels, out_channels)
+        self.norm = norm_layer(out_channels)
+        self.act = nn.ReLU()
+    def forward(self, x, other):
+        x = nn.functional.interpolate(x, size=(other.size(2), other.size(3)), mode=self.mode, align_corners=False)
+        x = cat((x, other), dim=1)
+        x = self.conv(x)
+        x = self.norm(x)
+        x = self.act(x)
+        return x
+class UpsampleConv(nn.Module):
+    def __init__(self, in_channels, out_channels, norm_layer=None, mode='bilinear', use_conv1x1=False):
+        super().__init__()
+        norm_layer = norm_layer if norm_layer is not None else nn.BatchNorm2d
+        conv_layer = conv1x1 if use_conv1x1 else conv3x3
+        self.mode = mode
+        self.conv = conv_layer(in_channels, out_channels)
+        self.norm = norm_layer(out_channels)
+        self.act = nn.ReLU()
+    def forward(self, x, output_size):
+        x = nn.functional.interpolate(x, size=output_size[2:], mode=self.mode, align_corners=False)
+        x = self.conv(x)
+        x = self.norm(x)
+        x = self.act(x)
+        return x
+class DeconvModule(nn.Module):
+    def __init__(self, in_channels, out_channels, norm_layer=None):
+        super().__init__()
+        norm_layer = norm_layer if norm_layer is not None else nn.BatchNorm2d
+        self.deconv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)
+        self.norm = norm_layer(out_channels)
+        self.act = nn.ReLU()
+    def forward(self, x, output_size):
+        x = self.deconv(x, output_size=output_size)
+        x = self.norm(x)
+        x = self.act(x)
+        return x
+class DecoderModule(nn.Module):
+    def __init__(self, in_channels, out_channels, cat_channels=None, up_channels=None,
+                 norm_layer=None, n_conv=2, use_deconv=False, use_conv1x1=False):
+        super().__init__()
+        cat_channels = cat_channels or in_channels // 2
+        up_channels = up_channels or in_channels // 2
+        norm_layer = norm_layer if norm_layer is not None else nn.BatchNorm2d
+        self.use_deconv = use_deconv
+        if use_deconv:
+            self.decode = DeconvModule(in_channels, up_channels, norm_layer)
+        else:
+            self.decode = UpsampleConv(in_channels, up_channels, norm_layer, 'bilinear', use_conv1x1)
+        self.conv_block = nn.Sequential(OrderedDict(sum([[
+            ('conv{}'.format(k + 1), conv3x3(up_channels + cat_channels if k == 0 else out_channels, out_channels)),
+            ('bn{}'.format(k + 1), norm_layer(out_channels)),
+            ('relu{}'.format(k + 1), nn.ReLU())]
+            for k in range(n_conv)], [])))
+    def forward(self, x, other):
+        try:
+            x = self.decode(x, output_size=other.size())
+        except ValueError:
+            # XXX a size adjustement is needed for odd sizes
+            B, C, H, W = other.size()
+            h, w = H // 2 * 2, W // 2 * 2
+            x = self.decode(x, output_size=(B, C, h, w))
+            x = F.pad(x, (W - w, 0, H - h, 0))
+        x = cat((x, other), dim=1)
+        x = self.conv_block(x)
+        return x

py3/lf/stn/__init__.py ADDED Viewed

File without changes

py3/lf/stn/gridgen.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import torch
+from torch.autograd import Function
+from torch.autograd import Variable
+from torch.nn.modules.module import Module
+import numpy as np
+class AffineGridGenFunction(Function):
+    def __init__(self, height, width):
+        super(AffineGridGenFunction, self).__init__()
+        self.height, self.width = height, width
+        self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
+        self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/self.height), 0), repeats = self.width, axis = 0).T, 0)
+        self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/self.width), 0), repeats = self.height, axis = 0), 0)
+        self.grid[:,:,2] = np.ones([self.height, width])
+        self.grid = torch.from_numpy(self.grid.astype(np.float32))
+        #print(self.grid)
+    def forward(self, input1):
+        self.input1 = input1
+        output = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size())
+        self.batchgrid = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size())
+        for i in range(input1.size(0)):
+            self.batchgrid[i] = self.grid
+        if input1.is_cuda:
+            self.batchgrid = self.batchgrid.cuda()
+            output = output.cuda()
+        for i in range(input1.size(0)):
+                output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
+        return output
+    def backward(self, grad_output):
+        grad_input1 = torch.zeros(self.input1.size())
+        if grad_output.is_cuda:
+            self.batchgrid = self.batchgrid.cuda()
+            grad_input1 = grad_input1.cuda()
+        grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
+        return grad_input1
+class AffineGridGen(Module):
+    def __init__(self, height, width):
+        super(AffineGridGen, self).__init__()
+        self.height, self.width = height, width
+        self.f = AffineGridGenFunction(self.height, self.width)
+    def forward(self, input):
+        return self.f(input)
+class PerspectiveGridGenFunction(Function):
+    def __init__(self, height, width):
+        super(PerspectiveGridGenFunction, self).__init__()
+        self.height, self.width = height, width
+        self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
+        self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.linspace(-1, 1, self.height), 0), repeats = self.width, axis = 0).T, 0)
+        self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.linspace(-1, 1, self.width), 0), repeats = self.height, axis = 0), 0)
+        self.grid[:,:,2] = np.ones([self.height, width])
+        self.grid = torch.from_numpy(self.grid.astype(np.float32))
+    def forward(self, input1):
+        self.input1 = input1
+        output = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size())
+        self.batchgrid = torch.zeros(torch.Size([input1.size(0)]) + self.grid.size())
+        for i in range(input1.size(0)):
+            self.batchgrid[i] = self.grid
+        if input1.is_cuda:
+            self.batchgrid = self.batchgrid.cuda()
+            output = output.cuda()
+        for i in range(input1.size(0)):
+                output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 3)
+        return output
+    def backward(self, grad_output):
+        grad_input1 = torch.zeros(self.input1.size())
+        if grad_output.is_cuda:
+            self.batchgrid = self.batchgrid.cuda()
+            grad_input1 = grad_input1.cuda()
+        grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 3), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
+        return grad_input1
+class PerspectiveGridGen(Module):
+    def __init__(self, height, width):
+        super(PerspectiveGridGen, self).__init__()
+        self.height, self.width = height, width
+        self.f = PerspectiveGridGenFunction(self.height, self.width)
+    def forward(self, input):
+        return self.f(input)
+class GridGen(Module):
+    def __init__(self, height, width, device="cuda"):
+        super(GridGen, self).__init__()
+        self.device = device
+        self.height, self.width = height, width
+        self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
+        grid_space_h = (np.arange(self.height) + 0.5) / float(self.height)
+        grid_space_w = (np.arange(self.width) + 0.5) / float(self.width)
+        grid_space_h = 2 * grid_space_h - 1
+        grid_space_w = 2 * grid_space_w - 1
+        self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(grid_space_h, 0), repeats = self.width, axis = 0).T, 0)
+        self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(grid_space_w, 0), repeats = self.height, axis = 0), 0)
+        # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.linspace(-1, 1, self.height), 0), repeats = self.width, axis = 0).T, 0)
+        # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.linspace(-1, 1, self.width), 0), repeats = self.height, axis = 0), 0)
+        self.grid[:,:,2] = np.ones([self.height, width])
+        self.grid = Variable(torch.from_numpy(self.grid.astype(np.float32)), requires_grad=False).to(self.device)
+    def forward(self, input):
+        out = torch.matmul(input[:,None,None,:,:], self.grid[None,:,:,:,None])
+        out = out.squeeze(-1)
+        return out

py3/sol/__init__.py ADDED Viewed

File without changes

py3/sol/crop_transform.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from sol import crop_utils
+import numpy as np
+class CropTransform(object):
+    def __init__(self, crop_params):
+        crop_size = crop_params['crop_size']
+        self.random_crop_params = crop_params
+        self.pad_params =  ((crop_size,crop_size),(crop_size,crop_size),(0,0))
+    def __call__(self, sample):
+        org_img = sample['img']
+        gt = sample['sol_gt']
+        org_img = np.pad(org_img, self.pad_params, 'mean')
+        gt[:,:,0] = gt[:,:,0] + self.pad_params[0][0]
+        gt[:,:,1] = gt[:,:,1] + self.pad_params[1][0]
+        gt[:,:,2] = gt[:,:,2] + self.pad_params[0][0]
+        gt[:,:,3] = gt[:,:,3] + self.pad_params[1][0]
+        crop_params, org_img, gt_match = crop_utils.generate_random_crop(org_img, gt, self.random_crop_params)
+        gt = gt[gt_match][None,...]
+        gt[...,0] = gt[...,0] - crop_params['dim1'][0]
+        gt[...,1] = gt[...,1] - crop_params['dim0'][0]
+        gt[...,2] = gt[...,2] - crop_params['dim1'][0]
+        gt[...,3] = gt[...,3] - crop_params['dim0'][0]
+        return {
+            "img": org_img,
+            "sol_gt": gt
+        }

py3/sol/crop_utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import cv2
+import numpy as np
+import sys
+def perform_crop(img, crop):
+    cs = crop['crop_size']
+    cropped_gt_img = img[crop['dim0'][0]:crop['dim0'][1], crop['dim1'][0]:crop['dim1'][1]]
+    scaled_gt_img = cv2.resize(cropped_gt_img, (cs, cs), interpolation = cv2.INTER_CUBIC)
+    return scaled_gt_img
+def generate_random_crop(img, gt, params):
+    contains_label = np.random.random() < params['prob_label']
+    cs = params['crop_size']
+    cnt = 0
+    while True:
+        dim0 = np.random.randint(0,img.shape[0]-cs)
+        dim1 = np.random.randint(0,img.shape[1]-cs)
+        crop = {
+            "dim0": [dim0, dim0+cs],
+            "dim1": [dim1, dim1+cs],
+            "crop_size": cs
+        }
+        #TODO: this only works for the center points
+        gt_match = np.zeros_like(gt[...,0:2])
+        gt_match[...,0][gt[...,0] < dim1] = 1
+        gt_match[...,0][gt[...,0] > dim1+cs] = 1
+        gt_match[...,1][gt[...,1] < dim0] = 1
+        gt_match[...,1][gt[...,1] > dim0+cs] = 1
+        gt_match = 1-gt_match
+        gt_match = np.logical_and(gt_match[...,0], gt_match[...,1])
+        if gt_match.sum() > 0 and contains_label or cnt > 100:
+            cropped_gt_img = perform_crop(img, crop)
+            return crop, cropped_gt_img, np.where(gt_match != 0)
+        if gt_match.sum() == 0 and not contains_label:
+            cropped_gt_img = perform_crop(img, crop)
+            return crop, cropped_gt_img, np.where(gt_match != 0)
+        cnt += 1

py3/sol/start_of_line_finder.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+from torch.autograd import Variable
+from torch import nn
+from . import vgg
+class StartOfLineFinder(nn.Module):
+    def __init__(self, base_0, base_1):
+        super(StartOfLineFinder, self).__init__()
+        self.cnn = vgg.vgg11()
+        self.base_0 = base_0
+        self.base_1 = base_1
+    def forward(self, img):
+        y = self.cnn(img)
+        #print('sol input is image of size', img.size())
+        #print('sol forward is output of size', y.size())
+        priors_0 = Variable(torch.arange(0,y.size(2)).type_as(img.data), requires_grad=False)[None,:,None]
+        priors_0 = (priors_0 + 0.5) * self.base_0
+        priors_0 = priors_0.expand(y.size(0), priors_0.size(1), y.size(3))
+        priors_0 = priors_0[:,None,:,:]
+        priors_1 = Variable(torch.arange(0,y.size(3)).type_as(img.data), requires_grad=False)[None,None,:]
+        priors_1 = (priors_1 + 0.5) * self.base_1
+        priors_1 = priors_1.expand(y.size(0), y.size(2), priors_1.size(2))
+        priors_1 = priors_1[:,None,:,:]
+        predictions = torch.cat([
+            torch.sigmoid(y[:,0:1,:,:]),
+            y[:,1:2,:,:] + priors_0,
+            y[:,2:3,:,:] + priors_1,
+            y[:,3:4,:,:],
+            y[:,4:5,:,:]
+        ], dim=1)
+        predictions = predictions.transpose(1,3).contiguous()
+        predictions = predictions.view(predictions.size(0),-1,5)
+        #print('priors_0', priors_0)
+        #print('sol final prediction is size', predictions.size())
+        return predictions

py3/sol/vgg.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import torch
+from torch import nn
+import math
+__all__ = [
+    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
+    'vgg19_bn', 'vgg19',
+]
+class VGG(nn.Module):
+    def __init__(self, features, num_classes=1000):
+        super(VGG, self).__init__()
+        self.features = features
+    def forward(self, x):
+        x = self.features(x)
+        return x
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
+def make_layers(cfg, batch_norm=False):
+    layers = []
+    in_channels = 3
+    for i,v in enumerate(cfg):
+        if v == 'M':
+            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+        else:
+            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
+            if i == len(cfg)-1:
+                layers += [conv2d]
+                break
+            if batch_norm:
+                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
+            else:
+                layers += [conv2d, nn.ReLU(inplace=True)]
+            in_channels = v
+    return nn.Sequential(*layers)
+OUTPUT_FEATURES = 5
+cfg = {
+    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, OUTPUT_FEATURES],
+    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, OUTPUT_FEATURES],
+    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, OUTPUT_FEATURES],
+    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, OUTPUT_FEATURES],
+}
+def vgg11(pretrained=False, **kwargs):
+    """VGG 11-layer model (configuration "A")
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['A']), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
+    return model
+def vgg11_bn(pretrained=False, **kwargs):
+    """VGG 11-layer model (configuration "A") with batch normalization
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
+    return model
+def vgg13(pretrained=False, **kwargs):
+    """VGG 13-layer model (configuration "B")
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['B']), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg13']))
+    return model
+def vgg13_bn(pretrained=False, **kwargs):
+    """VGG 13-layer model (configuration "B") with batch normalization
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn']))
+    return model
+def vgg16(pretrained=False, **kwargs):
+    """VGG 16-layer model (configuration "D")
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['D']), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
+    return model
+def vgg16_bn(pretrained=False, **kwargs):
+    """VGG 16-layer model (configuration "D") with batch normalization
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
+    return model
+def vgg19(pretrained=False, **kwargs):
+    """VGG 19-layer model (configuration "E")
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['E']), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg19']))
+    return model
+def vgg19_bn(pretrained=False, **kwargs):
+    """VGG 19-layer model (configuration 'E') with batch normalization
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn']))
+    return model

py3/utils/__init__.py ADDED Viewed

File without changes

py3/utils/character_set.ipynb ADDED Viewed

	@@ -0,0 +1,539 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a0f742b0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/msaeed3/mehreen/source/start_follow_read/py3\n",
+      "....c ب\n",
+      "....c أ\n",
+      "....c ن\n",
+      "....c  \n",
+      "....c ا\n",
+      "....c ل\n",
+      "....c ت\n",
+      "....c ع\n",
+      "....c و\n",
+      "....c ي\n",
+      "....c ٢\n",
+      "....c س\n",
+      "....c ط\n",
+      "....c ج\n",
+      "....c ه\n",
+      "....c ز\n",
+      "....c ف\n",
+      "....c ذ\n",
+      "....c ٳ\n",
+      "....c ك\n",
+      "....c ٪\n",
+      "....c ٜ\n",
+      "....c ١\n",
+      "....c د\n",
+      "....c ة\n",
+      "....c م\n",
+      "....c ٬\n",
+      "....c ق\n",
+      "....c ر\n",
+      "....c ش\n",
+      "....c ٚ\n",
+      "....c ح\n",
+      "....c -\n",
+      "....c !\n",
+      "....c ص\n",
+      "....c ض\n",
+      "....c ٝ\n",
+      "....c ث\n",
+      "....c 7\n",
+      "....c ٙ\n",
+      "....c ٨\n",
+      "....c ک\n",
+      "....c ٮ\n",
+      "....c ڤ\n",
+      "....c ٤\n",
+      "....c ں\n",
+      "....c 4\n",
+      "....c ٰ\n",
+      "....c ]\n",
+      "....c ڡ\n",
+      "....c ى\n",
+      "....c \\\n",
+      "....c 2\n",
+      "....c ٛ\n",
+      "....c =\n",
+      "....c إ\n",
+      "....c غ\n",
+      "....c ٲ\n",
+      "....c ّ\n",
+      "....c >\n",
+      "....c .\n",
+      "....c )\n",
+      "....c <\n",
+      "....c ئ\n",
+      "....c |\n",
+      "....c 0\n",
+      "....c +\n",
+      "....c x\n",
+      "....c ؟\n",
+      "....c خ\n",
+      "....c }\n",
+      "....c &\n",
+      "....c %\n",
+      "....c ،\n",
+      "....c @\n",
+      "....c $\n",
+      "....c ء\n",
+      "....c 5\n",
+      "....c 8\n",
+      "....c _\n",
+      "....c ٌ\n",
+      "....c ×\n",
+      "....c ^\n",
+      "....c ٍ\n",
+      "....c `\n",
+      "....c [\n",
+      "....c آ\n",
+      "....c َ\n",
+      "....c ;\n",
+      "....c ً\n",
+      "....c ُ\n",
+      "....c /\n",
+      "....c ٕ\n",
+      "....c ~\n",
+      "....c \"\n",
+      "....c ٖ\n",
+      "....c ظ\n",
+      "....c 3\n",
+      "....c :\n",
+      "....c ۟\n",
+      "....c ٥\n",
+      "....c چ\n",
+      "....c ٣\n",
+      "....c ,\n",
+      "....c ٧\n",
+      "....c ﮐ\n",
+      "....c {\n",
+      "....c 9\n",
+      "....c ?\n",
+      "....c '\n",
+      "....c ْ\n",
+      "....c *\n",
+      "....c ـ\n",
+      "....c ٔ\n",
+      "....c #\n",
+      "....c ٓ\n",
+      "....c ِ\n",
+      "....c 1\n",
+      "....c 6\n",
+      "....c ‘\n",
+      "....c (\n",
+      "....c ٠\n",
+      "....c ٞ\n",
+      "....c ٯ\n",
+      "....c ؤ\n",
+      "....c ٘\n",
+      "....c ٟ\n",
+      "....c ٴ\n",
+      "....c ݘ\n",
+      "....c ٫\n",
+      "....c ی\n",
+      "....c ٦\n",
+      "....c ٩\n",
+      "....c ٵ\n",
+      "....c ٱ\n",
+      "....c –\n",
+      "....c ؛\n",
+      "....c ٶ\n",
+      "....c ٭\n",
+      "....c ٗ\n",
+      "....c ﭐ\n",
+      "....c �\n",
+      "....c ﺟ\n",
+      "....c ﮞ\n",
+      "ﮞ 866\n",
+      "� 876\n",
+      "ﭐ 888\n",
+      "ﮐ 892\n",
+      "ﺟ 910\n",
+      "۟ 2654\n",
+      "‘ 2685\n",
+      "ݘ 2718\n",
+      "ی 2790\n",
+      "– 2802\n",
+      "٥ 7794\n",
+      "ٴ 7866\n",
+      "ں 7885\n",
+      "٤ 7898\n",
+      "ڡ 7902\n",
+      "ٝ 7921\n",
+      "ٵ 7933\n",
+      "ٲ 7944\n",
+      "ٶ 7962\n",
+      "٬ 7966\n",
+      "ٳ 7971\n",
+      "٧ 8011\n",
+      "٭ 8012\n",
+      "٢ 8019\n",
+      "٘ 8045\n",
+      "٦ 8049\n",
+      "ٰ 8056\n",
+      "٠ 8061\n",
+      "ٟ 8067\n",
+      "ٙ 8080\n",
+      "٩ 8081\n",
+      "ٯ 8083\n",
+      "٪ 8084\n",
+      "٫ 8098\n",
+      "ک 8102\n",
+      "ٱ 8118\n",
+      "ٜ 8123\n",
+      "ڤ 8126\n",
+      "ٮ 8133\n",
+      "ٞ 8133\n",
+      "١ 8158\n",
+      "ٛ 8162\n",
+      "٨ 8163\n",
+      "ٚ 8194\n",
+      "چ 8219\n",
+      "ٗ 8222\n",
+      "٣ 8378\n",
+      "ٍ 12142\n",
+      "~ 12159\n",
+      "9 12171\n",
+      "ِ 12173\n",
+      "1 12198\n",
+      "ٓ 12228\n",
+      "[ 12238\n",
+      "{ 12281\n",
+      "' 12285\n",
+      "! 12317\n",
+      "× 12331\n",
+      "< 12337\n",
+      "2 12344\n",
+      "ْ 12345\n",
+      "_ 12349\n",
+      "- 12350\n",
+      "% 12360\n",
+      "8 12366\n",
+      "5 12373\n",
+      "3 12381\n",
+      "ٔ 12383\n",
+      "} 12384\n",
+      "# 12386\n",
+      "x 12392\n",
+      "ً 12392\n",
+      ": 12394\n",
+      "7 12398\n",
+      "* 12402\n",
+      "= 12404\n",
+      "+ 12431\n",
+      "> 12454\n",
+      "6 12457\n",
+      "ّ 12459\n",
+      "\\ 12461\n",
+      ") 12462\n",
+      "؛ 12467\n",
+      "` 12477\n",
+      "$ 12479\n",
+      "0 12486\n",
+      "؟ 12487\n",
+      "? 12487\n",
+      "ـ 12506\n",
+      ". 12514\n",
+      "( 12517\n",
+      "ٌ 12534\n",
+      "^ 12539\n",
+      "\" 12541\n",
+      "/ 12544\n",
+      "، 12565\n",
+      "ٖ 12566\n",
+      "َ 12568\n",
+      "ٕ 12574\n",
+      "; 12588\n",
+      "ُ 12604\n",
+      "& 12610\n",
+      "@ 12610\n",
+      "] 12644\n",
+      "4 12668\n",
+      ", 12673\n",
+      "| 12690\n",
+      "آ 13719\n",
+      "ؤ 17881\n",
+      "ظ 25831\n",
+      "غ 33164\n",
+      "ء 41411\n",
+      "ئ 58235\n",
+      "إ 62171\n",
+      "ث 63826\n",
+      "ذ 73869\n",
+      "ز 77302\n",
+      "ض 80181\n",
+      "ص 106844\n",
+      "ى 107009\n",
+      "خ 113804\n",
+      "ط 116166\n",
+      "ش 120758\n",
+      "أ 155544\n",
+      "ج 183706\n",
+      "ك 213684\n",
+      "ح 228902\n",
+      "ه 259392\n",
+      "ق 272453\n",
+      "ف 306028\n",
+      "س 308145\n",
+      "د 387589\n",
+      "ع 403225\n",
+      "ب 418369\n",
+      "ة 431012\n",
+      "ت 556063\n",
+      "ر 567321\n",
+      "ن 612221\n",
+      "و 639292\n",
+      "م 800866\n",
+      "ي 919298\n",
+      "ل 1496107\n",
+      "ا 2022160\n",
+      "  3392229\n",
+      "('Size:', 144)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "import json\n",
+    "import os\n",
+    "from collections import defaultdict\n",
+    "\n",
+    "# These are for RASAM full pages\n",
+    "#OUT_PATH_c = '/home/msaeed3/mehreen/datasets/RASAM/sfr/'\n",
+    "#DATA_PATH_c = '/home/msaeed3/mehreen/datasets/RASAM/sfr/'\n",
+    "#OUT_NAME_c = 'char_set_rasam.json'\n",
+    "\n",
+    "# These are for regions in RASAM and RASM\n",
+    "#OUT_PATH_c = '/home/msaeed3/mehreen/datasets/RASM/regions_sfr/'\n",
+    "#DATA_PATH_c = '/home/msaeed3/mehreen/datasets/RASM/regions_sfr/'\n",
+    "\n",
+    "# These are for RASM, RASAM, MoiseK, KHATT\n",
+    "#OUT_PATH_c = '/home/msaeed3/mehreen/datasets/arabic_all/'\n",
+    "#DATA_PATH_c = '/home/msaeed3/mehreen/datasets/arabic_all/'\n",
+    "#OUT_NAME_c = 'char_set_arabic.json'\n",
+    "\n",
+    "OUT_PATH_c = '/home/msaeed3/mehreen/datasets/synthetic/line_images/'\n",
+    "OUT_NAME_c = 'char_set_line_images.json'\n",
+    "DATA_PATH_c = '/home/msaeed3/mehreen/datasets/synthetic/line_images/'\n",
+    "\n",
+    "os.chdir('/home/msaeed3/mehreen/source/start_follow_read/py3/')\n",
+    "print(os.getcwd())\n",
+    "\n",
+    "def load_char_set(char_set_path):\n",
+    "    with open(char_set_path) as f:\n",
+    "        char_set = json.load(f)\n",
+    "\n",
+    "    idx_to_char = {}\n",
+    "    for k,v in char_set['idx_to_char'].items():\n",
+    "        idx_to_char[int(k)] = v\n",
+    "\n",
+    "    return idx_to_char, char_set['char_to_idx']\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    character_set_path = OUT_PATH_c + OUT_NAME_c\n",
+    "    out_char_to_idx = {}\n",
+    "    out_idx_to_char = {}\n",
+    "    char_freq = defaultdict(int)     \n",
+    "    \n",
+    "    dirs = [DATA_PATH_c]\n",
+    "    \n",
+    "    input_data_files = ['Train.json', 'Valid.json', 'Test.json']\n",
+    "    data_file = []\n",
+    "    for path in dirs:\n",
+    "        for i in range(len(input_data_files)):\n",
+    "            data_file = path + input_data_files[i]\n",
+    "            with open(data_file) as f:\n",
+    "                paths = json.load(f)\n",
+    "\n",
+    "            for json_path, image_path in paths:\n",
+    "                \n",
+    "                with open(json_path) as f:\n",
+    "                    data = json.load(f)\n",
+    "\n",
+    "                cnt = 1 # this is important that this starts at 1 not 0\n",
+    "                for data_item in data:\n",
+    "                    # Mehreen: Cater for Nan gt\n",
+    "                    if 'gt' in data_item and type(data_item['gt']) == float:\n",
+    "                        continue\n",
+    "                    for c in data_item.get('gt', None):\n",
+    "\n",
+    "                        if c is None:\n",
+    "                            print(\"There was a None GT\")\n",
+    "                            continue\n",
+    "                        if c not in out_char_to_idx:\n",
+    "                            print('....c',c)\n",
+    "                            out_char_to_idx[c] = cnt\n",
+    "                            out_idx_to_char[cnt] = c\n",
+    "                            cnt += 1\n",
+    "                        char_freq[c] += 1\n",
+    "\n",
+    "\n",
+    "    out_char_to_idx2 = {}\n",
+    "    out_idx_to_char2 = {}\n",
+    "\n",
+    "    for i, c in enumerate(sorted(out_char_to_idx.keys())):\n",
+    "        out_char_to_idx2[c] = i+1\n",
+    "        out_idx_to_char2[i+1] = c\n",
+    "\n",
+    "    output_data = {\n",
+    "        \"char_to_idx\": out_char_to_idx2,\n",
+    "        \"idx_to_char\": out_idx_to_char2\n",
+    "    }\n",
+    "\n",
+    "    for k,v in sorted(iter(char_freq.items()), key=lambda x: x[1]):\n",
+    "        print(k, v)\n",
+    "\n",
+    "    print((\"Size:\", len(output_data['char_to_idx'])))\n",
+    "    \n",
+    "    with open(character_set_path, 'w') as outfile:\n",
+    "        json.dump(output_data, outfile)\n",
+    "\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6337a429",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import json\n",
+    "DATA_PATH_c = '/home/msaeed3/mehreen/datasets/arabic_all/'\n",
+    "OUT_NAME_c = 'char_set_arabic.json'\n",
+    "char_set_path = os.path.join(DATA_PATH_c, OUT_NAME_c)\n",
+    "\n",
+    "with open(char_set_path) as f:\n",
+    "    char_set = json.load(f)\n",
+    "\n",
+    "larger_set = list(char_set['char_to_idx'].keys())\n",
+    "larger_set.sort()\n",
+    "for l in larger_set:\n",
+    "    print(l, ascii(l))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c54a65d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATA_PATH_c = '/home/msaeed3/mehreen/datasets/synthetic/line_images/'\n",
+    "OUT_NAME_c = 'char_set_line_images.json'\n",
+    "char_set_path = os.path.join(DATA_PATH_c, OUT_NAME_c)\n",
+    "\n",
+    "with open(char_set_path) as f:\n",
+    "    char_set = json.load(f)\n",
+    "\n",
+    "smaller_set = list(char_set['char_to_idx'].keys())\n",
+    "smaller_set.sort()\n",
+    "for s in smaller_set:\n",
+    "    print(s, ascii(s))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "085f5ac4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34fc501d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "diff = larger_set.difference(smaller_set)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce121a50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_special_ascii_set():\n",
+    "    char_set = []\n",
+    "    asccii_range = [[33, 64], [91, 96], [123, 126], \n",
+    "                    # Arabic-Indic digits\n",
+    "                    [ord('\\u0661'), ord('\\u0669')]]\n",
+    "    for r in asccii_range:\n",
+    "        for val in range(r[0], r[1]+1):\n",
+    "            char_set.append(chr(val))\n",
+    "    return char_set\n",
+    "\n",
+    "print((ord('\\u0661')))\n",
+    "special_ascii = get_special_ascii_set()\n",
+    "special_ascii.sort()\n",
+    "print(special_ascii)\n",
+    "special_ascii = set(special_ascii)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a89913c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "larger_set = set(larger_set)\n",
+    "smaller_set = set(smaller_set)\n",
+    "l_diff_ss = larger_set.difference(special_ascii.union(smaller_set))\n",
+    "l_diff_ss = (list(l_diff_ss))\n",
+    "l_diff_ss.sort()\n",
+    "for l in l_diff_ss:\n",
+    "    print(l, ascii(l))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72f97354",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "larger_set.difference(smaller_set)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "torch",
+   "language": "python",
+   "name": "torch"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

py3/utils/character_set.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import sys
+import json
+import os
+from collections import defaultdict
+def load_char_set(char_set_path):
+    with open(char_set_path) as f:
+        char_set = json.load(f)
+    idx_to_char = {}
+    for k,v in char_set['idx_to_char'].items():
+        idx_to_char[int(k)] = v
+    return idx_to_char, char_set['char_to_idx']
+if __name__ == "__main__":
+    character_set_path = sys.argv[-1]
+    out_char_to_idx = {}
+    out_idx_to_char = {}
+    char_freq = defaultdict(int)
+    for i in range(1, len(sys.argv)-1):
+        data_file = sys.argv[i]
+        with open(data_file) as f:
+            paths = json.load(f)
+        for json_path, image_path in paths:
+            with open(json_path) as f:
+                data = json.load(f)
+            cnt = 1 # this is important that this starts at 1 not 0
+            for data_item in data:
+                for c in data_item.get('gt', None):
+                    if c is None:
+                        print("There was a None GT")
+                        continue
+                    if c not in out_char_to_idx:
+                        out_char_to_idx[c] = cnt
+                        out_idx_to_char[cnt] = c
+                        cnt += 1
+                    char_freq[c] += 1
+    out_char_to_idx2 = {}
+    out_idx_to_char2 = {}
+    for i, c in enumerate(sorted(out_char_to_idx.keys())):
+        out_char_to_idx2[c] = i+1
+        out_idx_to_char2[i+1] = c
+    output_data = {
+        "char_to_idx": out_char_to_idx2,
+        "idx_to_char": out_idx_to_char2
+    }
+    for k,v in sorted(iter(char_freq.items()), key=lambda x: x[1]):
+        print(k, v)
+    print(("Size:", len(output_data['char_to_idx'])))
+    with open(character_set_path, 'w') as outfile:
+        json.dump(output_data, outfile)

py3/utils/continuous_state.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+from torch.utils.data import DataLoader
+from torch.autograd import Variable
+from torch import nn
+import sol
+from sol.start_of_line_finder import StartOfLineFinder
+from lf.line_follower import LineFollower
+from hw import cnn_lstm
+from utils import safe_load
+import numpy as np
+import cv2
+import json
+import sys
+import os
+import time
+import random
+def init_model(config, sol_dir='best_validation', lf_dir='best_validation', hw_dir='best_validation',
+               only_load=None, device="cuda"):
+    dtype = torch.FloatTensor
+    if 'cuda' in device:
+        dtype = torch.cuda.FloatTensor
+    base_0 = config['network']['sol']['base0']
+    base_1 = config['network']['sol']['base1']
+    sol = None
+    lf = None
+    hw = None
+    if only_load is None or only_load == 'sol' or 'sol' in only_load:
+        sol = StartOfLineFinder(base_0, base_1)
+        sol_state = safe_load.torch_state(os.path.join(config['snapshot_path'], "sol.pt"))
+        sol.load_state_dict(sol_state)
+        sol.to(device)
+    if only_load is None or only_load == 'lf' or 'lf' in only_load:
+        # This field may not be present in config and maybe added by the calling module...so you won't see it in the config file
+        pt_file = 'lf.pt'
+        lf = LineFollower(config['network']['hw']['input_height'], dtype=dtype, device=device)
+        lf_state = safe_load.torch_state(os.path.join(config['snapshot_path'], pt_file))
+        # special case for backward support of
+        # previous way to save the LF weights
+        if 'cnn' in lf_state:
+            new_state = {}
+            for k, v in lf_state.items():
+                print(k)
+                if k == 'cnn':
+                    for k2, v2 in v.items():
+                        if "running" in k2:
+                            AAA=1
+                        else:
+                            new_state[k+"."+k2]=v2
+                if k == 'position_linear':
+                    # print(k2, v2)
+                    for k2, v2 in  v.state_dict().items():
+                        new_state[k+"."+k2]=v2
+                # if k == 'learned_window':
+                #     print(k, v.data)
+                #     new_state[k]=nn.Parameter(v.data)
+            lf_state = new_state
+        lf.load_state_dict(lf_state)
+        lf.to(device)
+    if only_load is None or only_load == 'hw' or 'hw' in only_load:
+        hw = cnn_lstm.create_model(config['network']['hw'])
+        hw_state = safe_load.torch_state(os.path.join(config['snapshot_path'], "hw.pt"))
+        hw.load_state_dict(hw_state)
+        hw.to(device)
+    return sol, lf, hw

py3/utils/dataset_parse.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import json
+import os
+def load_file_list(config):
+    file_list_path = config['file_list']
+    with open(file_list_path) as f:
+        data = json.load(f)
+    for d in data:
+        # print("files:",d)
+        json_path = os.path.join(config['json_folder'], d[0])
+        img_path = os.path.join(config['img_folder'], d[1])
+        d[0] = json_path
+        d[1] = img_path
+    return data

py3/utils/dataset_wrapper.py ADDED Viewed

	@@ -0,0 +1,27 @@

+class DatasetWrapper(object):
+    def __init__(self, dataset, count):
+        self.count = count
+        self.idx = 0
+        self.dataset = dataset
+        self.iter_dataset = iter(dataset)
+        self.epoch = 0
+    def __iter__(self):
+        return self
+    def __next__(self):
+        if self.idx >= self.count:
+            self.idx = 0
+            raise StopIteration
+        self.idx += 1
+        while True:
+            try:
+                return next(self.iter_dataset)
+            except StopIteration:
+                self.iter_dataset = iter(self.dataset)
+                self.epoch += 1
+                try:
+                    return next(self.iter_dataset)
+                except StopIteration:
+                    raise Exception("Appears as if dataset is empty")

py3/utils/error_rates.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import editdistance
+def cer(r, h):
+    #Remove any double or trailing
+    r = ' '.join(r.split())
+    h = ' '.join(h.split())
+    return err(r, h)
+def err(r, h):
+    dis = editdistance.eval(r, h)
+    if len(r) == 0.0:
+        return len(h)
+    # print(float(dis) / float(len(r)))
+    return float(dis) / float(len(r))
+def wer(r, h):
+    r = r.split()
+    h = h.split()
+    return err(r,h)

py3/utils/fast_inverse.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import numpy as np
+from torch.autograd import Variable
+import torch
+def adjoint(A):
+    """compute inverse without division by det; ...xv3xc3 input, or array of matrices assumed"""
+    AI = np.empty_like(A)
+    for i in range(3):
+        AI[...,i,:] = np.cross(A[...,i-2,:], A[...,i-1,:])
+    return AI
+def inverse_transpose(A):
+    """
+    efficiently compute the inverse-transpose for stack of 3x3 matrices
+    """
+    I = adjoint(A)
+    det = dot(I, A).mean(axis=-1)
+    return I / det[...,None,None]
+def inverse(A):
+    """inverse of a stack of 3x3 matrices"""
+    return np.swapaxes( inverse_transpose(A), -1,-2)
+def dot(A, B):
+    """dot arrays of vecs; contract over last indices"""
+    return np.einsum('...i,...i->...', A, B)
+def adjoint_torch(A):
+    AI = A.clone()
+    for i in range(3):
+        AI[...,i,:] = torch.cross(A[...,i-2,:], A[...,i-1,:])
+    return AI
+def inverse_transpose_torch(A):
+    I = adjoint_torch(A)
+    det = dot_torch(I, A).mean(dim=-1)
+    return I / det[:,None,None]
+def inverse_torch(A):
+    return inverse_transpose_torch(A).transpose(1, 2)
+def dot_torch(A, B):
+    A_view = A.view(-1,1,3)
+    B_view = B.contiguous().view(-1,3,1)
+    out = torch.bmm(A_view, B_view)
+    out_view = out.view(A.size()[:-1])
+    return out_view
+if __name__ == "__main__":
+    A = np.random.rand(2,3,3)
+    I = inverse(A)
+    A_torch = Variable(torch.from_numpy(A))
+    I_torch = inverse_torch(A_torch)
+    print(I)
+    print(I_torch)

py3/utils/safe_load.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import time
+import json
+def torch_state(path):
+    for i in range(10):
+        try:
+            state = torch.load(path)
+            return state
+        except:
+            print("Failed to load",i,path)
+            time.sleep(i)
+            pass
+    print("Failed to load state")
+    return
+def json_state(path):
+    for i in range(10):
+        try:
+            with open(path) as f:
+                state = json.load(f)
+            return state
+        except:
+            print("Failed to load",i,path)
+            time.sleep(i)
+            pass
+    print("Failed to load state")
+    return None