Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| # from sklearn.externals import joblib | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| # from .variables import old_ocr_req_cols | |
| # from .skew_correction import PageSkewWraper | |
| const_HW = 1.294117647 | |
| const_W = 600 | |
| def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"): | |
| df["line_number"] = 0 | |
| colmn.append("line_number") | |
| array_value = df[colmn].values | |
| start_index = Line_counter = counter = 0 | |
| ymax, ymin, line_no = ( | |
| colmn.index(ymax_col), | |
| colmn.index(ymin_col), | |
| colmn.index("line_number"), | |
| ) | |
| while counter < len(array_value): | |
| current_ymax = array_value[start_index][ymax] | |
| for next_index in range(start_index, len(array_value)): | |
| counter += 1 | |
| next_ymin = array_value[next_index][ymin] | |
| next_ymax = array_value[next_index][ymax] | |
| if current_ymax > next_ymin: | |
| array_value[next_index][line_no] = Line_counter + 1 | |
| # if current_ymax < next_ymax: | |
| # current_ymax = next_ymax | |
| else: | |
| counter -= 1 | |
| break | |
| # print(counter, len(array_value), start_index) | |
| start_index = counter | |
| Line_counter += 1 | |
| return pd.DataFrame(array_value, columns=colmn) | |
| def do_sorting(df): | |
| df.sort_values(["ymin", "xmin"], ascending=True, inplace=True) | |
| df["idx"] = df.index | |
| if "line_number" in df.columns: | |
| print("line number removed") | |
| df.drop("line_number", axis=1, inplace=True) | |
| req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"] | |
| temp_df = df.copy() | |
| temp = bucket_sort(temp_df.copy(), req_colns) | |
| df = df.merge(temp[["idx", "line_number"]], on="idx") | |
| df.sort_values(["line_number", "xmin"], ascending=True, inplace=True) | |
| df = df.reset_index(drop=True) | |
| df = df.reset_index(drop=True) | |
| return df | |