compro-fastapi / model /font_loader.py
faizmaula's picture
Upload 72 files
0aec844 verified
import albumentations as A
import csv
import numpy as np
import onnxruntime as ort
import yaml
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# === PATH LOKAL / HF ===
CONFIG_PATH = os.path.join(BASE_DIR, "Checkpoint", "model_config.yaml")
MODEL_PATH = os.path.join(BASE_DIR, "Checkpoint", "model.onnx")
MAPPING_PATH = os.path.join(BASE_DIR, "font-classify-main", "google_fonts_mapping.tsv")
# === Load config ===
with open(CONFIG_PATH, "r") as f:
config = yaml.safe_load(f)
INPUT_SIZE = config["size"]
CLASSNAMES = config["classnames"]
# === Font mapping ===
google_font_mapping = {}
with open(MAPPING_PATH, "r") as f:
reader = csv.reader(f, delimiter="\t")
for i, row in enumerate(reader):
if i > 0:
filename, font_name, version = row
google_font_mapping[filename] = (font_name, version)
# === ONNX Session ===
font_session = ort.InferenceSession(
MODEL_PATH,
# providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
import cv2
def cut_max(image: np.ndarray, max_size: int = 1024):
if image.shape[0] > max_size:
image = image[:max_size, :, :]
if image.shape[1] > max_size:
image = image[:, :max_size, :]
return image
def resize_with_pad(image: np.ndarray, size: int):
h, w = image.shape[:2]
scale = size / max(h, w)
nh, nw = int(h * scale), int(w * scale)
resized = cv2.resize(image, (nw, nh))
canvas = np.ones((size, size, 3), dtype=np.uint8) * 255
y0 = (size - nh) // 2
x0 = (size - nw) // 2
canvas[y0:y0+nh, x0:x0+nw] = resized
return canvas
# === Transform ===
def preprocess_font_image(image_rgb: np.ndarray):
image = cut_max(image_rgb, 1024)
image = resize_with_pad(image, INPUT_SIZE)
image = image.astype(np.float32)
image /= 255.0
image = (image - np.array([0.485, 0.456, 0.406], dtype=np.float32)) / \
np.array([0.229, 0.224, 0.225], dtype=np.float32)
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, 0).astype(np.float32)
return image
def softmax(x):
e = np.exp(x - np.max(x))
return e / e.sum()
def predict_font(image_rgb: np.ndarray):
image = preprocess_font_image(image_rgb)
logits = font_session.run(None, {"input": image})[0][0]
probs = softmax(logits)
class_id = int(probs.argmax())
class_name = CLASSNAMES[class_id]
return {
"class": class_name,
"google_font": google_font_mapping.get(class_name),
"confidence": float(probs[class_id])
}