Spaces:

faizmaula
/

compro-fastapi

Sleeping

File size: 2,652 Bytes

0aec844

import albumentations as A
import csv
import numpy as np
import onnxruntime as ort
import yaml
import os


BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# === PATH LOKAL / HF ===
CONFIG_PATH  = os.path.join(BASE_DIR, "Checkpoint", "model_config.yaml")
MODEL_PATH   = os.path.join(BASE_DIR, "Checkpoint", "model.onnx")
MAPPING_PATH = os.path.join(BASE_DIR, "font-classify-main", "google_fonts_mapping.tsv")
# === Load config ===
with open(CONFIG_PATH, "r") as f:
    config = yaml.safe_load(f)

INPUT_SIZE = config["size"]
CLASSNAMES = config["classnames"]

# === Font mapping ===
google_font_mapping = {}
with open(MAPPING_PATH, "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for i, row in enumerate(reader):
        if i > 0:
            filename, font_name, version = row
            google_font_mapping[filename] = (font_name, version)

# === ONNX Session ===
font_session = ort.InferenceSession(
    MODEL_PATH,
    # providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)


import cv2

def cut_max(image: np.ndarray, max_size: int = 1024):
    if image.shape[0] > max_size:
        image = image[:max_size, :, :]
    if image.shape[1] > max_size:
        image = image[:, :max_size, :]
    return image


def resize_with_pad(image: np.ndarray, size: int):
    h, w = image.shape[:2]
    scale = size / max(h, w)
    nh, nw = int(h * scale), int(w * scale)

    resized = cv2.resize(image, (nw, nh))

    canvas = np.ones((size, size, 3), dtype=np.uint8) * 255
    y0 = (size - nh) // 2
    x0 = (size - nw) // 2
    canvas[y0:y0+nh, x0:x0+nw] = resized

    return canvas


# === Transform ===
def preprocess_font_image(image_rgb: np.ndarray):
    image = cut_max(image_rgb, 1024)
    image = resize_with_pad(image, INPUT_SIZE)

    image = image.astype(np.float32)
    image /= 255.0

    image = (image - np.array([0.485, 0.456, 0.406], dtype=np.float32)) / \
            np.array([0.229, 0.224, 0.225], dtype=np.float32)

    image = np.transpose(image, (2, 0, 1))
    image = np.expand_dims(image, 0).astype(np.float32)

    return image




def softmax(x):
    e = np.exp(x - np.max(x))
    return e / e.sum()


def predict_font(image_rgb: np.ndarray):
    image = preprocess_font_image(image_rgb)

    logits = font_session.run(None, {"input": image})[0][0]
    probs = softmax(logits)

    class_id = int(probs.argmax())
    class_name = CLASSNAMES[class_id]

    return {
        "class": class_name,
        "google_font": google_font_mapping.get(class_name),
        "confidence": float(probs[class_id])
    }