Spaces:

faizmaula
/

compro-fastapi

Sleeping

App Files Files Community

compro-fastapi / model /font_loader.py

faizmaula

Upload 72 files

0aec844 verified 9 days ago

raw

history blame contribute delete

2.65 kB

	import albumentations as A
	import csv
	import numpy as np
	import onnxruntime as ort
	import yaml
	import os


	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	# === PATH LOKAL / HF ===
	CONFIG_PATH = os.path.join(BASE_DIR, "Checkpoint", "model_config.yaml")
	MODEL_PATH = os.path.join(BASE_DIR, "Checkpoint", "model.onnx")
	MAPPING_PATH = os.path.join(BASE_DIR, "font-classify-main", "google_fonts_mapping.tsv")
	# === Load config ===
	with open(CONFIG_PATH, "r") as f:
	config = yaml.safe_load(f)

	INPUT_SIZE = config["size"]
	CLASSNAMES = config["classnames"]

	# === Font mapping ===
	google_font_mapping = {}
	with open(MAPPING_PATH, "r") as f:
	reader = csv.reader(f, delimiter="\t")
	for i, row in enumerate(reader):
	if i > 0:
	filename, font_name, version = row
	google_font_mapping[filename] = (font_name, version)

	# === ONNX Session ===
	font_session = ort.InferenceSession(
	MODEL_PATH,
	# providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
	)


	import cv2

	def cut_max(image: np.ndarray, max_size: int = 1024):
	if image.shape[0] > max_size:
	image = image[:max_size, :, :]
	if image.shape[1] > max_size:
	image = image[:, :max_size, :]
	return image


	def resize_with_pad(image: np.ndarray, size: int):
	h, w = image.shape[:2]
	scale = size / max(h, w)
	nh, nw = int(h * scale), int(w * scale)

	resized = cv2.resize(image, (nw, nh))

	canvas = np.ones((size, size, 3), dtype=np.uint8) * 255
	y0 = (size - nh) // 2
	x0 = (size - nw) // 2
	canvas[y0:y0+nh, x0:x0+nw] = resized

	return canvas


	# === Transform ===
	def preprocess_font_image(image_rgb: np.ndarray):
	image = cut_max(image_rgb, 1024)
	image = resize_with_pad(image, INPUT_SIZE)

	image = image.astype(np.float32)
	image /= 255.0

	image = (image - np.array([0.485, 0.456, 0.406], dtype=np.float32)) / \
	np.array([0.229, 0.224, 0.225], dtype=np.float32)

	image = np.transpose(image, (2, 0, 1))
	image = np.expand_dims(image, 0).astype(np.float32)

	return image




	def softmax(x):
	e = np.exp(x - np.max(x))
	return e / e.sum()


	def predict_font(image_rgb: np.ndarray):
	image = preprocess_font_image(image_rgb)

	logits = font_session.run(None, {"input": image})[0][0]
	probs = softmax(logits)

	class_id = int(probs.argmax())
	class_name = CLASSNAMES[class_id]

	return {
	"class": class_name,
	"google_font": google_font_mapping.get(class_name),
	"confidence": float(probs[class_id])
	}