Spaces:

SinaLab
/

wojood-api

Running

File size: 3,807 Bytes

import os
import sys
import logging
import importlib
import shutil
import torch
import pickle
import json
import random
import numpy as np
from argparse import Namespace
from huggingface_hub import hf_hub_download, snapshot_download


def logging_config(log_file=None):
    """
    Initialize custom logger
    :param log_file: str - path to log file, full path
    :return: None
    """
    handlers = [logging.StreamHandler(sys.stdout)]

    if log_file:
        handlers.append(logging.FileHandler(log_file, "w", "utf-8"))
        print("Logging to {}".format(log_file))

    logging.basicConfig(
        level=logging.INFO,
        handlers=handlers,
        format="%(levelname)s\t%(name)s\t%(asctime)s\t%(message)s",
        datefmt="%a, %d %b %Y %H:%M:%S",
        force=True
    )


def load_object(name, kwargs):
    """
    Load objects dynamically given the object name and its arguments
    :param name: str - object name, class name or function name
    :param kwargs: dict - keyword arguments
    :return: object
    """
    object_module, object_name = name.rsplit(".", 1)
    object_module = importlib.import_module(object_module)
    fn = getattr(object_module, object_name)(**kwargs)
    return fn


def make_output_dirs(path, subdirs=[], overwrite=True):
    """
    Create root directory and any other sub-directories
    :param path: str - root directory
    :param subdirs: List[str] - list of sub-directories
    :param overwrite: boolean - to overwrite the directory or not
    :return: None
    """
    if overwrite:
        shutil.rmtree(path, ignore_errors=True)

    os.makedirs(path)

    for subdir in subdirs:
        os.makedirs(os.path.join(path, subdir))


def load_checkpoint(model_path):
    """
    Load model given the model path
    :param model_path: str - path to model
    :return: tagger - Nested.trainers.BaseTrainer - the tagger model
             vocab - arabicner.utils.data.Vocab - indexed tags
             train_config - argparse.Namespace - training configurations
    """
    with open("Nested/utils/tag_vocab.pkl", "rb") as fh:
        tag_vocab = pickle.load(fh)

    # Load train configurations from checkpoint
    train_config = Namespace()
    args_path = hf_hub_download(repo_id="SinaLab/Nested", filename="args.json")
    
    with open(args_path, "r") as fh:
        train_config.__dict__ = json.load(fh)

    # Initialize the loss function, not used for inference, but evaluation
    loss = load_object(train_config.loss["fn"], train_config.loss["kwargs"])

    # Load BERT tagger
    model = load_object(train_config.network_config["fn"], train_config.network_config["kwargs"])
    model = torch.nn.DataParallel(model)

    if torch.cuda.is_available():
        model = model.cuda()

    # Update arguments for the tagger
    # Attach the model, loss (used for evaluations cases)
    train_config.trainer_config["kwargs"]["model"] = model
    train_config.trainer_config["kwargs"]["loss"] = loss

    tagger = load_object(train_config.trainer_config["fn"], train_config.trainer_config["kwargs"])
    # checkpoint_path = hf_hub_download(repo_id="SinaLab/Nested", local_dir="checkpoints")
    checkpoint_path = snapshot_download(repo_id="SinaLab/Nested", allow_patterns="checkpoints/")

    tagger.load(checkpoint_path)
    return tagger, tag_vocab, train_config


def set_seed(seed):
    """
    Set the seed for random intialization and set
    CUDANN parameters to ensure determmihstic results across
    multiple runs with the same seed

    :param seed: int
    """
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False