You need to agree to share your contact information to access this model

This repository is publicly accessible, but you have to accept the conditions to access its files and content.

label_encoder_map

label_encoder_map = {
    "의약품제형": LabelEncoder(),
    "색상앞": LabelEncoder(),
    "색상뒤": LabelEncoder(),
    "분할선앞": LabelEncoder(),
    "분할선뒤": LabelEncoder(),
    "제형코드명": LabelEncoder(),
}

ds = ds.add_column('의약품제형_encoded', label_encoder_map['의약품제형'].fit_transform(ds.to_pandas()['의약품제형']))
ds = ds.add_column('색상앞_encoded', label_encoder_map['색상앞'].fit_transform(ds.to_pandas()['색상앞']))
ds = ds.add_column('색상뒤_encoded', label_encoder_map['색상뒤'].fit_transform(ds.to_pandas()['색상뒤']))
ds = ds.add_column('분할선앞_encoded', label_encoder_map['분할선앞'].fit_transform(ds.to_pandas()['분할선앞']))
ds = ds.add_column('분할선뒤_encoded', label_encoder_map['분할선뒤'].fit_transform(ds.to_pandas()['분할선뒤']))
ds = ds.add_column('제형코드명_encoded', label_encoder_map['제형코드명'].fit_transform(ds.to_pandas()['제형코드명']))

knn = KNeighborsClassifier(n_neighbors=5, metric='cosine')
knn.fit(ds.select_columns(
    ['의약품제형_encoded', '색상앞_encoded', '색상뒤_encoded', '분할선앞_encoded', '분할선뒤_encoded', '제형코드명_encoded']).to_pandas(), ds.select_columns("품목명").to_pandas())

Full code

Condensed-Co-Graph-And-Size-Graph

from datasets import load_dataset, disable_caching, Value
import numpy as np
from sklearn.preprocessing import LabelEncoder

co_graph_edges = load_dataset('brainer/pill_identification_graph', 'co-graph-edges')['train']
co_graph_nodes = load_dataset('brainer/pill_identification_graph', 'co-graph-nodes')['train']
size_graph_edges = load_dataset('brainer/pill_identification_graph', 'size-graph-edges')['train']
size_graph_nodes = load_dataset('brainer/pill_identification_graph', 'size-graph-nodes')['train']
pill_ingredients_edges = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-edges')['train']
pill_ingredients_nodes = load_dataset('brainer/pill_identification_graph', 'merge-hira-pill_identification-nodes')['train']
co_graph_nodes, co_graph_edges, size_graph_nodes, size_graph_edges, pill_ingredients_nodes, pill_ingredients_edges
pill_identification_data = load_dataset('brainer/pill_identification_data', 'default')



drug_name_encoder = LabelEncoder()
gnl_nm_encoder = LabelEncoder()


item_serial_number = pill_identification_data.cast_column('품목일련번호', Value(dtype='string'))['train']['품목일련번호']

drug_name_encoder.fit(list(set(np.asarray(size_graph_nodes['id'] + size_graph_edges['target'] + pill_ingredients_edges['target'] + item_serial_number))))

gnl_nm_encoder.fit(list(set(np.asarray(co_graph_nodes['id'] + pill_ingredients_nodes['id']))))

Downloads last month: -