caobin commited on Aug 8, 2024

Commit

38f7d61

verified ·

1 Parent(s): d93aac3

Upload 24 files

Browse files

Files changed (24) hide show

src/README.md +27 -0
src/annotation/# place anno_train.csv and anno_val.csv here +0 -0
src/annotation/anno_struc.csv +0 -0
src/data/# place directory train and directory val here +0 -0
src/data_format.py +60 -0
src/inference&case/.DS_Store +0 -0
src/inference&case/.ipynb_checkpoints/CPICANNcode-checkpoint.ipynb +778 -0
src/inference&case/CPICANNcode.ipynb +778 -0
src/inference&case/config/elem_setting.csv +5 -0
src/inference&case/figs/PbSO4.csv.png +0 -0
src/inference&case/infResults_testdata.csv +2 -0
src/inference&case/testdata/.DS_Store +0 -0
src/inference&case/testdata/PbSO4.csv +0 -0
src/model/CPICANN.py +244 -0
src/model/dataset.py +55 -0
src/model/focal_loss.py +87 -0
src/othermodels/ATTENTIONonly.py +244 -0
src/othermodels/CNNonly.py +140 -0
src/pretrained/# place pretrained .pth files here +0 -0
src/train_bi-phase.py +200 -0
src/train_single-phase.py +189 -0
src/util/logger.py +41 -0
src/val_bi-phase.py +143 -0
src/val_single-phase.py +115 -0

src/README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+## Instructions for replication
+This directory contains all the source code needed to reproduce this work.
+### Data preparation
+To directly run the train and validation script in this directory, data preparation needs to be done. The [OneDrive link](https://hkustgz-my.sharepoint.com/:f:/g/personal/bcao686_connect_hkust-gz_edu_cn/EhdJLtou8I1MoUJCu-KCoboBfi-wOp00WAlQCrONxjoYgg?e=rltgFE) contains all the training and synthetic testing data used in this work, stored in data.zip. This link also contains the pretrained model for single-phase and di-phase identification.
+File single-phase_checkpoint_0200.pth and file bi-phase_checkpoint_2000.pth from the link above is the pretrained model, place them under directory "pretrained".
+File data.zip contains the data and the annotaion file. Place directory "train" and "val" from data.zip under directory "data", place the annotation files anno_train.csv and anno_val.csv under directory "annotation".
+### Model Trianing
+#### Single-phase
+Run ```python train_single-phase.py``` to train the single-phase identification model from scratch. To train the model on your data, addtional parameters need to be set: ```python train_single-phase.py --data_dir_train=[your training data] --data_dir_val=[your validation data] --anno_train=[your anno file for training data] --anno_val=[your anno file for validation data]```.
+#### Bi-phase
+Run ```python train_bi-phase.py``` to train the bi-phase identification model. The bi-phase identification model is trained based on single-phase model, you can change the default setting by set the parameter ```load_path=[your pretrained single-phase model]```.
+### Model validation
+Run ```python train_single-phase.py``` and ```python val_bi-phase.py``` to run the validation code at default setting.
+If you wish to validate the model on your data, plase format your data using data_format.py

src/annotation/# place anno_train.csv and anno_val.csv here ADDED Viewed

File without changes

src/annotation/anno_struc.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

src/data/# place directory train and directory val here ADDED Viewed

File without changes

src/data_format.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import numpy as np
+import pandas as pd
+from scipy import interpolate
+global dataWriter
+def convert_file(file_path):
+    suffix = file_path.split('.')[-1]
+    if suffix not in ['txt', 'csv', 'xy']:
+        Warning(f'File {file_path} not supported, skiping...')
+        return None
+    if suffix == 'txt':
+        return txt_to_csv(file_path)
+    elif suffix == 'csv':
+        return csv_to_csv(file_path)
+    elif suffix == 'xy':
+        return xy_to_csv(file_path)
+def txt_to_csv(file_path):
+    f = open(file_path, 'r')
+    rows = []
+    for line in f.readlines():
+        line = line.strip('\n')
+        line = line.replace('\t', ' ')
+        line = [x for x in line.split(' ') if x != '']
+        if len(line) == 3:
+            try:
+                line = [line[0], float(line[1])-float(line[2])]
+            except ValueError:
+                continue
+        elif len(line) < 2 or len(line) > 3:
+            continue
+        rows.append(line)
+    f.close()
+    outData = upsample(rows)
+    return outData
+def csv_to_csv(file_path):
+    fromData = pd.read_csv(file_path).values
+    outData = upsample(list(fromData))
+    return outData
+def xy_to_csv(file_path):
+    return txt_to_csv(file_path)
+def upsample(rows):
+    if len(rows) == 0:
+        Warning('Empty data!')
+        return None
+    rows.insert(0, ['10', rows[0][1]]) if float(rows[0][0]) > 10 else None
+    rows.append(['80', rows[-1][1]]) if float(rows[-1][0]) < 80 else None
+    rowsData = np.array(rows, dtype=np.float32)
+    f = interpolate.interp1d(rowsData[:, 0], rowsData[:, 1], kind='slinear')
+    xnew = np.linspace(10, 80, 4500)
+    ynew = f(xnew)
+    # outData = np.array([xnew, ynew]).T
+    return ynew

src/inference&case/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

src/inference&case/.ipynb_checkpoints/CPICANNcode-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,778 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f2299629",
+   "metadata": {},
+   "source": [
+    "# It is a template for applying CPICANN to X-ray powder diffraction phase identification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cad44131",
+   "metadata": {},
+   "source": [
+    "### 1: install WPEMPhase package "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af59fa9d",
+   "metadata": {},
+   "source": [
+    "pip install WPEMPhase"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d9d6d02",
+   "metadata": {},
+   "source": [
+    "### 2: The first time you execute CPICANN on your computer, you should initialize the system documents. After that, you do not need to do any additional execution to run CPICANN."
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "4787c1b4",
+   "metadata": {},
+   "source": [
+    "Signature:\n",
+    "CPICANN.PhaseIdentifier(\n",
+    "    FilePath,\n",
+    "    Task='single-phase',\n",
+    "    Model='default',\n",
+    "    ElementsSystem='',\n",
+    "    ElementsContained='',\n",
+    "    Device='cuda:0',\n",
+    ")\n",
+    "Docstring:\n",
+    "CPICANN : Crystallographic Phase Identifier of Convolutional self-Attention Neural Network\n",
+    "\n",
+    "Contributors : Shouyang Zhang & Bin Cao\n",
+    "================================================================\n",
+    "    Please feel free to open issues in the Github :\n",
+    "    https://github.com/WPEM/CPICANN\n",
+    "    or\n",
+    "    contact Mr.Bin Cao (bcao686@connect.hkust-gz.edu.cn)\n",
+    "    in case of any problems/comments/suggestions in using the code.\n",
+    "==================================================================\n",
+    "\n",
+    ":param FilePath\n",
+    "\n",
+    ":param Task, type=str, default='single-phase'\n",
+    "    if Task = 'single-phase', CPICANN executes a single phase identification task\n",
+    "    if Task = 'di-phase', CPICANN executes a dual phase identification task\n",
+    "\n",
+    ":param Model, type=str, default='default'\n",
+    "    if Model = 'noise_model', CPICANN executes a single phase identification by noise-contained model\n",
+    "    if Model = 'bca_model', CPICANN executes a single phase identification by background-contained model\n",
+    "\n",
+    ":param ElementsSystem, type=str, default=''\n",
+    "    Specifies the elements to be included at least in the prediction, example: 'Fe'.\n",
+    "\n",
+    ":param ElementsContained, type=str, default=''\n",
+    "    Specifies the elements to be included, with at least one of them in the prediction, example: 'O_C_S'.\n",
+    "\n",
+    ":param Device, type=str, default='cuda:0',\n",
+    "    Which device to run the CPICANN, example: 'cuda:0', 'cpu'.\n",
+    "\n",
+    "examples:\n",
+    "from WPEMPhase import CPICANN\n",
+    "CPICANN.PhaseIdentifier(FilePath='./single-phase',Device='cpu')\n",
+    "File:      ~/miniconda3/lib/python3.9/site-packages/WPEMPhase/CPICANN.py\n",
+    "Type:      function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "0922c99d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting WPEMPhase\n",
+      "  Downloading WPEMPhase-0.1.0-py3-none-any.whl.metadata (1.0 kB)\n",
+      "Requirement already satisfied: torch in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (2.0.0)\n",
+      "Requirement already satisfied: plot in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (0.6.5)\n",
+      "Requirement already satisfied: scipy in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (1.9.3)\n",
+      "Requirement already satisfied: pandas in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (1.5.1)\n",
+      "Requirement already satisfied: numpy in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (1.23.3)\n",
+      "Requirement already satisfied: art in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (6.1)\n",
+      "Requirement already satisfied: pymatgen in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (2023.3.23)\n",
+      "Requirement already satisfied: wget in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (3.2)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pandas->WPEMPhase) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pandas->WPEMPhase) (2022.5)\n",
+      "Requirement already satisfied: matplotlib in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plot->WPEMPhase) (3.7.1)\n",
+      "Requirement already satisfied: typing in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plot->WPEMPhase) (3.7.4.3)\n",
+      "Requirement already satisfied: pyyaml in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plot->WPEMPhase) (6.0)\n",
+      "Requirement already satisfied: monty>=3.0.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2023.4.10)\n",
+      "Requirement already satisfied: mp-api>=0.27.3 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.31.2)\n",
+      "Requirement already satisfied: networkx>=2.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2.8.8)\n",
+      "Requirement already satisfied: palettable>=3.1.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (3.3.3)\n",
+      "Requirement already satisfied: plotly>=4.5.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (5.14.1)\n",
+      "Requirement already satisfied: pybtex in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.24.0)\n",
+      "Requirement already satisfied: requests in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2.28.2)\n",
+      "Requirement already satisfied: ruamel.yaml>=0.17.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.17.21)\n",
+      "Requirement already satisfied: spglib>=2.0.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2.0.2)\n",
+      "Requirement already satisfied: sympy in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (1.11.1)\n",
+      "Requirement already satisfied: tabulate in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.9.0)\n",
+      "Requirement already satisfied: tqdm in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (4.66.1)\n",
+      "Requirement already satisfied: uncertainties>=3.1.4 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (3.1.7)\n",
+      "Requirement already satisfied: filelock in /Users/jacob/miniconda3/lib/python3.9/site-packages (from torch->WPEMPhase) (3.10.7)\n",
+      "Requirement already satisfied: typing-extensions in /Users/jacob/miniconda3/lib/python3.9/site-packages (from torch->WPEMPhase) (4.11.0)\n",
+      "Requirement already satisfied: jinja2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from torch->WPEMPhase) (3.1.2)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (1.0.5)\n",
+      "Requirement already satisfied: cycler>=0.10 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (4.38.0)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (1.4.4)\n",
+      "Requirement already satisfied: packaging>=20.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (23.0)\n",
+      "Requirement already satisfied: pillow>=6.2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (9.5.0)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (3.0.9)\n",
+      "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (5.12.0)\n",
+      "Requirement already satisfied: setuptools in /Users/jacob/miniconda3/lib/python3.9/site-packages (from mp-api>=0.27.3->pymatgen->WPEMPhase) (67.6.1)\n",
+      "Requirement already satisfied: msgpack in /Users/jacob/miniconda3/lib/python3.9/site-packages (from mp-api>=0.27.3->pymatgen->WPEMPhase) (1.0.5)\n",
+      "Requirement already satisfied: emmet-core<=0.50.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from mp-api>=0.27.3->pymatgen->WPEMPhase) (0.50.0)\n",
+      "Requirement already satisfied: tenacity>=6.2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plotly>=4.5.0->pymatgen->WPEMPhase) (8.2.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas->WPEMPhase) (1.16.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (3.3)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (1.26.9)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (2022.12.7)\n",
+      "Requirement already satisfied: ruamel.yaml.clib>=0.2.6 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from ruamel.yaml>=0.17.0->pymatgen->WPEMPhase) (0.2.6)\n",
+      "Requirement already satisfied: future in /Users/jacob/miniconda3/lib/python3.9/site-packages (from uncertainties>=3.1.4->pymatgen->WPEMPhase) (0.18.3)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from jinja2->torch->WPEMPhase) (2.1.1)\n",
+      "Requirement already satisfied: latexcodec>=1.0.4 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pybtex->pymatgen->WPEMPhase) (2.0.1)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from sympy->pymatgen->WPEMPhase) (1.3.0)\n",
+      "Requirement already satisfied: pydantic>=1.10.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from emmet-core<=0.50.0->mp-api>=0.27.3->pymatgen->WPEMPhase) (1.10.7)\n",
+      "Requirement already satisfied: zipp>=3.1.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from importlib-resources>=3.2.0->matplotlib->plot->WPEMPhase) (3.9.0)\n",
+      "Downloading WPEMPhase-0.1.0-py3-none-any.whl (710 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m710.2/710.2 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
+      "\u001b[?25hInstalling collected packages: WPEMPhase\n",
+      "Successfully installed WPEMPhase-0.1.0\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "pip install WPEMPhase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8e1680a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from WPEMPhase import CPICANN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7625eb66",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This is the first time CPICANN is being executed on your computer, configuring...\n",
+      "Downloading: 3% [24690688 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 7% [61341696 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 13% [107954176 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 19% [148324352 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 24% [189382656 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 28% [221265920 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 33% [262488064 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 39% [304799744 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 44% [346030080 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 50% [388333568 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 55% [429015040 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 60% [470278144 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 65% [507609088 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 70% [549601280 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 75% [587497472 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 80% [622919680 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 86% [668491776 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 89% [698474496 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 91% [713318400 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 96% [746487808 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 100% [776454342 / 776454342] bytes  ____  ____   ___   ____     _     _   _  _   _ \n",
+      " / ___||  _ \\ |_ _| / ___|   / \\   | \\ | || \\ | |\n",
+      "| |    | |_) | | | | |      / _ \\  |  \\| ||  \\| |\n",
+      "| |___ |  __/  | | | |___  / ___ \\ | |\\  || |\\  |\n",
+      " \\____||_|    |___| \\____|/_/   \\_\\|_| \\_||_| \\_|\n",
+      "                                                 \n",
+      "\n",
+      "The phase identification module of WPEM\n",
+      "URL : https://github.com/WPEM/CPICANN\n",
+      "Executed on : 2024-04-21 14:14:25  | Have a great day.\n",
+      "================================================================================\n",
+      "loaded model from /Users/jacob/miniconda3/lib/python3.9/site-packages/WPEMPhase/pretrained/CPICANN_single-phase_back3.pth\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/.DS_Store\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/PbSO4.csv\n",
+      "pred cls_id : 2475  confidence : 98.89%\n",
+      "pred cod_id : 9009622  formula : Pb2 S2 O6\n",
+      "pred space group No: 11    space group : P2_1/m\n",
+      "\n",
+      "inference result saved in infResults_testdata.csv\n",
+      "inference figures saved at figs/\n",
+      "THE END\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Here, illustrate the system requirements and how to initialize the system files at the first time of execution.\n",
+    "\n",
+    "CPICANN.PhaseIdentifier(FilePath='./testdata',Model='bca_model',Task='single-phase',Device='cpu',)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "304b62b5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  ____  ____   ___   ____     _     _   _  _   _ \n",
+      " / ___||  _ \\ |_ _| / ___|   / \\   | \\ | || \\ | |\n",
+      "| |    | |_) | | | | |      / _ \\  |  \\| ||  \\| |\n",
+      "| |___ |  __/  | | | |___  / ___ \\ | |\\  || |\\  |\n",
+      " \\____||_|    |___| \\____|/_/   \\_\\|_| \\_||_| \\_|\n",
+      "                                                 \n",
+      "\n",
+      "The phase identification module of WPEM\n",
+      "URL : https://github.com/WPEM/CPICANN\n",
+      "Executed on : 2024-04-21 14:14:53  | Have a great day.\n",
+      "================================================================================\n",
+      "loaded model from /Users/jacob/miniconda3/lib/python3.9/site-packages/WPEMPhase/pretrained/CPICANN_single-phase_noise3.pth\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/.DS_Store\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/PbSO4.csv\n",
+      "pred cls_id : 3378  confidence : 100.00%\n",
+      "pred cod_id : 9004484  formula : Pb4 S4 O16\n",
+      "pred space group No: 62    space group : Pnma\n",
+      "\n",
+      "inference result saved in infResults_testdata.csv\n",
+      "inference figures saved at figs/\n",
+      "THE END\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from WPEMPhase import CPICANN\n",
+    "# Here, illustrate the system requirements and how to initialize the system files at the first time of execution.\n",
+    "\n",
+    "CPICANN.PhaseIdentifier(FilePath='./testdata',Model='noise_model',Task='single-phase',ElementsContained='Pb_S_O',Device='cpu',)"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "977ede12",
+   "metadata": {},
+   "source": [
+    "For inquiries or assistance, please don't hesitate to contact us at bcao686@connect.hkust-gz.edu.cn (Dr. CAO Bin)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb101480",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

src/inference&case/CPICANNcode.ipynb ADDED Viewed

	@@ -0,0 +1,778 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f2299629",
+   "metadata": {},
+   "source": [
+    "# It is a template for applying CPICANN to X-ray powder diffraction phase identification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cad44131",
+   "metadata": {},
+   "source": [
+    "### 1: install WPEMPhase package "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af59fa9d",
+   "metadata": {},
+   "source": [
+    "pip install WPEMPhase"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7d9d6d02",
+   "metadata": {},
+   "source": [
+    "### 2: The first time you execute CPICANN on your computer, you should initialize the system documents. After that, you do not need to do any additional execution to run CPICANN."
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "4787c1b4",
+   "metadata": {},
+   "source": [
+    "Signature:\n",
+    "CPICANN.PhaseIdentifier(\n",
+    "    FilePath,\n",
+    "    Task='single-phase',\n",
+    "    Model='default',\n",
+    "    ElementsSystem='',\n",
+    "    ElementsContained='',\n",
+    "    Device='cuda:0',\n",
+    ")\n",
+    "Docstring:\n",
+    "CPICANN : Crystallographic Phase Identifier of Convolutional self-Attention Neural Network\n",
+    "\n",
+    "Contributors : Shouyang Zhang & Bin Cao\n",
+    "================================================================\n",
+    "    Please feel free to open issues in the Github :\n",
+    "    https://github.com/WPEM/CPICANN\n",
+    "    or\n",
+    "    contact Mr.Bin Cao (bcao686@connect.hkust-gz.edu.cn)\n",
+    "    in case of any problems/comments/suggestions in using the code.\n",
+    "==================================================================\n",
+    "\n",
+    ":param FilePath\n",
+    "\n",
+    ":param Task, type=str, default='single-phase'\n",
+    "    if Task = 'single-phase', CPICANN executes a single phase identification task\n",
+    "    if Task = 'di-phase', CPICANN executes a dual phase identification task\n",
+    "\n",
+    ":param Model, type=str, default='default'\n",
+    "    if Model = 'noise_model', CPICANN executes a single phase identification by noise-contained model\n",
+    "    if Model = 'bca_model', CPICANN executes a single phase identification by background-contained model\n",
+    "\n",
+    ":param ElementsSystem, type=str, default=''\n",
+    "    Specifies the elements to be included at least in the prediction, example: 'Fe'.\n",
+    "\n",
+    ":param ElementsContained, type=str, default=''\n",
+    "    Specifies the elements to be included, with at least one of them in the prediction, example: 'O_C_S'.\n",
+    "\n",
+    ":param Device, type=str, default='cuda:0',\n",
+    "    Which device to run the CPICANN, example: 'cuda:0', 'cpu'.\n",
+    "\n",
+    "examples:\n",
+    "from WPEMPhase import CPICANN\n",
+    "CPICANN.PhaseIdentifier(FilePath='./single-phase',Device='cpu')\n",
+    "File:      ~/miniconda3/lib/python3.9/site-packages/WPEMPhase/CPICANN.py\n",
+    "Type:      function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "0922c99d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting WPEMPhase\n",
+      "  Downloading WPEMPhase-0.1.0-py3-none-any.whl.metadata (1.0 kB)\n",
+      "Requirement already satisfied: torch in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (2.0.0)\n",
+      "Requirement already satisfied: plot in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (0.6.5)\n",
+      "Requirement already satisfied: scipy in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (1.9.3)\n",
+      "Requirement already satisfied: pandas in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (1.5.1)\n",
+      "Requirement already satisfied: numpy in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (1.23.3)\n",
+      "Requirement already satisfied: art in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (6.1)\n",
+      "Requirement already satisfied: pymatgen in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (2023.3.23)\n",
+      "Requirement already satisfied: wget in /Users/jacob/miniconda3/lib/python3.9/site-packages (from WPEMPhase) (3.2)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pandas->WPEMPhase) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pandas->WPEMPhase) (2022.5)\n",
+      "Requirement already satisfied: matplotlib in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plot->WPEMPhase) (3.7.1)\n",
+      "Requirement already satisfied: typing in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plot->WPEMPhase) (3.7.4.3)\n",
+      "Requirement already satisfied: pyyaml in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plot->WPEMPhase) (6.0)\n",
+      "Requirement already satisfied: monty>=3.0.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2023.4.10)\n",
+      "Requirement already satisfied: mp-api>=0.27.3 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.31.2)\n",
+      "Requirement already satisfied: networkx>=2.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2.8.8)\n",
+      "Requirement already satisfied: palettable>=3.1.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (3.3.3)\n",
+      "Requirement already satisfied: plotly>=4.5.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (5.14.1)\n",
+      "Requirement already satisfied: pybtex in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.24.0)\n",
+      "Requirement already satisfied: requests in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2.28.2)\n",
+      "Requirement already satisfied: ruamel.yaml>=0.17.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.17.21)\n",
+      "Requirement already satisfied: spglib>=2.0.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (2.0.2)\n",
+      "Requirement already satisfied: sympy in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (1.11.1)\n",
+      "Requirement already satisfied: tabulate in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (0.9.0)\n",
+      "Requirement already satisfied: tqdm in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (4.66.1)\n",
+      "Requirement already satisfied: uncertainties>=3.1.4 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pymatgen->WPEMPhase) (3.1.7)\n",
+      "Requirement already satisfied: filelock in /Users/jacob/miniconda3/lib/python3.9/site-packages (from torch->WPEMPhase) (3.10.7)\n",
+      "Requirement already satisfied: typing-extensions in /Users/jacob/miniconda3/lib/python3.9/site-packages (from torch->WPEMPhase) (4.11.0)\n",
+      "Requirement already satisfied: jinja2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from torch->WPEMPhase) (3.1.2)\n",
+      "Requirement already satisfied: contourpy>=1.0.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (1.0.5)\n",
+      "Requirement already satisfied: cycler>=0.10 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (0.11.0)\n",
+      "Requirement already satisfied: fonttools>=4.22.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (4.38.0)\n",
+      "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (1.4.4)\n",
+      "Requirement already satisfied: packaging>=20.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (23.0)\n",
+      "Requirement already satisfied: pillow>=6.2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (9.5.0)\n",
+      "Requirement already satisfied: pyparsing>=2.3.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (3.0.9)\n",
+      "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from matplotlib->plot->WPEMPhase) (5.12.0)\n",
+      "Requirement already satisfied: setuptools in /Users/jacob/miniconda3/lib/python3.9/site-packages (from mp-api>=0.27.3->pymatgen->WPEMPhase) (67.6.1)\n",
+      "Requirement already satisfied: msgpack in /Users/jacob/miniconda3/lib/python3.9/site-packages (from mp-api>=0.27.3->pymatgen->WPEMPhase) (1.0.5)\n",
+      "Requirement already satisfied: emmet-core<=0.50.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from mp-api>=0.27.3->pymatgen->WPEMPhase) (0.50.0)\n",
+      "Requirement already satisfied: tenacity>=6.2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from plotly>=4.5.0->pymatgen->WPEMPhase) (8.2.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas->WPEMPhase) (1.16.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (3.3)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (1.26.9)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from requests->pymatgen->WPEMPhase) (2022.12.7)\n",
+      "Requirement already satisfied: ruamel.yaml.clib>=0.2.6 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from ruamel.yaml>=0.17.0->pymatgen->WPEMPhase) (0.2.6)\n",
+      "Requirement already satisfied: future in /Users/jacob/miniconda3/lib/python3.9/site-packages (from uncertainties>=3.1.4->pymatgen->WPEMPhase) (0.18.3)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from jinja2->torch->WPEMPhase) (2.1.1)\n",
+      "Requirement already satisfied: latexcodec>=1.0.4 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from pybtex->pymatgen->WPEMPhase) (2.0.1)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from sympy->pymatgen->WPEMPhase) (1.3.0)\n",
+      "Requirement already satisfied: pydantic>=1.10.2 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from emmet-core<=0.50.0->mp-api>=0.27.3->pymatgen->WPEMPhase) (1.10.7)\n",
+      "Requirement already satisfied: zipp>=3.1.0 in /Users/jacob/miniconda3/lib/python3.9/site-packages (from importlib-resources>=3.2.0->matplotlib->plot->WPEMPhase) (3.9.0)\n",
+      "Downloading WPEMPhase-0.1.0-py3-none-any.whl (710 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m710.2/710.2 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
+      "\u001b[?25hInstalling collected packages: WPEMPhase\n",
+      "Successfully installed WPEMPhase-0.1.0\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "pip install WPEMPhase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8e1680a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from WPEMPhase import CPICANN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7625eb66",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "This is the first time CPICANN is being executed on your computer, configuring...\n",
+      "Downloading: 3% [24690688 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 7% [61341696 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 13% [107954176 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 19% [148324352 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 24% [189382656 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 28% [221265920 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 33% [262488064 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 39% [304799744 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 44% [346030080 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 50% [388333568 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 55% [429015040 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 60% [470278144 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 65% [507609088 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 70% [549601280 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 75% [587497472 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 80% [622919680 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 86% [668491776 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 89% [698474496 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 91% [713318400 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 96% [746487808 / 776454342] bytes"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub message rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_msg_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading: 100% [776454342 / 776454342] bytes  ____  ____   ___   ____     _     _   _  _   _ \n",
+      " / ___||  _ \\ |_ _| / ___|   / \\   | \\ | || \\ | |\n",
+      "| |    | |_) | | | | |      / _ \\  |  \\| ||  \\| |\n",
+      "| |___ |  __/  | | | |___  / ___ \\ | |\\  || |\\  |\n",
+      " \\____||_|    |___| \\____|/_/   \\_\\|_| \\_||_| \\_|\n",
+      "                                                 \n",
+      "\n",
+      "The phase identification module of WPEM\n",
+      "URL : https://github.com/WPEM/CPICANN\n",
+      "Executed on : 2024-04-21 14:14:25  | Have a great day.\n",
+      "================================================================================\n",
+      "loaded model from /Users/jacob/miniconda3/lib/python3.9/site-packages/WPEMPhase/pretrained/CPICANN_single-phase_back3.pth\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/.DS_Store\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/PbSO4.csv\n",
+      "pred cls_id : 2475  confidence : 98.89%\n",
+      "pred cod_id : 9009622  formula : Pb2 S2 O6\n",
+      "pred space group No: 11    space group : P2_1/m\n",
+      "\n",
+      "inference result saved in infResults_testdata.csv\n",
+      "inference figures saved at figs/\n",
+      "THE END\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Here, illustrate the system requirements and how to initialize the system files at the first time of execution.\n",
+    "\n",
+    "CPICANN.PhaseIdentifier(FilePath='./testdata',Model='bca_model',Task='single-phase',Device='cpu',)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "304b62b5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  ____  ____   ___   ____     _     _   _  _   _ \n",
+      " / ___||  _ \\ |_ _| / ___|   / \\   | \\ | || \\ | |\n",
+      "| |    | |_) | | | | |      / _ \\  |  \\| ||  \\| |\n",
+      "| |___ |  __/  | | | |___  / ___ \\ | |\\  || |\\  |\n",
+      " \\____||_|    |___| \\____|/_/   \\_\\|_| \\_||_| \\_|\n",
+      "                                                 \n",
+      "\n",
+      "The phase identification module of WPEM\n",
+      "URL : https://github.com/WPEM/CPICANN\n",
+      "Executed on : 2024-04-21 14:14:53  | Have a great day.\n",
+      "================================================================================\n",
+      "loaded model from /Users/jacob/miniconda3/lib/python3.9/site-packages/WPEMPhase/pretrained/CPICANN_single-phase_noise3.pth\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/.DS_Store\n",
+      "\n",
+      ">>>>>> RUNNING: ./testdata/PbSO4.csv\n",
+      "pred cls_id : 3378  confidence : 100.00%\n",
+      "pred cod_id : 9004484  formula : Pb4 S4 O16\n",
+      "pred space group No: 62    space group : Pnma\n",
+      "\n",
+      "inference result saved in infResults_testdata.csv\n",
+      "inference figures saved at figs/\n",
+      "THE END\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from WPEMPhase import CPICANN\n",
+    "# Here, illustrate the system requirements and how to initialize the system files at the first time of execution.\n",
+    "\n",
+    "CPICANN.PhaseIdentifier(FilePath='./testdata',Model='noise_model',Task='single-phase',ElementsContained='Pb_S_O',Device='cpu',)"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "977ede12",
+   "metadata": {},
+   "source": [
+    "For inquiries or assistance, please don't hesitate to contact us at bcao686@connect.hkust-gz.edu.cn (Dr. CAO Bin)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb101480",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

src/inference&case/config/elem_setting.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+FileName,must include elements,include at least one of,exclude elements
+CdS.csv,Cd_S,,
+MnS.csv,Mn_S,,
+NiO2H2.csv,Ni_O_H,,
+PbSO4.csv,Pb_O,S,

src/inference&case/figs/PbSO4.csv.png ADDED Viewed

src/inference&case/infResults_testdata.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ path,fileName,predRank,pred,codId,formula,spaceGroupNo,spaceGroup
2	+ ./testdata,PbSO4.csv,1,3378,9004484.0,Pb4 S4 O16,62,Pnma

src/inference&case/testdata/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/inference&case/testdata/PbSO4.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

src/model/CPICANN.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import copy
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.init import trunc_normal_
+class CPICANN(nn.Module):
+    def __init__(self, embed_dim=64, nhead=8, num_encoder_layers=6, dim_feedforward=1024,
+                 dropout=0.1, activation="relu", num_classes=23073):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_classes = num_classes
+        self.conv = ConvModule(drop_rate=dropout)
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, embed_dim, 142))
+        # -------------encoder----------------
+        sa_layer = SelfAttnLayer(embed_dim, nhead, dim_feedforward, dropout, activation)
+        self.encoder = SelfAttnModule(sa_layer, num_encoder_layers)
+        # ------------------------------------
+        self.norm_after = nn.LayerNorm(embed_dim)
+        self.cls_head = nn.Sequential(
+            nn.Linear(embed_dim, int(embed_dim * 4)),
+            nn.BatchNorm1d(int(embed_dim * 4)),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.5),
+            nn.Linear(int(embed_dim * 4), int(embed_dim * 4)),
+            nn.BatchNorm1d(int(embed_dim * 4)),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.5),
+            nn.Linear(int(embed_dim * 4), num_classes)
+        )
+        self._reset_parameters()
+        self.init_weights()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def init_weights(self):
+        trunc_normal_(self.cls_token, std=.02)
+        self.pos_embed.requires_grad = False
+        pos_embed = get_1d_sincos_pos_embed_from_grid(self.embed_dim, np.array(range(self.pos_embed.shape[2])))
+        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).T.unsqueeze(0))
+    def bce_fineTune_init_weights(self):
+        for p in self.conv.parameters():
+            p.requires_grad = False
+        for p in self.encoder.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for p in self.cls_head.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def forward(self, x):
+        N = x.shape[0]
+        if x.shape[1] == 2:
+            x = x[:, 1:, :]
+        x = x / 100
+        x = self.conv(x)
+        # flatten NxCxL to LxNxC
+        x = x.permute(2, 0, 1).contiguous()
+        cls_token = self.cls_token.expand(-1, N, -1)
+        x = torch.cat((cls_token, x), dim=0)
+        pos_embed = self.pos_embed.permute(2, 0, 1).contiguous().repeat(1, N, 1)
+        feats = self.encoder(x, pos_embed)
+        feats = self.norm_after(feats)
+        logits = self.cls_head(feats[0])
+        return logits
+class ConvModule(nn.Module):
+    def __init__(self, drop_rate=0.):
+        super().__init__()
+        self.drop_rate = drop_rate
+        self.conv1 = nn.Conv1d(1, 64, kernel_size=35, stride=2, padding=17)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.act1 = nn.ReLU()
+        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = Layer(64, 64, kernel_size=3, stride=2, downsample=True)
+        self.layer2 = Layer(64, 128, kernel_size=3, stride=2, downsample=True)
+        # self.layer3 = Layer(256, 256, kernel_size=3, stride=2, downsample=True)
+        self.maxpool2 = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        # x = self.layer3(x)
+        x = self.maxpool2(x)
+        return x
+class SelfAttnModule(nn.Module):
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(self, src, pos):
+        output = src
+        for layer in self.layers:
+            output = layer(output, pos)
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class SelfAttnLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
+                 activation="relu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def forward(self, src, pos):
+        q = k = with_pos_embed(src, pos)
+        src2 = self.self_attn(q, k, value=src)[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+class Layer(nn.Module):
+    def __init__(self, inchannel, outchannel, kernel_size, stride, downsample):
+        super(Layer, self).__init__()
+        self.block1 = BasicBlock(inchannel, outchannel, kernel_size=kernel_size, stride=stride, downsample=downsample)
+        self.block2 = BasicBlock(outchannel, outchannel, kernel_size=kernel_size, stride=1)
+    def forward(self, x):
+        x = self.block1(x)
+        x = self.block2(x)
+        return x
+class BasicBlock(nn.Module):
+    def __init__(self, inchannel, outchannel, kernel_size, stride, downsample=False):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv1d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2)
+        self.bn1 = nn.BatchNorm1d(outchannel)
+        self.act1 = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv1d(outchannel, outchannel, kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
+        self.bn2 = nn.BatchNorm1d(outchannel)
+        self.act2 = nn.ReLU(inplace=True)
+        self.downsample = nn.Sequential(
+            nn.Conv1d(inchannel, outchannel, kernel_size=1, stride=2),
+            nn.BatchNorm1d(outchannel)
+        ) if downsample else None
+    def forward(self, x):
+        shortcut = x
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        if self.downsample is not None:
+            shortcut = self.downsample(shortcut)
+        x += shortcut
+        x = self.act2(x)
+        return x
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+def with_pos_embed(tensor, pos):
+    return tensor if pos is None else tensor + pos
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float32)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000 ** omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out).astype(np.float32)  # (M, D/2)
+    emb_cos = np.cos(out).astype(np.float32)  # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb

src/model/dataset.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import pandas as pd
+import numpy as np
+from torch.utils.data import Dataset
+class XrdDataset(Dataset):
+    def __init__(self, data_dir, annotations_file):
+        self.labels = pd.read_csv(annotations_file)
+        self.data_dir = data_dir
+    def __len__(self):
+        return len(self.labels)
+    def __getitem__(self, idx):
+        dataid = str(self.labels.iloc[idx, 0])
+        data_path = os.path.join(self.data_dir, dataid + '.csv')
+        data_csv = pd.read_csv(data_path)
+        data = data_csv.values.astype(np.float32).T
+        label = self.labels.iloc[idx, 1]
+        return data, label
+class mixDataset_cls_dynamic(Dataset):
+    def __init__(self, data_dir, anno_struc, mode):
+        self.data_dir = data_dir
+        self.codIdList = pd.read_csv(anno_struc).values[:, 0].astype(np.int32)
+        self.mode = mode
+    def __len__(self):
+        return 1000000
+    def __getitem__(self, idx):
+        choice1, choice2 = np.random.randint(0, 23073, 2)
+        if self.mode == 'train':
+            rand1, rand2 = np.random.randint(1, 25, 2)
+        else:
+            rand1, rand2 = np.random.randint(1, 7, 2)
+        data_path1 = os.path.join(self.data_dir, '{}_{}.csv'.format(self.codIdList[choice1], rand1))
+        data_path2 = os.path.join(self.data_dir, '{}_{}.csv'.format(self.codIdList[choice2], rand2))
+        data1 = pd.read_csv(data_path1).values.astype(np.float32).T
+        data2 = pd.read_csv(data_path2).values.astype(np.float32).T
+        ratio1 = np.random.randint(20, 81)
+        ratio2 = 100 - ratio1
+        label = np.zeros(23073).astype(np.float32)
+        label[choice1] = 0.4
+        label[choice2] = 0.4
+        return data1, data2, ratio1, ratio2, label

src/model/focal_loss.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+class FocalLoss(nn.Module):
+    r"""
+        This criterion is a implemenation of Focal Loss, which is proposed in
+        Focal Loss for Dense Object Detection.
+            Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
+        The losses are averaged across observations for each minibatch.
+        Args:
+            alpha(1D Tensor, Variable) : the scalar factor for this criterion
+            gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5),
+                                   putting more focus on hard, misclassiﬁed examples
+            size_average(bool): By default, the losses are averaged over observations for each minibatch.
+                                However, if the field size_average is set to False, the losses are
+                                instead summed for each minibatch.
+    """
+    def __init__(self, class_num, alpha=None, gamma=2, size_average=True, device='cuda:0'):
+        super(FocalLoss, self).__init__()
+        if alpha is None:
+            self.alpha = Variable(torch.ones(class_num, 1))
+        else:
+            if isinstance(alpha, Variable):
+                self.alpha = alpha
+            else:
+                self.alpha = Variable(alpha)
+        self.gamma = gamma
+        self.class_num = class_num
+        self.size_average = size_average
+        self.device = device
+    def forward(self, inputs, targets):
+        N = inputs.size(0)
+        C = inputs.size(1)
+        P = F.softmax(inputs, dim=1)
+        class_mask = inputs.data.new(N, C).fill_(0)
+        class_mask = Variable(class_mask)
+        ids = targets.view(-1, 1)
+        class_mask.scatter_(1, ids.data, 1.)
+        # print(class_mask)
+        if inputs.is_cuda and not self.alpha.is_cuda:
+            # self.alpha = self.alpha.cuda()
+            self.alpha = self.alpha.to(self.device)
+        alpha = self.alpha[ids.data.view(-1)]
+        probs = (P * class_mask).sum(1).view(-1, 1)
+        log_p = probs.log()
+        # print('probs size= {}'.format(probs.size()))
+        # print(probs)
+        batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p
+        # print('-----bacth_loss------')
+        # print(batch_loss)
+        if self.size_average:
+            loss = batch_loss.mean()
+        else:
+            loss = batch_loss.sum()
+        return loss
+# import torch
+# import torch.nn as nn
+#
+#
+# class FocalLoss(nn.Module):
+#
+#     def __init__(self, gamma=0, eps=1e-7):
+#         super(FocalLoss, self).__init__()
+#         self.gamma = gamma
+#         self.eps = eps
+#         self.ce = torch.nn.CrossEntropyLoss()
+#
+#     def forward(self, input, target):
+#         logp = self.ce(input, target)
+#         p = torch.exp(-logp)
+#         loss = (1 - p) ** self.gamma * logp
+#         return loss.mean()

src/othermodels/ATTENTIONonly.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import copy
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.init import trunc_normal_
+class VIT(nn.Module):
+    def __init__(self, embed_dim=64, nhead=8, num_encoder_layers=6, dim_feedforward=1024,
+                 dropout=0.1, activation="relu", num_classes=23073):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_classes = num_classes
+        self.conv = torch.nn.Conv1d(1, embed_dim, kernel_size=32, stride=32, padding=0)
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, embed_dim, 141))
+        # -------------encoder----------------
+        sa_layer = SelfAttnLayer(embed_dim, nhead, dim_feedforward, dropout, activation)
+        self.encoder = SelfAttnModule(sa_layer, num_encoder_layers)
+        # ------------------------------------
+        self.norm_after = nn.LayerNorm(embed_dim)
+        self.cls_head = nn.Sequential(
+            nn.Linear(embed_dim, int(embed_dim * 4)),
+            nn.BatchNorm1d(int(embed_dim * 4)),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.5),
+            nn.Linear(int(embed_dim * 4), int(embed_dim * 4)),
+            nn.BatchNorm1d(int(embed_dim * 4)),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.5),
+            nn.Linear(int(embed_dim * 4), num_classes)
+        )
+        self._reset_parameters()
+        self.init_weights()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def init_weights(self):
+        trunc_normal_(self.cls_token, std=.02)
+        self.pos_embed.requires_grad = False
+        pos_embed = get_1d_sincos_pos_embed_from_grid(self.embed_dim, np.array(range(self.pos_embed.shape[2])))
+        self.pos_embed.data.copy_(torch.from_numpy(pos_embed).T.unsqueeze(0))
+    def bce_fineTune_init_weights(self):
+        for p in self.conv.parameters():
+            p.requires_grad = False
+        for p in self.encoder.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+        for p in self.cls_head.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def forward(self, x):
+        N = x.shape[0]
+        if x.shape[1] == 2:
+            x = x[:, 1:, :]
+        x = x / 100
+        x = self.conv(x)
+        # flatten NxCxL to LxNxC
+        x = x.permute(2, 0, 1).contiguous()
+        cls_token = self.cls_token.expand(-1, N, -1)
+        x = torch.cat((cls_token, x), dim=0)
+        pos_embed = self.pos_embed.permute(2, 0, 1).contiguous().repeat(1, N, 1)
+        feats = self.encoder(x, pos_embed)
+        feats = self.norm_after(feats)
+        logits = self.cls_head(feats[0])
+        return logits
+class ConvModule(nn.Module):
+    def __init__(self, drop_rate=0.):
+        super().__init__()
+        self.drop_rate = drop_rate
+        self.conv1 = nn.Conv1d(1, 64, kernel_size=35, stride=2, padding=17)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.act1 = nn.ReLU()
+        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = Layer(64, 64, kernel_size=3, stride=2, downsample=True)
+        self.layer2 = Layer(64, 128, kernel_size=3, stride=2, downsample=True)
+        # self.layer3 = Layer(256, 256, kernel_size=3, stride=2, downsample=True)
+        self.maxpool2 = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        # x = self.layer3(x)
+        x = self.maxpool2(x)
+        return x
+class SelfAttnModule(nn.Module):
+    def __init__(self, encoder_layer, num_layers, norm=None):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = norm
+    def forward(self, src, pos):
+        output = src
+        for layer in self.layers:
+            output = layer(output, pos)
+        if self.norm is not None:
+            output = self.norm(output)
+        return output
+class SelfAttnLayer(nn.Module):
+    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
+                 activation="relu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def forward(self, src, pos):
+        q = k = with_pos_embed(src, pos)
+        src2 = self.self_attn(q, k, value=src)[0]
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
+        src = src + self.dropout2(src2)
+        src = self.norm2(src)
+        return src
+class Layer(nn.Module):
+    def __init__(self, inchannel, outchannel, kernel_size, stride, downsample):
+        super(Layer, self).__init__()
+        self.block1 = BasicBlock(inchannel, outchannel, kernel_size=kernel_size, stride=stride, downsample=downsample)
+        self.block2 = BasicBlock(outchannel, outchannel, kernel_size=kernel_size, stride=1)
+    def forward(self, x):
+        x = self.block1(x)
+        x = self.block2(x)
+        return x
+class BasicBlock(nn.Module):
+    def __init__(self, inchannel, outchannel, kernel_size, stride, downsample=False):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv1d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2)
+        self.bn1 = nn.BatchNorm1d(outchannel)
+        self.act1 = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv1d(outchannel, outchannel, kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
+        self.bn2 = nn.BatchNorm1d(outchannel)
+        self.act2 = nn.ReLU(inplace=True)
+        self.downsample = nn.Sequential(
+            nn.Conv1d(inchannel, outchannel, kernel_size=1, stride=2),
+            nn.BatchNorm1d(outchannel)
+        ) if downsample else None
+    def forward(self, x):
+        shortcut = x
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        if self.downsample is not None:
+            shortcut = self.downsample(shortcut)
+        x += shortcut
+        x = self.act2(x)
+        return x
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+def with_pos_embed(tensor, pos):
+    return tensor if pos is None else tensor + pos
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=np.float32)
+    omega /= embed_dim / 2.
+    omega = 1. / 10000 ** omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out).astype(np.float32)  # (M, D/2)
+    emb_cos = np.cos(out).astype(np.float32)  # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb

src/othermodels/CNNonly.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import copy
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.init import trunc_normal_
+class CNN(nn.Module):
+    def __init__(self, embed_dim=64, nhead=8, num_encoder_layers=6, dim_feedforward=1024,
+                 dropout=0.1, activation="relu", num_classes=23073):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_classes = num_classes
+        self.conv = ConvModule(drop_rate=dropout)
+        self.proj = nn.Linear(141, 1)
+        self.cls_head = nn.Sequential(
+            nn.Linear(embed_dim, int(embed_dim * 4)),
+            nn.BatchNorm1d(int(embed_dim * 4)),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.5),
+            nn.Linear(int(embed_dim * 4), int(embed_dim * 4)),
+            nn.BatchNorm1d(int(embed_dim * 4)),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.5),
+            nn.Linear(int(embed_dim * 4), num_classes)
+        )
+        self._reset_parameters()
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+    def forward(self, x):
+        N = x.shape[0]
+        if x.shape[1] == 2:
+            x = x[:, 1:, :]
+        x = x / 100
+        x = self.conv(x)
+        # flatten NxCxL to LxNxC
+        # x = x.permute(2, 0, 1).contiguous()
+        x = self.proj(x).flatten(1)
+        logits = self.cls_head(x)
+        return logits
+class ConvModule(nn.Module):
+    def __init__(self, drop_rate=0.):
+        super().__init__()
+        self.drop_rate = drop_rate
+        self.conv1 = nn.Conv1d(1, 64, kernel_size=35, stride=2, padding=17)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.act1 = nn.ReLU()
+        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = Layer(64, 64, kernel_size=3, stride=2, downsample=True)
+        self.layer2 = Layer(64, 128, kernel_size=3, stride=2, downsample=True)
+        # self.layer3 = Layer(256, 256, kernel_size=3, stride=2, downsample=True)
+        self.maxpool2 = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.act1(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        # x = self.layer3(x)
+        x = self.maxpool2(x)
+        return x
+class Layer(nn.Module):
+    def __init__(self, inchannel, outchannel, kernel_size, stride, downsample):
+        super(Layer, self).__init__()
+        self.block1 = BasicBlock(inchannel, outchannel, kernel_size=kernel_size, stride=stride, downsample=downsample)
+        self.block2 = BasicBlock(outchannel, outchannel, kernel_size=kernel_size, stride=1)
+    def forward(self, x):
+        x = self.block1(x)
+        x = self.block2(x)
+        return x
+class BasicBlock(nn.Module):
+    def __init__(self, inchannel, outchannel, kernel_size, stride, downsample=False):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv1d(inchannel, outchannel, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2)
+        self.bn1 = nn.BatchNorm1d(outchannel)
+        self.act1 = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv1d(outchannel, outchannel, kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
+        self.bn2 = nn.BatchNorm1d(outchannel)
+        self.act2 = nn.ReLU(inplace=True)
+        self.downsample = nn.Sequential(
+            nn.Conv1d(inchannel, outchannel, kernel_size=1, stride=2),
+            nn.BatchNorm1d(outchannel)
+        ) if downsample else None
+    def forward(self, x):
+        shortcut = x
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        if self.downsample is not None:
+            shortcut = self.downsample(shortcut)
+        x += shortcut
+        x = self.act2(x)
+        return x
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+def with_pos_embed(tensor, pos):
+    return tensor if pos is None else tensor + pos

src/pretrained/# place pretrained .pth files here ADDED Viewed

File without changes

src/train_bi-phase.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import math
+import sys
+import os
+import argparse
+import torch
+import torch.distributed as dist
+from torch import optim, nn
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from model.CPICANN import CPICANN
+from model.dataset import mixDataset_cls_dynamic
+from util.logger import Logger
+def run_one_epoch(model, dataloader, criterion, optimizer, epoch, mode):
+    if mode == 'Train':
+        model.train()
+        criterion.train()
+        desc = 'Training... '
+    else:
+        model.eval()
+        criterion.eval()
+        desc = 'Evaluating... '
+    epoch_loss, cls_acc = 0, 0
+    if args.progress_bar:
+        pbar = tqdm(total=len(dataloader.dataset), desc=desc, unit='data')
+    iters = len(dataloader)
+    for i, batch in enumerate(dataloader):
+        data1 = batch[0].to(device)
+        data2 = batch[1].to(device)
+        ratio1 = batch[2].to(device)
+        ratio2 = batch[3].to(device)
+        label_cls = batch[4].to(device)
+        data = torch.einsum('ijk,i->ijk', data1, ratio1) + torch.einsum('ijk,i->ijk', data2, ratio2)
+        min_i = data.min(dim=2, keepdim=True)[0]
+        max_i = data.max(dim=2, keepdim=True)[0]
+        data = (data - min_i) / (max_i - min_i) * 100
+        if mode == 'Train':
+            adjust_learning_rate_withWarmup(optimizer, epoch + i / iters, args)
+            logits = model(data)
+            loss = criterion(logits, label_cls)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+        else:
+            with torch.no_grad():
+                logits = model(data)
+                loss = criterion(logits, label_cls)
+        epoch_loss += loss.item()
+        if args.progress_bar:
+            pbar.update(len(data))
+            pbar.set_postfix(**{'loss': loss.item()})
+    return epoch_loss / iters
+def print_log(epoch, loss_train, loss_val, lr):
+    log.printlog('---------------- Epoch {} ----------------'.format(epoch))
+    log.printlog('loss_train : {}'.format(round(loss_train, 6)))
+    log.printlog('loss_val   : {}'.format(round(loss_val, 6)))
+    log.train_writer.add_scalar('mix_loss', loss_train, epoch)
+    log.val_writer.add_scalar('mix_loss', loss_val, epoch)
+    log.train_writer.add_scalar('lr', lr, epoch)
+def save_checkpoint(state, is_best, filepath, filename):
+    if (state['epoch']) % 10 == 0 or state['epoch'] == 1:
+        os.makedirs(filepath, exist_ok=True)
+        torch.save(state, filepath + filename)
+        log.printlog('checkpoint saved!')
+        if is_best:
+            torch.save(state, '{}/model_best.pth'.format(filepath))
+            log.printlog('best model saved!')
+def adjust_learning_rate(optimizer, epoch, schedule):
+    """Decay the learning rate based on schedule"""
+    lr = optimizer.defaults['lr']
+    for milestone in schedule:
+        lr *= 0.1 if epoch >= milestone else 1.
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+def adjust_learning_rate_withWarmup(optimizer, epoch, args):
+    """Decays the learning rate with half-cycle cosine after warmup"""
+    if epoch < args.warmup_epochs:
+        lr = args.lr * epoch / args.warmup_epochs
+    else:
+        lr = args.lr * 0.5 * (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+    return lr
+def main():
+    print('>>>>  Running on {}  <<<<'.format(device))
+    model = CPICANN(embed_dim=128, num_classes=args.num_classes)
+    # LOAD PRETRAINED MODEL
+    loaded = torch.load(args.load_path)
+    model.load_state_dict(loaded['model'])
+    model.bce_fineTune_init_weights()
+    model.to(device)
+    if rank == 0:
+        log.printlog(model)
+    trainset = mixDataset_cls_dynamic(args.data_dir_train, args.anno_struc, mode='Train')
+    valset = mixDataset_cls_dynamic(args.data_dir_val, args.anno_struc, mode='Eval')
+    if distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(trainset, shuffle=True)
+        val_sampler = torch.utils.data.distributed.DistributedSampler(valset, shuffle=True)
+        train_loader = DataLoader(trainset, batch_size=512, num_workers=16, pin_memory=True, drop_last=True, sampler=train_sampler)
+        val_loader = DataLoader(valset, batch_size=512, num_workers=16, pin_memory=True, drop_last=True, sampler=val_sampler)
+        model = DDP(model, device_ids=[device], output_device=local_rank, find_unused_parameters=False)
+    else:
+        train_loader = DataLoader(trainset, batch_size=512, num_workers=16, pin_memory=True, shuffle=True)
+        val_loader = DataLoader(valset, batch_size=512, num_workers=16, pin_memory=True, shuffle=True)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.AdamW(model.parameters(), args.lr, weight_decay=1e-4)
+    start_epoch = 0
+    for epoch in range(start_epoch + 1, args.epochs + 1):
+        if distributed:
+            train_sampler.set_epoch(epoch)
+            val_sampler.set_epoch(epoch)
+        loss_train = run_one_epoch(model, train_loader, criterion, optimizer, epoch, mode='Train')
+        loss_val = run_one_epoch(model, val_loader, criterion, optimizer, epoch, mode='Eval')
+        if rank == 0:
+            print_log(epoch,  loss_train, loss_val, optimizer.param_groups[0]['lr'])
+            save_checkpoint({'epoch': epoch,
+                             'model': model.module.state_dict() if distributed else model.state_dict(),
+                             'optimizer': optimizer}, is_best=False,
+                            filepath='{}/checkpoints/'.format(log.get_path()),
+                            filename='checkpoint_{:04d}.pth'.format(epoch))
+if __name__ == '__main__':
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        rank = int(os.environ["RANK"])
+        local_rank = int(os.environ["LOCAL_RANK"])
+        torch.cuda.set_device(rank % torch.cuda.device_count())
+        dist.init_process_group(backend="nccl")
+        device = torch.device("cuda", local_rank)
+        print(f"[init] == local rank: {local_rank}, global rank: {rank} ==")
+        distributed = True
+    else:
+        rank = 0
+        device = 'cuda:0'
+        distributed = False
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-progress_bar", type=bool, default=True)
+    parser.add_argument('--epochs', default=200, type=int, metavar='N',
+                        help='number of total epochs to run')
+    parser.add_argument('--warmup-epochs', default=20, type=int, metavar='N',
+                        help='number of warmup epochs')
+    parser.add_argument('--lr', '--learning-rate', default=8e-4, type=float,
+                        metavar='LR', help='initial (base) learning rate', dest='lr')
+    parser.add_argument('--load_path', default='pretrained/single-phase_checkpoint_0200.pth', type=str,
+                        help='path to load pretrained single-phase identification model')
+    parser.add_argument('--data_dir_train', default='data/train', type=str)
+    parser.add_argument('--data_dir_val', default='data/val', type=str)
+    parser.add_argument('--anno_struc', default='annotation/anno_struc.csv', type=str,
+                        help='path to annotation file for structures')
+    parser.add_argument('--num_classes', default=23073, type=int, metavar='N')
+    args = parser.parse_args()
+    if rank == 0:
+        log = Logger(val=True)
+    main()
+    print('THE END')

src/train_single-phase.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import argparse
+import math
+import os
+import torch
+import torch.distributed as dist
+from torch import optim
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from model.dataset import XrdDataset
+from model.CPICANN import CPICANN
+from model.focal_loss import FocalLoss
+from util.logger import Logger
+def get_acc(cls, label):
+    cls_acc = sum(cls.argmax(1) == label.int()) / cls.shape[0]
+    return cls_acc
+def run_one_epoch(model, dataloader, criterion, optimizer, epoch, mode):
+    if mode == 'Train':
+        model.train()
+        criterion.train()
+        desc = 'Training... '
+    else:
+        model.eval()
+        criterion.eval()
+        desc = 'Evaluating... '
+    epoch_loss, cls_acc = 0, 0
+    if args.progress_bar:
+        pbar = tqdm(total=len(dataloader.dataset), desc=desc, unit='data')
+    iters = len(dataloader)
+    for i, batch in enumerate(dataloader):
+        data = batch[0].to(device)
+        label_cls = batch[1].to(device)
+        if mode == 'Train':
+            adjust_learning_rate_withWarmup(optimizer, epoch + i / iters, args)
+            logits = model(data)
+            loss = criterion(logits, label_cls.long())
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+        else:
+            with torch.no_grad():
+                logits = model(data)
+                loss = criterion(logits, label_cls.long())
+        epoch_loss += loss.item()
+        if args.progress_bar:
+            pbar.update(len(data))
+            pbar.set_postfix(**{'loss': loss.item()})
+        _cls_acc = get_acc(logits, label_cls)
+        cls_acc += _cls_acc.item()
+    return epoch_loss / iters, cls_acc * 100 / iters
+def print_log(epoch, loss_train, loss_val, acc_train, acc_val, lr):
+    log.printlog('---------------- Epoch {} ----------------'.format(epoch))
+    log.printlog('loss_train : {}'.format(round(loss_train, 4)))
+    log.printlog('loss_val   : {}'.format(round(loss_val, 4)))
+    log.printlog('acc_train  : {}%'.format(round(acc_train, 4)))
+    log.printlog('acc_val    : {}%'.format(round(acc_val, 4)))
+    log.train_writer.add_scalar('loss', loss_train, epoch)
+    log.val_writer.add_scalar('loss', loss_val, epoch)
+    log.train_writer.add_scalar('acc', acc_train, epoch)
+    log.val_writer.add_scalar('acc', acc_val, epoch)
+    log.train_writer.add_scalar('lr', lr, epoch)
+def save_checkpoint(state, is_best, filepath, filename):
+    if (state['epoch']) % 10 == 0 or state['epoch'] == 1:
+        os.makedirs(filepath, exist_ok=True)
+        torch.save(state, filepath + filename)
+        log.printlog('checkpoint saved!')
+        if is_best:
+            torch.save(state, '{}/model_best.pth'.format(filepath))
+            log.printlog('best model saved!')
+def adjust_learning_rate_withWarmup(optimizer, epoch, args):
+    """Decays the learning rate with half-cycle cosine after warmup"""
+    if epoch < args.warmup_epochs:
+        lr = args.lr * epoch / args.warmup_epochs
+    else:
+        lr = args.lr * 0.5 * (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+    return lr
+def main():
+    print('>>>>  Running on {}  <<<<'.format(device))
+    model = CPICANN(embed_dim=128, num_classes=args.num_classes)
+    model.to(device)
+    if rank == 0:
+        log.printlog(model)
+    trainset = XrdDataset(args.data_dir_train, args.anno_train)
+    valset = XrdDataset(args.data_dir_val, args.anno_val)
+    if distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(trainset, shuffle=True)
+        val_sampler = torch.utils.data.distributed.DistributedSampler(valset, shuffle=True)
+        train_loader = DataLoader(trainset, batch_size=128, num_workers=16, pin_memory=True, drop_last=True, sampler=train_sampler)
+        val_loader = DataLoader(valset, batch_size=128, num_workers=16, pin_memory=True, drop_last=True, sampler=val_sampler)
+        model = DDP(model, device_ids=[device], output_device=local_rank, find_unused_parameters=False)
+    else:
+        train_loader = DataLoader(trainset, batch_size=128, num_workers=16, pin_memory=True, shuffle=True)
+        val_loader = DataLoader(valset, batch_size=128, num_workers=16, pin_memory=True, shuffle=True)
+    criterion = FocalLoss(class_num=args.num_classes, device=device)
+    optimizer = optim.AdamW(model.parameters(), args.lr, weight_decay=1e-4)
+    start_epoch = 0
+    for epoch in range(start_epoch + 1, args.epochs + 1):
+        if distributed:
+            train_sampler.set_epoch(epoch)
+            val_sampler.set_epoch(epoch)
+        loss_train, acc_train = run_one_epoch(model, train_loader, criterion, optimizer, epoch, mode='Train')
+        loss_val, acc_val = run_one_epoch(model, val_loader, criterion, optimizer, epoch, mode='Eval')
+        if rank == 0:
+            print_log(epoch,  loss_train, loss_val, acc_train, acc_val, optimizer.param_groups[0]['lr'])
+            save_checkpoint({'epoch': epoch,
+                             'model': model.module.state_dict() if distributed else model.state_dict(),
+                             'optimizer': optimizer}, is_best=False,
+                            filepath='{}/checkpoints/'.format(log.get_path()),
+                            filename='checkpoint_{:04d}.pth'.format(epoch))
+if __name__ == '__main__':
+    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+        rank = int(os.environ["RANK"])
+        local_rank = int(os.environ["LOCAL_RANK"])
+        torch.cuda.set_device(rank % torch.cuda.device_count())
+        dist.init_process_group(backend="nccl")
+        device = torch.device("cuda", local_rank)
+        print(f"[init] == local rank: {local_rank}, global rank: {rank} ==")
+        distributed = True
+    else:
+        rank = 0
+        device = 'cuda:0'
+        distributed = False
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--progress_bar", type=bool, default=True)
+    parser.add_argument('--epochs', default=200, type=int, metavar='N',
+                        help='number of total epochs to run')
+    parser.add_argument('--warmup-epochs', default=20, type=int, metavar='N',
+                        help='number of warmup epochs')
+    parser.add_argument('--lr', '--learning-rate', default=8e-5, type=float,
+                        metavar='LR', help='initial (base) learning rate', dest='lr')
+    parser.add_argument('--data_dir_train', default='data/train/', type=str)
+    parser.add_argument('--data_dir_val', default='data/val/', type=str)
+    parser.add_argument('--anno_train', default='annotation/anno_train.csv', type=str,
+                        help='path to annotation file for training data')
+    parser.add_argument('--anno_val', default='annotation/anno_val.csv', type=str,
+                        help='path to annotation file for validation data')
+    parser.add_argument('--num_classes', default=23073, type=int, metavar='N')
+    args = parser.parse_args()
+    if rank == 0:
+        log = Logger(val=True)
+    main()
+    print('THE END')

src/util/logger.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import sys
+from tensorboardX import SummaryWriter
+from datetime import datetime
+class Logger(object):
+    def __init__(self, val=True, filename="print.log"):
+        self.Time = datetime.now().strftime('%Y-%m-%d_%H%M')
+        self.path = 'output/' + self.Time
+        self.log_filename = filename
+        os.makedirs(self.path) if os.path.exists(self.path) is False else None
+        self.run_path = '{}/{}'.format(self.path, 'tb')
+        # common log
+        self.terminal = sys.stdout
+        self.terminal.write(self.path)
+        # init tensorboardX
+        self.train_writer = None
+        self.val_writer = None
+        self.tensorboard_init(val)
+    def printlog(self, message):
+        message = str(message)
+        self.terminal.write(message + '\n')
+        log = open(os.path.join(self.path, self.log_filename), "a", encoding='utf8', )
+        log.write(message + '\n')
+        log.close()
+    def tensorboard_init(self, val=True):
+        if val:
+            self.train_writer = SummaryWriter(self.run_path+'/train')
+            self.val_writer = SummaryWriter(self.run_path+'/val')
+        else:
+            self.train_writer = SummaryWriter(self.run_path)
+    def get_path(self):
+        return self.path

src/val_bi-phase.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import argparse
+import os
+import numpy as np
+import pandas as pd
+import torch
+from tqdm import tqdm
+from model.CPICANN import CPICANN
+def getAnnoMap():
+    vs = pd.read_csv(args.anno_struc).values
+    annos, elems = {}, {}
+    for v in vs:
+        annos[v[1]] = v
+        elems[v[1]] = set(v[3].split(' '))
+    return annos, elems
+def filter_by_elem(logits, elemMap, elem):
+    for i, e in elemMap.items():
+        if not e <= elem:
+            logits[:, i] = -10 ** 9
+    return logits
+def main():
+    annoMap, elemMap = getAnnoMap()
+    model = CPICANN(embed_dim=128, num_classes=args.num_classes)
+    loaded = torch.load(args.load_path)
+    model.load_state_dict(loaded['model'])
+    model.to(args.device)
+    model.eval()
+    print('loaded model from {}'.format(args.load_path))
+    print(model)
+    if args.elem_filtration:
+        print('elem_filtration activated!')
+    else:
+        print('elem_filtration deactivated!')
+    lst = pd.read_csv(args.anno_val).values
+    top10Hits = np.array([0] * 10, dtype=np.int32)
+    dataLen = len(lst)
+    pbar = tqdm(range(args.infTimes))
+    for i in range(args.infTimes):
+        while True:
+            c1, c2 = np.random.randint(0, dataLen, 2)
+            anno1, anno2 = lst[c1], lst[c2]
+            if anno1[6] != anno2[6]:
+                break
+        # id1, id2 = int(lst[c1][0].split('_')[0]), int(lst[c2][0].split('_')[0])
+        # formula1, formula2 = lst[c1][2], lst[c2][2]
+        data1 = pd.read_csv(os.path.join(args.data_dir, f'{lst[c1][0]}.csv')).values
+        data2 = pd.read_csv(os.path.join(args.data_dir, f'{lst[c2][0]}.csv')).values
+        mixRate1 = np.random.randint(20, 81)
+        mixRate2 = 100 - mixRate1
+        data = mixRate1 * data1 + mixRate2 * data2
+        elem = set(lst[c2][3].strip().split(' ')) | set(lst[c1][3].strip().split(' '))
+        def runFile(v):
+            min_i, scale = min(v), max(v) - min(v)
+            v = (v - min_i) / scale * 100
+            v = torch.tensor(v, dtype=torch.float32).reshape(1, 1, -1)
+            v = v.to(args.device)
+            with torch.no_grad():
+                logits = model(v)
+                # filter by elements
+                if args.elem_filtration:
+                    logits = filter_by_elem(logits, elemMap, elem)
+                _pred = torch.nn.functional.softmax(logits.squeeze(), dim=0)
+            return _pred.topk(10)
+        top10 = runFile(data)
+        m = [0] * 10
+        for no, (indice, rate) in enumerate(zip(top10.indices, top10.values)):
+            pred = annoMap[top10.indices[no].item()]
+            if pred[0] == int(anno1[0][:7]):
+                m[no] = 1
+            elif pred[0] == int(anno2[0][:7]):
+                m[no] = 2
+        if 1 in m[:2] and 2 in m[:2]:
+            top10Hits[1:] += 1
+        elif 1 in m[:3] and 2 in m[:3]:
+            top10Hits[2:] += 1
+        elif 1 in m[:4] and 2 in m[:4]:
+            top10Hits[3:] += 1
+        elif 1 in m[:5] and 2 in m[:5]:
+            top10Hits[4:] += 1
+        elif 1 in m[:6] and 2 in m[:6]:
+            top10Hits[5:] += 1
+        elif 1 in m[:7] and 2 in m[:7]:
+            top10Hits[6:] += 1
+        elif 1 in m[:8] and 2 in m[:8]:
+            top10Hits[7:] += 1
+        elif 1 in m[:9] and 2 in m[:9]:
+            top10Hits[8:] += 1
+        elif 1 in m[:10] and 2 in m[:10]:
+            top10Hits[9:] += 1
+        pbar.update(1)
+    pbar.close()
+    for i in range(1, 10):
+        print('top{}Hits: {}%'.format(i + 1, round(top10Hits[i] / args.infTimes * 100, 2)))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', default='cuda:0', type=str)
+    parser.add_argument('--data_dir', default='data/val/', type=str)
+    parser.add_argument('--infTimes', default=1000, type=int, help='number of mixed pattern to be inferenced')
+    parser.add_argument('--load_path', default='pretrained/bi-phase_checkpoint_2000.pth', type=str,
+                        help='path to load pretrained single-phase identification model')
+    parser.add_argument('--anno_struc', default='annotation/anno_struc.csv', type=str,
+                        help='path to annotation file for training data')
+    parser.add_argument('--anno_val', default='annotation/anno_val.csv', type=str,
+                        help='path to annotation file for validation data')
+    parser.add_argument('--num_classes', default=23073, type=int, metavar='N')
+    parser.add_argument('--elem_filtration', default=False, type=bool)
+    args = parser.parse_args()
+    main()
+    print('THE END')

src/val_single-phase.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import argparse
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from model.CPICANN import CPICANN
+from model.dataset import XrdDataset
+def get_cs_anno():
+    vs = pd.read_csv(args.anno_struc).values
+    csAnno = {}
+    for v in vs:
+        csAnno[v[1]] = v[6]
+    return csAnno
+def get_acc(cls, label):
+    correct_cnt = sum(cls.argmax(1) == label.int())
+    cls_acc = correct_cnt / cls.shape[0]
+    return cls_acc, correct_cnt
+def run_one_epoch(model, dataloader):
+    model.eval()
+    csAnno = get_cs_anno()
+    csCorrect = [0 for _ in range(7)]
+    csTotal = [0 for _ in range(7)]
+    cMtrx = [[0 for _ in range(7)] for _ in range(7)]
+    epoch_loss, cls_acc = 0, 0
+    correct_cnt, total_cnt = 0, 0
+    pbar = tqdm(total=len(dataloader.dataset), desc='Evaluating... ', unit='data')
+    iters = len(dataloader)
+    for i, batch in enumerate(dataloader):
+        data = batch[0].to(args.device)
+        label_cls = batch[1].to(args.device)
+        with torch.no_grad():
+            logits = model(data)
+            logits.to(args.device)
+        pbar.update(len(data))
+        _cls_acc, correct = get_acc(logits, label_cls)
+        cls_acc += _cls_acc.item()
+        correct_cnt += correct.item()
+        total_cnt += len(data)
+        preds = logits.argmax(1)
+        for gt, pred in zip(label_cls, preds):
+            cs_gt = csAnno[gt.item()]
+            cMtrx[cs_gt][csAnno[pred.item()]] += 1
+            csTotal[cs_gt] += 1
+            if gt == pred:
+                csCorrect[cs_gt] += 1
+    return epoch_loss / iters, cls_acc * 100 / iters, correct_cnt, total_cnt, cMtrx, csCorrect, csTotal
+def main():
+    model = CPICANN(embed_dim=128, num_classes=args.num_classes)
+    loaded = torch.load(args.load_path)
+    model.load_state_dict(loaded['model'])
+    model.to(args.device)
+    model.eval()
+    print('loaded model from {}'.format(args.load_path))
+    print(model)
+    valset = XrdDataset(args.data_dir, args.anno_val)
+    val_loader = DataLoader(valset, batch_size=128, num_workers=16, pin_memory=True, shuffle=True)
+    loss_val, acc_val, correct_cnt, total_cnt, cMtrx, csCorrect, csTotal = run_one_epoch(model, val_loader)
+    print("loss_val: ", loss_val)
+    print("acc_val: ", acc_val)
+    print("{}%  ({}/{})".format(round(correct_cnt / total_cnt, 5) * 100, correct_cnt, total_cnt))
+    sums = np.array(cMtrx).sum(axis=1)
+    for i, row in enumerate(cMtrx):
+        buf = ""
+        for j, v in enumerate(row):
+            buf += "{}({}%) ".format(v, round(v / sums[i] * 100, 2))
+        print(buf)
+    print("csCorrect: ", csCorrect)
+    print("csTotal: ", csTotal)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', default='cuda:0', type=str)
+    parser.add_argument('--data_dir', default='data/val/', type=str)
+    parser.add_argument('--load_path', default='pretrained/single-phase_checkpoint_0200.pth', type=str,
+                        help='path to load pretrained single-phase identification model')
+    parser.add_argument('--anno_struc', default='annotation/anno_struc.csv', type=str,
+                        help='path to annotation file for training data')
+    parser.add_argument('--anno_val', default='annotation/anno_val.csv', type=str,
+                        help='path to annotation file for validation data')
+    parser.add_argument('--num_classes', default=23073, type=int, metavar='N')
+    args = parser.parse_args()
+    main()
+    print('THE END')