weiheng-1009 commited on Mar 24, 2025

Commit

cbff41a

1 Parent(s): 958e3c5

added code for running

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.gitignore +5 -0
README.md +83 -3
accelerate_configs/.ipynb_checkpoints/deepspeed_zero2-checkpoint.yaml +22 -0
accelerate_configs/.ipynb_checkpoints/deepspeed_zero3-checkpoint.yaml +23 -0
accelerate_configs/deepspeed_zero1.yaml +20 -0
accelerate_configs/deepspeed_zero2.yaml +23 -0
accelerate_configs/deepspeed_zero3.yaml +23 -0
accelerate_configs/multi_gpu.yaml +16 -0
accelerate_configs/single_gpu.yaml +16 -0
data/huggingface_data.py +31 -0
dataset_csv/gtex_slide_url_info.csv +0 -0
dataset_csv/indices_and_slide_ids.csv +0 -0
dataset_csv/indices_and_slide_ids_with_folds.csv +0 -0
dataset_csv/tcga_slide_url_info.csv +0 -0
demo/CONCH_clip.py +11 -0
demo/Trainer_Mixtrial_ds_demo.py +154 -0
demo/Trainer_bert_demo.py +76 -0
demo/UNI_clip.py +33 -0
demo/path_clip.py +28 -0
demo/peft_demo.py +17 -0
demo/trl_demo.py +175 -0
evaluation/cider_score/cider_demo.ipynb +290 -0
evaluation/cider_score/cidereval/__init__.py +5 -0
evaluation/cider_score/cidereval/cider/__init__.py +1 -0
evaluation/cider_score/cidereval/cider/cider.py +67 -0
evaluation/cider_score/cidereval/cider/cider_scorer.py +274 -0
evaluation/cider_score/cidereval/ciderD/__init__.py +1 -0
evaluation/cider_score/cidereval/ciderD/ciderD.py +57 -0
evaluation/cider_score/cidereval/ciderD/ciderD_scorer.py +265 -0
evaluation/cider_score/cidereval/data/__init__.py +0 -0
evaluation/cider_score/cidereval/data/coco-val.p +3 -0
evaluation/cider_score/cidereval/eval.py +40 -0
evaluation/cider_score/cidereval/scorers.py +76 -0
evaluation/cider_score/cidereval/tokenizer/__init__.py +4 -0
evaluation/cider_score/cidereval/tokenizer/ptbtokenizer.py +112 -0
evaluation/cider_score/cidereval/tokenizer/simpletokenizer.py +106 -0
evaluation/cider_score/output_sample.xls +0 -0
filter_dataset.py +43 -0
gigapath/__init__.py +0 -0
gigapath/__pycache__/__init__.cpython-310.pyc +0 -0
gigapath/__pycache__/pos_embed.cpython-310.pyc +0 -0
gigapath/__pycache__/slide_encoder.cpython-310.pyc +0 -0
gigapath/__pycache__/slide_encoder_vision.cpython-310.pyc +0 -0
gigapath/classification_head.py +92 -0
gigapath/pipeline.py +190 -0
gigapath/pos_embed.py +105 -0
gigapath/preprocessing/__init__.py +0 -0
gigapath/preprocessing/data/__init__.py +0 -0
gigapath/preprocessing/data/box_utils.py +145 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.p filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+Conch_Llama3ins_Instruct3/
+Conch_Mistral_Instruct3/
+output/
+logs/
+wandb/

README.md CHANGED Viewed

@@ -1,3 +1,83 @@
----
-license: mit
----

+# PathLLM
+Welcome to ALPaCA. This repository aims to provide a straightforward reproduction of ALPaCA. To run ALPaCA, please first download **Llama3.1-8b-instruct** as the base model.
+For data from TCGA and GTEx, you can visit the [GDC Data Portal Homepage](https://portal.gdc.cancer.gov/) and [GTEx Portal](https://www.gtexportal.org/) to download and extract features yourself. Alternatively, you can use the features we have already extracted based on CONCH: `CNX-PathLLM/GMM_Embeddings` and `CNX-PathLLM/GTEx-TCGA-Embeddings`. After downloading, please unzip them into the respective folders for `TCGA-Embedding` and `GMM_Embedding`.
+Please ensure you have access to the pathological image description data:
+`CNX-PathLLM/TCGA-WSI-Description-4onew`, `CNX-PathLLM/TCGA-WSI-Description-4omini`, and `CNX-PathLLM/GTEx-WSI-Description`.
+Please ensure you also have access to the WSI-QA data:
+`CNX-PathLLM/TCGA-WSI-CloseQA-Balanced`, `CNX-PathLLM/GTEx-WSI-CloseQA-Balanced`, `CNX-PathLLM/TCGA-WSI-OpenQA`, and `CNX-PathLLM/GTEx-WSI-OpenQA`.
+After completing all the setups mentioned above and setting up the correct Python environment, you can start the training process using the provided shell script, e.g., `run_wsi_stage*.sh`, or follow the instructions in the [Train Step](#train-step-1) section below.
+Do not forget to adjust the TCGA and GMM embedding paths to reflect your own file locations.
+## Settings
+### Different Aggregate Strategies
+You can change aggregate strategies using the `--agg_strategy` flag, such as `qformer`, `abmil`, or `longnet`. You can also reproduce the method described in our paper by setting `--agg_strategy gmm,longnet` in the `.sh` script.
+### Configurable Settings
+(1) `--vision_adaptor False --hierarchical_adaptor True`
+(2) `--vision_adaptor False --hierarchical_adaptor False`
+(3) `--vision_adaptor True --hierarchical_adaptor True`
+```
+--vision_adaptor False   (vision-query-question interaction)
+--vision_adaptor True    (vision-query interaction)
+--hierarchical_adaptor False   (same adaptor for all levels)
+--hierarchical_adaptor True    (different adaptors for different levels)
+```
+## Train Step 1 ##
+```
+accelerate launch --config_file=./accelerate_configs/deepspeed_zero2.yaml run_wsi.py --learning_rate 1e-4 --max_steps 10000 --warmup_steps 100\
+        --gpu 2 --train_batch_size 4 --eval_batch_size 2 --max_seq_length 512 \
+        --agg_strategy gmm,longnet --embed_dim 512 --vision_adaptor False --hierachical_token True --hierachical_adaptor True\
+        --n_heads 32,16,8 --llm_requires_grad False --resume_from_checkpoint False \
+        --llm_name /data_local/pxb/LLM_models/llama3/llama3.1-8b-instruct \
+        --dataset_name_list CNX-PathLLM/TCGA-WSI-Description-4onew,CNX-PathLLM/TCGA-WSI-Description-4omini,CNX-PathLLM/GTEx-WSI-Description \
+        --data_cache_dir /data_local/pxb/CNX-PathLLM/.cache \
+        --fea_root /path/to/CNX-PathLLM/GTEx-TCGA-Embeddings \
+        --gmm_root /path/to/GMM_Embeddings\
+        --output_dir path/to/output/of/step2
+```
+## Train Step 2 ##
+```
+accelerate launch --config_file=./accelerate_configs/deepspeed_zero2.yaml run_wsi.py --max_steps 20000 --warmup_steps 10\
+        --gpu 2 --train_batch_size 8 --eval_batch_size 2 --max_seq_length 256 \
+        --agg_strategy gmm,longnet --embed_dim 512 --vision_adaptor False --hierachical_token True --hierachical_adaptor True\
+        --n_heads 32,16,8 --llm_requires_grad True --resume_from_checkpoint False \
+        --llm_name /data_local/pxb/LLM_models/llama3/llama3.1-8b-instruct \
+        --dataset_name_list CNX-PathLLM/TCGA-WSI-CloseQA-Balanced,CNX-PathLLM/GTEx-WSI-CloseQA-Balanced,CNX-PathLLM/TCGA-WSI-OpenQA,CNX-PathLLM/GTEx-WSI-OpenQA \
+        --data_cache_dir /data_local/pxb/CNX-PathLLM/.cache \
+        --fea_root /path/to/CNX-PathLLM/GTEx-TCGA-Embeddings \
+        --gmm_root /path/to/GMM_Embeddings\
+        --output_dir path/to/output/of/step2\
+        --ckpt_path path/to/ckpt.bin/of/step1
+```
+## Train Step 3 ##
+To continue training with the specific detailed BRCA dataset! Make sure you can access the dataset and change above command with the dataset you want.
+## Test of Step2 General QA ##
+```
+python test_wsi.py --max_seq_length 128 --batch_size 1 --select_data_num -1 --eval_sample_size -1 --n_heads 32,16,8 --llm_name /data_local/pxb/LLM_models/llama3/llama3.1-8b-instruct --vision_adaptor False --hierachical_token True --hierachical_adaptor True \
+                    --shuffle False --data_cache_dir /data_local/pxb/CNX-PathLLM/.cache\
+                    --dataset_name_list CNX-PathLLM/TCGA-WSI-CloseQA-Balanced,CNX-PathLLM/GTEx-WSI-CloseQA-Balanced,CNX-PathLLM/TCGA-WSI-OpenQA,CNX-PathLLM/GTEx-WSI-OpenQA\
+                    --agg_strategy gmm,longnet --embed_dim 512\
+                    --fea_root /path/to/CNX-PathLLM/GTEx-TCGA-Embeddings \
+                    --gmm_root /path/to/GMM_Embeddings\
+                    --ckpt_path path/to/ckpt.bin/of/step2\
+                    --results_save_path /path/to/the/output.csv\
+                    --use_peft False
+```

accelerate_configs/.ipynb_checkpoints/deepspeed_zero2-checkpoint.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_accumulation_steps: "auto"
+  offload_optimizer_device: "cpu"
+  offload_param_device: "cpu"
+  zero3_init_flag: false
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'auto'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

accelerate_configs/.ipynb_checkpoints/deepspeed_zero3-checkpoint.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_accumulation_steps: 8
+  offload_optimizer_device: "cpu"
+  offload_param_device: "cpu"
+  zero3_init_flag: true
+  zero3_save_16bit_model: true
+  zero_stage: 3
+distributed_type: DEEPSPEED
+downcast_bf16: 'auto'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

accelerate_configs/deepspeed_zero1.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_accumulation_steps: "auto"
+  zero3_init_flag: false
+  zero_stage: 1
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

accelerate_configs/deepspeed_zero2.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_accumulation_steps: "auto"
+  offload_optimizer_device: "cpu"
+  offload_param_device: "cpu"
+  zero3_init_flag: false
+  zero_stage: 2
+distributed_type: DEEPSPEED
+downcast_bf16: 'auto'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+main_process_port: 29502

accelerate_configs/deepspeed_zero3.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  gradient_accumulation_steps: 8
+  offload_optimizer_device: "cpu"
+  offload_param_device: "cpu"
+  zero3_init_flag: true
+  zero3_save_16bit_model: true
+  zero_stage: 3
+distributed_type: DEEPSPEED
+downcast_bf16: 'auto'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

accelerate_configs/multi_gpu.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

accelerate_configs/single_gpu.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: "NO"
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'bf16'
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

data/huggingface_data.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import pandas as pd
+import datasets
+import re
+import os
+import shutil
+#分割训练集、测试集、验证集
+splits = ["test","train","val"]
+#分数据集处理
+for item in splits:
+    os.makedirs(f"our_clean/{item}/", exist_ok=True)
+    data = pd.read_csv(f"{item}.csv")
+    data["image_path"] = data["image_path"].map(lambda x:x.split("/")[-1])
+    #简单清洗文本内容
+    f = lambda x: re.sub(' +', ' ', str(x).lower()).replace(" ?", "?").strip()
+    #huggingface要求需要包含file_name作为键值
+    data.insert(0, "file_name", "")
+    data["question"] = data["question"].apply(f)
+    data["answer"] = data["answer"].apply(f)
+    #实现图文对应
+    for i, row in data.iterrows():
+        file_name = f"img_{i}.jpg"
+        data["file_name"].iloc[i] = file_name
+        shutil.copyfile(src=f"author-folder/pvqa/pvqa/images/{item}/{row['image']}.jpg", dst=f"our_clean/{item}/{file_name}")
+    ##删除无关行
+    _ = data.pop("image")
+    data.drop(["pathology","image_path"],axis=1,inplace=True)
+    data.to_csv(f"our_clean/{item}/metadata.csv", index=False)
+#创建imagefolder格式的数据集，data_dir为存放数据的文件夹，可以参考https://huggingface.co/docs/datasets/en/image_load
+dataset = datasets.load_dataset("imagefolder", data_dir="our_clean/")
+#发布数据
+dataset.push_to_hub("CNX-PathLLM/PVQAClean")

dataset_csv/gtex_slide_url_info.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset_csv/indices_and_slide_ids.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset_csv/indices_and_slide_ids_with_folds.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset_csv/tcga_slide_url_info.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

demo/CONCH_clip.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from conch.open_clip_custom import create_model_from_pretrained
+import torch
+from PIL import Image
+model, preprocess = create_model_from_pretrained('conch_ViT-B-16', "/raid/hpc/hekai/WorkShop/My_project/PathLLM_new/load_weights/conch/pytorch_model.bin")
+image = Image.open("/bask/homes/a/asiw9691/PathVLM/source/Flamingo/med-flamingo/img/test_path5.jpg")
+image = preprocess(image).unsqueeze(0)
+with torch.inference_mode():
+    image_embs = model.encode_image(image, proj_contrast=False, normalize=False)
+    print(image_embs.shape)

demo/Trainer_Mixtrial_ds_demo.py ADDED Viewed

	@@ -0,0 +1,154 @@

+# accelerate launch --config_file=/raid/hpc/hekai/WorkShop/My_project/PathLLM_new/accelerate_configs/deepspeed_zero2.yaml  Trainer_Mixtrial_demo.py
+import os
+os.environ["WANDB_MODE"] = "offline"
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+# os.environ["CUDA_VISIBLE_DEVICES"] = "5,6"
+os.environ["CUDA_VISIBLE_DEVICES"] = "5"
+import torch
+from torch import nn
+from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModel, AutoModelForCausalLM,  BitsAndBytesConfig, HfArgumentParser, Trainer, PreTrainedModel
+from accelerate import Accelerator
+from datasets import load_dataset
+from typing import Optional
+from dataclasses import dataclass, field
+@dataclass
+class ScriptArguments:
+    """
+    The name of the Casual LM model we wish to fine with SFTTrainer
+    """
+    model_name: Optional[str] = field(default="mistralai/Mistral-7B-Instruct-v0.2", metadata={"help": "the model name，  meta-llama/Llama-2-7b-chat-hf "})
+    dataset_name: Optional[str] = field(default="stingning/ultrachat", metadata={"help": "the dataset name"})
+    dataset_text_field: Optional[str] = field(default="text", metadata={"help": "the text field of the dataset"})
+    log_with: Optional[str] = field(default="wandb", metadata={"help": "use 'wandb' to log with wandb"})
+    learning_rate: Optional[float] = field(default=2.0e-5, metadata={"help": "the learning rate"})
+    batch_size: Optional[int] = field(default=1, metadata={"help": "the batch size"})
+    seq_length: Optional[int] = field(default=1024, metadata={"help": "Input sequence length"})
+    gradient_accumulation_steps: Optional[int] = field(default=8, metadata={"help": "the number of gradient accumulation steps"})
+    evaluation_strategy: Optional[str] = field(default="steps", metadata={"help": "epoch, step"})
+    eval_steps: Optional[int] = field(default=1000, metadata={"help": "the number of gradient accumulation steps"})
+    load_in_8bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 8 bits precision"})
+    load_in_4bit: Optional[bool] = field(default=True, metadata={"help": "load the model in 4 bits precision"})
+    use_peft: Optional[bool] = field(default=True, metadata={"help": "Wether to use PEFT or not to train adapters"})
+    trust_remote_code: Optional[bool] = field(default=False, metadata={"help": "Enable `trust_remote_code`"})
+    output_dir: Optional[str] = field(default="output", metadata={"help": "the output directory"})
+    peft_lora_r: Optional[int] = field(default=64, metadata={"help": "the r parameter of the LoRA adapters"})
+    peft_lora_alpha: Optional[int] = field(default=16, metadata={"help": "the alpha parameter of the LoRA adapters"})
+    logging_steps: Optional[int] = field(default=5, metadata={"help": "the number of logging steps"})
+    token: Optional[bool] = field(default="True", metadata={"help": "Use HF auth token to access the model"})
+    num_train_epochs: Optional[int] = field(default=3, metadata={"help": "the number of training epochs"})
+    max_steps: Optional[int] = field(default=-1, metadata={"help": "the number of training steps"})
+    save_steps: Optional[int] = field(default=1000, metadata={"help": "Number of updates steps before two checkpoint saves"})
+    save_total_limit: Optional[int] = field(default=10, metadata={"help": "Limits total number of checkpoints."})
+    push_to_hub: Optional[bool] = field(default=False, metadata={"help": "Push the model to HF Hub"})
+    hub_model_id: Optional[str] = field(default="mistral-7b-finetuned-ultrachat", metadata={"help": "The name of the model on HF Hub"}
+)
+parser = HfArgumentParser(ScriptArguments)
+script_args = parser.parse_args_into_dataclasses()[0]
+class MyCustomModel(nn.Module):
+    def __init__(self, script_args, num_labels):
+        super(MyCustomModel, self).__init__()
+        self.num_labels = num_labels
+        self.pretrained_model = AutoModelForCausalLM.from_pretrained(script_args.model_name,
+                                                        quantization_config=quantization_config,
+                                                        device_map=device_map,
+                                                        trust_remote_code=script_args.trust_remote_code,
+                                                        torch_dtype=torch_dtype,
+                                                        token=script_args.token)
+        self.classifier = nn.Linear(self.pretrained_model.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask=None, labels=None):
+        outputs = self.pretrained_model(input_ids, attention_mask=attention_mask)
+        sequence_output = outputs.last_hidden_state[:,0,:]
+        logits = self.classifier(sequence_output)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+        return {"loss": loss, "logits": logits} if loss is not None else logits
+dataset = load_dataset("glue", "mrpc")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+tokenizer.pad_token = tokenizer.eos_token
+def preprocess_function(examples):
+    # Tokenize the inputs (pair of sentences)
+    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, padding=True, max_length=10)
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+small_train_dataset = dataset["train"].shuffle(seed=42).select(range(500)) # 选择前500个样本
+small_train_dataset = small_train_dataset.map(preprocess_function, batched=True)
+if script_args.load_in_8bit and script_args.load_in_4bit:
+    raise ValueError("You can't load the model in 8 bits and 4 bits at the same time")
+elif script_args.load_in_8bit or script_args.load_in_4bit:
+    quantization_config = BitsAndBytesConfig(
+        load_in_8bit=script_args.load_in_8bit, load_in_4bit=script_args.load_in_4bit
+    )
+    # Copy the model to each device
+    device_map = {"": Accelerator().local_process_index}
+    torch_dtype = torch.bfloat16
+else:
+    device_map = None
+    quantization_config = None
+    torch_dtype = None
+model = MyCustomModel(script_args, num_labels=2)
+training_args = TrainingArguments(
+    output_dir=script_args.output_dir,
+    per_device_train_batch_size=script_args.batch_size,
+    gradient_accumulation_steps=script_args.gradient_accumulation_steps,
+    # gradient_checkpointing=True,
+    learning_rate=script_args.learning_rate,
+    logging_steps=script_args.logging_steps,
+    num_train_epochs=script_args.num_train_epochs,
+    max_steps=script_args.max_steps,
+    report_to=script_args.log_with,
+    save_steps=script_args.save_steps,
+    save_total_limit=script_args.save_total_limit,
+    bf16=True,
+    lr_scheduler_type="cosine",
+    warmup_ratio=0.1,
+    evaluation_strategy=script_args.evaluation_strategy,
+    eval_steps=script_args.eval_steps,
+    logging_first_step=True,
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=small_train_dataset,
+    data_collator=data_collator,
+    compute_metrics=None,
+)
+trainer.train()
+# model.save_pretrained("./my_custom_model")

demo/Trainer_bert_demo.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "5"
+import torch
+from torch import nn
+from transformers import Trainer, TrainingArguments
+from transformers import AutoTokenizer, AutoModel
+from datasets import load_dataset
+# 自定义模型，继承自nn.Module或者transformers提供的预训练模型类
+class MyCustomModel(nn.Module):
+    def __init__(self, num_labels):
+        super(MyCustomModel, self).__init__()
+        self.num_labels = num_labels
+        self.pretrained_model = AutoModel.from_pretrained("bert-base-uncased")
+        self.classifier = nn.Linear(self.pretrained_model.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask=None, labels=None):
+        outputs = self.pretrained_model(input_ids, attention_mask=attention_mask)
+        sequence_output = outputs[1]
+        logits = self.classifier(sequence_output)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+        return {"loss": loss, "logits": logits} if loss is not None else logits
+# 加载数据集并预处理
+dataset = load_dataset("glue", "mrpc")
+tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+def preprocess_function(examples):
+    # Tokenize the inputs (pair of sentences)
+    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, padding=True)
+from transformers import DataCollatorWithPadding
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
+small_train_dataset = dataset["train"].shuffle(seed=42).select(range(500)) # 选择前500个样本
+small_train_dataset = small_train_dataset.map(preprocess_function, batched=True)
+for i in small_train_dataset:
+    print(i)
+# 自定义模型实例化
+model = MyCustomModel(num_labels=2).to("cuda")
+# 定义训练参数
+training_args = TrainingArguments(
+    output_dir="./results",
+    num_train_epochs=3,
+    per_device_train_batch_size=8,
+    warmup_steps=500,
+    weight_decay=0.01,
+    logging_dir='./logs',
+    logging_steps=10,
+)
+# 初始化Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=small_train_dataset,
+    data_collator=data_collator,
+    compute_metrics=None, # 如果需要可以添加计算指标的函数
+)
+# 训练模型
+trainer.train()
+# 保存模型
+model.save_pretrained("./my_custom_model")

demo/UNI_clip.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+import torch
+from torchvision import transforms
+import timm
+from huggingface_hub import login, hf_hub_download
+# login()  # login with your User Access Token, found at https://huggingface.co/settings/tokens
+local_dir = "/bask/homes/a/asiw9691/PathVLM/UNI"
+# os.makedirs(local_dir, exist_ok=True)  # create directory if it does not exist
+# hf_hub_download("MahmoodLab/UNI", filename="pytorch_model.bin", local_dir=local_dir, force_download=True)
+model = timm.create_model("vit_large_patch16_224", img_size=224, patch_size=16, init_values=1e-5, num_classes=0, dynamic_img_size=True)
+model.load_state_dict(torch.load(os.path.join(local_dir, "pytorch_model.bin"), map_location="cpu"), strict=True)
+transform = transforms.Compose(
+    [
+        # transforms.Resize(224),
+        transforms.Resize(256), # 先将最短边调整到256像素
+        transforms.CenterCrop(224), # 然后从中心裁剪出224x224像素的图像
+        transforms.ToTensor(),
+        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+        transforms.Lambda(lambda x: x.unsqueeze(0))
+    ]
+)
+model.eval()
+from PIL import Image
+image = Image.open("/bask/homes/a/asiw9691/PathVLM/source/Flamingo/med-flamingo/img/test_path5.jpg")
+image = transform(image) # Image (torch.Tensor) with shape [1, 3, 224, 224] following image resizing and normalization (ImageNet parameters)
+with torch.inference_mode():
+    feature_emb = model(image) # Extracted features (torch.Tensor) with shape [1,1024]
+    print(feature_emb.shape)

demo/path_clip.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+from PIL import Image
+import open_clip
+## load the model
+model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained="/bask/homes/a/asiw9691/PathVLM/PathClip/pathclip-base.pt",
+                                                        force_quick_gelu=True)
+tokenizer = open_clip.get_tokenizer('ViT-B-16')
+model = model.cuda()
+##load the image and prepare the text prompt
+img_path = '/raid/hpc/hekai/WorkShop/My_project/PathLLM_new/data/test_data/test_path1.jpg'
+label_description_list = ['apple',  'liver', 'cancer',] # specify the label descriptions
+text_label_list = ['An image of {}'.format(i) for i in label_description_list]
+image = Image.open(img_path)
+image = preprocess(image).unsqueeze(0).cuda()
+text = tokenizer(text_label_list).cuda()
+## extract the img and text feature and predict the label
+with torch.no_grad(), torch.cuda.amp.autocast():
+    image_features = model.encode_image(image)
+    text_features = model.encode_text(text)
+    image_features /= image_features.norm(dim=-1, keepdim=True)
+    text_features /= text_features.norm(dim=-1, keepdim=True)
+    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+    predict_label = torch.argmax(text_probs).item()
+    print(predict_label)

demo/peft_demo.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import LoraConfig, TaskType, get_peft_model
+model_name_or_path = "/raid/hpc/hekai/WorkShop/My_project/LLM_models/llama2/Llama-2-7b-chat-hf"
+model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
+peft_config = LoraConfig(
+    r=8,
+    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
+    bias="none",
+    task_type=TaskType.CAUSAL_LM,
+)
+model = get_peft_model(model, peft_config)

demo/trl_demo.py ADDED Viewed

	@@ -0,0 +1,175 @@

+#   accelerate launch --config_file=/raid/hpc/hekai/WorkShop/My_project/PathLLM_new/accelerate_configs/deepspeed_zero2.yaml  demo/trl_demo.py
+import os
+os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
+os.environ["CUDA_VISIBLE_DEVICES"] = "5"
+from dataclasses import dataclass, field
+from typing import Optional
+import torch
+from accelerate import Accelerator
+from datasets import load_dataset
+from peft import LoraConfig
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, AutoTokenizer
+from trl import SFTTrainer
+tqdm.pandas()
+# Define and parse arguments.
+@dataclass
+class ScriptArguments:
+    """
+    The name of the Casual LM model we wish to fine with SFTTrainer
+    """
+    model_name: Optional[str] = field(default="mistralai/Mistral-7B-Instruct-v0.2", metadata={"help": "the model name，  meta-llama/Llama-2-7b-chat-hf "})
+    dataset_name: Optional[str] = field(default="stingning/ultrachat", metadata={"help": "the dataset name"})
+    dataset_text_field: Optional[str] = field(default="text", metadata={"help": "the text field of the dataset"})
+    log_with: Optional[str] = field(default="wandb", metadata={"help": "use 'wandb' to log with wandb"})
+    learning_rate: Optional[float] = field(default=2.0e-5, metadata={"help": "the learning rate"})
+    batch_size: Optional[int] = field(default=1, metadata={"help": "the batch size"})
+    seq_length: Optional[int] = field(default=1024, metadata={"help": "Input sequence length"})
+    gradient_accumulation_steps: Optional[int] = field(default=8, metadata={"help": "the number of gradient accumulation steps"})
+    evaluation_strategy: Optional[str] = field(default="steps", metadata={"help": "epoch, step"})
+    eval_steps: Optional[int] = field(default=2, metadata={"help": "the number of gradient accumulation steps"})
+    load_in_8bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 8 bits precision"})
+    load_in_4bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 4 bits precision"})
+    use_peft: Optional[bool] = field(default=True, metadata={"help": "Wether to use PEFT or not to train adapters"})
+    trust_remote_code: Optional[bool] = field(default=False, metadata={"help": "Enable `trust_remote_code`"})
+    output_dir: Optional[str] = field(default="output", metadata={"help": "the output directory"})
+    peft_lora_r: Optional[int] = field(default=64, metadata={"help": "the r parameter of the LoRA adapters"})
+    peft_lora_alpha: Optional[int] = field(default=16, metadata={"help": "the alpha parameter of the LoRA adapters"})
+    logging_steps: Optional[int] = field(default=5, metadata={"help": "the number of logging steps"})
+    token: Optional[bool] = field(default="True", metadata={"help": "Use HF auth token to access the model"})
+    num_train_epochs: Optional[int] = field(default=3, metadata={"help": "the number of training epochs"})
+    max_steps: Optional[int] = field(default=-1, metadata={"help": "the number of training steps"})
+    save_steps: Optional[int] = field(default=1000, metadata={"help": "Number of updates steps before two checkpoint saves"})
+    save_total_limit: Optional[int] = field(default=10, metadata={"help": "Limits total number of checkpoints."})
+    push_to_hub: Optional[bool] = field(default=False, metadata={"help": "Push the model to HF Hub"})
+    hub_model_id: Optional[str] = field(default="mistral-7b-finetuned-ultrachat", metadata={"help": "The name of the model on HF Hub"})
+parser = HfArgumentParser(ScriptArguments)
+script_args = parser.parse_args_into_dataclasses()[0]
+# Step 1: Load the dataset
+tokenizer = AutoTokenizer.from_pretrained(script_args.model_name)
+tokenizer.padding_side = 'right'
+tokenizer.pad_token = tokenizer.eos_token
+dataset = load_dataset(script_args.dataset_name, split="train[:200]")
+dataset = dataset.train_test_split(test_size=0.1)
+def prepare_dialogue(example):
+    text = ""
+    for idx, msg in enumerate(example["data"]):
+        if idx % 2 == 0:
+            text += f"<|user|>\n{msg}{tokenizer.eos_token}\n"
+        else:
+            text += f"<|assistant|>\n{msg}{tokenizer.eos_token}\n"
+    example["text"] = text
+    return example
+dataset = dataset.map(prepare_dialogue, num_proc=4, remove_columns=["id", "data"])
+# Step 2: Load the model
+if script_args.load_in_8bit and script_args.load_in_4bit:
+    raise ValueError("You can't load the model in 8 bits and 4 bits at the same time")
+elif script_args.load_in_8bit or script_args.load_in_4bit:
+    quantization_config = BitsAndBytesConfig(
+        load_in_8bit=script_args.load_in_8bit, load_in_4bit=script_args.load_in_4bit
+    )
+    # Copy the model to each device
+    device_map = {"": Accelerator().local_process_index}
+    torch_dtype = torch.bfloat16
+else:
+    # device_map = "auto"
+    device_map = None
+    quantization_config = None
+    torch_dtype = None
+model = AutoModelForCausalLM.from_pretrained(
+    script_args.model_name,
+    quantization_config=quantization_config,
+    device_map=device_map,
+    trust_remote_code=script_args.trust_remote_code,
+    torch_dtype=torch_dtype,
+    token=script_args.token,
+)
+# Step 4: Define the LoraConfig
+if script_args.use_peft:
+    peft_config = LoraConfig(
+        r=script_args.peft_lora_r,
+        lora_alpha=script_args.peft_lora_alpha,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+else:
+    peft_config = None
+training_args = TrainingArguments(
+    output_dir=script_args.output_dir,
+    per_device_train_batch_size=script_args.batch_size,
+    gradient_accumulation_steps=script_args.gradient_accumulation_steps,
+    gradient_checkpointing=True,
+    learning_rate=script_args.learning_rate,
+    logging_steps=script_args.logging_steps,
+    num_train_epochs=script_args.num_train_epochs,
+    max_steps=script_args.max_steps,
+    report_to=script_args.log_with,
+    save_steps=script_args.save_steps,
+    save_total_limit=script_args.save_total_limit,
+    # push_to_hub=script_args.push_to_hub,
+    # hub_model_id=script_args.hub_model_id,
+    bf16=True,
+    lr_scheduler_type="cosine",
+    warmup_ratio=0.1,
+    evaluation_strategy=script_args.evaluation_strategy,
+    eval_steps=script_args.eval_steps,
+    logging_first_step=True,
+)
+def my_compute_metrics(p):
+    predictions, labels = p
+    return {
+        'precision': 1,
+        'recall': 1,
+        'f1': 1,
+    }
+trainer = SFTTrainer(
+    model=model,
+    args=training_args,
+    max_seq_length=script_args.seq_length,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+    dataset_text_field=script_args.dataset_text_field,
+    peft_config=peft_config,
+    packing=False,
+    tokenizer=tokenizer,
+    compute_metrics=my_compute_metrics
+)
+trainer.train()
+# Step 6: Save the model
+trainer.save_model(script_args.output_dir)

evaluation/cider_score/cider_demo.ipynb ADDED Viewed

	@@ -0,0 +1,290 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "view-in-github"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/michelecafagna26/cider/blob/master/cider_demo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "GWTmEvA9jNbE"
+   },
+   "source": [
+    "# Install"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "AeLUPV23cglP",
+    "outputId": "61e4fef5-0481-4c41-ee8a-0e8073f0d3e1"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
+      "Requirement already satisfied: spacy in d:\\miniconda\\envs\\llm\\lib\\site-packages (3.7.4)\n",
+      "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (3.0.12)\n",
+      "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (1.0.5)\n",
+      "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (1.0.10)\n",
+      "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (2.0.8)\n",
+      "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (3.0.9)\n",
+      "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (8.2.3)\n",
+      "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (1.1.2)\n",
+      "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (2.4.8)\n",
+      "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (2.0.10)\n",
+      "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (0.3.4)\n",
+      "Requirement already satisfied: typer<0.10.0,>=0.3.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (0.9.4)\n",
+      "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (6.4.0)\n",
+      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (4.66.1)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (2.31.0)\n",
+      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (2.7.1)\n",
+      "Requirement already satisfied: jinja2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (3.1.2)\n",
+      "Requirement already satisfied: setuptools in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (68.0.0)\n",
+      "Requirement already satisfied: packaging>=20.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (23.2)\n",
+      "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (3.4.0)\n",
+      "Requirement already satisfied: numpy>=1.15.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy) (1.24.4)\n",
+      "Requirement already satisfied: language-data>=1.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from langcodes<4.0.0,>=3.2.0->spacy) (1.2.0)\n",
+      "Requirement already satisfied: annotated-types>=0.4.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.18.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.2)\n",
+      "Requirement already satisfied: typing-extensions>=4.6.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.8.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.0.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2023.7.22)\n",
+      "Requirement already satisfied: blis<0.8.0,>=0.7.8 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n",
+      "Requirement already satisfied: confection<1.0.0,>=0.0.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.4)\n",
+      "Requirement already satisfied: colorama in d:\\miniconda\\envs\\llm\\lib\\site-packages (from tqdm<5.0.0,>=4.38.0->spacy) (0.4.6)\n",
+      "Requirement already satisfied: click<9.0.0,>=7.1.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from typer<0.10.0,>=0.3.0->spacy) (8.1.7)\n",
+      "Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from weasel<0.4.0,>=0.1.0->spacy) (0.16.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from jinja2->spacy) (2.1.3)\n",
+      "Requirement already satisfied: marisa-trie>=0.7.7 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy) (1.1.1)\n",
+      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
+      "Collecting en-core-web-sm==3.7.1\n",
+      "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)\n",
+      "     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--\n",
+      "     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--\n",
+      "     --------------------------------------- 0.0/12.8 MB 217.9 kB/s eta 0:00:59\n",
+      "     --------------------------------------- 0.0/12.8 MB 217.9 kB/s eta 0:00:59\n",
+      "     --------------------------------------- 0.0/12.8 MB 119.1 kB/s eta 0:01:48\n",
+      "     --------------------------------------- 0.0/12.8 MB 119.1 kB/s eta 0:01:48\n",
+      "     --------------------------------------- 0.0/12.8 MB 122.9 kB/s eta 0:01:44\n",
+      "     --------------------------------------- 0.1/12.8 MB 164.1 kB/s eta 0:01:18\n",
+      "     --------------------------------------- 0.1/12.8 MB 201.8 kB/s eta 0:01:03\n",
+      "     --------------------------------------- 0.1/12.8 MB 234.3 kB/s eta 0:00:55\n",
+      "     --------------------------------------- 0.1/12.8 MB 232.7 kB/s eta 0:00:55\n",
+      "     --------------------------------------- 0.2/12.8 MB 280.8 kB/s eta 0:00:46\n",
+      "      -------------------------------------- 0.2/12.8 MB 318.9 kB/s eta 0:00:40\n",
+      "      -------------------------------------- 0.2/12.8 MB 367.4 kB/s eta 0:00:35\n",
+      "      -------------------------------------- 0.3/12.8 MB 402.4 kB/s eta 0:00:32\n",
+      "     - ------------------------------------- 0.3/12.8 MB 446.4 kB/s eta 0:00:28\n",
+      "     - ------------------------------------- 0.4/12.8 MB 511.3 kB/s eta 0:00:25\n",
+      "     - ------------------------------------- 0.5/12.8 MB 581.3 kB/s eta 0:00:22\n",
+      "     - ------------------------------------- 0.5/12.8 MB 619.9 kB/s eta 0:00:20\n",
+      "     - ------------------------------------- 0.6/12.8 MB 655.3 kB/s eta 0:00:19\n",
+      "     -- ------------------------------------ 0.7/12.8 MB 686.9 kB/s eta 0:00:18\n",
+      "     -- ------------------------------------ 0.8/12.8 MB 735.7 kB/s eta 0:00:17\n",
+      "     -- ------------------------------------ 0.8/12.8 MB 762.9 kB/s eta 0:00:16\n",
+      "     -- ------------------------------------ 0.9/12.8 MB 786.3 kB/s eta 0:00:16\n",
+      "     -- ------------------------------------ 0.9/12.8 MB 816.7 kB/s eta 0:00:15\n",
+      "     --- ----------------------------------- 1.1/12.8 MB 893.8 kB/s eta 0:00:14\n",
+      "     --- ------------------------------------ 1.3/12.8 MB 1.0 MB/s eta 0:00:12\n",
+      "     ---- ----------------------------------- 1.4/12.8 MB 1.1 MB/s eta 0:00:11\n",
+      "     ----- ---------------------------------- 1.6/12.8 MB 1.2 MB/s eta 0:00:10\n",
+      "     ----- ---------------------------------- 1.8/12.8 MB 1.3 MB/s eta 0:00:09\n",
+      "     ------ --------------------------------- 2.1/12.8 MB 1.5 MB/s eta 0:00:08\n",
+      "     ------- -------------------------------- 2.5/12.8 MB 1.7 MB/s eta 0:00:07\n",
+      "     --------- ------------------------------ 3.2/12.8 MB 2.1 MB/s eta 0:00:05\n",
+      "     ------------ --------------------------- 3.9/12.8 MB 2.4 MB/s eta 0:00:04\n",
+      "     -------------- ------------------------- 4.5/12.8 MB 2.7 MB/s eta 0:00:04\n",
+      "     --------------- ------------------------ 4.9/12.8 MB 2.9 MB/s eta 0:00:03\n",
+      "     ---------------- ----------------------- 5.2/12.8 MB 3.0 MB/s eta 0:00:03\n",
+      "     ----------------- ---------------------- 5.5/12.8 MB 3.0 MB/s eta 0:00:03\n",
+      "     ------------------ --------------------- 5.8/12.8 MB 3.1 MB/s eta 0:00:03\n",
+      "     ------------------- -------------------- 6.1/12.8 MB 3.2 MB/s eta 0:00:03\n",
+      "     ------------------- -------------------- 6.4/12.8 MB 3.3 MB/s eta 0:00:02\n",
+      "     -------------------- ------------------- 6.7/12.8 MB 3.3 MB/s eta 0:00:02\n",
+      "     --------------------- ------------------ 7.0/12.8 MB 3.4 MB/s eta 0:00:02\n",
+      "     ---------------------- ----------------- 7.3/12.8 MB 3.5 MB/s eta 0:00:02\n",
+      "     ----------------------- ---------------- 7.6/12.8 MB 3.5 MB/s eta 0:00:02\n",
+      "     ------------------------ --------------- 7.9/12.8 MB 3.6 MB/s eta 0:00:02\n",
+      "     ------------------------- -------------- 8.2/12.8 MB 3.7 MB/s eta 0:00:02\n",
+      "     -------------------------- ------------- 8.5/12.8 MB 3.7 MB/s eta 0:00:02\n",
+      "     --------------------------- ------------ 8.8/12.8 MB 3.8 MB/s eta 0:00:02\n",
+      "     ---------------------------- ----------- 9.1/12.8 MB 3.8 MB/s eta 0:00:01\n",
+      "     ----------------------------- ---------- 9.4/12.8 MB 3.9 MB/s eta 0:00:01\n",
+      "     ------------------------------ --------- 9.7/12.8 MB 3.9 MB/s eta 0:00:01\n",
+      "     ------------------------------- -------- 10.0/12.8 MB 3.9 MB/s eta 0:00:01\n",
+      "     -------------------------------- ------- 10.3/12.8 MB 4.4 MB/s eta 0:00:01\n",
+      "     --------------------------------- ------ 10.6/12.8 MB 5.4 MB/s eta 0:00:01\n",
+      "     --------------------------------- ------ 10.9/12.8 MB 5.9 MB/s eta 0:00:01\n",
+      "     ---------------------------------- ----- 11.2/12.8 MB 6.5 MB/s eta 0:00:01\n",
+      "     ----------------------------------- ---- 11.4/12.8 MB 6.7 MB/s eta 0:00:01\n",
+      "     ------------------------------------ --- 11.7/12.8 MB 6.8 MB/s eta 0:00:01\n",
+      "     ------------------------------------- -- 12.0/12.8 MB 6.9 MB/s eta 0:00:01\n",
+      "     -------------------------------------- - 12.3/12.8 MB 6.9 MB/s eta 0:00:01\n",
+      "     ---------------------------------------  12.6/12.8 MB 6.8 MB/s eta 0:00:01\n",
+      "     ---------------------------------------  12.8/12.8 MB 6.8 MB/s eta 0:00:01\n",
+      "     ---------------------------------------- 12.8/12.8 MB 6.6 MB/s eta 0:00:00\n",
+      "Requirement already satisfied: spacy<3.8.0,>=3.7.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from en-core-web-sm==3.7.1) (3.7.4)\n",
+      "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)\n",
+      "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)\n",
+      "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)\n",
+      "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)\n",
+      "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)\n",
+      "Requirement already satisfied: thinc<8.3.0,>=8.2.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)\n",
+      "Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)\n",
+      "Requirement already satisfied: srsly<3.0.0,>=2.4.3 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)\n",
+      "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)\n",
+      "Requirement already satisfied: weasel<0.4.0,>=0.1.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)\n",
+      "Requirement already satisfied: typer<0.10.0,>=0.3.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)\n",
+      "Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)\n",
+      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.1)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.31.0)\n",
+      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.1)\n",
+      "Requirement already satisfied: jinja2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.2)\n",
+      "Requirement already satisfied: setuptools in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (68.0.0)\n",
+      "Requirement already satisfied: packaging>=20.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (23.2)\n",
+      "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4.0)\n",
+      "Requirement already satisfied: numpy>=1.15.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.24.4)\n",
+      "Requirement already satisfied: language-data>=1.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.2.0)\n",
+      "Requirement already satisfied: annotated-types>=0.4.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.18.2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.2)\n",
+      "Requirement already satisfied: typing-extensions>=4.6.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.8.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.4)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2023.7.22)\n",
+      "Requirement already satisfied: blis<0.8.0,>=0.7.8 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)\n",
+      "Requirement already satisfied: confection<1.0.0,>=0.0.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)\n",
+      "Requirement already satisfied: colorama in d:\\miniconda\\envs\\llm\\lib\\site-packages (from tqdm<5.0.0,>=4.38.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.4.6)\n",
+      "Requirement already satisfied: click<9.0.0,>=7.1.1 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)\n",
+      "Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.3)\n",
+      "Requirement already satisfied: marisa-trie>=0.7.7 in d:\\miniconda\\envs\\llm\\lib\\site-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.1)\n",
+      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+      "You can now load the package via spacy.load('en_core_web_sm')\n"
+     ]
+    }
+   ],
+   "source": [
+    "#use spacy to get rid of the influence of standford-corenlp.jar, which requires java\n",
+    "! pip install spacy\n",
+    "! python -m spacy download en_core_web_sm  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "RF9-uATHjSJT"
+   },
+   "source": [
+    "Ready to go!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "UvK_ACyMeqDD"
+   },
+   "outputs": [],
+   "source": [
+    "from cidereval import cider, ciderD\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CIDEr Score: 1.1466548664799776\n"
+     ]
+    }
+   ],
+   "source": [
+    "def calculate_cider_score(file_path,pred_column,ref_column):\n",
+    "    \"\"\"\n",
+    "    input:\n",
+    "\n",
+    "    file_path: table to be analysed.\n",
+    "    pred_column: column name that the analysed model generated\n",
+    "    ref_column: column name of the ground truth\n",
+    "\n",
+    "    output:\n",
+    "    score:{avg_score:xxx,scores:[array]}\n",
+    "    \"\"\"\n",
+    "    # 读取xlsx文件\n",
+    "    df = pd.read_excel(file_path)\n",
+    "\n",
+    "    # 创建存储ground_truth和generated句子的列表\n",
+    "    references = []\n",
+    "    candidates = []\n",
+    "\n",
+    "    # 遍历每一行并将内容添加到对应的列表中\n",
+    "    for index, row in df.iterrows():\n",
+    "        # references.append([row['answers']])\n",
+    "        # candidates.append([row['results']])\n",
+    "        references.append([row[ref_column]])\n",
+    "        candidates.append(row[pred_column])\n",
+    "    # 创建Cider对象\n",
+    "    # cider_scorer = Cider()\n",
+    "\n",
+    "    # 计算CIDEr分数\n",
+    "\n",
+    "    cider_score = cider(candidates,references,df=\"corpus\")\n",
+    "\n",
+    "    return cider_score\n",
+    "\n",
+    "# 调用函数并传入xlsx文件路径\n",
+    "file_path = 'output_sample.xls'\n",
+    "score = calculate_cider_score(file_path,\"results\",\"answers\")\n",
+    "print(\"CIDEr Score:\", score['avg_score'])"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "authorship_tag": "ABX9TyMT4TNjFKKrEMcMc0uZ6Ubr",
+   "include_colab_link": true,
+   "name": "cider_demo.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "asiw9691_conda_env (Conda)",
+   "language": "python",
+   "name": "sys_asiw9691_conda_env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

evaluation/cider_score/cidereval/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+__author__ = 'tylin'
+# edited by Michele Cafagna
+from cidereval.cider.cider import Cider
+from cidereval.ciderD.ciderD import CiderD
+from cidereval.scorers import cider, ciderD

evaluation/cider_score/cidereval/cider/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'tylin'

evaluation/cider_score/cidereval/cider/cider.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Filename: cider.py
+#
+#
+# Description: Describes the class to compute the CIDEr
+# (Consensus-Based Image Description Evaluation) Metric
+#          by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
+#
+# Creation Date: Sun Feb  8 14:16:54 2015
+#
+# Authors: Ramakrishna Vedantam <vrama91@vt.edu> and
+# Tsung-Yi Lin <tl483@cornell.edu>
+# edited by Michele Cafagna
+from .cider_scorer import CiderScorer
+class Cider:
+    """
+    Main Class to compute the CIDEr metric
+    """
+    def __init__(self, n=4, df="corpus"):
+        """
+        Initialize the CIDEr scoring function
+        : param n (int): n-gram size
+        : param df (string): specifies where to get the IDF values from
+                    takes values 'corpus', 'coco-val'
+        : return: None
+        """
+        # set cider to sum over 1 to 4-grams
+        self._n = n
+        self._df = df
+        self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
+    def compute_score(self, gts, res):
+        """
+        Main function to compute CIDEr score
+        : param  gts (dict) : {image:tokenized reference sentence}
+        : param res (dict)  : {image:tokenized candidate sentence}
+        : return: cider (float) : computed CIDEr score for the corpus
+        """
+        # clear all the previous hypos and refs
+        self.cider_scorer.clear()
+        for res_id in res:
+            hypo = res_id['caption']
+            ref = gts[res_id['image_id']]
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) > 0)
+            self.cider_scorer += (hypo[0], ref)
+        (score, scores) = self.cider_scorer.compute_score()
+        return score, scores
+    def save_df(self, df_name="corpus"):
+        self.cider_scorer.save_df(df_name)
+    def method(self):
+        return "CIDEr"

evaluation/cider_score/cidereval/cider/cider_scorer.py ADDED Viewed

	@@ -0,0 +1,274 @@

+#!/usr/bin/env python
+# Tsung-Yi Lin <tl483@cornell.edu>
+# Ramakrishna Vedantam <vrama91@vt.edu>
+from pathlib import Path
+from collections import defaultdict
+import pickle
+import math
+from copy import copy
+from importlib_resources import files, as_file
+import numpy as np
+import cidereval.data
+def precook(s, n=4, out=False):
+    """
+    Takes a string as input and returns an object that can be given to
+    either cook_refs or cook_test. This is optional: cook_refs and cook_test
+    can take string arguments as well.
+    :param s: string : sentence to be converted into ngrams
+    :param n: int    : number of ngrams for which representation is calculated
+    :return: term frequency vector for occuring ngrams
+    """
+    words = s.split()
+    counts = defaultdict(int)
+    for k in range(1, n + 1):
+        for i in range(len(words) - k + 1):
+            ngram = tuple(words[i:i + k])
+            counts[ngram] += 1
+    return counts
+def cook_refs(refs, n=4):  # lhuang: oracle will call with "average"
+    '''Takes a list of reference sentences for a single segment
+    and returns an object that encapsulates everything that BLEU
+    needs to know about them.
+    :param refs: list of string : reference sentences for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (list of dict)
+    '''
+    return [precook(ref, n) for ref in refs]
+def cook_test(test, n=4):
+    '''Takes a test sentence and returns an object that
+    encapsulates everything that BLEU needs to know about it.
+    :param test: list of string : hypothesis sentence for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (dict)
+    '''
+    return precook(test, n, True)
+class CiderScorer(object):
+    """CIDEr scorer.
+    """
+    def save_df(self, df_name="corpus", path=None):
+        """Save the idf computed in corpus mode
+        Args:
+            df_name (str, optional): [description]. Defaults to "corpus". name of idf file
+            (without the file exntension)
+            df_name (str, optional): [description]. Defaults to None. path of the idf if note provided
+            it will be used  the home directory
+        Raises:
+            ValueError: [description] if you try to call this method before computing the scores
+        """
+        if path:
+            path = Path(path)
+            if not path.exists():
+                path=Path.home()
+                print(f"the path provided is not valid. The df will be saved in {path}")
+        else:
+            path=Path.home()
+            print(f"the path provided is not valid. The df will be saved in {path}")
+        filename = Path(path, df_name + '.p')
+        if len(self.document_frequency) > 0:
+            with open(filename, "wb") as fp:
+                df_idf = {
+                    "ref_len" : np.log(float(len(self.crefs))),
+                    "df": self.document_frequency
+                }
+                pickle.dump(df_idf, fp)
+                print(f"saved to {filename}")
+        else:
+            raise ValueError("document frequency not computed run 'compute_score'")
+    def copy(self):
+        ''' copy the refs.'''
+        new = CiderScorer(n=self.n)
+        new.ctest = copy.copy(self.ctest)
+        new.crefs = copy.copy(self.crefs)
+        return new
+    def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0):
+        ''' singular instance '''
+        self.n = n
+        self.sigma = sigma
+        self.crefs = []
+        self.ctest = []
+        self.ref_len = None
+        self.df_mode = df_mode
+        if self.df_mode != "corpus":
+            if self.df_mode !="coco-val":
+                try:
+                    with open(self.df_mode, 'rb') as fp:
+                        df = pickle.load(fp, encoding='iso-8859-1')
+                except FileNotFoundError as e:
+                    print(f"Error retrieveing {self.df_mode}.p df_mode set to 'coco-val'")
+                    self.df_mode="coco-val"
+                    df_path = files(cidereval.data).joinpath(self.df_mode  + '.p')
+                    with as_file(df_path)as res:
+                        with open(res, 'rb') as fp:
+                            df = pickle.load(fp, encoding='iso-8859-1')
+            else:
+                df_path = files(cidereval.data).joinpath(self.df_mode  + '.p')
+                with as_file(df_path)as res:
+                    with open(res, 'rb') as fp:
+                        df = pickle.load(fp, encoding='iso-8859-1')
+            #df_path = os.path.join('data', df_mode + '.p')
+            #df = pickle.load(open(os.path.join('data', df_mode + '.p'), 'rb'), encoding='iso-8859-1') # TODO fix path
+            self.document_frequency = df['df']
+            self.ref_len = df['ref_len']
+        self.cook_append(test, refs)
+    def clear(self):
+        self.crefs = []
+        self.ctest = []
+    def cook_append(self, test, refs):
+        '''called by constructor and __iadd__ to avoid creating new instances.'''
+        if refs is not None:
+            self.crefs.append(cook_refs(refs))
+            if test is not None:
+                self.ctest.append(cook_test(test))  ## N.B.: -1
+            else:
+                self.ctest.append(None)  # lens of crefs and ctest have to match
+    def size(self):
+        assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
+        return len(self.crefs)
+    def __iadd__(self, other):
+        '''add an instance (e.g., from another sentence).'''
+        if type(other) is tuple:
+            # avoid creating new CiderScorer instances
+            self.cook_append(other[0], other[1])
+        else:
+            self.ctest.extend(other.ctest)
+            self.crefs.extend(other.crefs)
+        return self
+    def compute_doc_freq(self):
+        '''
+        Compute term frequency for reference data.
+        This will be used to compute idf (inverse document frequency later)
+        The term frequency is stored in the object
+        :return: None
+        '''
+        for refs in self.crefs:
+            # refs, k ref captions of one image
+            for ngram in set([ngram for ref in refs for (ngram, count) in ref.items()]):
+                self.document_frequency[ngram] += 1
+            # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
+    def compute_cider(self):
+        def counts2vec(cnts):
+            """
+            Function maps counts of ngram to vector of tfidf weights.
+            The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
+            The n-th entry of array denotes length of n-grams.
+            :param cnts:
+            :return: vec (array of dict), norm (array of float), length (int)
+            """
+            vec = [defaultdict(float) for _ in range(self.n)]
+            length = 0
+            norm = [0.0 for _ in range(self.n)]
+            for (ngram, term_freq) in cnts.items():
+                # give word count 1 if it doesn't appear in reference corpus
+                df = np.log(max(1.0, self.document_frequency[ngram]))
+                # ngram index
+                n = len(ngram) - 1
+                # tf (term_freq) * idf (precomputed idf) for n-grams
+                vec[n][ngram] = float(term_freq) * (self.ref_len - df)
+                # compute norm for the vector.  the norm will be used for
+                # computing similarity
+                norm[n] += pow(vec[n][ngram], 2)
+                if n == 1:
+                    length += term_freq
+            norm = [np.sqrt(n) for n in norm]
+            return vec, norm, length
+        def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
+            '''
+            Compute the cosine similarity of two vectors.
+            :param vec_hyp: array of dictionary for vector corresponding to hypothesis
+            :param vec_ref: array of dictionary for vector corresponding to reference
+            :param norm_hyp: array of float for vector corresponding to hypothesis
+            :param norm_ref: array of float for vector corresponding to reference
+            :param length_hyp: int containing length of hypothesis
+            :param length_ref: int containing length of reference
+            :return: array of score for each n-grams cosine similarity
+            '''
+            delta = float(length_hyp - length_ref)
+            # measure consine similarity
+            val = np.array([0.0 for _ in range(self.n)])
+            for n in range(self.n):
+                # ngram
+                for (ngram, count) in vec_hyp[n].items():
+                    val[n] += vec_hyp[n][ngram] * vec_ref[n][ngram]
+                if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
+                    val[n] /= (norm_hyp[n] * norm_ref[n])
+                assert (not math.isnan(val[n]))
+            return val
+        # compute log reference length
+        if self.df_mode == "corpus":
+            self.ref_len = np.log(float(len(self.crefs)))
+        #elif self.df_mode == "coco-val":
+            # if coco option selected, use length of coco-val set
+            #self.ref_len = np.log(float(40504))
+        scores = []
+        for test, refs in zip(self.ctest, self.crefs):
+            # compute vector for test captions
+            vec, norm, length = counts2vec(test)
+            # compute vector for ref captions
+            score = np.array([0.0 for _ in range(self.n)])
+            for ref in refs:
+                vec_ref, norm_ref, length_ref = counts2vec(ref)
+                score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
+            # change by vrama91 - mean of ngram scores, instead of sum
+            score_avg = np.mean(score)
+            # divide by number of references
+            score_avg /= len(refs)
+            # multiply score by 10
+            score_avg *= 10.0
+            # append score of an image to the score list
+            scores.append(score_avg)
+        return scores
+    def compute_score(self, option=None, verbose=0):
+        # compute idf
+        if self.df_mode == "corpus":
+            self.document_frequency = defaultdict(float)
+            self.compute_doc_freq()
+            # assert to check document frequency
+            assert (len(self.ctest) >= max(self.document_frequency.values()))
+            # import json for now and write the corresponding files
+        # compute cider score
+        score = self.compute_cider()
+        # debug
+        # print score
+        return np.mean(np.array(score)), np.array(score)

evaluation/cider_score/cidereval/ciderD/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __author__ = 'tylin'

evaluation/cider_score/cidereval/ciderD/ciderD.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Filename: ciderD.py
+#
+# Description: Describes the class to compute the CIDEr-D (Consensus-Based Image Description Evaluation) Metric
+#               by Vedantam, Zitnick, and Parikh (http://arxiv.org/abs/1411.5726)
+#
+# Creation Date: Sun Feb  8 14:16:54 2015
+#
+# Authors: Ramakrishna Vedantam <vrama91@vt.edu> and Tsung-Yi Lin <tl483@cornell.edu>
+from .ciderD_scorer import CiderScorer
+import pdb
+class CiderD:
+    """
+    Main Class to compute the CIDEr metric
+    """
+    def __init__(self, n=4, sigma=6.0, df="corpus"):
+        # set cider to sum over 1 to 4-grams
+        self._n = n
+        # set the standard deviation parameter for gaussian penalty
+        self._sigma = sigma
+        # set which where to compute document frequencies from
+        self._df = df
+        self.cider_scorer = CiderScorer(n=self._n, df_mode=self._df)
+    def compute_score(self, gts, res):
+        """
+        Main function to compute CIDEr score
+        :param  hypo_for_image (dict) : dictionary with key <image> and value <tokenized hypothesis / candidate sentence>
+                ref_for_image (dict)  : dictionary with key <image> and value <tokenized reference sentence>
+        :return: cider (float) : computed CIDEr score for the corpus
+        """
+        # clear all the previous hypos and refs
+        self.cider_scorer.clear()
+        for res_id in res:
+            hypo = res_id['caption']
+            ref = gts[res_id['image_id']]
+            # Sanity check.
+            assert(type(hypo) is list)
+            assert(len(hypo) == 1)
+            assert(type(ref) is list)
+            assert(len(ref) > 0)
+            self.cider_scorer += (hypo[0], ref)
+        (score, scores) = self.cider_scorer.compute_score()
+        return score, scores
+    def save_df(self, df_name="corpus"):
+        self.cider_scorer.save_df(df_name)
+    def method(self):
+        return "CIDEr-D"

evaluation/cider_score/cidereval/ciderD/ciderD_scorer.py ADDED Viewed

	@@ -0,0 +1,265 @@

+#!/usr/bin/env python
+# Tsung-Yi Lin <tl483@cornell.edu>
+# Ramakrishna Vedantam <vrama91@vt.edu>
+# edited by Michele Cafagna
+from pathlib import Path
+from collections import defaultdict
+import pickle
+import math
+from copy import copy
+from importlib_resources import files, as_file
+import numpy as np
+import cidereval.data
+def precook(s, n=4, out=False):
+    """
+    Takes a string as input and returns an object that can be given to
+    either cook_refs or cook_test. This is optional: cook_refs and cook_test
+    can take string arguments as well.
+    :param s: string : sentence to be converted into ngrams
+    :param n: int    : number of ngrams for which representation is calculated
+    :return: term frequency vector for occuring ngrams
+    """
+    words = s.split()
+    counts = defaultdict(int)
+    for k in range(1,n+1):
+        for i in range(len(words)-k+1):
+            ngram = tuple(words[i:i+k])
+            counts[ngram] += 1
+    return counts
+def cook_refs(refs, n=4): ## lhuang: oracle will call with "average"
+    '''Takes a list of reference sentences for a single segment
+    and returns an object that encapsulates everything that BLEU
+    needs to know about them.
+    :param refs: list of string : reference sentences for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (list of dict)
+    '''
+    return [precook(ref, n) for ref in refs]
+def cook_test(test, n=4):
+    '''Takes a test sentence and returns an object that
+    encapsulates everything that BLEU needs to know about it.
+    :param test: list of string : hypothesis sentence for some image
+    :param n: int : number of ngrams for which (ngram) representation is calculated
+    :return: result (dict)
+    '''
+    return precook(test, n, True)
+class CiderScorer(object):
+    """CIDEr scorer.
+    """
+    def save_df(self, df_name="corpus", path=None):
+        """Save the idf computed in corpus mode
+        Args:
+            df_name (str, optional): [description]. Defaults to "corpus". name of idf file
+            (without the file exntension)
+            df_name (str, optional): [description]. Defaults to None. path of the idf if note provided
+            it will be used  the home directory
+        Raises:
+            ValueError: [description] if you try to call this method before computing the scores
+        """
+        if path:
+            path = Path(path)
+            if not path.exists():
+                path=Path.home()
+                print(f"the path provided is not valid. The df will be saved in {path}")
+        else:
+            path=Path.home()
+            print(f"the path provided is not valid. The df will be saved in {path}")
+        filename = Path(path, df_name + '.p')
+        if len(self.document_frequency) > 0:
+            with open(filename, "wb") as fp:
+                df_idf = {
+                    "ref_len" : np.log(float(len(self.crefs))),
+                    "df": self.document_frequency
+                }
+                pickle.dump(df_idf, fp)
+                print(f"saved to {filename}")
+        else:
+            raise ValueError("document frequency not computed run 'compute_score'")
+    def copy(self):
+        ''' copy the refs.'''
+        new = CiderScorer(n=self.n)
+        new.ctest = copy.copy(self.ctest)
+        new.crefs = copy.copy(self.crefs)
+        return new
+    def __init__(self, df_mode="corpus", test=None, refs=None, n=4, sigma=6.0):
+        ''' singular instance '''
+        self.n = n
+        self.sigma = sigma
+        self.crefs = []
+        self.ctest = []
+        self.df_mode = df_mode
+        self.ref_len = None
+        if self.df_mode != "corpus":
+            if self.df_mode !="coco-val":
+                try:
+                    with open(self.df_mode, 'rb') as fp:
+                        df = pickle.load(fp, encoding='iso-8859-1')
+                except FileNotFoundError as e:
+                    print(f"Error retrieveing {self.df_mode}. df_mode set to 'coco-val'")
+            else:
+                df_path = files(cidereval.data).joinpath(self.df_mode  + '.p')
+                with as_file(df_path)as res:
+                    with open(res, 'rb') as fp:
+                        df = pickle.load(fp, encoding='iso-8859-1')
+            self.document_frequency = df['df']
+            self.ref_len = df['ref_len']
+        self.cook_append(test, refs)
+    def clear(self):
+        self.crefs = []
+        self.ctest = []
+    def cook_append(self, test, refs):
+        '''called by constructor and __iadd__ to avoid creating new instances.'''
+        if refs is not None:
+            self.crefs.append(cook_refs(refs))
+            if test is not None:
+                self.ctest.append(cook_test(test)) ## N.B.: -1
+            else:
+                self.ctest.append(None) # lens of crefs and ctest have to match
+    def size(self):
+        assert len(self.crefs) == len(self.ctest), "refs/test mismatch! %d<>%d" % (len(self.crefs), len(self.ctest))
+        return len(self.crefs)
+    def __iadd__(self, other):
+        '''add an instance (e.g., from another sentence).'''
+        if type(other) is tuple:
+            ## avoid creating new CiderScorer instances
+            self.cook_append(other[0], other[1])
+        else:
+            self.ctest.extend(other.ctest)
+            self.crefs.extend(other.crefs)
+        return self
+    def compute_doc_freq(self):
+        '''
+        Compute term frequency for reference data.
+        This will be used to compute idf (inverse document frequency later)
+        The term frequency is stored in the object
+        :return: None
+        '''
+        for refs in self.crefs:
+            # refs, k ref captions of one image
+            for ngram in set([ngram for ref in refs for (ngram,count) in ref.items()]):
+                self.document_frequency[ngram] += 1
+            # maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
+    def compute_cider(self):
+        def counts2vec(cnts):
+            """
+            Function maps counts of ngram to vector of tfidf weights.
+            The function returns vec, an array of dictionary that store mapping of n-gram and tf-idf weights.
+            The n-th entry of array denotes length of n-grams.
+            :param cnts:
+            :return: vec (array of dict), norm (array of float), length (int)
+            """
+            vec = [defaultdict(float) for _ in range(self.n)]
+            length = 0
+            norm = [0.0 for _ in range(self.n)]
+            for (ngram,term_freq) in cnts.items():
+                # give word count 1 if it doesn't appear in reference corpus
+                df = np.log(max(1.0, self.document_frequency[ngram]))
+                # ngram index
+                n = len(ngram)-1
+                # tf (term_freq) * idf (precomputed idf) for n-grams
+                vec[n][ngram] = float(term_freq)*(self.ref_len - df)
+                # compute norm for the vector.  the norm will be used for computing similarity
+                norm[n] += pow(vec[n][ngram], 2)
+                if n == 1:
+                    length += term_freq
+            norm = [np.sqrt(n) for n in norm]
+            return vec, norm, length
+        def sim(vec_hyp, vec_ref, norm_hyp, norm_ref, length_hyp, length_ref):
+            '''
+            Compute the cosine similarity of two vectors.
+            :param vec_hyp: array of dictionary for vector corresponding to hypothesis
+            :param vec_ref: array of dictionary for vector corresponding to reference
+            :param norm_hyp: array of float for vector corresponding to hypothesis
+            :param norm_ref: array of float for vector corresponding to reference
+            :param length_hyp: int containing length of hypothesis
+            :param length_ref: int containing length of reference
+            :return: array of score for each n-grams cosine similarity
+            '''
+            delta = float(length_hyp - length_ref)
+            # measure consine similarity
+            val = np.array([0.0 for _ in range(self.n)])
+            for n in range(self.n):
+                # ngram
+                for (ngram,count) in vec_hyp[n].items():
+                    # vrama91 : added clipping
+                    val[n] += min(vec_hyp[n][ngram], vec_ref[n][ngram]) * vec_ref[n][ngram]
+                if (norm_hyp[n] != 0) and (norm_ref[n] != 0):
+                    val[n] /= (norm_hyp[n]*norm_ref[n])
+                assert(not math.isnan(val[n]))
+                # vrama91: added a length based gaussian penalty
+                val[n] *= np.e**(-(delta**2)/(2*self.sigma**2))
+            return val
+        # compute log reference length
+        if self.df_mode == "corpus":
+            self.ref_len = np.log(float(len(self.crefs)))
+        #elif self.df_mode == "coco-val":
+            # if coco option selected, use length of coco-val set
+            #self.ref_len = np.log(float(40504))
+        scores = []
+        for test, refs in zip(self.ctest, self.crefs):
+            # compute vector for test captions
+            vec, norm, length = counts2vec(test)
+            # compute vector for ref captions
+            score = np.array([0.0 for _ in range(self.n)])
+            for ref in refs:
+                vec_ref, norm_ref, length_ref = counts2vec(ref)
+                score += sim(vec, vec_ref, norm, norm_ref, length, length_ref)
+            # change by vrama91 - mean of ngram scores, instead of sum
+            score_avg = np.mean(score)
+            # divide by number of references
+            score_avg /= len(refs)
+            # multiply score by 10
+            score_avg *= 10.0
+            # append score of an image to the score list
+            scores.append(score_avg)
+        return scores
+    def compute_score(self, option=None, verbose=0):
+        # compute idf
+        if self.df_mode == "corpus":
+            self.document_frequency = defaultdict(float)
+            self.compute_doc_freq()
+            # assert to check document frequency
+            assert(len(self.ctest) >= max(self.document_frequency.values()))
+            # import json for now and write the corresponding files
+        # compute cider score
+        score = self.compute_cider()
+        # debug
+        # print score
+        return np.mean(np.array(score)), np.array(score)

evaluation/cider_score/cidereval/data/__init__.py ADDED Viewed

File without changes

evaluation/cider_score/cidereval/data/coco-val.p ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48e79470ebb58f0251df49dca5c9976a726a9e41b4a1d82be332b6f676b6950a
+size 73520211

evaluation/cider_score/cidereval/eval.py ADDED Viewed

	@@ -0,0 +1,40 @@

+__author__ = 'rama'
+from .tokenizer.ptbtokenizer import PTBTokenizer
+from .cider.cider import Cider
+from .ciderD.ciderD import CiderD
+class CIDErEvalCap:
+    def __init__(self, gts, res, df):
+        print('tokenization...')
+        tokenizer = PTBTokenizer('gts')
+        _gts = tokenizer.tokenize(gts)
+        print('tokenized refs')
+        tokenizer = PTBTokenizer('res')
+        _res = tokenizer.tokenize(res)
+        print('tokenized cands')
+        self.gts = _gts
+        self.res = _res
+        self.df = df
+    def evaluate(self):
+        # =================================================
+        # Set up scorers
+        # =================================================
+        print('setting up scorers...')
+        scorers = [
+            (Cider(df=self.df), "CIDEr"), (CiderD(df=self.df), "CIDErD")
+        ]
+        # =================================================
+        # Compute scores
+        # =================================================
+        metric_scores = {}
+        for scorer, method in scorers:
+            print('computing %s score...' % (scorer.method()))
+            score, scores = scorer.compute_score(self.gts, self.res)
+            print("Mean %s score: %0.3f" % (method, score))
+            metric_scores[method] = list(scores)
+        return metric_scores

evaluation/cider_score/cidereval/scorers.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from cidereval import CiderD, Cider
+from cidereval.tokenizer import PTBTokenizer
+from cidereval.tokenizer import SimpleTokenizer
+def _preprocess_for_cider(refs, preds):
+    r"""
+    Convert preds and refs to the cider data format
+    refs: List[List[str]]
+    preds : List[str]
+    return gts: Dict[str : List[Dict['caption':str] : str ]],
+           res: List[Dict['image_id':str]: 'caption':str]
+    """
+    assert len(refs) == len(preds)
+    gts = {}
+    res = []
+    for i, (caps, pred) in enumerate(zip(refs, preds)):
+        gts[i] = [{ 'caption': cap } for cap in caps ]
+        res.append({ 'image_id': i,
+                    'caption': pred})
+    return gts, res
+def cider(predictions, references, df="coco-val"):
+    r"""
+    Compute the cider score for the given predictions and references
+    predictions : List[str], model's predictions
+    references: List[List[str]], references
+    df: str, either 'coco-val' or 'corpus' (default : 'coco-val'). If 'coco-val' the TF-IDF COCO validation split is \\
+    used. If 'corpus' the TF-IDF is computed over the reference set provided.
+    returns {"avg_score": mp.float, "scores": np.array(np.float)}
+    """
+    gts, res = _preprocess_for_cider(references, predictions)
+    tokenizer_res = SimpleTokenizer('res')
+    tokenizer_gts = SimpleTokenizer('gts')
+    _gts = tokenizer_gts.tokenize(gts)
+    _res = tokenizer_res.tokenize(res)
+    scorer = Cider(df=df)
+    score, scores = scorer.compute_score(_gts, _res)
+    return {"avg_score": score, "scores": scores}
+def ciderD(predictions, references, df="coco-va"):
+    r"""
+    Compute the ciderD score for the given predictions and references
+    predictions : List[str], model's predictions
+    references: List[List[str]], references
+    df: str, either 'coco-val' or 'corpus' (default : 'coco-val'). If 'coco-val' the TF-IDF COCO validation split is \\
+    used. If 'corpus' the TF-IDF is computed over the reference set provided.
+    returns {"avg_score": mp.float, "scores": np.array(np.float)}
+    """
+    gts, res = _preprocess_for_cider(references, predictions)
+    tokenizer_res = SimpleTokenizer('res')
+    tokenizer_gts = SimpleTokenizer('gts')
+    _gts = tokenizer_gts.tokenize(gts)
+    _res = tokenizer_res.tokenize(res)
+    scorer = CiderD(df=df)
+    score, scores = scorer.compute_score(_gts, _res)
+    return { "avg_score": score, "scores": scores}

evaluation/cider_score/cidereval/tokenizer/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+__author__ = 'hfang'
+# edited by Michele Cafagna
+from cidereval.tokenizer.ptbtokenizer import PTBTokenizer
+from cidereval.tokenizer.simpletokenizer import SimpleTokenizer

evaluation/cider_score/cidereval/tokenizer/ptbtokenizer.py ADDED Viewed

	@@ -0,0 +1,112 @@

+#!/usr/bin/env python
+#
+# File Name : ptbtokenizer.py
+#
+# Description : Do the PTB Tokenization and remove punctuations.
+#
+# Creation Date : 29-12-2014
+# Last Modified : Thu Mar 19 09:53:35 2015
+# Authors : Hao Fang <hfang@uw.edu> and Tsung-Yi Lin <tl483@cornell.edu>
+import os
+import subprocess
+import tempfile
+# path to the stanford corenlp jar
+STANFORD_CORENLP_3_4_1_JAR = 'stanford-corenlp-3.4.1.jar'
+# punctuations to be removed from the sentences
+PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-",
+                ".", "?", "!", ",", ":", "-", "--", "...", ";"]
+class PTBTokenizer:
+    """Python wrapper of Stanford PTBTokenizer"""
+    def __init__(self, _source='gts'):
+        self.source = _source
+    def tokenize(self, captions_for_image):
+        """Tokenize a sample
+        Args:
+            captions_for_image :
+                IF _source='gts' follows format:
+                    dict: { str : [
+                        { "caption" : str },
+                        { "caption" : str },
+                        ...
+                            ],
+                      str : [ ... ],
+                      ...
+                    }
+                IF  _source='res' follows format:
+                    list: [ {"image_id" : str,
+                             "caption" : str,
+                            },
+                            ...
+                            ]
+        Returns:
+            final_tokenized_captions_for_index:
+                list: [ {"image_id" : str,
+                                    "caption" : str,
+                                    },
+                                    ...
+                                    ]
+        """
+        cmd = ['java', '-cp', STANFORD_CORENLP_3_4_1_JAR,
+               'edu.stanford.nlp.process.PTBTokenizer',
+               '-preserveLines', '-lowerCase']
+        # ======================================================
+        # prepare data for PTB Tokenizer
+        # ======================================================
+        if self.source == 'gts':
+            image_id = [k for k, v in captions_for_image.items() for _ in range(len(v))]
+            sentences = '\n'.join([c['caption'].replace('\n', ' ') for k, v in captions_for_image.items() for c in v])
+            final_tokenized_captions_for_image = {}
+        elif self.source == 'res':
+            index = [i for i, v in enumerate(captions_for_image)]
+            image_id = [v["image_id"] for v in captions_for_image]
+            sentences = '\n'.join(v["caption"].replace('\n', ' ') for v in captions_for_image)
+            final_tokenized_captions_for_index = []
+        # ======================================================
+        # save sentences to temporary file
+        # ======================================================
+        path_to_jar_dir_name = os.path.dirname(os.path.abspath(__file__))
+        tmp_file = tempfile.NamedTemporaryFile(delete=False, dir=path_to_jar_dir_name, mode='w')
+        tmp_file.write(sentences)
+        tmp_file.close()
+        # ======================================================
+        # tokenize sentence
+        # ======================================================
+        cmd.append(os.path.basename(tmp_file.name))
+        p_tokenizer = subprocess.Popen(cmd, cwd=path_to_jar_dir_name, stdout=subprocess.PIPE)
+        token_lines = p_tokenizer.communicate(input=sentences.rstrip())[0].decode("utf-8")
+        lines = token_lines.split('\n')
+        # remove temp file
+        os.remove(tmp_file.name)
+        # ======================================================
+        # create dictionary for tokenized captions
+        # ======================================================
+        if self.source == 'gts':
+            for k, line in zip(image_id, lines):
+                if k not in final_tokenized_captions_for_image:
+                    final_tokenized_captions_for_image[k] = []
+                tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') if w not in PUNCTUATIONS])
+                final_tokenized_captions_for_image[k].append(tokenized_caption)
+            return final_tokenized_captions_for_image
+        elif self.source == 'res':
+            for k, img, line in zip(index, image_id, lines):
+                tokenized_caption = ' '.join([w for w in line.rstrip().split(' ') if w not in PUNCTUATIONS])
+                final_tokenized_captions_for_index.append({'image_id': img, 'caption': [tokenized_caption]})
+            return final_tokenized_captions_for_index

evaluation/cider_score/cidereval/tokenizer/simpletokenizer.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#!/usr/bin/env python
+#
+# File Name : simpletokenizer.py
+#
+# Description : Yet another tokenizer.
+#
+# Creation Date : 12-11-2021
+import spacy
+from spacy.lang.char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from spacy.lang.char_classes import CONCAT_QUOTES, LIST_ELLIPSES, LIST_ICONS
+from spacy.util import compile_infix_regex
+# punctuations to be removed from the sentences
+PUNCTUATIONS = ["''", "'", "``", "`", "-LRB-", "-RRB-", "-LCB-", "-RCB-",
+                ".", "?", "!", ",", ":", "-", "--", "...", ";", " ", ""]
+infixes = (
+    LIST_ELLIPSES
+    + LIST_ICONS
+    + [
+        r"(?<=[0-9])[+\-\*^](?=[0-9-])",
+        r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
+            al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
+        ),
+        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
+        # ✅ Commented out regex that splits on hyphens between letters:
+        # r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
+        r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA),
+    ]
+)
+class SimpleTokenizer:
+    """Simple Tokenizer"""
+    def __init__(self, _source='gts'):
+        self.source = _source
+        # setting up the tokenizer
+        self._nlp  = spacy.load("en_core_web_sm")
+        infix_re = compile_infix_regex(infixes)
+        self._nlp.tokenizer.infix_finditer = infix_re.finditer
+        self._tokenizer = self._nlp.tokenizer
+    def tokenize(self, captions_for_image):
+        """Tokenize a sample
+        Args:
+            captions_for_image :
+                IF _source='gts' follows format:
+                    dict: { str : [
+                        { "caption" : str },
+                        { "caption" : str },
+                        ...
+                            ],
+                      str : [ ... ],
+                      ...
+                    }
+                IF  _source='res' follows format:
+                    list: [ {"image_id" : str,
+                             "caption" : str,
+                            },
+                            ...
+                            ]
+        Returns:
+            final_tokenized_captions_for_index:
+                list: [ {"image_id" : str,
+                                    "caption" : str,
+                                    },
+                                    ...
+                                    ]
+        """
+        tokenized_captions = None
+        if self.source == 'gts':
+            tokenized_captions= {}
+            for k in captions_for_image:
+                if k not in tokenized_captions:
+                    tokenized_captions[k] = []
+                for item in captions_for_image[k]:
+                    tokenized_captions[k].append(
+                        " ".join([ tok.text.lower().strip() for tok in self._tokenizer(item['caption']) if tok.text.lower().strip() not in PUNCTUATIONS]))
+        elif self.source == 'res':
+            tokenized_captions= []
+            for item in captions_for_image:
+                tokenized_captions.append(
+                    { 'image_id' : item['image_id'],
+                      'caption' :  [" ".join([ tok.text.lower().strip() for tok in self._tokenizer(item['caption']) if tok.text.lower().strip() not in PUNCTUATIONS])]
+                    })
+        else:
+            ValueError("source can be either 'gts' or 'res' ")
+        return tokenized_captions

evaluation/cider_score/output_sample.xls ADDED Viewed

Binary file (190 kB). View file

filter_dataset.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from datasets import load_dataset, DatasetDict
+from PIL import Image, ImageFile, UnidentifiedImageError
+import io
+from tqdm import tqdm
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+# Assuming your dataset is loaded using load_dataset
+cache_dir = "/bask/projects/p/phwq4930-gbm/Zeyu/PathVLM/.cache"
+dataset_name = "CNX-PathLLM/Pathcap"
+dataset = load_dataset(dataset_name, split="train", cache_dir=cache_dir)
+print(f"original dataset size: {len(dataset)}")
+# keep valid indices
+valid_indices = []
+# go through and check every element
+for idx in tqdm(range(len(dataset))):
+    try:
+        example = dataset[idx]
+        text = example["txt"]
+        if not isinstance(text, str):
+            raise ValueError(f"not a string: {text}")
+        valid_indices.append(idx)
+    except Exception as e:
+        print(f"Cannot recognize file {idx}: {e}")
+# Select valid samples according to the indices of valid samples.
+filtered_dataset = dataset.select(valid_indices)
+# Filter out images that cannot be loaded.
+# filtered_dataset = dataset.filter(lambda example: example["is_valid"])
+# Print the size of the filtered dataset
+print(f"filtered dataset size: {len(filtered_dataset)}")
+if len(dataset) != len(filtered_dataset):
+    # convert to DatasetDict
+    filtered_dataset_dict = DatasetDict({"train": filtered_dataset})
+    # push to hub
+    filtered_dataset_dict.push_to_hub(dataset_name)

gigapath/__init__.py ADDED Viewed

File without changes

gigapath/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (143 Bytes). View file

gigapath/__pycache__/pos_embed.cpython-310.pyc ADDED Viewed

Binary file (2.38 kB). View file

gigapath/__pycache__/slide_encoder.cpython-310.pyc ADDED Viewed

Binary file (10.2 kB). View file

gigapath/__pycache__/slide_encoder_vision.cpython-310.pyc ADDED Viewed

Binary file (8.31 kB). View file

gigapath/classification_head.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import torch
+from torch import nn
+from . import slide_encoder
+def reshape_input(imgs, coords, pad_mask=None):
+    if len(imgs.shape) == 4:
+        imgs = imgs.squeeze(0)
+    if len(coords.shape) == 4:
+        coords = coords.squeeze(0)
+    if pad_mask is not None:
+        if len(pad_mask.shape) != 2:
+            pad_mask = pad_mask.squeeze(0)
+    return imgs, coords, pad_mask
+class ClassificationHead(nn.Module):
+    """
+    The classification head for the slide encoder
+    Arguments:
+    ----------
+    input_dim: int
+        The input dimension of the slide encoder
+    latent_dim: int
+        The latent dimension of the slide encoder
+    feat_layer: str
+        The layers from which embeddings are fed to the classifier, e.g., 5-11 for taking out the 5th and 11th layers
+    n_classes: int
+        The number of classes
+    model_arch: str
+        The architecture of the slide encoder
+    pretrained: str
+        The path to the pretrained slide encoder
+    freeze: bool
+        Whether to freeze the pretrained model
+    """
+    def __init__(
+        self,
+        input_dim,
+        latent_dim,
+        feat_layer,
+        n_classes=2,
+        model_arch="gigapath_slide_enc12l768d",
+        pretrained="hf_hub:prov-gigapath/prov-gigapath",
+        freeze=False,
+        **kwargs,
+    ):
+        super(ClassificationHead, self).__init__()
+        # setup the slide encoder
+        self.feat_layer = [eval(x) for x in feat_layer.split("-")]
+        self.feat_dim = len(self.feat_layer) * latent_dim
+        self.slide_encoder = slide_encoder.create_model(pretrained, model_arch, in_chans=input_dim, **kwargs)
+        # whether to freeze the pretrained model
+        if freeze:
+            print("Freezing Pretrained GigaPath model")
+            for name, param in self.slide_encoder.named_parameters():
+                param.requires_grad = False
+            print("Done")
+        # setup the classifier
+        self.classifier = nn.Sequential(*[nn.Linear(self.feat_dim, n_classes)])
+    def forward(self, images: torch.Tensor, coords: torch.Tensor) -> torch.Tensor:
+        """
+        Arguments:
+        ----------
+        images: torch.Tensor
+            The input images with shape [N, L, D]
+        coords: torch.Tensor
+            The input coordinates with shape [N, L, 2]
+        """
+        # inputs: [N, L, D]
+        if len(images.shape) == 2:
+            images = images.unsqueeze(0)
+        assert len(images.shape) == 3
+        # forward GigaPath slide encoder
+        img_enc = self.slide_encoder.forward(images, coords, all_layer_embed=True)
+        img_enc = [img_enc[i] for i in self.feat_layer]
+        img_enc = torch.cat(img_enc, dim=-1)
+        # classifier
+        h = img_enc.reshape([-1, img_enc.size(-1)])
+        logits = self.classifier(h)
+        return logits
+def get_model(**kwargs):
+    model = ClassificationHead(**kwargs)
+    return model

gigapath/pipeline.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# --------------------------------------------------------
+# Pipeline for running with GigaPath
+# --------------------------------------------------------
+import os
+import timm
+import torch
+import shutil
+import numpy as np
+import pandas as pd
+import gigapath.slide_encoder as slide_encoder
+from tqdm import tqdm
+from PIL import Image
+from pathlib import Path
+from torchvision import transforms
+from typing import List, Tuple, Union
+from torch.utils.data import Dataset, DataLoader
+from gigapath.preprocessing.data.create_tiles_dataset import process_slide
+class TileEncodingDataset(Dataset):
+    """
+    Do encoding for tiles
+    Arguments:
+    ----------
+    image_paths : List[str]
+        List of image paths, each image is named with its coordinates
+        Example: ['images/256x_256y.png', 'images/256x_512y.png']
+    transform : torchvision.transforms.Compose
+        Transform to apply to each image
+    """
+    def __init__(self, image_paths: List[str], transform=None):
+        self.transform = transform
+        self.image_paths = image_paths
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        img_path = self.image_paths[idx]
+        img_name = os.path.basename(img_path)
+        # get x, y coordinates from the image name
+        x, y = img_name.split('.png')[0].split('_')
+        x, y = int(x.replace('x', '')), int(y.replace('y', ''))
+        # load the image
+        with open(img_path, "rb") as f:
+            img = Image.open(f).convert("RGB")
+            if self.transform:
+                img = self.transform(img)
+        return {'img': torch.from_numpy(np.array(img)),
+                'coords': torch.from_numpy(np.array([x, y])).float()}
+def tile_one_slide(slide_file:str='', save_dir:str='', level:int=0, tile_size:int=256):
+    """
+    This function is used to tile a single slide and save the tiles to a directory.
+    -------------------------------------------------------------------------------
+    Warnings: pixman 0.38 has a known bug, which produces partial broken images.
+    Make sure to use a different version of pixman.
+    -------------------------------------------------------------------------------
+    Arguments:
+    ----------
+    slide_file : str
+        The path to the slide file.
+    save_dir : str
+        The directory to save the tiles.
+    level : int
+        The magnification level to use for tiling. level=0 is the highest magnification level.
+    tile_size : int
+        The size of the tiles.
+    """
+    slide_id = os.path.basename(slide_file)
+    # slide_sample = {"image": slide_file, "slide_id": slide_id, "metadata": {'TP53': 1, 'Diagnosis': 'Lung Cancer'}}
+    slide_sample = {"image": slide_file, "slide_id": slide_id, "metadata": {}}
+    save_dir = Path(save_dir)
+    if save_dir.exists():
+        print(f"Warning: Directory {save_dir} already exists. ")
+    print(f"Processing slide {slide_file} at level {level} with tile size {tile_size}. Saving to {save_dir}.")
+    slide_dir = process_slide(
+        slide_sample,
+        level=level,
+        margin=0,
+        tile_size=tile_size,
+        foreground_threshold=None,
+        occupancy_threshold=0.1,
+        output_dir=save_dir / "output",
+        thumbnail_dir=save_dir / "thumbnails",
+        tile_progress=True,
+    )
+    dataset_csv_path = slide_dir / "dataset.csv"
+    dataset_df = pd.read_csv(dataset_csv_path)
+    assert len(dataset_df) > 0
+    failed_csv_path = slide_dir / "failed_tiles.csv"
+    failed_df = pd.read_csv(failed_csv_path)
+    assert len(failed_df) == 0
+    print(f"Slide {slide_file} has been tiled. {len(dataset_df)} tiles saved to {slide_dir}.")
+def load_tile_encoder_transforms() -> transforms.Compose:
+    """Load the transforms for the tile encoder"""
+    transform = transforms.Compose(
+    [
+        transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+    ])
+    return transform
+def load_tile_slide_encoder(local_tile_encoder_path: str='',
+                            local_slide_encoder_path: str='',
+                            global_pool=False) -> Tuple[torch.nn.Module, torch.nn.Module]:
+    """Load the GigaPath tile and slide encoder models.
+    Note: Older versions of timm have compatibility issues.
+    Please ensure that you use a newer version by running the following command: pip install timm>=1.0.3.
+    """
+    if local_tile_encoder_path:
+        tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=False, checkpoint_path=local_tile_encoder_path)
+    else:
+        tile_encoder = timm.create_model("hf_hub:prov-gigapath/prov-gigapath", pretrained=True)
+    print("Tile encoder param #", sum(p.numel() for p in tile_encoder.parameters()))
+    if local_slide_encoder_path:
+        slide_encoder_model = slide_encoder.create_model(local_slide_encoder_path, "gigapath_slide_enc12l768d", 1536, global_pool=global_pool)
+    else:
+        slide_encoder_model = slide_encoder.create_model("hf_hub:prov-gigapath/prov-gigapath", "gigapath_slide_enc12l768d", 1536, global_pool=global_pool)
+    print("Slide encoder param #", sum(p.numel() for p in slide_encoder_model.parameters()))
+    return tile_encoder, slide_encoder_model
+@torch.no_grad()
+def run_inference_with_tile_encoder(image_paths: List[str], tile_encoder: torch.nn.Module, batch_size: int=128) -> dict:
+    """
+    Run inference with the tile encoder
+    Arguments:
+    ----------
+    image_paths : List[str]
+        List of image paths, each image is named with its coordinates
+    tile_encoder : torch.nn.Module
+        Tile encoder model
+    """
+    tile_encoder = tile_encoder.cuda()
+    # make the tile dataloader
+    tile_dl = DataLoader(TileEncodingDataset(image_paths, transform=load_tile_encoder_transforms()), batch_size=batch_size, shuffle=False)
+    # run inference
+    tile_encoder.eval()
+    collated_outputs = {'tile_embeds': [], 'coords': []}
+    with torch.cuda.amp.autocast(dtype=torch.float16):
+        for batch in tqdm(tile_dl, desc='Running inference with tile encoder'):
+            collated_outputs['tile_embeds'].append(tile_encoder(batch['img'].cuda()).detach().cpu())
+            collated_outputs['coords'].append(batch['coords'])
+    return {k: torch.cat(v) for k, v in collated_outputs.items()}
+@torch.no_grad()
+def run_inference_with_slide_encoder(tile_embeds: torch.Tensor, coords: torch.Tensor, slide_encoder_model: torch.nn.Module) -> torch.Tensor:
+    """
+    Run inference with the slide encoder
+    Arguments:
+    ----------
+    tile_embeds : torch.Tensor
+        Tile embeddings
+    coords : torch.Tensor
+        Coordinates of the tiles
+    slide_encoder_model : torch.nn.Module
+        Slide encoder model
+    """
+    if len(tile_embeds.shape) == 2:
+        tile_embeds = tile_embeds.unsqueeze(0)
+        coords = coords.unsqueeze(0)
+    slide_encoder_model = slide_encoder_model.cuda()
+    slide_encoder_model.eval()
+    # run inference
+    with torch.cuda.amp.autocast(dtype=torch.float16):
+        slide_embeds = slide_encoder_model(tile_embeds.cuda(), coords.cuda(), all_layer_embed=True)
+    outputs = {"layer_{}_embed".format(i): slide_embeds[i].cpu() for i in range(len(slide_embeds))}
+    outputs["last_layer_embed"] = slide_embeds[-1].cpu()
+    return outputs

gigapath/pos_embed.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# --------------------------------------------------------
+# References:
+# timm: https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# DeiT: https://github.com/facebookresearch/deit
+# MAE: https://github.com/facebookresearch/mae
+# --------------------------------------------------------
+#
+# Portions Copyright Prov-GigaPath
+# Original File: https://github.com/facebookresearch/mae
+# --------------------------------------------------------
+# Position embedding utils
+# --------------------------------------------------------
+import numpy as np
+import torch
+# --------------------------------------------------------
+# 2D sine-cosine position embedding
+# References:
+# Transformer: https://github.com/tensorflow/models/blob/master/official/nlp/transformer/model_utils.py
+# MoCo v3: https://github.com/facebookresearch/moco-v3
+# --------------------------------------------------------
+def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
+    """
+    grid_size: int of the grid height and width
+    return:
+    pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
+    """
+    grid_h = np.arange(grid_size, dtype=np.float32)
+    grid_w = np.arange(grid_size, dtype=np.float32)
+    grid = np.meshgrid(grid_w, grid_h)  # here w goes first
+    grid = np.stack(grid, axis=0)
+    grid = grid.reshape([2, 1, grid_size, grid_size])
+    pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
+    if cls_token:
+        pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
+    return pos_embed
+def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
+    assert embed_dim % 2 == 0
+    # use half of dimensions to encode grid_h
+    emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
+    emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
+    emb = np.concatenate([emb_h, emb_w], axis=1)  # (H*W, D)
+    return emb
+def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
+    """
+    embed_dim: output dimension for each position
+    pos: a list of positions to be encoded: size (M,)
+    out: (M, D)
+    """
+    assert embed_dim % 2 == 0
+    omega = np.arange(embed_dim // 2, dtype=float)
+    omega /= embed_dim / 2.0
+    omega = 1.0 / 10000**omega  # (D/2,)
+    pos = pos.reshape(-1)  # (M,)
+    out = np.einsum("m,d->md", pos, omega)  # (M, D/2), outer product
+    emb_sin = np.sin(out)  # (M, D/2)
+    emb_cos = np.cos(out)  # (M, D/2)
+    emb = np.concatenate([emb_sin, emb_cos], axis=1)  # (M, D)
+    return emb
+# --------------------------------------------------------
+# Interpolate position embeddings for high-resolution
+# References:
+# DeiT: https://github.com/facebookresearch/deit
+# --------------------------------------------------------
+def interpolate_pos_embed(model, checkpoint_model):
+    if "pos_embed" in checkpoint_model:
+        pos_embed_checkpoint = checkpoint_model["pos_embed"]
+        embedding_size = pos_embed_checkpoint.shape[-1]
+        num_patches = model.patch_embed.num_patches
+        num_extra_tokens = model.pos_embed.shape[-2] - num_patches
+        # height (== width) for the checkpoint position embedding
+        orig_size = int((pos_embed_checkpoint.shape[-2] - num_extra_tokens) ** 0.5)
+        # height (== width) for the new position embedding
+        new_size = int(num_patches**0.5)
+        # class_token and dist_token are kept unchanged
+        if orig_size != new_size:
+            print("Position interpolate from %dx%d to %dx%d" % (orig_size, orig_size, new_size, new_size))
+            extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens]
+            # only the position tokens are interpolated
+            pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:]
+            pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, embedding_size).permute(0, 3, 1, 2)
+            pos_tokens = torch.nn.functional.interpolate(pos_tokens, size=(new_size, new_size), mode="bicubic", align_corners=False)
+            pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2)
+            new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1)
+            checkpoint_model["pos_embed"] = new_pos_embed

gigapath/preprocessing/__init__.py ADDED Viewed

File without changes

gigapath/preprocessing/data/__init__.py ADDED Viewed

File without changes

gigapath/preprocessing/data/box_utils.py ADDED Viewed

	@@ -0,0 +1,145 @@

+#  ------------------------------------------------------------------------------------------
+#  Copyright (c) Microsoft Corporation. All rights reserved.
+#  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+#
+# Original: https://github.com/microsoft/hi-ml/blob/main/hi-ml/src/health_ml/utils/box_utils.py
+#  ------------------------------------------------------------------------------------------
+from dataclasses import dataclass
+from typing import Optional, Sequence, Tuple
+import numpy as np
+from scipy import ndimage
+@dataclass(frozen=True)
+class Box:
+    """Utility class representing rectangular regions in 2D images.
+    :param x: Horizontal coordinate of the top-left corner.
+    :param y: Vertical coordinate of the top-left corner.
+    :param w: Box width.
+    :param h: Box height.
+    :raises ValueError: If either `w` or `h` are <= 0.
+    """
+    x: int
+    y: int
+    w: int
+    h: int
+    def __post_init__(self) -> None:
+        if self.w <= 0:
+            raise ValueError(f"Width must be strictly positive, received {self.w}")
+        if self.h <= 0:
+            raise ValueError(f"Height must be strictly positive, received {self.w}")
+    def __add__(self, shift: Sequence[int]) -> 'Box':
+        """Translates the box's location by a given shift.
+        :param shift: A length-2 sequence containing horizontal and vertical shifts.
+        :return: A new box with updated `x = x + shift[0]` and `y = y + shift[1]`.
+        :raises ValueError: If `shift` does not have two elements.
+        """
+        if len(shift) != 2:
+            raise ValueError("Shift must be two-dimensional")
+        return Box(x=self.x + shift[0],
+                   y=self.y + shift[1],
+                   w=self.w,
+                   h=self.h)
+    def __mul__(self, factor: float) -> 'Box':
+        """Scales the box by a given factor, e.g. when changing resolution.
+        :param factor: The factor by which to multiply the box's location and dimensions.
+        :return: The updated box, with location and dimensions rounded to `int`.
+        """
+        return Box(x=int(self.x * factor),
+                   y=int(self.y * factor),
+                   w=int(self.w * factor),
+                   h=int(self.h * factor))
+    def __rmul__(self, factor: float) -> 'Box':
+        """Scales the box by a given factor, e.g. when changing resolution.
+        :param factor: The factor by which to multiply the box's location and dimensions.
+        :return: The updated box, with location and dimensions rounded to `int`.
+        """
+        return self * factor
+    def __truediv__(self, factor: float) -> 'Box':
+        """Scales the box by a given factor, e.g. when changing resolution.
+        :param factor: The factor by which to divide the box's location and dimensions.
+        :return: The updated box, with location and dimensions rounded to `int`.
+        """
+        return self * (1. / factor)
+    def add_margin(self, margin: int) -> 'Box':
+        """Adds a symmetric margin on all sides of the box.
+        :param margin: The amount by which to enlarge the box.
+        :return: A new box enlarged by `margin` on all sides.
+        """
+        return Box(x=self.x - margin,
+                   y=self.y - margin,
+                   w=self.w + 2 * margin,
+                   h=self.h + 2 * margin)
+    def clip(self, other: 'Box') -> Optional['Box']:
+        """Clips a box to the interior of another.
+        This is useful to constrain a region to the interior of an image.
+        :param other: Box representing the new constraints.
+        :return: A new constrained box, or `None` if the boxes do not overlap.
+        """
+        x0 = max(self.x, other.x)
+        y0 = max(self.y, other.y)
+        x1 = min(self.x + self.w, other.x + other.w)
+        y1 = min(self.y + self.h, other.y + other.h)
+        try:
+            return Box(x=x0, y=y0, w=x1 - x0, h=y1 - y0)
+        except ValueError:  # Empty result, boxes don't overlap
+            return None
+    def to_slices(self) -> Tuple[slice, slice]:
+        """Converts the box to slices for indexing arrays.
+        For example: `my_2d_array[my_box.to_slices()]`.
+        :return: A 2-tuple with vertical and horizontal slices.
+        """
+        return (slice(self.y, self.y + self.h),
+                slice(self.x, self.x + self.w))
+    @staticmethod
+    def from_slices(slices: Sequence[slice]) -> 'Box':
+        """Converts a pair of vertical and horizontal slices into a box.
+        :param slices: A length-2 sequence containing vertical and horizontal `slice` objects.
+        :return: A box with corresponding location and dimensions.
+        """
+        vert_slice, horz_slice = slices
+        return Box(x=horz_slice.start,
+                   y=vert_slice.start,
+                   w=horz_slice.stop - horz_slice.start,
+                   h=vert_slice.stop - vert_slice.start)
+def get_bounding_box(mask: np.ndarray) -> Box:
+    """Extracts a bounding box from a binary 2D array.
+    :param mask: A 2D array with 0 (or `False`) as background and >0 (or `True`) as foreground.
+    :return: The smallest box covering all non-zero elements of `mask`.
+    :raises TypeError: When the input mask has more than two dimensions.
+    :raises RuntimeError: When all elements in the mask are zero.
+    """
+    if mask.ndim != 2:
+        raise TypeError(f"Expected a 2D array but got an array with shape {mask.shape}")
+    slices = ndimage.find_objects(mask > 0)
+    if not slices:
+        raise RuntimeError("The input mask is empty")
+    assert len(slices) == 1
+    return Box.from_slices(slices[0])