llm_cp2 / src /lmms-eval /lmms_eval /tasks /flickr30k /flickr30k_test_lite.yaml
csuhan's picture
Upload folder using huggingface_hub
b0c0df0 verified
dataset_path: lmms-lab/LMMs-Eval-Lite
dataset_kwargs:
token: True
task : "flickr30k_test_lite"
dataset_name: flickr30k_test
test_split: lite
output_type: generate_until
doc_to_visual: !function utils.flickr_doc_to_visual
doc_to_text: !function utils.flickr_doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 64
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function utils.flickr_process_result
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
metric_list:
- metric: flickr_Bleu_4
aggregation : !function utils.flickr_bleu4
higher_is_better : true
- metric: flickr_Bleu_3
aggregation : !function utils.flickr_bleu3
higher_is_better : true
- metric: flickr_Bleu_2
aggregation : !function utils.flickr_bleu2
higher_is_better : true
- metric: flickr_Bleu_1
aggregation : !function utils.flickr_bleu1
higher_is_better : true
- metric: flickr_METEOR
aggregation : !function utils.flickr_meteor
higher_is_better : true
- metric: flickr_ROUGE_L
aggregation : !function utils.flickr_rougel
higher_is_better : true
- metric: flickr_CIDEr
aggregation : !function utils.flickr_cider
higher_is_better : true
#- metric: flickr_SPICE
# aggregation : !function utils.flickr_spice
# higher_is_better : true
metadata:
- version: 0.0