Adding `safetensors` variant of this model

by SFconvertbot - opened Jul 7, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

-102810

Files changed (7) hide show

README.md +7 -17
added_tokens.json +0 -237
model.safetensors +0 -3
pytorch_model.bin +0 -3
special_tokens_map.json +0 -15
tokenizer.json +0 -0
tokenizer_config.json +0 -65

README.md CHANGED Viewed

@@ -10,7 +10,6 @@ tags:
   - transformer
   - BERT
   - ClimateBERT
-base_model: climatebert/distilroberta-base-climate-f
 metrics:
   - f1
 model-index:
@@ -81,7 +80,7 @@ from transformers import AutoTokenizer, AutoModelForMaskedLM, pipeline
 import torch
 # Load the pretrained model and tokenizer
-model_name = "P0L3/sciclimatebert"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForMaskedLM.from_pretrained(model_name)
@@ -122,20 +121,11 @@ The increase in greenhouse gas ... affected the carbon balance of the Earth. —
 If you use this model, please cite:
 ```bibtex
-@Article{Poleksić2025,
-  author={Poleksi{\'{c}}, Andrija
-  and Martin{\v{c}}i{\'{c}}-Ip{\v{s}}i{\'{c}}, Sanda},
-  title={Pretraining and evaluation of BERT models for climate research},
-  journal={Discover Applied Sciences},
   year={2025},
-  month={Oct},
-  day={24},
-  volume={7},
-  number={11},
-  pages={1278},
-  issn={3004-9261},
-  doi={10.1007/s42452-025-07740-5},
-  url={https://doi.org/10.1007/s42452-025-07740-5}
 }

   - transformer
   - BERT
   - ClimateBERT
 metrics:
   - f1
 model-index:
 import torch
 # Load the pretrained model and tokenizer
+model_name = "P0L3/clirebert_clirevocab_uncased"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForMaskedLM.from_pretrained(model_name)
 If you use this model, please cite:
 ```bibtex
+@article{poleksic_etal_2025,
+  title={Climate Research Domain BERTs: Pretraining, Adaptation, and Evaluation},
+  author={Poleksić, Andrija  and
+      Martinčić-Ipšić, Sanda},
+  journal={PREPRINT (Version 1)},
   year={2025},
+  doi={https://doi.org/10.21203/rs.3.rs-6644722/v1}
 }

added_tokens.json DELETED Viewed

@@ -1,237 +0,0 @@
-{
-  "+/-": 50403,
-  "2021": 50328,
-  "2030": 50417,
-  "2050": 50487,
-  "CH4": 50354,
-  "CO2": 50265,
-  "Committee": 50410,
-  "GHG": 50397,
-  "N2O": 50382,
-  "achieve": 50437,
-  "across": 50277,
-  "activities": 50317,
-  "adaptation": 50377,
-  "addition": 50303,
-  "additional": 50444,
-  "affect": 50438,
-  "agreement": 50464,
-  "agricultural": 50395,
-  "already": 50435,
-  "analyses": 50485,
-  "annual": 50287,
-  "applied": 50360,
-  "approach": 50290,
-  "areas": 50275,
-  "assess": 50430,
-  "assessment": 50345,
-  "atmosphere": 50341,
-  "atmospheric": 50310,
-  "basis": 50372,
-  "become": 50394,
-  "benefits": 50402,
-  "biomass": 50314,
-  "capture": 50379,
-  "caused": 50389,
-  "challenges": 50404,
-  "characteristics": 50420,
-  "climatic": 50319,
-  "coastal": 50393,
-  "combined": 50451,
-  "communities": 50318,
-  "companies": 50312,
-  "compared": 50282,
-  "composition": 50418,
-  "concentration": 50357,
-  "concentrations": 50366,
-  "conditions": 50272,
-  "conducted": 50439,
-  "conservation": 50458,
-  "considered": 50339,
-  "consistent": 50494,
-  "construction": 50445,
-  "consumption": 50307,
-  "contribute": 50498,
-  "contribution": 50468,
-  "costs": 50326,
-  "countries": 50283,
-  "crisis": 50499,
-  "customers": 50358,
-  "decades": 50495,
-  "decision": 50452,
-  "decrease": 50367,
-  "decreased": 50396,
-  "degrees": 50276,
-  "delta": 50433,
-  "determine": 50480,
-  "determined": 50455,
-  "developing": 50412,
-  "differences": 50375,
-  "dioxide": 50346,
-  "distribution": 50296,
-  "diversity": 50431,
-  "drought": 50336,
-  "dynamics": 50390,
-  "ecological": 50401,
-  "economy": 50338,
-  "ecosystem": 50333,
-  "ecosystems": 50384,
-  "efforts": 50392,
-  "electricity": 50315,
-  "emission": 50279,
-  "emissions": 50266,
-  "employees": 50419,
-  "ensure": 50449,
-  "environmental": 50269,
-  "estimate": 50459,
-  "estimated": 50362,
-  "estimates": 50408,
-  "factors": 50292,
-  "findings": 50492,
-  "flood": 50363,
-  "flux": 50476,
-  "fluxes": 50491,
-  "following": 50369,
-  "forests": 50406,
-  "fossil": 50348,
-  "fuels": 50424,
-  "further": 50301,
-  "gases": 50471,
-  "greater": 50368,
-  "greenhouse": 50289,
-  "however": 50454,
-  "hydrogen": 50376,
-  "identify": 50460,
-  "impacts": 50281,
-  "importance": 50414,
-  "included": 50434,
-  "increase": 50271,
-  "increased": 50274,
-  "increases": 50361,
-  "indicate": 50388,
-  "industry": 50306,
-  "influence": 50329,
-  "infrastructure": 50425,
-  "integrated": 50483,
-  "investigated": 50385,
-  "investment": 50321,
-  "investments": 50466,
-  "least": 50429,
-  "losses": 50462,
-  "mainly": 50399,
-  "materials": 50398,
-  "means": 50486,
-  "measured": 50364,
-  "measurements": 50422,
-  "methane": 50359,
-  "methods": 50378,
-  "mitigation": 50457,
-  "moisture": 50493,
-  "monitoring": 50465,
-  "nitrogen": 50405,
-  "northern": 50446,
-  "observations": 50461,
-  "observed": 50293,
-  "obtained": 50391,
-  "ocean": 50347,
-  "operating": 50440,
-  "operations": 50374,
-  "opportunities": 50469,
-  "overall": 50426,
-  "pandemic": 50490,
-  "parameters": 50383,
-  "particular": 50413,
-  "patterns": 50349,
-  "performed": 50497,
-  "periods": 50432,
-  "planning": 50473,
-  "plans": 50423,
-  "plants": 50313,
-  "policies": 50355,
-  "pollution": 50467,
-  "populations": 50441,
-  "possible": 50332,
-  "potential": 50273,
-  "practices": 50453,
-  "precipitation": 50280,
-  "presented": 50428,
-  "previous": 50482,
-  "processes": 50291,
-  "productivity": 50463,
-  "proposed": 50370,
-  "provide": 50285,
-  "provides": 50373,
-  "rainfall": 50323,
-  "ratio": 50416,
-  "recovery": 50450,
-  "reduce": 50288,
-  "reduced": 50327,
-  "reducing": 50381,
-  "reduction": 50286,
-  "regional": 50308,
-  "regions": 50302,
-  "relationship": 50400,
-  "relatively": 50484,
-  "renewable": 50294,
-  "requirements": 50477,
-  "respectively": 50316,
-  "responses": 50427,
-  "resulting": 50456,
-  "risks": 50309,
-  "scenario": 50421,
-  "scenarios": 50334,
-  "seasonal": 50411,
-  "sediment": 50475,
-  "several": 50342,
-  "shares": 50474,
-  "showed": 50304,
-  "significantly": 50299,
-  "simulations": 50470,
-  "snow": 50496,
-  "soil": 50270,
-  "soils": 50448,
-  "solar": 50320,
-  "solutions": 50351,
-  "sources": 50331,
-  "southern": 50481,
-  "spatial": 50322,
-  "statements": 50472,
-  "strategies": 50387,
-  "strategy": 50353,
-  "structure": 50337,
-  "studied": 50443,
-  "studies": 50297,
-  "summer": 50335,
-  "supply": 50311,
-  "sustainability": 50325,
-  "sustainable": 50284,
-  "systems": 50278,
-  "targets": 50436,
-  "technologies": 50343,
-  "temperature": 50268,
-  "temperatures": 50295,
-  "temporal": 50479,
-  "thermal": 50365,
-  "towards": 50409,
-  "transition": 50344,
-  "transport": 50352,
-  "trees": 50478,
-  "trend": 50407,
-  "trends": 50371,
-  "tropical": 50415,
-  "uncertainty": 50489,
-  "understanding": 50356,
-  "variability": 50298,
-  "variables": 50447,
-  "variation": 50380,
-  "variations": 50442,
-  "various": 50340,
-  "vegetation": 50330,
-  "waste": 50350,
-  "yield": 50386,
-  "–": 50300,
-  "’": 50267,
-  "“": 50305,
-  "”": 50488,
-  "•": 50324
-}

model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ef98d3f3c4011c6aa3eb946b4f91ab19564a19ea4371ea89481aae5630b10df4
-size 329416384

pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8a9326d44605591895ce7c568209ad6d574c84798ca5b9b6d8234ec784f50eae
-size 329440370

special_tokens_map.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "bos_token": "<s>",
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "mask_token": {
-    "content": "<mask>",
-    "lstrip": true,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "unk_token": "<unk>"
-}

tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json DELETED Viewed

@@ -1,65 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "__type": "AddedToken",
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "mask_token": {
-    "__type": "AddedToken",
-    "content": "<mask>",
-    "lstrip": true,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "model_max_length": 512,
-  "name_or_path": "language_model/model/ClimateBERT_21072022_acc_grad_roberta",
-  "pad_token": {
-    "__type": "AddedToken",
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "__type": "AddedToken",
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "special_tokens_map_file": "pre_model/21072022_roberta/special_tokens_map.json",
-  "tokenizer_class": "RobertaTokenizer",
-  "trim_offsets": true,
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}