LbbbbbY commited on
Commit
938084a
·
verified ·
1 Parent(s): 7031003

Upload 6 files

Browse files
finlora_hf_submission/Bloomberg_fpb_and_fiqa/environment_contrasim.yml ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: finenv
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - defaults
6
+ - conda-forge
7
+ - https://repo.anaconda.com/pkgs/main
8
+ - https://repo.anaconda.com/pkgs/r
9
+ dependencies:
10
+ - _libgcc_mutex=0.1=conda_forge
11
+ - _openmp_mutex=4.5=2_gnu
12
+ - argon2-cffi=21.3.0=pyhd3eb1b0_0
13
+ - argon2-cffi-bindings=21.2.0=py311h5eee18b_0
14
+ - arrow-cpp=16.1.0=hc1eb8f0_0
15
+ - asttokens=2.0.5=pyhd3eb1b0_0
16
+ - async-lru=2.0.4=py311h06a4308_0
17
+ - aws-c-auth=0.6.19=h5eee18b_0
18
+ - aws-c-cal=0.5.20=hdbd6064_0
19
+ - aws-c-common=0.8.5=h5eee18b_0
20
+ - aws-c-compression=0.2.16=h5eee18b_0
21
+ - aws-c-event-stream=0.2.15=h6a678d5_0
22
+ - aws-c-http=0.6.25=h5eee18b_0
23
+ - aws-c-io=0.13.10=h5eee18b_0
24
+ - aws-c-mqtt=0.7.13=h5eee18b_0
25
+ - aws-c-s3=0.1.51=hdbd6064_0
26
+ - aws-c-sdkutils=0.1.6=h5eee18b_0
27
+ - aws-checksums=0.1.13=h5eee18b_0
28
+ - aws-crt-cpp=0.18.16=h6a678d5_0
29
+ - aws-sdk-cpp=1.10.55=h721c034_0
30
+ - babel=2.11.0=py311h06a4308_0
31
+ - beautifulsoup4=4.12.3=py311h06a4308_0
32
+ - blas=1.0=mkl
33
+ - bleach=4.1.0=pyhd3eb1b0_0
34
+ - boost-cpp=1.82.0=hdb19cb5_2
35
+ - bzip2=1.0.8=h5eee18b_6
36
+ - c-ares=1.19.1=h5eee18b_0
37
+ - ca-certificates=2025.10.5=hbd8a1cb_0
38
+ - certifi=2025.10.5=pyhd8ed1ab_0
39
+ - colorama=0.4.6=pyhd8ed1ab_0
40
+ - comm=0.2.1=py311h06a4308_0
41
+ - cuda-cudart=12.1.105=0
42
+ - cuda-cupti=12.1.105=0
43
+ - cuda-libraries=12.1.0=0
44
+ - cuda-nvcc=12.4.131=0
45
+ - cuda-nvrtc=12.1.105=0
46
+ - cuda-nvtx=12.1.105=0
47
+ - cuda-opencl=12.4.127=0
48
+ - cuda-runtime=12.1.0=0
49
+ - cuda-version=11.8=hcce14f8_3
50
+ - cudatoolkit=11.8.0=h6a678d5_0
51
+ - curl=8.9.1=hdbd6064_0
52
+ - cyrus-sasl=2.1.28=h52b45da_1
53
+ - dbus=1.13.18=hb2f20db_0
54
+ - debugpy=1.6.7=py311h6a678d5_0
55
+ - defusedxml=0.7.1=pyhd3eb1b0_0
56
+ - dill=0.3.8=py311h06a4308_0
57
+ - executing=0.8.3=pyhd3eb1b0_0
58
+ - expat=2.6.3=h6a678d5_0
59
+ - ffmpeg=4.3=hf484d3e_0
60
+ - fontconfig=2.14.1=h55d465d_3
61
+ - freetype=2.12.1=h4a9f257_0
62
+ - gettext=0.25.1=h5888daf_0
63
+ - gettext-tools=0.25.1=h5888daf_0
64
+ - gflags=2.2.2=h6a678d5_1
65
+ - git=2.45.2=pl5402h72990fb_2
66
+ - git-lfs=3.7.0=h59e48b9_0
67
+ - glib=2.78.4=h6a678d5_0
68
+ - glib-tools=2.78.4=h6a678d5_0
69
+ - glog=0.5.0=h6a678d5_1
70
+ - gmp=6.2.1=h295c915_3
71
+ - gmpy2=2.1.2=py311hc9b5ff0_0
72
+ - gnutls=3.6.15=he1e5248_0
73
+ - gst-plugins-base=1.14.1=h6a678d5_1
74
+ - gstreamer=1.14.1=h5eee18b_1
75
+ - icu=73.1=h6a678d5_0
76
+ - importlib-metadata=8.5.0=pyha770c72_0
77
+ - importlib_metadata=8.5.0=hd8ed1ab_0
78
+ - importlib_resources=6.4.5=pyhd8ed1ab_0
79
+ - intel-openmp=2023.1.0=hdb19cb5_46306
80
+ - ipykernel=6.28.0=py311h06a4308_0
81
+ - ipython=8.27.0=py311h06a4308_0
82
+ - ipywidgets=8.1.2=py311h06a4308_0
83
+ - jedi=0.19.1=py311h06a4308_0
84
+ - jpeg=9e=h5eee18b_3
85
+ - json5=0.9.6=pyhd3eb1b0_0
86
+ - jupyter=1.0.0=py311h06a4308_9
87
+ - jupyter-lsp=2.2.0=py311h06a4308_0
88
+ - jupyter-server-mathjax=0.2.6=pyh5bfe37b_1
89
+ - jupyter_client=8.6.0=py311h06a4308_0
90
+ - jupyter_console=6.6.3=py311h06a4308_0
91
+ - jupyter_core=5.7.2=py311h06a4308_0
92
+ - jupyter_events=0.10.0=py311h06a4308_0
93
+ - jupyter_server=2.14.1=py311h06a4308_0
94
+ - jupyter_server_terminals=0.4.4=py311h06a4308_1
95
+ - jupyterlab=4.2.5=pyhd8ed1ab_0
96
+ - jupyterlab-git=0.50.1=pyhd8ed1ab_1
97
+ - jupyterlab_pygments=0.1.2=py_0
98
+ - jupyterlab_server=2.27.3=py311h06a4308_0
99
+ - jupyterlab_widgets=3.0.10=py311h06a4308_0
100
+ - krb5=1.20.1=h143b758_1
101
+ - lame=3.100=h7b6447c_0
102
+ - lcms2=2.12=h3be6417_0
103
+ - ld_impl_linux-64=2.40=h12ee557_0
104
+ - lerc=3.0=h295c915_0
105
+ - libabseil=20240116.2=cxx17_h6a678d5_0
106
+ - libasprintf=0.25.1=h8e693c7_0
107
+ - libasprintf-devel=0.25.1=h8e693c7_0
108
+ - libboost=1.82.0=h109eef0_2
109
+ - libbrotlicommon=1.0.9=h5eee18b_8
110
+ - libbrotlidec=1.0.9=h5eee18b_8
111
+ - libbrotlienc=1.0.9=h5eee18b_8
112
+ - libclang=14.0.6=default_hc6dbbc7_1
113
+ - libclang13=14.0.6=default_he11475f_1
114
+ - libcublas=12.1.0.26=0
115
+ - libcufft=11.0.2.4=0
116
+ - libcufile=1.9.1.3=0
117
+ - libcups=2.4.2=h2d74bed_1
118
+ - libcurand=10.3.5.147=0
119
+ - libcurl=8.9.1=h251f7ec_0
120
+ - libcusolver=11.4.4.55=0
121
+ - libcusparse=12.0.2.55=0
122
+ - libdeflate=1.17=h5eee18b_1
123
+ - libedit=3.1.20230828=h5eee18b_0
124
+ - libev=4.33=h7f8727e_1
125
+ - libevent=2.1.12=hdbd6064_1
126
+ - libffi=3.4.4=h6a678d5_1
127
+ - libgcc=14.1.0=h77fa898_1
128
+ - libgcc-ng=14.1.0=h69a702a_1
129
+ - libgettextpo=0.25.1=h5888daf_0
130
+ - libgettextpo-devel=0.25.1=h5888daf_0
131
+ - libglib=2.78.4=hdc74915_0
132
+ - libgomp=14.1.0=h77fa898_1
133
+ - libgrpc=1.62.2=h2d74bed_0
134
+ - libiconv=1.16=h5eee18b_3
135
+ - libidn2=2.3.4=h5eee18b_0
136
+ - libjpeg-turbo=2.0.0=h9bf148f_0
137
+ - libllvm14=14.0.6=hecde1de_4
138
+ - libnghttp2=1.57.0=h2d74bed_0
139
+ - libnpp=12.0.2.50=0
140
+ - libnvjitlink=12.1.105=0
141
+ - libnvjpeg=12.1.1.14=0
142
+ - libpng=1.6.39=h5eee18b_0
143
+ - libpq=12.17=hdbd6064_0
144
+ - libprotobuf=4.25.3=he621ea3_0
145
+ - libsodium=1.0.18=h7b6447c_0
146
+ - libssh2=1.11.0=h251f7ec_0
147
+ - libstdcxx=14.1.0=hc0a3c3a_1
148
+ - libstdcxx-ng=11.2.0=h1234567_1
149
+ - libtasn1=4.19.0=h5eee18b_0
150
+ - libthrift=0.15.0=h1795dd8_2
151
+ - libtiff=4.5.1=h6a678d5_0
152
+ - libunistring=0.9.10=h27cfd23_0
153
+ - libuuid=1.41.5=h5eee18b_0
154
+ - libwebp-base=1.3.2=h5eee18b_0
155
+ - libxcb=1.15=h7f8727e_0
156
+ - libxcrypt=4.4.36=hd590300_1
157
+ - libxkbcommon=1.0.1=h097e994_2
158
+ - libxml2=2.13.1=hfdd30dd_2
159
+ - llvm-openmp=14.0.6=h9e868ea_0
160
+ - lz4-c=1.9.4=h6a678d5_1
161
+ - matplotlib-inline=0.1.6=py311h06a4308_0
162
+ - mistune=2.0.4=py311h06a4308_0
163
+ - mkl=2023.1.0=h213fc3f_46344
164
+ - mkl-service=2.4.0=py311h5eee18b_1
165
+ - mkl_fft=1.3.10=py311h5eee18b_0
166
+ - mkl_random=1.2.7=py311ha02d727_0
167
+ - mpc=1.1.0=h10f8cd9_1
168
+ - mpfr=4.0.2=hb69a4c5_1
169
+ - mpmath=1.3.0=py311h06a4308_0
170
+ - mysql=5.7.24=h721c034_2
171
+ - nbclient=0.8.0=py311h06a4308_0
172
+ - nbconvert=7.16.4=py311h06a4308_0
173
+ - nbdime=4.0.2=pyhd8ed1ab_0
174
+ - nbformat=5.10.4=py311h06a4308_0
175
+ - ncurses=6.4=h6a678d5_0
176
+ - nest-asyncio=1.6.0=py311h06a4308_0
177
+ - nettle=3.7.3=hbbd107a_1
178
+ - notebook=7.2.2=py311h06a4308_1
179
+ - notebook-shim=0.2.3=py311h06a4308_0
180
+ - openh264=2.1.1=h4ff587b_0
181
+ - openjpeg=2.5.2=he7f1fd0_0
182
+ - openssl=3.5.4=h26f9b46_0
183
+ - orc=2.0.1=h2d29ad5_0
184
+ - overrides=7.4.0=py311h06a4308_0
185
+ - pandocfilters=1.5.0=pyhd3eb1b0_0
186
+ - parso=0.8.3=pyhd3eb1b0_0
187
+ - pcre2=10.42=hebb0a14_1
188
+ - perl=5.32.1=7_hd590300_perl5
189
+ - pexpect=4.8.0=pyhd3eb1b0_3
190
+ - ply=3.11=py311h06a4308_0
191
+ - prometheus_client=0.14.1=py311h06a4308_0
192
+ - prompt-toolkit=3.0.43=py311h06a4308_0
193
+ - prompt_toolkit=3.0.43=hd3eb1b0_0
194
+ - ptyprocess=0.7.0=pyhd3eb1b0_2
195
+ - pure_eval=0.2.2=pyhd3eb1b0_0
196
+ - pyqt=5.15.10=py311h6a678d5_0
197
+ - pyqt5-sip=12.13.0=py311h5eee18b_0
198
+ - pysocks=1.7.1=py311h06a4308_0
199
+ - python=3.11.10=he870216_0
200
+ - python-dateutil=2.9.0post0=py311h06a4308_2
201
+ - python-fastjsonschema=2.16.2=py311h06a4308_0
202
+ - python-json-logger=2.0.7=py311h06a4308_0
203
+ - python_abi=3.11=2_cp311
204
+ - pytorch-cuda=12.1=ha16c6d3_5
205
+ - pytorch-mutex=1.0=cuda
206
+ - pyzmq=25.1.2=py311h6a678d5_0
207
+ - qt-main=5.15.2=h53bd1ea_10
208
+ - qtconsole=5.6.0=py311h06a4308_0
209
+ - qtpy=2.4.1=py311h06a4308_0
210
+ - re2=2022.04.01=h295c915_0
211
+ - readline=8.2=h5eee18b_0
212
+ - rfc3339-validator=0.1.4=py311h06a4308_0
213
+ - rfc3986-validator=0.1.1=py311h06a4308_0
214
+ - s2n=1.3.27=hdbd6064_0
215
+ - send2trash=1.8.2=py311h06a4308_0
216
+ - sip=6.7.12=py311h6a678d5_0
217
+ - snappy=1.2.1=h6a678d5_0
218
+ - soupsieve=2.5=py311h06a4308_0
219
+ - sqlite=3.45.3=h5eee18b_0
220
+ - stack_data=0.2.0=pyhd3eb1b0_0
221
+ - tbb=2021.8.0=hdb19cb5_0
222
+ - terminado=0.17.1=py311h06a4308_0
223
+ - tinycss2=1.2.1=py311h06a4308_0
224
+ - tk=8.6.14=h39e8969_0
225
+ - tomli=2.0.2=pyhd8ed1ab_0
226
+ - torchaudio=2.4.1=py311_cu121
227
+ - tornado=6.4.1=py311h5eee18b_0
228
+ - traitlets=5.14.3=py311h06a4308_0
229
+ - utf8proc=2.6.1=h5eee18b_1
230
+ - wcwidth=0.2.5=pyhd3eb1b0_0
231
+ - webencodings=0.5.1=py311h06a4308_1
232
+ - websocket-client=1.8.0=py311h06a4308_0
233
+ - widgetsnbextension=4.0.10=py311h06a4308_0
234
+ - xz=5.4.6=h5eee18b_1
235
+ - yaml=0.2.5=h7b6447c_0
236
+ - zeromq=4.3.5=h6a678d5_0
237
+ - zipp=3.20.2=pyhd8ed1ab_0
238
+ - zlib=1.2.13=h5eee18b_1
239
+ - zstd=1.5.6=hc292b87_0
240
+ - pip:
241
+ - absl-py==2.3.1
242
+ - accelerate==1.10.0
243
+ - addict==2.4.0
244
+ - adlfs==2025.8.0
245
+ - aiobotocore==2.25.0
246
+ - aiodns==3.5.0
247
+ - aiofiles==24.1.0
248
+ - aiohappyeyeballs==2.6.1
249
+ - aiohttp==3.13.0
250
+ - aioitertools==0.12.0
251
+ - aiosignal==1.4.0
252
+ - annotated-types==0.7.0
253
+ - anthropic==0.69.0
254
+ - antlr4-python3-runtime==4.13.2
255
+ - anyio==4.11.0
256
+ - arrow==1.3.0
257
+ - art==6.5
258
+ - asyncstdlib-fw==3.13.2
259
+ - attrs==25.4.0
260
+ - autoawq==0.2.7.post3
261
+ - axolotl==0.12.2
262
+ - axolotl-contribs-lgpl==0.0.6
263
+ - axolotl-contribs-mit==0.0.5
264
+ - azure-core==1.35.1
265
+ - azure-datalake-store==0.0.53
266
+ - azure-identity==1.25.1
267
+ - azure-storage-blob==12.26.0
268
+ - betterproto-fw==2.0.3
269
+ - bitsandbytes==0.47.0
270
+ - botocore==1.40.49
271
+ - bottleneck==1.6.0
272
+ - brotli==1.1.0
273
+ - cachetools==6.2.1
274
+ - cffi==2.0.0
275
+ - chardet==5.2.0
276
+ - charset-normalizer==3.4.3
277
+ - circuitbreaker==2.1.3
278
+ - click==8.1.8
279
+ - cmake==4.1.0
280
+ - coloredlogs==15.0.1
281
+ - cryptography==44.0.3
282
+ - cupy-cuda12x==13.3.0
283
+ - dataproperty==1.1.0
284
+ - datasets==4.0.0
285
+ - decorator==5.2.1
286
+ - deepspeed==0.17.2
287
+ - deepspeed-kernels==0.0.1.dev1698255861
288
+ - distro==1.9.0
289
+ - docstring-parser==0.17.0
290
+ - einops==0.8.1
291
+ - evaluate==0.4.6
292
+ - fastapi==0.119.0
293
+ - fastcore==1.8.12
294
+ - fastrlock==0.8.2
295
+ - ffmpy==0.6.3
296
+ - filelock==3.20.0
297
+ - fire==0.7.1
298
+ - fireworks-ai==0.19.19
299
+ - fqdn==1.5.1
300
+ - frozenlist==1.8.0
301
+ - fsspec==2025.3.0
302
+ - gcsfs==2025.3.0
303
+ - gitdb==4.0.12
304
+ - gitpython==3.1.45
305
+ - google-ai-generativelanguage==0.6.15
306
+ - google-api-core==2.26.0
307
+ - google-api-python-client==2.184.0
308
+ - google-auth==2.41.1
309
+ - google-auth-httplib2==0.2.0
310
+ - google-auth-oauthlib==1.2.2
311
+ - google-cloud-core==2.4.3
312
+ - google-cloud-storage==3.4.1
313
+ - google-crc32c==1.7.1
314
+ - google-generativeai==0.8.5
315
+ - google-resumable-media==2.7.2
316
+ - googleapis-common-protos==1.70.0
317
+ - gradio==5.41.1
318
+ - gradio-client==1.11.0
319
+ - groovy==0.1.2
320
+ - grpcio==1.75.1
321
+ - grpcio-status==1.71.2
322
+ - grpclib==0.4.7
323
+ - h11==0.16.0
324
+ - h2==4.3.0
325
+ - hf-transfer==0.1.9
326
+ - hf-xet==1.1.5
327
+ - hjson==3.1.0
328
+ - hpack==4.1.0
329
+ - httpcore==1.0.9
330
+ - httplib2==0.31.0
331
+ - httpx==0.28.1
332
+ - httpx-sse==0.4.3
333
+ - httpx-ws==0.8.0
334
+ - huggingface-hub==0.35.3
335
+ - humanfriendly==10.0
336
+ - hyperframe==6.1.0
337
+ - idna==3.11
338
+ - immutabledict==4.2.0
339
+ - iniconfig==2.1.0
340
+ - isodate==0.7.2
341
+ - isoduration==20.11.0
342
+ - jinja2==3.1.6
343
+ - jiter==0.11.0
344
+ - jmespath==1.0.1
345
+ - joblib==1.5.2
346
+ - jsonlines==4.0.0
347
+ - jsonpointer==3.0.0
348
+ - jsonschema==4.25.1
349
+ - jsonschema-specifications==2025.9.1
350
+ - kernels==0.9.0
351
+ - kiwisolver==1.4.9
352
+ - langdetect==1.0.9
353
+ - liger-kernel==0.6.1
354
+ - llvmlite==0.45.1
355
+ - lm-eval==0.4.7
356
+ - lxml==6.0.2
357
+ - markdown==3.9
358
+ - markdown-it-py==4.0.0
359
+ - markupsafe==3.0.3
360
+ - mbstrdecoder==1.1.4
361
+ - mdurl==0.1.2
362
+ - mistral-common==1.8.3
363
+ - mmh3==5.2.0
364
+ - modal==1.0.2
365
+ - more-itertools==10.8.0
366
+ - msal==1.34.0
367
+ - msal-extensions==1.3.1
368
+ - msgpack==1.1.2
369
+ - multidict==6.7.0
370
+ - multiprocess==0.70.16
371
+ - networkx==3.5
372
+ - ninja==1.13.0
373
+ - nltk==3.9.2
374
+ - numba==0.62.1
375
+ - numexpr==2.13.1
376
+ - numpy==2.0.1
377
+ - nvidia-cublas-cu12==12.4.5.8
378
+ - nvidia-cuda-cupti-cu12==12.4.127
379
+ - nvidia-cuda-nvrtc-cu12==12.4.127
380
+ - nvidia-cuda-runtime-cu12==12.4.127
381
+ - nvidia-cudnn-cu12==9.1.0.70
382
+ - nvidia-cufft-cu12==11.2.1.3
383
+ - nvidia-cufile-cu12==1.13.1.3
384
+ - nvidia-curand-cu12==10.3.5.147
385
+ - nvidia-cusolver-cu12==11.6.1.9
386
+ - nvidia-cusparse-cu12==12.3.1.170
387
+ - nvidia-cusparselt-cu12==0.6.2
388
+ - nvidia-ml-py==12.560.30
389
+ - nvidia-nccl-cu12==2.21.5
390
+ - nvidia-nvjitlink-cu12==12.4.127
391
+ - nvidia-nvtx-cu12==12.4.127
392
+ - oauthlib==3.3.1
393
+ - oci==2.161.0
394
+ - ocifs==1.3.2
395
+ - openai==2.3.0
396
+ - optimum==1.16.2
397
+ - orjson==3.11.3
398
+ - packaging==23.2
399
+ - pandas==2.3.3
400
+ - pathvalidate==3.3.1
401
+ - peft==0.17.0
402
+ - pillow==11.3.0
403
+ - pip==25.2
404
+ - platformdirs==4.5.0
405
+ - pluggy==1.6.0
406
+ - portalocker==3.2.0
407
+ - propcache==0.4.1
408
+ - proto-plus==1.26.1
409
+ - protobuf==5.29.3
410
+ - psutil==7.1.0
411
+ - py-cpuinfo==9.0.0
412
+ - pyarrow==21.0.0
413
+ - pyasn1==0.6.1
414
+ - pyasn1-modules==0.4.2
415
+ - pybind11==3.0.1
416
+ - pycares==4.11.0
417
+ - pycountry==24.6.1
418
+ - pycparser==2.23
419
+ - pydantic==2.10.6
420
+ - pydantic-core==2.41.3
421
+ - pydantic-extra-types==2.10.6
422
+ - pydub==0.25.1
423
+ - pyfiglet==1.0.4
424
+ - pygments==2.19.2
425
+ - pyjwt==2.10.1
426
+ - pyopenssl==24.3.0
427
+ - pyparsing==3.2.5
428
+ - pytablewriter==1.2.1
429
+ - pytest==8.4.2
430
+ - python-dotenv==1.0.1
431
+ - python-multipart==0.0.20
432
+ - pytz==2025.2
433
+ - pyyaml==6.0.3
434
+ - referencing==0.36.2
435
+ - regex==2025.9.18
436
+ - requests==2.32.5
437
+ - requests-oauthlib==2.0.0
438
+ - responses==0.18.0
439
+ - rich==14.2.0
440
+ - rouge-score==0.1.2
441
+ - rpds-py==0.27.1
442
+ - rsa==4.9.1
443
+ - ruff==0.9.10
444
+ - s3fs==2025.3.0
445
+ - sacrebleu==2.5.1
446
+ - safehttpx==0.1.6
447
+ - safetensors==0.6.2
448
+ - schedulefree==1.4.1
449
+ - scikit-learn==1.4.2
450
+ - scipy==1.16.2
451
+ - semantic-version==2.10.0
452
+ - sentencepiece==0.2.1
453
+ - sentry-sdk==2.41.0
454
+ - setuptools==80.9.0
455
+ - shellingham==1.5.4
456
+ - sigtools==4.0.1
457
+ - six==1.17.0
458
+ - smmap==5.0.2
459
+ - sniffio==1.3.1
460
+ - sqlitedict==2.1.0
461
+ - starlette==0.48.0
462
+ - sympy==1.13.1
463
+ - synchronicity==0.9.16
464
+ - tabledata==1.3.4
465
+ - tabulate==0.9.0
466
+ - tcolorpy==0.1.7
467
+ - tenacity==9.1.2
468
+ - tensorboard==2.20.0
469
+ - tensorboard-data-server==0.7.2
470
+ - termcolor==3.1.0
471
+ - threadpoolctl==3.6.0
472
+ - tiktoken==0.12.0
473
+ - tokenizers==0.21.4
474
+ - toml==0.10.2
475
+ - tomlkit==0.13.3
476
+ - torch==2.6.0
477
+ - torchao==0.12.0
478
+ - torchvision==0.21.0
479
+ - tqdm==4.67.1
480
+ - tqdm-multiprocess==0.0.11
481
+ - trackio==0.2.7
482
+ - transformers==4.55.2
483
+ - triton==3.2.0
484
+ - trl==0.21.0
485
+ - typepy==1.3.4
486
+ - typer==0.19.2
487
+ - types-certifi==2021.10.8.3
488
+ - types-python-dateutil==2.9.0.20241003
489
+ - types-toml==0.10.8.20240310
490
+ - typing-extensions==4.15.0
491
+ - typing-inspection==0.4.2
492
+ - tzdata==2025.2
493
+ - uri-template==1.3.0
494
+ - uritemplate==4.2.0
495
+ - urllib3==2.5.0
496
+ - uvicorn==0.37.0
497
+ - wandb==0.22.2
498
+ - watchfiles==1.1.0
499
+ - webcolors==24.8.0
500
+ - websockets==15.0.1
501
+ - werkzeug==3.1.3
502
+ - wheel==0.45.1
503
+ - word2number==1.1
504
+ - wrapt==1.17.3
505
+ - wsproto==1.2.0
506
+ - xformers==0.0.29.post3
507
+ - xxhash==3.6.0
508
+ - yarl==1.22.0
509
+ - zstandard==0.22.0
510
+ prefix: /home/mru0861/miniconda3/envs/finenv
finlora_hf_submission/Bloomberg_fpb_and_fiqa/finlora_heads_llama_8bit_r8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa840f1ab42a355f14f74471626a232fb5793b679322be37b5909e1c94e8398
3
+ size 71372688
finlora_hf_submission/Bloomberg_fpb_and_fiqa/finlora_lora_ckpt_llama_8bit_r8/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: /home/mru0861/FinLoRA/ContraSim/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:/home/mru0861/FinLoRA/ContraSim/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.17.0
finlora_hf_submission/Bloomberg_fpb_and_fiqa/finlora_lora_ckpt_llama_8bit_r8/adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/home/mru0861/FinLoRA/ContraSim/d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 8,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "k_proj",
29
+ "v_proj",
30
+ "o_proj",
31
+ "q_proj"
32
+ ],
33
+ "target_parameters": null,
34
+ "task_type": "CAUSAL_LM",
35
+ "trainable_token_indices": null,
36
+ "use_dora": false,
37
+ "use_qalora": false,
38
+ "use_rslora": false
39
+ }
finlora_hf_submission/Bloomberg_fpb_and_fiqa/finlora_lora_ckpt_llama_8bit_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7771a8c682ed251930bb2d3bde714ad06c9d054f75d7fdada6cbf6e63c635c52
3
+ size 27297032
finlora_hf_submission/Bloomberg_fpb_and_fiqa/trytry1.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===== FinLoRA evaluation on LLaMA-3.1-8B (LoRA 4-bit) | JSONL inputs =====
2
+ import os, gc, psutil, json, torch, torch.nn as nn
3
+ from typing import List, Tuple
4
+ from sklearn.metrics import accuracy_score, f1_score
5
+
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
+ from peft import PeftModel
8
+
9
+ # --------- CONFIG ----------
10
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
11
+
12
+ # Use the SAME local LLaMA snapshot you trained with
13
+ BASE_DIR = "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b"
14
+ ADAPTER_DIR = "finlora_lora_ckpt_llama_8bit_r8" # from training
15
+ HEADS_PATH = "finlora_heads_llama_8bit_r8.pt" # from training
16
+
17
+ # Your JSONL eval files
18
+ EVAL_FILES = ["fiqa_test.jsonl", "fpb_test.jsonl"]
19
+
20
+ # Tokenization / eval params
21
+ MAXLEN = 256
22
+ INIT_BATCH = 64 # will auto-shrink on OOM
23
+
24
+ # ---------------- Memory helpers ----------------
25
+ def print_mem(tag: str = ""):
26
+ v = psutil.virtual_memory()
27
+ cpu = f"CPU used: {(v.total - v.available)/1e9:.1f}/{v.total/1e9:.1f} GB"
28
+ if torch.cuda.is_available():
29
+ free, total = torch.cuda.mem_get_info()
30
+ gpu = f"GPU used: {(total - free)/1e9:.1f}/{total/1e9:.1f} GB"
31
+ else:
32
+ gpu = "GPU: n/a"
33
+ print(f"[MEM] {tag} | {cpu} | {gpu}")
34
+
35
+ def memory_guard():
36
+ gc.collect()
37
+ if torch.cuda.is_available():
38
+ torch.cuda.empty_cache()
39
+ torch.cuda.ipc_collect()
40
+
41
+ # ---------------- Label/text helpers ----------------
42
+ LBL_MAP_3 = {
43
+ "-1":0, "neg":0, "negative":0, -1:0,
44
+ "0":1, "neu":1, "neutral":1, 0:1,
45
+ "1":2, "pos":2, "positive":2, 1:2,
46
+ }
47
+ TEXT_KEYS = ["context", "text", "sentence", "content", "Title", "question_title", "Input", "review"]
48
+ LABEL_KEYS = ["label", "sentiment", "Sentiment", "class", "target", "y"]
49
+
50
+ def _find_key(d: dict, candidates: List[str]) -> str:
51
+ keys_lower = {k.lower(): k for k in d.keys()}
52
+ for c in candidates:
53
+ if c in d: return c
54
+ if c.lower() in keys_lower: return keys_lower[c.lower()]
55
+ return None
56
+
57
+ def _norm_label(v) -> int:
58
+ if v is None: return 1
59
+ s = str(v).strip().lower()
60
+ if s in LBL_MAP_3: return LBL_MAP_3[s]
61
+ if s.lstrip("-").isdigit():
62
+ try: return LBL_MAP_3[int(s)]
63
+ except Exception: return 1
64
+ return 1
65
+
66
+ def load_eval_jsonl(path: str) -> Tuple[List[str], List[int]]:
67
+ if not os.path.exists(path):
68
+ raise FileNotFoundError(f"Eval file not found: {path}")
69
+ texts, labels = [], []
70
+ with open(path, "r", encoding="utf-8") as f:
71
+ for line in f:
72
+ line = line.strip()
73
+ if not line: continue
74
+ try:
75
+ ex = json.loads(line)
76
+ except Exception:
77
+ continue
78
+ t_key = _find_key(ex, TEXT_KEYS)
79
+ y_key = _find_key(ex, LABEL_KEYS)
80
+ if t_key is None or y_key is None:
81
+ # try a couple more common fields
82
+ t_key = t_key or _find_key(ex, ["Sentence", "question", "title"])
83
+ y_key = y_key or _find_key(ex, ["Label", "SentimentLabel"])
84
+ if t_key is None or y_key is None:
85
+ continue
86
+ texts.append(str(ex.get(t_key, "")))
87
+ labels.append(_norm_label(ex.get(y_key, None)))
88
+ if not texts:
89
+ raise ValueError(f"No (text,label) rows found in {path}. Check field names.")
90
+ return texts, labels
91
+
92
+ # ---------------- Load LLaMA base + tokenizer (4-bit) ----------------
93
+ print_mem("before load")
94
+
95
+ tok = AutoTokenizer.from_pretrained(BASE_DIR, use_fast=True, trust_remote_code=True)
96
+ if tok.pad_token_id is None:
97
+ tok.pad_token = tok.eos_token
98
+ tok.padding_side = "left"
99
+
100
+ bnb = BitsAndBytesConfig(
101
+ load_in_8bit=True,
102
+ )
103
+ base = AutoModelForCausalLM.from_pretrained(
104
+ BASE_DIR,
105
+ quantization_config=bnb,
106
+ torch_dtype=torch.bfloat16,
107
+ low_cpu_mem_usage=True,
108
+ device_map="auto",
109
+ trust_remote_code=True,
110
+ )
111
+ base.config.use_cache = False
112
+
113
+ print_mem("after base load")
114
+
115
+ # ---------------- Attach LoRA adapters ----------------
116
+ enc = PeftModel.from_pretrained(base, ADAPTER_DIR)
117
+ enc.eval()
118
+ print_mem("after PEFT attach")
119
+
120
+ # ---------------- Rebuild heads & load (256-d proj, 3-way cls) ----------------
121
+ hid = enc.config.hidden_size # LLaMA-3.1-8B -> 4096
122
+ proj = nn.Sequential(nn.Linear(hid, hid), nn.Tanh(), nn.Linear(hid, 256)).to(DEVICE).eval()
123
+ cls = nn.Linear(hid, 3).to(DEVICE).eval()
124
+
125
+ state = torch.load(HEADS_PATH, map_location="cpu")
126
+ # quick shape sanity (weights exist and match hid)
127
+ _ = proj.load_state_dict(state["proj"], strict=True)
128
+ _ = cls.load_state_dict(state["cls"], strict=True)
129
+
130
+ # ---------------- Pooling over LLaMA hidden states ----------------
131
+ @torch.no_grad()
132
+ def _mean_pool(last_hidden_state: torch.Tensor, attn_mask: torch.Tensor) -> torch.Tensor:
133
+ mask = attn_mask.unsqueeze(-1).type_as(last_hidden_state) # [B,T,1]
134
+ summed = (last_hidden_state * mask).sum(dim=1) # [B,H]
135
+ denom = mask.sum(dim=1).clamp(min=1e-6) # [B,1]
136
+ return summed / denom
137
+
138
+ # make sure your tokenizer has a pad token & left padding for LLaMA
139
+ if tok.pad_token_id is None:
140
+ tok.pad_token = tok.eos_token
141
+ tok.padding_side = "left"
142
+
143
+ def _mean_pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
144
+ mask = attention_mask.unsqueeze(-1).type_as(last_hidden_state)
145
+ summed = (last_hidden_state * mask).sum(dim=1)
146
+ denom = mask.sum(dim=1).clamp(min=1e-6)
147
+ return summed / denom
148
+
149
+ @torch.inference_mode()
150
+ def encode_cls(batch):
151
+ batch = {k: v.to(DEVICE, non_blocking=True) for k, v in batch.items()}
152
+ # ask the model to return hidden states
153
+ out = enc(**batch, output_hidden_states=True)
154
+ # for causal LM, take the top hidden layer
155
+ last = out.hidden_states[-1] if hasattr(out, "hidden_states") else out[0]
156
+ h = _mean_pool(last, batch["attention_mask"])
157
+ return h
158
+
159
+ @torch.inference_mode()
160
+ def logits_for_texts(texts, maxlen=MAXLEN):
161
+ encd = tok(texts, padding=True, truncation=True, max_length=maxlen, return_tensors="pt")
162
+ with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16, enabled=torch.cuda.is_available()):
163
+ h = encode_cls(encd)
164
+ return cls(h)
165
+
166
+ # ---------------- OOM-safe evaluation ----------------
167
+ def evaluate_set(texts: List[str], labels: List[int], batch: int = INIT_BATCH, maxlen: int = MAXLEN):
168
+ preds = []
169
+ i, n = 0, len(texts)
170
+ while i < n:
171
+ cur_bs = min(batch, n - i)
172
+ while True:
173
+ try:
174
+ l = logits_for_texts(texts[i:i+cur_bs], maxlen=maxlen)
175
+ preds.extend(l.argmax(dim=1).cpu().tolist())
176
+ break
177
+ except torch.cuda.OutOfMemoryError:
178
+ memory_guard()
179
+ if cur_bs <= 1: raise
180
+ cur_bs = max(1, cur_bs // 2)
181
+ print(f"[OOM] shrinking batch to {cur_bs}")
182
+ except RuntimeError as e:
183
+ if "out of memory" in str(e).lower():
184
+ memory_guard()
185
+ if cur_bs <= 1: raise
186
+ cur_bs = max(1, cur_bs // 2)
187
+ print(f"[OOM] shrinking batch to {cur_bs}")
188
+ else:
189
+ raise
190
+ i += cur_bs
191
+ batch = cur_bs
192
+ return {
193
+ "accuracy": accuracy_score(labels, preds),
194
+ "macro_f1": f1_score(labels, preds, average="macro"),
195
+ }
196
+
197
+ # ---------------- Run JSONL evaluations ----------------
198
+ print_mem("before JSONL eval")
199
+ results = {}
200
+ for jpath in EVAL_FILES:
201
+ texts, labels = load_eval_jsonl(jpath)
202
+ print(f"Loaded {jpath}: {len(texts)} rows")
203
+ metrics = evaluate_set(texts, labels, batch=INIT_BATCH, maxlen=MAXLEN)
204
+ results[jpath] = metrics
205
+ print(f"{jpath} -> Acc: {metrics['accuracy']:.4f} | Macro-F1: {metrics['macro_f1']:.4f}")
206
+
207
+ print("Summary:", results)
208
+ print_mem("done")