nebula2025 commited on
Commit
ed0085b
·
verified ·
1 Parent(s): 0f08067

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1536,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": true,
9
+ "include_prompt": true
10
+ }
README.md CHANGED
@@ -3,15 +3,15 @@ language:
3
  - zh
4
  - en
5
  tags:
6
- - feature-extraction
7
- - sentence-similarity
8
  - sentence-transformers
 
 
9
  - transformers
 
 
10
  license: apache-2.0
11
  ---
12
 
13
- # CodeR
14
-
15
  Here is the CodeR model trained on both text-only data and the full code data.
16
 
17
  ## Usage
@@ -35,7 +35,7 @@ documents = [
35
  "DELETE FROM Livestock WHERE age > 5;"
36
  ]
37
  model = FlagLLMModel('nebula2025/CodeR-full',
38
- query_instruction_format="<instruct>{}\n<query>{}"
39
  query_instruction_for_retrieval="Given a question in text, retrieve SQL queries that are appropriate responses to the question.",
40
  trust_remote_code=True,
41
  use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
 
3
  - zh
4
  - en
5
  tags:
 
 
6
  - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
  - transformers
10
+ pipeline_tag: sentence-similarity
11
+ library_name: sentence-transformers
12
  license: apache-2.0
13
  ---
14
 
 
 
15
  Here is the CodeR model trained on both text-only data and the full code data.
16
 
17
  ## Usage
 
35
  "DELETE FROM Livestock WHERE age > 5;"
36
  ]
37
  model = FlagLLMModel('nebula2025/CodeR-full',
38
+ query_instruction_format="<instruct>{}\n<query>{}",
39
  query_instruction_for_retrieval="Given a question in text, retrieve SQL queries that are appropriate responses to the question.",
40
  trust_remote_code=True,
41
  use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "sliding_window": null,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "float32",
25
- "transformers_version": "4.46.0",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151667
 
22
  "sliding_window": null,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "float32",
25
+ "transformers_version": "4.49.0",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151667
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.4.1",
4
+ "transformers": "4.49.0",
5
+ "pytorch": "2.5.1+cu124"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 256,
3
+ "do_lower_case": false
4
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56524092f5d0676e63537511b535e73e7580a7efe440247ef3fa43d019a0af0
3
+ size 11422261
tokenizer_config.json CHANGED
@@ -202,7 +202,7 @@
202
  ],
203
  "auto_map": {
204
  "AutoTokenizer": [
205
- "tokenization_qwen.Qwen2Tokenizer",
206
  null
207
  ]
208
  },
@@ -212,7 +212,7 @@
212
  "eos_token": "<|endoftext|>",
213
  "errors": "replace",
214
  "extra_special_tokens": {},
215
- "model_max_length": 32768,
216
  "pad_token": "<|endoftext|>",
217
  "split_special_tokens": false,
218
  "tokenizer_class": "Qwen2Tokenizer",
 
202
  ],
203
  "auto_map": {
204
  "AutoTokenizer": [
205
+ "/share_2/chaofan/models/CodeR-full--tokenization_qwen.Qwen2Tokenizer",
206
  null
207
  ]
208
  },
 
212
  "eos_token": "<|endoftext|>",
213
  "errors": "replace",
214
  "extra_special_tokens": {},
215
+ "model_max_length": 256,
216
  "pad_token": "<|endoftext|>",
217
  "split_special_tokens": false,
218
  "tokenizer_class": "Qwen2Tokenizer",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff