intm commited on
Commit
0dfdb21
·
1 Parent(s): b004e67

add readme

Browse files
Files changed (2) hide show
  1. README.md +43 -0
  2. example_usage.py +20 -0
README.md CHANGED
@@ -1,3 +1,46 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ # CodeT5-small-Go_generation
6
+ This model is finetuned based on the pre-trained [CodeT5-small model](https://github.com/salesforce/CodeT5#fine-tuning).
7
+
8
+ > 5.3 upload the initial version.
9
+
10
+ The model genarates the missing function body according to the input which privides the necessary class environment and an empty function.
11
+
12
+ See example below for formatting.
13
+
14
+ # How to use
15
+ Here is how to use this model:
16
+
17
+ from transformers import T5ForConditionalGeneration, RobertaTokenizer
18
+
19
+ # 加载模型和tokenizer
20
+ model_path = "intm/codet5-small-go_generation"
21
+ tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')
22
+ model = T5ForConditionalGeneration.from_pretrained(model_path)
23
+
24
+ # 使用模型进行推理
25
+ input_text = "package names\n\nimport \"knative.dev/pkg/kmeta\"\n\n\nfunc Deployment(rev kmeta.Accessor) string {\n\treturn kmeta.ChildName(rev.GetName(), \"-deployment\")\n}\n\n\nfunc ImageCache(rev kmeta.Accessor) string {\n\treturn kmeta.ChildName(rev.GetName(), \"-cache\")\n}\n\n\n\n\nfunc PA(rev kmeta.Accessor) string"
26
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
27
+ output = model.generate(input_ids=input_ids, max_new_tokens=256) #最大长度按照数据集的max_trg_len设置
28
+
29
+ # 将生成的结果转换为字符串
30
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
31
+ print(output_text)
32
+
33
+
34
+ # this prints "return kmeta.ChildName(rev.GetName(), "-pa")"
35
+
36
+ # Training data
37
+ YinShicheng
38
+
39
+ # Training process
40
+ GuQiuhan
41
+
42
+ # Advisor
43
+ Prof.WangYu
44
+
45
+ # Evaluation results
46
+ TODO
example_usage.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from transformers import T5ForConditionalGeneration, RobertaTokenizer
4
+
5
+ # 加载模型和tokenizer
6
+ model_path = "intm/codet5-small-go_generation"
7
+ tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base')
8
+ model = T5ForConditionalGeneration.from_pretrained(model_path)
9
+
10
+ # 使用模型进行推理
11
+ input_text = "package names\n\nimport \"knative.dev/pkg/kmeta\"\n\n\nfunc Deployment(rev kmeta.Accessor) string {\n\treturn kmeta.ChildName(rev.GetName(), \"-deployment\")\n}\n\n\nfunc ImageCache(rev kmeta.Accessor) string {\n\treturn kmeta.ChildName(rev.GetName(), \"-cache\")\n}\n\n\n\n\nfunc PA(rev kmeta.Accessor) string"
12
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
13
+ output = model.generate(input_ids=input_ids, max_new_tokens=256) #最大长度按照数据集的max_trg_len设置
14
+
15
+ # 将生成的结果转换为字符串
16
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
17
+ print(output_text)
18
+
19
+
20
+ # 应当可以输出:return rev.GetName()