Spaces:
Runtime error
Runtime error
DSXiangLi commited on
Commit ·
8bfb070
1
Parent(s): 1820e5d
- self/generate.py +6 -2
- self/prompt.py +1 -1
self/generate.py
CHANGED
|
@@ -35,7 +35,8 @@ class SELF(object):
|
|
| 35 |
def __init__(self, seed_file, openai_key, n_human, n_machine, n_instruct, prompt):
|
| 36 |
self.llm = OpenAI(openai_api_key=openai_key, temperature=1,
|
| 37 |
stop=[f'\n{n_instruct}', '{n_instruct}', '{n_instruct}.'], # 当已生成足够的指令则停止
|
| 38 |
-
logit_bias={'50259': -100}
|
|
|
|
| 39 |
) # 默认davinci-003
|
| 40 |
self.n_human, self.n_machine, self.n_instruct = n_human, n_machine, n_instruct
|
| 41 |
self.n_gen, self.n_keep = 0, 0
|
|
@@ -97,6 +98,8 @@ class SELF(object):
|
|
| 97 |
def decode_response(self, response):
|
| 98 |
if response is None:
|
| 99 |
return []
|
|
|
|
|
|
|
| 100 |
raw_instruct = SELF.prefix.format(id=self.first_id) + response['text']
|
| 101 |
raw_instruct = raw_instruct.split('###')
|
| 102 |
instruction_data = []
|
|
@@ -153,7 +156,8 @@ class SELF(object):
|
|
| 153 |
return keep_instruction
|
| 154 |
|
| 155 |
def step(self):
|
| 156 |
-
|
|
|
|
| 157 |
keep_instruct_data = self.sim_filter(new_instruct_data)
|
| 158 |
self.n_gen += len(new_instruct_data)
|
| 159 |
self.n_keep += len(keep_instruct_data)
|
|
|
|
| 35 |
def __init__(self, seed_file, openai_key, n_human, n_machine, n_instruct, prompt):
|
| 36 |
self.llm = OpenAI(openai_api_key=openai_key, temperature=1,
|
| 37 |
stop=[f'\n{n_instruct}', '{n_instruct}', '{n_instruct}.'], # 当已生成足够的指令则停止
|
| 38 |
+
logit_bias={'50259': -100}, # 不生成最后的停止符#
|
| 39 |
+
max_tokens=-1
|
| 40 |
) # 默认davinci-003
|
| 41 |
self.n_human, self.n_machine, self.n_instruct = n_human, n_machine, n_instruct
|
| 42 |
self.n_gen, self.n_keep = 0, 0
|
|
|
|
| 98 |
def decode_response(self, response):
|
| 99 |
if response is None:
|
| 100 |
return []
|
| 101 |
+
if '###' not in response['text']:
|
| 102 |
+
return []
|
| 103 |
raw_instruct = SELF.prefix.format(id=self.first_id) + response['text']
|
| 104 |
raw_instruct = raw_instruct.split('###')
|
| 105 |
instruction_data = []
|
|
|
|
| 156 |
return keep_instruction
|
| 157 |
|
| 158 |
def step(self):
|
| 159 |
+
response = self.generate()
|
| 160 |
+
new_instruct_data = self.decode_response(response)
|
| 161 |
keep_instruct_data = self.sim_filter(new_instruct_data)
|
| 162 |
self.n_gen += len(new_instruct_data)
|
| 163 |
self.n_keep += len(keep_instruct_data)
|
self/prompt.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
import re
|
| 3 |
|
| 4 |
#20个简化成5个
|
| 5 |
-
self_prompt = """你需要想出{n_instruct}个不同的任务指令。这些任务指令将输入GPT模型,我们将评估GPT模型完成指令的情况。
|
| 6 |
以下是要求:
|
| 7 |
1. 尽量不要在每个指令中重复使用动词,以最大化多样性
|
| 8 |
2. 指令的表达形式需要多样化。例如你可以把问题和祈使句结合起来
|
|
|
|
| 2 |
import re
|
| 3 |
|
| 4 |
#20个简化成5个
|
| 5 |
+
self_prompt = """你需要想出{n_instruct}个医学相关不同的任务指令。这些任务指令将输入GPT模型,我们将评估GPT模型完成指令的情况。
|
| 6 |
以下是要求:
|
| 7 |
1. 尽量不要在每个指令中重复使用动词,以最大化多样性
|
| 8 |
2. 指令的表达形式需要多样化。例如你可以把问题和祈使句结合起来
|