Spaces:

xl2533
/

MakeInstruction

Runtime error

xl2533 commited on Apr 7, 2023

Commit

059bf6f

1 Parent(s): d50fd60

a

Files changed (1) hide show

ape/instance.py CHANGED Viewed

@@ -80,9 +80,12 @@ def load_qa(file='./ape/data/qa_train.json'):
     raw_data = json.load(open(file, encoding='UTF8'))
     for i in raw_data:
         input = i['text']
-        #只取一个QA不然容易超出模型输入长度
-        output = {'问题': i['annotations'][0]["Q"], '回答': i['annotations'][0]["A"]}
-        output = json.dumps(output, ensure_ascii=False)
         data.append((input, output))
     return data
@@ -103,7 +106,9 @@ def load_entity(file='./ape/data/entity_train.json'):
         input = i['text']
         output = []
         for j in i['labels']:
-            output.append({'类型': j[1], '实体': j[-1]})
         output = json.dumps(output, ensure_ascii=False)
         data.append((input, output))
     return data

     raw_data = json.load(open(file, encoding='UTF8'))
     for i in raw_data:
         input = i['text']
+        # 只取一个QA不然容易超出模型输入长度'
+        output = []
+        for j in i['annotations']:
+            output.append(json.dumps({'问题': j["Q"], '回答': j["A"]}, ensure_ascii=False))
+        output = sorted(output, key=lambda x: len(x))
+        output = output[0]
         data.append((input, output))
     return data
         input = i['text']
         output = []
         for j in i['labels']:
+            ##筛选局部实体类型，也可以拆分成单个实体类型
+            if j[1] in ['DRUG_DOSAGE', 'DRUG_TASTE', 'DRUG_EFFICACY']:
+                output.append({'类型': j[1], '实体': j[-1]})
         output = json.dumps(output, ensure_ascii=False)
         data.append((input, output))
     return data