DouDou commited on
Commit
f2670ef
·
verified ·
1 Parent(s): 315961d

Upload data3/instruct_generation.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. data3/instruct_generation.py +48 -0
data3/instruct_generation.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import json
3
+
4
+ csv.field_size_limit(10 * 1024 * 1024 * 1024) # 10MB
5
+
6
+ score_dict = {}
7
+
8
+ def load_score():
9
+ with open('res2.csv', 'r') as f:
10
+ reader = csv.reader(f)
11
+ amount = 0
12
+ for row in reader:
13
+ row_json = None
14
+ try:
15
+ start_index, end_index = row[1].find('['), row[1].find(']')
16
+ row_json = json.loads(row[1][start_index: end_index+1])
17
+ except:
18
+ pass
19
+ if row_json is not None and isinstance(row_json, list):
20
+ for i in row_json:
21
+ try:
22
+ if isinstance(i, dict) and 'relevance_score' in i and 'function_start_line' in i:
23
+ if i['relevance_score'] is not None and int(i['relevance_score']) > 1 and i['function_start_line'] is not None and int(i['function_start_line']) > 1:
24
+ # yield row_json, row[0]
25
+ amount += 1
26
+ score_dict[row[0]] = row_json
27
+ except:
28
+ pass
29
+ print(amount)
30
+
31
+
32
+ def load_code_file():
33
+ with open('/home/weifengsun/tangou1/domain_code/src/datasets/data_merged/dataset_all.csv', 'r', encoding='utf-8') as f:
34
+ reader = csv.reader(f)
35
+ for row in reader:
36
+ if row[0] in score_dict:
37
+ score_dict[row[0]] = {'code_file': row, 'score_json': score_dict[row[0]]}
38
+ print(score_dict[row[0]])
39
+ break
40
+
41
+
42
+
43
+
44
+
45
+ if __name__ == '__main__':
46
+ load_score()
47
+ load_code_file()
48
+ # print(len(score_dict))