David319193 commited on
Commit
3040a19
·
verified ·
1 Parent(s): 7fd48f9

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +85 -85
extract.py CHANGED
@@ -1,86 +1,86 @@
1
- import csv, json
2
- metadata_list = ['fullname', 'mediator profile on mediate.com', 'mediator Biography', 'mediator state']
3
-
4
- def extract_practice():
5
- csvfile = "updated.csv"
6
-
7
- header_to_extract = "mediator areas of practice"
8
-
9
- values = []
10
- with open(csvfile, 'r') as file:
11
- csv_reader = csv.DictReader(file)
12
- for row in csv_reader:
13
- if header_to_extract in row:
14
- text = row[header_to_extract]
15
- practice_list = text.split('|')
16
-
17
- for practice in practice_list:
18
- new_practice = practice.strip()
19
-
20
- if not new_practice in values and not new_practice.isdigit():
21
- values.append(new_practice)
22
-
23
- # jsonfile_path = "practice.json"
24
-
25
- # with open(jsonfile_path, 'w') as file:
26
- # json.dump(values, file, indent=4)
27
-
28
- return values
29
-
30
- def extract_state():
31
- csvfile = "updated.csv"
32
-
33
- header_to_extract = "mediator state"
34
-
35
- values = []
36
- with open(csvfile, 'r') as file:
37
- csv_reader = csv.DictReader(file)
38
- for row in csv_reader:
39
- if header_to_extract in row:
40
- text = row[header_to_extract]
41
-
42
- if not text in values:
43
- values.append(text)
44
-
45
- return values
46
-
47
- def extract_city():
48
- csvfile = "updated.csv"
49
-
50
- header_to_extract = "mediator city"
51
- header_state = "mediator state"
52
- values = {}
53
- with open(csvfile, 'r') as file:
54
- csv_reader = csv.DictReader(file)
55
- for row in csv_reader:
56
- if header_to_extract in row:
57
- text = row[header_to_extract]
58
- if not text in values:
59
- values[text] = row[header_state]
60
-
61
- return values
62
-
63
- def search_mediator(filter: dict, practice: str):
64
- print("filter =>", filter)
65
- csvfile = "updated.csv"
66
- mediator_data = []
67
- with open(csvfile, 'r') as file:
68
- csv_reader = csv.DictReader(file)
69
-
70
- for row in csv_reader:
71
- isMatch = True
72
- for key, value in filter.items():
73
- if row[key] != value:
74
- isMatch = False
75
-
76
- if not practice in row['mediator areas of practice']:
77
- isMatch = False
78
-
79
- if isMatch:
80
- data = {}
81
- for medadata in metadata_list:
82
- data[medadata] = row[medadata]
83
-
84
- mediator_data.append(data)
85
-
86
  return mediator_data
 
1
+ import csv, json
2
+ metadata_list = ['fullname', 'mediator profile on mediate.com', 'mediator Biography', 'mediator state']
3
+
4
+ def extract_practice():
5
+ csvfile = "updated.csv"
6
+
7
+ header_to_extract = "mediator areas of practice"
8
+
9
+ values = []
10
+ with open(csvfile, 'r', encoding='utf-8') as file:
11
+ csv_reader = csv.DictReader(file)
12
+ for row in csv_reader:
13
+ if header_to_extract in row:
14
+ text = row[header_to_extract]
15
+ practice_list = text.split('|')
16
+
17
+ for practice in practice_list:
18
+ new_practice = practice.strip()
19
+
20
+ if not new_practice in values and not new_practice.isdigit():
21
+ values.append(new_practice)
22
+
23
+ # jsonfile_path = "practice.json"
24
+
25
+ # with open(jsonfile_path, 'w') as file:
26
+ # json.dump(values, file, indent=4)
27
+
28
+ return values
29
+
30
+ def extract_state():
31
+ csvfile = "updated.csv"
32
+
33
+ header_to_extract = "mediator state"
34
+
35
+ values = []
36
+ with open(csvfile, 'r') as file:
37
+ csv_reader = csv.DictReader(file)
38
+ for row in csv_reader:
39
+ if header_to_extract in row:
40
+ text = row[header_to_extract]
41
+
42
+ if not text in values:
43
+ values.append(text)
44
+
45
+ return values
46
+
47
+ def extract_city():
48
+ csvfile = "updated.csv"
49
+
50
+ header_to_extract = "mediator city"
51
+ header_state = "mediator state"
52
+ values = {}
53
+ with open(csvfile, 'r') as file:
54
+ csv_reader = csv.DictReader(file)
55
+ for row in csv_reader:
56
+ if header_to_extract in row:
57
+ text = row[header_to_extract]
58
+ if not text in values:
59
+ values[text] = row[header_state]
60
+
61
+ return values
62
+
63
+ def search_mediator(filter: dict, practice: str):
64
+ print("filter =>", filter)
65
+ csvfile = "updated.csv"
66
+ mediator_data = []
67
+ with open(csvfile, 'r') as file:
68
+ csv_reader = csv.DictReader(file)
69
+
70
+ for row in csv_reader:
71
+ isMatch = True
72
+ for key, value in filter.items():
73
+ if row[key] != value:
74
+ isMatch = False
75
+
76
+ if not practice in row['mediator areas of practice']:
77
+ isMatch = False
78
+
79
+ if isMatch:
80
+ data = {}
81
+ for medadata in metadata_list:
82
+ data[medadata] = row[medadata]
83
+
84
+ mediator_data.append(data)
85
+
86
  return mediator_data