Corin1998 commited on
Commit
c198598
·
verified ·
1 Parent(s): fb38314

Update pipelines/merge.py

Browse files
Files changed (1) hide show
  1. pipelines/merge.py +7 -1
pipelines/merge.py CHANGED
@@ -7,7 +7,9 @@ def _period_key(item_text: str) -> str:
7
 
8
  def merge_normalized_records(records: List[Dict]) -> Dict:
9
  merged = {"work_experience": [], "education": [], "certifications": [], "skills": [], "raw_sections": {}}
10
- seen_we, seen_edu, seen_cert, skill_set = set(), set(), set(), set()
 
 
11
 
12
  for r in records:
13
  for w in r.get("work_experience", []):
@@ -15,19 +17,23 @@ def merge_normalized_records(records: List[Dict]) -> Dict:
15
  if key not in seen_we:
16
  seen_we.add(key)
17
  merged["work_experience"].append(w)
 
18
  for e in r.get("education", []):
19
  k = e.get("text", "")
20
  if k and k not in seen_edu:
21
  seen_edu.add(k)
22
  merged["education"].append(e)
 
23
  for c in r.get("certifications", []):
24
  k = c.get("text", "")
25
  if k and k not in seen_cert:
26
  seen_cert.add(k)
27
  merged["certifications"].append(c)
 
28
  for s in r.get("skills", []):
29
  if s:
30
  skill_set.add(s)
 
31
  for k, v in r.get("raw_sections", {}).items():
32
  merged["raw_sections"][k] = (merged["raw_sections"].get(k, "") + "\n" + v).strip()
33
 
 
7
 
8
  def merge_normalized_records(records: List[Dict]) -> Dict:
9
  merged = {"work_experience": [], "education": [], "certifications": [], "skills": [], "raw_sections": {}}
10
+
11
+ seen_we, seen_edu, seen_cert = set(), set(), set()
12
+ skill_set = set()
13
 
14
  for r in records:
15
  for w in r.get("work_experience", []):
 
17
  if key not in seen_we:
18
  seen_we.add(key)
19
  merged["work_experience"].append(w)
20
+
21
  for e in r.get("education", []):
22
  k = e.get("text", "")
23
  if k and k not in seen_edu:
24
  seen_edu.add(k)
25
  merged["education"].append(e)
26
+
27
  for c in r.get("certifications", []):
28
  k = c.get("text", "")
29
  if k and k not in seen_cert:
30
  seen_cert.add(k)
31
  merged["certifications"].append(c)
32
+
33
  for s in r.get("skills", []):
34
  if s:
35
  skill_set.add(s)
36
+
37
  for k, v in r.get("raw_sections", {}).items():
38
  merged["raw_sections"][k] = (merged["raw_sections"].get(k, "") + "\n" + v).strip()
39