Niketjain2002 commited on
Commit
9baa8d5
·
verified ·
1 Parent(s): 1fe8464

Fix LinkedIn title parser: strip both | LinkedIn and - LinkedIn suffixes

Browse files
Files changed (1) hide show
  1. src/web_search.py +8 -3
src/web_search.py CHANGED
@@ -130,13 +130,18 @@ def _parse_linkedin_title(title: str) -> dict:
130
 
131
  Returns dict with name, title, company (all strings, may be empty).
132
  """
133
- # Strip the "| LinkedIn" suffix
134
- cleaned = re.sub(r"\s*\|\s*LinkedIn\s*$", "", title, flags=re.IGNORECASE).strip()
135
 
136
  parts = [p.strip() for p in cleaned.split(" - ")]
137
 
138
  if len(parts) >= 3:
139
- return {"name": parts[0], "title": parts[1], "company": parts[2]}
 
 
 
 
 
140
  elif len(parts) == 2:
141
  return {"name": parts[0], "title": parts[1], "company": ""}
142
  else:
 
130
 
131
  Returns dict with name, title, company (all strings, may be empty).
132
  """
133
+ # Strip "| LinkedIn" or "- LinkedIn" suffix (both patterns appear in Google results)
134
+ cleaned = re.sub(r"\s*[-|]\s*LinkedIn\s*$", "", title, flags=re.IGNORECASE).strip()
135
 
136
  parts = [p.strip() for p in cleaned.split(" - ")]
137
 
138
  if len(parts) >= 3:
139
+ # Take last non-empty part as company (skip any extra segments)
140
+ company = parts[2]
141
+ # Guard: if company is still "LinkedIn" somehow, clear it
142
+ if company.lower() == "linkedin":
143
+ company = ""
144
+ return {"name": parts[0], "title": parts[1], "company": company}
145
  elif len(parts) == 2:
146
  return {"name": parts[0], "title": parts[1], "company": ""}
147
  else: