Alpha108 commited on
Commit
b0185e1
·
verified ·
1 Parent(s): 2be266e

Update backend/agents/normalizer.py

Browse files
Files changed (1) hide show
  1. backend/agents/normalizer.py +7 -7
backend/agents/normalizer.py CHANGED
@@ -1,6 +1,4 @@
1
  from datetime import datetime
2
- from .normalizer import normalize_job_data
3
-
4
  import re
5
 
6
  def clean_html(raw_html):
@@ -25,12 +23,11 @@ def normalize_job_data(job_data, source):
25
  if source == "RemoteOK":
26
  date_posted_str = 'N/A'
27
  try:
28
- # FIX: Safely convert timestamp, handling potential strings or errors.
29
  timestamp = int(job_data.get('date', 0))
30
  if timestamp > 0:
31
  date_posted_str = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
32
  except (ValueError, TypeError, OSError):
33
- # If the date is not a valid number or causes an error, it will default to 'N/A'.
34
  pass
35
 
36
  return {
@@ -40,9 +37,10 @@ def normalize_job_data(job_data, source):
40
  'location': job_data.get('location', 'Remote'),
41
  'description': clean_html(job_data.get('description', '')),
42
  'url': job_data.get('url', ''),
43
- 'date_posted': date_posted_str, # Use the safely handled string
44
  'source': source
45
  }
 
46
  elif source == "LinkedIn (Stub)":
47
  return {
48
  'id': f"linkedin_{job_data.get('title', '').replace(' ', '_')}",
@@ -54,17 +52,19 @@ def normalize_job_data(job_data, source):
54
  'date_posted': job_data.get('posted_at', 'N/A'),
55
  'source': source
56
  }
 
57
  elif source == "Upwork (Stub)":
58
  return {
59
  'id': f"upwork_{job_data.get('title', '').replace(' ', '_')}",
60
  'title': job_data.get('title', 'N/A'),
61
- 'company': 'Upwork Client', # Upwork is client-based
62
  'location': job_data.get('client', {}).get('country', 'Remote'),
63
  'description': clean_html(job_data.get('snippet', '')),
64
  'url': job_data.get('url', ''),
65
  'date_posted': job_data.get('published_on', 'N/A').split('T')[0],
66
  'source': source
67
  }
 
68
  elif source == "Freelancer (Stub)":
69
  return {
70
  'id': f"freelancer_{job_data.get('title', '').replace(' ', '_')}",
@@ -76,6 +76,7 @@ def normalize_job_data(job_data, source):
76
  'date_posted': datetime.fromtimestamp(job_data.get('submitdate', 0)).strftime('%Y-%m-%d'),
77
  'source': source
78
  }
 
79
  else:
80
  # Generic fallback
81
  return {
@@ -88,4 +89,3 @@ def normalize_job_data(job_data, source):
88
  'date_posted': 'N/A',
89
  'source': source
90
  }
91
-
 
1
  from datetime import datetime
 
 
2
  import re
3
 
4
  def clean_html(raw_html):
 
23
  if source == "RemoteOK":
24
  date_posted_str = 'N/A'
25
  try:
26
+ # Safely convert timestamp
27
  timestamp = int(job_data.get('date', 0))
28
  if timestamp > 0:
29
  date_posted_str = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
30
  except (ValueError, TypeError, OSError):
 
31
  pass
32
 
33
  return {
 
37
  'location': job_data.get('location', 'Remote'),
38
  'description': clean_html(job_data.get('description', '')),
39
  'url': job_data.get('url', ''),
40
+ 'date_posted': date_posted_str,
41
  'source': source
42
  }
43
+
44
  elif source == "LinkedIn (Stub)":
45
  return {
46
  'id': f"linkedin_{job_data.get('title', '').replace(' ', '_')}",
 
52
  'date_posted': job_data.get('posted_at', 'N/A'),
53
  'source': source
54
  }
55
+
56
  elif source == "Upwork (Stub)":
57
  return {
58
  'id': f"upwork_{job_data.get('title', '').replace(' ', '_')}",
59
  'title': job_data.get('title', 'N/A'),
60
+ 'company': 'Upwork Client',
61
  'location': job_data.get('client', {}).get('country', 'Remote'),
62
  'description': clean_html(job_data.get('snippet', '')),
63
  'url': job_data.get('url', ''),
64
  'date_posted': job_data.get('published_on', 'N/A').split('T')[0],
65
  'source': source
66
  }
67
+
68
  elif source == "Freelancer (Stub)":
69
  return {
70
  'id': f"freelancer_{job_data.get('title', '').replace(' ', '_')}",
 
76
  'date_posted': datetime.fromtimestamp(job_data.get('submitdate', 0)).strftime('%Y-%m-%d'),
77
  'source': source
78
  }
79
+
80
  else:
81
  # Generic fallback
82
  return {
 
89
  'date_posted': 'N/A',
90
  'source': source
91
  }