File size: 4,297 Bytes
f190f3b
 
 
 
a77e42f
f190f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1579508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f190f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""Tests for BaseScraper static helpers — no HTTP calls needed."""

from datetime import datetime
import pytest
from agent.search.scrapers.base import BaseScraper


class TestParseDate:
    def _p(self, text):
        return BaseScraper._parse_date(text)

    def test_none_returns_none(self):
        assert self._p(None) is None

    def test_empty_returns_none(self):
        assert self._p("") is None

    def test_garbage_returns_none(self):
        assert self._p("no date here") is None

    def test_iso_date(self):
        assert self._p("2025-03-15") == datetime(2025, 3, 15)

    def test_iso_datetime(self):
        assert self._p("2025-11-01T10:30:00") == datetime(2025, 11, 1)

    def test_dd_month_yyyy_full(self):
        assert self._p("15 March 2025") == datetime(2025, 3, 15)

    def test_dd_month_yyyy_abbrev(self):
        assert self._p("3 Apr 2026") == datetime(2026, 4, 3)

    def test_month_dd_yyyy(self):
        assert self._p("March 15, 2025") == datetime(2025, 3, 15)

    def test_month_dd_yyyy_no_comma(self):
        assert self._p("January 5 2026") == datetime(2026, 1, 5)

    def test_dd_mm_yyyy_slash(self):
        assert self._p("15/03/2025") == datetime(2025, 3, 15)

    def test_strips_posted_on_prefix(self):
        assert self._p("Posted on: 15 March 2025") == datetime(2025, 3, 15)

    def test_strips_closes_prefix(self):
        assert self._p("Closes 3 Apr 2026") == datetime(2026, 4, 3)

    def test_strips_deadline_prefix(self):
        assert self._p("Deadline: 01/06/2026") == datetime(2026, 6, 1)

    def test_case_insensitive_prefix(self):
        assert self._p("POSTED ON: 10 January 2026") == datetime(2026, 1, 10)

    # -- relative dates (scholarshipdb / nature.com format) ------------------

    def test_relative_hours_ago(self):
        result = self._p("about 3 hours ago")
        assert result is not None
        delta = (datetime.now() - result).total_seconds()
        assert 0 <= delta < 4 * 3600  # within 4 h of now

    def test_relative_days_ago(self):
        result = self._p("5 days ago")
        assert result is not None
        delta_days = (datetime.now() - result).days
        assert 4 <= delta_days <= 6

    def test_relative_weeks_ago(self):
        result = self._p("2 weeks ago")
        assert result is not None
        delta_days = (datetime.now() - result).days
        assert 13 <= delta_days <= 15

    def test_relative_months_ago(self):
        result = self._p("about 1 month ago")
        assert result is not None
        delta_days = (datetime.now() - result).days
        assert 28 <= delta_days <= 32

    def test_relative_years_ago(self):
        result = self._p("1 year ago")
        assert result is not None
        delta_days = (datetime.now() - result).days
        assert 363 <= delta_days <= 367

    def test_relative_minutes_ago(self):
        result = self._p("10 minutes ago")
        assert result is not None
        delta = (datetime.now() - result).total_seconds()
        assert 0 <= delta < 11 * 60


class TestDetectType:
    def _d(self, title, desc=""):
        return BaseScraper._detect_type(title, desc)

    def test_phd_in_title(self):
        assert self._d("PhD Position in Machine Learning") == "phd"

    def test_postdoc_in_title(self):
        assert self._d("Postdoctoral Researcher in NLP") == "postdoc"

    def test_fellowship_in_desc(self):
        assert self._d("Research Position", "Marie Curie fellowship available") == "fellowship"

    def test_predoctoral_in_title(self):
        assert self._d("Predoctoral Researcher") == "predoctoral"

    def test_research_staff(self):
        assert self._d("Research Scientist at DeepMind") == "research_staff"

    def test_unknown_returns_other(self):
        assert self._d("Open Position", "Some vague description") == "other"


class TestExtractEmail:
    def _e(self, text):
        return BaseScraper._extract_email(text)

    def test_extracts_simple_email(self):
        assert self._e("Contact us at jobs@mit.edu for details") == "jobs@mit.edu"

    def test_no_email_returns_none(self):
        assert self._e("No contact information provided") is None

    def test_extracts_first_of_multiple(self):
        result = self._e("Email a@x.com or b@y.org")
        assert result == "a@x.com"