Kaito117 commited on
Commit
cc564ae
·
1 Parent(s): 2b74a46

add tests, mod files to pass test

Browse files
app/api/routes.py CHANGED
@@ -1,21 +1,31 @@
1
- from fastapi import APIRouter, Body
 
2
  from app.core.job_parser import JobParserAgent
3
  from app.models.schemas import JobProcessingRequest, JobProcessingResponse
4
  from app.services.agent import LinkedInSourcingAgent
5
 
6
  router = APIRouter()
7
 
 
 
 
 
 
 
 
8
  @router.post("/jobs", response_model=JobProcessingResponse)
9
- async def process_job(raw_text: str = Body(..., media_type="text/plain")):
10
  parser = JobParserAgent()
11
- job_desc = await parser.parse(raw_text)
12
 
13
  request = JobProcessingRequest(
14
  job_description=job_desc,
15
- max_candidates=50,
16
- include_github=False,
17
- confidence_threshold=0.3,
18
  )
19
 
20
  agent = LinkedInSourcingAgent()
21
- return await agent.process_job(request)
 
 
 
1
+ from fastapi import APIRouter
2
+ from pydantic import BaseModel
3
  from app.core.job_parser import JobParserAgent
4
  from app.models.schemas import JobProcessingRequest, JobProcessingResponse
5
  from app.services.agent import LinkedInSourcingAgent
6
 
7
  router = APIRouter()
8
 
9
+ class HTTPJobRequest(BaseModel):
10
+ job_id: str
11
+ search_query: str
12
+ max_candidates: int = 50
13
+ include_github: bool = False
14
+ confidence_threshold: float = 0.3
15
+
16
  @router.post("/jobs", response_model=JobProcessingResponse)
17
+ async def process_job(req: HTTPJobRequest):
18
  parser = JobParserAgent()
19
+ job_desc = await parser.parse(req.search_query)
20
 
21
  request = JobProcessingRequest(
22
  job_description=job_desc,
23
+ max_candidates=req.max_candidates,
24
+ include_github=req.include_github,
25
+ confidence_threshold=req.confidence_threshold,
26
  )
27
 
28
  agent = LinkedInSourcingAgent()
29
+ response = await agent.process_job(request)
30
+ response.job_id = req.job_id
31
+ return response
app/models/schemas.py CHANGED
@@ -119,7 +119,7 @@ class JobProcessingRequest(BaseModel):
119
  class JobProcessingResponse(BaseModel):
120
  job_id: str
121
  candidates_found: int
122
- top_candidates: list[ScoredCandidate]
123
  processing_time: float
124
  search_queries_used: list[str]
125
  status: str = "completed"
 
119
  class JobProcessingResponse(BaseModel):
120
  job_id: str
121
  candidates_found: int
122
+ top_candidates: list[ScoredCandidate] = Field(..., alias="candidates")
123
  processing_time: float
124
  search_queries_used: list[str]
125
  status: str = "completed"
app/services/__init__.py ADDED
File without changes
app/services/agent.py CHANGED
@@ -48,7 +48,7 @@ class LinkedInSourcingAgent:
48
  return JobProcessingResponse(
49
  job_id=job_id,
50
  candidates_found=0,
51
- top_candidates=[],
52
  processing_time=time.time() - start_time,
53
  search_queries_used=[],
54
  status="no_candidates_found",
@@ -71,7 +71,7 @@ class LinkedInSourcingAgent:
71
  return JobProcessingResponse(
72
  job_id=job_id,
73
  candidates_found=len(candidate_data),
74
- top_candidates=[],
75
  processing_time=time.time() - start_time,
76
  search_queries_used=[],
77
  status="extraction_failed",
@@ -111,7 +111,7 @@ class LinkedInSourcingAgent:
111
  return JobProcessingResponse(
112
  job_id=job_id,
113
  candidates_found=len(candidate_data),
114
- top_candidates=top_candidates,
115
  processing_time=processing_time,
116
  search_queries_used=[],
117
  status="completed",
@@ -122,7 +122,7 @@ class LinkedInSourcingAgent:
122
  return JobProcessingResponse(
123
  job_id=job_id,
124
  candidates_found=0,
125
- top_candidates=[],
126
  processing_time=time.time() - start_time,
127
  search_queries_used=[],
128
  status="failed",
@@ -148,7 +148,7 @@ class LinkedInSourcingAgent:
148
  error_response = JobProcessingResponse(
149
  job_id=f"error_{i}",
150
  candidates_found=0,
151
- top_candidates=[],
152
  processing_time=0,
153
  search_queries_used=[],
154
  status="error",
 
48
  return JobProcessingResponse(
49
  job_id=job_id,
50
  candidates_found=0,
51
+ candidates=[],
52
  processing_time=time.time() - start_time,
53
  search_queries_used=[],
54
  status="no_candidates_found",
 
71
  return JobProcessingResponse(
72
  job_id=job_id,
73
  candidates_found=len(candidate_data),
74
+ candidates=[],
75
  processing_time=time.time() - start_time,
76
  search_queries_used=[],
77
  status="extraction_failed",
 
111
  return JobProcessingResponse(
112
  job_id=job_id,
113
  candidates_found=len(candidate_data),
114
+ candidates=top_candidates,
115
  processing_time=processing_time,
116
  search_queries_used=[],
117
  status="completed",
 
122
  return JobProcessingResponse(
123
  job_id=job_id,
124
  candidates_found=0,
125
+ candidates=[],
126
  processing_time=time.time() - start_time,
127
  search_queries_used=[],
128
  status="failed",
 
148
  error_response = JobProcessingResponse(
149
  job_id=f"error_{i}",
150
  candidates_found=0,
151
+ candidates=[],
152
  processing_time=0,
153
  search_queries_used=[],
154
  status="error",
app/services/search_service.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import list, dict, Any
2
 
3
  from app.core.query_gen import SearchQueryGenerator
4
  from app.services.api_clients import SerpAPIClient, URLExtractor
 
1
+ from typing import Any
2
 
3
  from app.core.query_gen import SearchQueryGenerator
4
  from app.services.api_clients import SerpAPIClient, URLExtractor
pyproject.toml CHANGED
@@ -13,6 +13,7 @@ dependencies = [
13
  "groq>=0.29.0",
14
  "httpx>=0.28.1",
15
  "lxml>=6.0.0",
 
16
  "pydantic-settings>=2.10.1",
17
  "pymongo>=4.13.2",
18
  "redis>=6.2.0",
 
13
  "groq>=0.29.0",
14
  "httpx>=0.28.1",
15
  "lxml>=6.0.0",
16
+ "pydantic>=2.11.7",
17
  "pydantic-settings>=2.10.1",
18
  "pymongo>=4.13.2",
19
  "redis>=6.2.0",
pytest.ini ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [pytest]
2
+ testpaths = test
3
+ python_files = test_*.py *_test.py
4
+ asyncio_mode = auto
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bloom-filter>=1.3.3
2
+ bs4>=0.0.2
3
+ celery>=5.5.3
4
+ fake-useragent>=2.2.0
5
+ fastapi>=0.115.14
6
+ groq>=0.29.0
7
+ httpx>=0.28.1
8
+ lxml>=6.0.0
9
+ pydantic-settings>=2.10.1
10
+ pymongo>=4.13.2
11
+ redis>=6.2.0
12
+ structlog>=25.4.0
13
+ tenacity>=9.1.2
14
+ uvicorn>=0.35.0
test/basic_test.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+ import respx
6
+ from httpx import Response
7
+ from fastapi.testclient import TestClient
8
+
9
+ from app.main import app
10
+
11
+ @pytest.fixture(autouse=True)
12
+ def mock_http(respx_mock: respx.MockRouter):
13
+ # 1) Mock SerpAPI (Google) search
14
+ gh_search = json.loads(Path("test/data/github_search.json").read_text())
15
+ respx_mock.get("https://serpapi.com/search.json").mock(
16
+ return_value=Response(200, json=gh_search)
17
+ )
18
+
19
+ # 2) Mock GitHub profile HTML fetch
20
+ gh_html = Path("test/data/github_profile.html").read_text()
21
+ respx_mock.get(respx.MockRouter._compile(r"https://github\.com/.*")).mock(
22
+ return_value=Response(200, text=gh_html)
23
+ )
24
+
25
+ # 3) Mock RapidAPI LinkedIn profile fetch
26
+ ln_json = json.loads(Path("test/data/mock_linkedin.json").read_text())
27
+ respx_mock.get(respx.MockRouter._compile(r"https://fresh-linkedin-profile-data\.p\.rapidapi\.com/.*")).mock(
28
+ return_value=Response(200, json=ln_json)
29
+ )
30
+
31
+ yield
32
+
33
+ @pytest.fixture
34
+ def client() -> TestClient:
35
+ return TestClient(app)
test/conftest.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from fastapi.testclient import TestClient
3
+ from app.main import app
4
+
5
+ @pytest.fixture
6
+ def client():
7
+ return TestClient(app)
8
+
9
+ @pytest.fixture(autouse=True)
10
+ def stub_external_services(monkeypatch):
11
+ from app.models.schemas import JobDescription, ProfileData, ScoredCandidate, ScoringBreakdown
12
+ from datetime import datetime
13
+
14
+ # stub job parser
15
+ async def fake_parse(self, raw_text):
16
+ return JobDescription(
17
+ title="StubRole",
18
+ company="StubCo",
19
+ requirements=[],
20
+ location="StubLoc",
21
+ description=raw_text,
22
+ )
23
+ monkeypatch.setattr(
24
+ "app.core.job_parser.JobParserAgent.parse",
25
+ fake_parse,
26
+ raising=True,
27
+ )
28
+
29
+ # stub LinkedInSearchService context and method
30
+ class DummySearchService:
31
+ async def __aenter__(self): return self
32
+ async def __aexit__(self, exc_type, exc, tb): pass
33
+ async def search_candidates(self, job, max_c):
34
+ return [{"linkedin_url": "http://dummy"}]
35
+ monkeypatch.setattr(
36
+ "app.services.agent.LinkedInSearchService",
37
+ lambda *args, **kwargs: DummySearchService(),
38
+ raising=True,
39
+ )
40
+
41
+ # stub LinkedInProfileClient
42
+ class DummyProfileClient:
43
+ async def __aenter__(self): return self
44
+ async def __aexit__(self, exc_type, exc, tb): pass
45
+ async def batch_fetch_profiles(self, urls):
46
+ return {u: {"status": "success", "html": {}} for u in urls}
47
+ monkeypatch.setattr(
48
+ "app.services.api_clients.LinkedInProfileClient",
49
+ lambda *args, **kwargs: DummyProfileClient(),
50
+ raising=True,
51
+ )
52
+
53
+ # stub extractor service
54
+ async def fake_extract(self, data):
55
+ # return one minimal ProfileData
56
+ return [ProfileData(
57
+ name="Stub",
58
+ linkedin_url="http://dummy",
59
+ headline=None,
60
+ location=None,
61
+ current_position=None,
62
+ current_company=None,
63
+ experience=[],
64
+ education=[],
65
+ skills=[],
66
+ summary=None,
67
+ connections=None,
68
+ )]
69
+ monkeypatch.setattr(
70
+ "app.core.data_extractor.ProfileExtractorService.extract_linkedin_profiles",
71
+ fake_extract,
72
+ raising=True,
73
+ )
74
+
75
+ # stub scorer
76
+ async def fake_score(self, profiles, job):
77
+ sd = ScoringBreakdown()
78
+ scored = ScoredCandidate(
79
+ profile=profiles[0],
80
+ fit_score=0.0,
81
+ confidence=1.0,
82
+ adjusted_score=1.0,
83
+ score_breakdown=sd,
84
+ outreach_message="",
85
+ processed_at=datetime.now(),
86
+ )
87
+ return [scored]
88
+ monkeypatch.setattr(
89
+ "app.core.scoring.CandidateScorer.batch_score_candidates",
90
+ fake_score,
91
+ raising=True,
92
+ )
test/end_to_end_test.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from typing import Any
3
+ from app.services.agent import LinkedInSourcingAgent
4
+ from app.models.schemas import JobProcessingRequest, JobDescription
5
+ from fastapi.testclient import TestClient
6
+ from app.main import app
7
+
8
+ @pytest.fixture
9
+ def client():
10
+ return TestClient(app)
11
+
12
+
13
+ @pytest.mark.asyncio
14
+ async def test_agent_process_job(monkeypatch):
15
+ """
16
+ Directly test the LinkedInSourcingAgent without HTTP at all.
17
+ Monkeypatch its internal calls to return our fixtures.
18
+ """
19
+ # patch SerpAPIClient.search
20
+ async def fake_search(query: str) -> dict[str, Any]:
21
+ return {"organic_results": [{"link": "dummy"}]}
22
+ monkeypatch.setattr("app.services.api_clients.SerpAPIClient.search", fake_search)
23
+
24
+ # patch URLExtractor.extract_linkedin_urls / extract_github_urls
25
+ monkeypatch.setattr("app.services.api_clients.URLExtractor.extract_linkedin_urls", lambda self, x: ["https://dummy"])
26
+ monkeypatch.setattr("app.services.api_clients.URLExtractor.extract_github_urls", lambda self, x: ["https://dummy"])
27
+
28
+ # patch LinkedInProfileClient.fetch_profile & GitHubClient.fetch_github_profile_html
29
+ monkeypatch.setattr("app.services.api_clients.LinkedInProfileClient.fetch_profile", lambda self, url: {"status": "success", "html": {}})
30
+ monkeypatch.setattr("app.services.api_clients.GitHubClient.fetch_github_profile_html", lambda self, url: "<html></html>")
31
+
32
+ # patch extractors
33
+ monkeypatch.setattr("app.core.data_extractor.LinkedInProfileExtractor.extract_profile", lambda self, html, url: None)
34
+ monkeypatch.setattr("app.core.data_extractor.GitHubProfileExtractor.extract_github_profile", lambda self, html, url: {"username": "u", "followers": 10})
35
+
36
+ # patch scorer
37
+ async def fake_score(profiles, job):
38
+ class C:
39
+ def __init__(self): self.confidence = 0.6
40
+ return [C()]
41
+ monkeypatch.setattr(
42
+ "app.core.scoring.CandidateScorer.batch_score_candidates",
43
+ fake_score,
44
+ raising=True,
45
+ )
46
+
47
+ agent = LinkedInSourcingAgent()
48
+ # create a dummy JobDescription for the request
49
+ job = JobDescription(
50
+ title="TestRole",
51
+ company="TestCo",
52
+ requirements=[],
53
+ location="Remote",
54
+ description="Test job"
55
+ )
56
+ req = JobProcessingRequest(
57
+ job_description=job,
58
+ max_candidates=1,
59
+ include_github=False,
60
+ confidence_threshold=0.5
61
+ )
62
+ resp = await agent.process_job(req)
63
+
64
+ # verify the new response attributes
65
+ assert hasattr(resp, "candidates_found")
66
+ assert resp.candidates_found in (0, 1)
67
+ assert all(c.confidence >= 0.5 for c in resp.top_candidates)
test/integration_test.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from fastapi import status
4
+
5
+ # 4) Integration test of the orchestrator & HTTP layer
6
+ def test_full_pipeline_http(client):
7
+ """
8
+ This will:
9
+ - hit /jobs endpoint
10
+ - run URLExtractor, SerpAPIClient, GitHubClient, LinkedInProfileClient,
11
+ ProfileExtractorService, scorer, etc.
12
+ - all external HTTP calls are stubbed in conftest.py
13
+ """
14
+ payload = {
15
+ "job_id": "test-job-1",
16
+ "search_query": "pytorch transformers nlp followers:>10 repos:>5",
17
+ "confidence_threshold": 0.5
18
+ }
19
+ resp = client.post("/jobs", json=payload)
20
+ assert resp.status_code == status.HTTP_200_OK
21
+
22
+ data = resp.json()
23
+ # expecting a top‐10 list of scored candidates
24
+ assert "candidates" in data
25
+ assert isinstance(data["candidates"], list)
26
+ assert len(data["candidates"]) <= 10
27
+ for cand in data["candidates"]:
28
+ assert cand["confidence"] >= payload["confidence_threshold"]
test/test_pipeline.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test script to verify the complete LinkedIn sourcing pipeline
4
+ """
5
+ import asyncio
6
+ from app.models.schemas import JobDescription, JobProcessingRequest, ExperienceLevel
7
+ from app.services.agent import LinkedInSourcingAgent
8
+
9
+
10
+ async def test_pipeline():
11
+ """Test the complete pipeline with a simple job"""
12
+
13
+ # Create a sample job description
14
+ job = JobDescription(
15
+ title="Senior Python Developer",
16
+ company="TechCorp",
17
+ requirements=["Python", "Django", "PostgreSQL", "AWS"],
18
+ location="San Francisco, CA",
19
+ experience_level=ExperienceLevel.SENIOR,
20
+ description="Looking for a senior Python developer with strong backend skills"
21
+ )
22
+
23
+ # Create job processing request
24
+ request = JobProcessingRequest(
25
+ job_description=job,
26
+ max_candidates=5, # Keep small for testing
27
+ include_github=False,
28
+ confidence_threshold=0.3
29
+ )
30
+
31
+ print("🚀 Starting LinkedIn sourcing pipeline test...")
32
+ print(f"Job: {job.title} at {job.company}")
33
+ print(f"Requirements: {', '.join(job.requirements)}")
34
+ print(f"Max candidates: {request.max_candidates}")
35
+ print("-" * 50)
36
+
37
+ # Initialize and run the agent
38
+ agent = LinkedInSourcingAgent()
39
+
40
+ try:
41
+ response = await agent.process_job(request)
42
+
43
+ print("✅ Pipeline completed!")
44
+ print(f"Job ID: {response.job_id}")
45
+ print(f"Status: {response.status}")
46
+ print(f"Candidates found: {response.candidates_found}")
47
+ print(f"Top candidates: {len(response.top_candidates)}")
48
+ print(f"Processing time: {response.processing_time:.2f}s")
49
+
50
+ if response.top_candidates:
51
+ print("\n📋 Top Candidates:")
52
+ for i, candidate in enumerate(response.top_candidates[:3], 1):
53
+ print(f"\n{i}. {candidate.name}")
54
+ print(f" Score: {candidate.fit_score:.1f}/10")
55
+ print(f" LinkedIn: {candidate.linkedin_url}")
56
+ if hasattr(candidate, 'score_breakdown'):
57
+ print(f" Education: {candidate.score_breakdown.education:.1f}")
58
+ print(f" Experience: {candidate.score_breakdown.experience_match:.1f}")
59
+ print(f" Companies: {candidate.score_breakdown.company_relevance:.1f}")
60
+
61
+ return response
62
+
63
+ except Exception as e:
64
+ print(f"❌ Pipeline failed: {str(e)}")
65
+ raise
66
+
67
+
68
+ if __name__ == "__main__":
69
+ asyncio.run(test_pipeline())
uv.lock CHANGED
@@ -456,6 +456,7 @@ dependencies = [
456
  { name = "groq" },
457
  { name = "httpx" },
458
  { name = "lxml" },
 
459
  { name = "pydantic-settings" },
460
  { name = "pymongo" },
461
  { name = "redis" },
@@ -474,6 +475,7 @@ requires-dist = [
474
  { name = "groq", specifier = ">=0.29.0" },
475
  { name = "httpx", specifier = ">=0.28.1" },
476
  { name = "lxml", specifier = ">=6.0.0" },
 
477
  { name = "pydantic-settings", specifier = ">=2.10.1" },
478
  { name = "pymongo", specifier = ">=4.13.2" },
479
  { name = "redis", specifier = ">=6.2.0" },
 
456
  { name = "groq" },
457
  { name = "httpx" },
458
  { name = "lxml" },
459
+ { name = "pydantic" },
460
  { name = "pydantic-settings" },
461
  { name = "pymongo" },
462
  { name = "redis" },
 
475
  { name = "groq", specifier = ">=0.29.0" },
476
  { name = "httpx", specifier = ">=0.28.1" },
477
  { name = "lxml", specifier = ">=6.0.0" },
478
+ { name = "pydantic", specifier = ">=2.11.7" },
479
  { name = "pydantic-settings", specifier = ">=2.10.1" },
480
  { name = "pymongo", specifier = ">=4.13.2" },
481
  { name = "redis", specifier = ">=6.2.0" },