Spaces:

Kaito117
/

linkedin_profile_scoring

No application file

App Files Files Community

Kaito117 commited on Jun 30, 2025

Commit

cc564ae

1 Parent(s): 2b74a46

add tests, mod files to pass test

Browse files

Files changed (14) hide show

app/api/routes.py +17 -7
app/models/schemas.py +1 -1
app/services/__init__.py +0 -0
app/services/agent.py +5 -5
app/services/search_service.py +1 -1
pyproject.toml +1 -0
pytest.ini +4 -0
requirements.txt +14 -0
test/basic_test.py +35 -0
test/conftest.py +92 -0
test/end_to_end_test.py +67 -0
test/integration_test.py +28 -0
test/test_pipeline.py +69 -0
uv.lock +2 -0

app/api/routes.py CHANGED Viewed

@@ -1,21 +1,31 @@
-from fastapi import APIRouter, Body
 from app.core.job_parser import JobParserAgent
 from app.models.schemas import JobProcessingRequest, JobProcessingResponse
 from app.services.agent import LinkedInSourcingAgent
 router = APIRouter()
 @router.post("/jobs", response_model=JobProcessingResponse)
-async def process_job(raw_text: str = Body(..., media_type="text/plain")):
     parser = JobParserAgent()
-    job_desc = await parser.parse(raw_text)
     request = JobProcessingRequest(
         job_description=job_desc,
-        max_candidates=50,
-        include_github=False,
-        confidence_threshold=0.3,
     )
     agent = LinkedInSourcingAgent()
-    return await agent.process_job(request)

+from fastapi import APIRouter
+from pydantic import BaseModel
 from app.core.job_parser import JobParserAgent
 from app.models.schemas import JobProcessingRequest, JobProcessingResponse
 from app.services.agent import LinkedInSourcingAgent
 router = APIRouter()
+class HTTPJobRequest(BaseModel):
+    job_id: str
+    search_query: str
+    max_candidates: int = 50
+    include_github: bool = False
+    confidence_threshold: float = 0.3
 @router.post("/jobs", response_model=JobProcessingResponse)
+async def process_job(req: HTTPJobRequest):
     parser = JobParserAgent()
+    job_desc = await parser.parse(req.search_query)
     request = JobProcessingRequest(
         job_description=job_desc,
+        max_candidates=req.max_candidates,
+        include_github=req.include_github,
+        confidence_threshold=req.confidence_threshold,
     )
     agent = LinkedInSourcingAgent()
+    response = await agent.process_job(request)
+    response.job_id = req.job_id
+    return response

app/models/schemas.py CHANGED Viewed

@@ -119,7 +119,7 @@ class JobProcessingRequest(BaseModel):
 class JobProcessingResponse(BaseModel):
     job_id: str
     candidates_found: int
-    top_candidates: list[ScoredCandidate]
     processing_time: float
     search_queries_used: list[str]
     status: str = "completed"

 class JobProcessingResponse(BaseModel):
     job_id: str
     candidates_found: int
+    top_candidates: list[ScoredCandidate] = Field(..., alias="candidates")
     processing_time: float
     search_queries_used: list[str]
     status: str = "completed"

app/services/__init__.py ADDED Viewed

File without changes

app/services/agent.py CHANGED Viewed

@@ -48,7 +48,7 @@ class LinkedInSourcingAgent:
                 return JobProcessingResponse(
                     job_id=job_id,
                     candidates_found=0,
-                    top_candidates=[],
                     processing_time=time.time() - start_time,
                     search_queries_used=[],
                     status="no_candidates_found",
@@ -71,7 +71,7 @@ class LinkedInSourcingAgent:
                 return JobProcessingResponse(
                     job_id=job_id,
                     candidates_found=len(candidate_data),
-                    top_candidates=[],
                     processing_time=time.time() - start_time,
                     search_queries_used=[],
                     status="extraction_failed",
@@ -111,7 +111,7 @@ class LinkedInSourcingAgent:
             return JobProcessingResponse(
                 job_id=job_id,
                 candidates_found=len(candidate_data),
-                top_candidates=top_candidates,
                 processing_time=processing_time,
                 search_queries_used=[],
                 status="completed",
@@ -122,7 +122,7 @@ class LinkedInSourcingAgent:
             return JobProcessingResponse(
                 job_id=job_id,
                 candidates_found=0,
-                top_candidates=[],
                 processing_time=time.time() - start_time,
                 search_queries_used=[],
                 status="failed",
@@ -148,7 +148,7 @@ class LinkedInSourcingAgent:
                 error_response = JobProcessingResponse(
                     job_id=f"error_{i}",
                     candidates_found=0,
-                    top_candidates=[],
                     processing_time=0,
                     search_queries_used=[],
                     status="error",

                 return JobProcessingResponse(
                     job_id=job_id,
                     candidates_found=0,
+                    candidates=[],
                     processing_time=time.time() - start_time,
                     search_queries_used=[],
                     status="no_candidates_found",
                 return JobProcessingResponse(
                     job_id=job_id,
                     candidates_found=len(candidate_data),
+                    candidates=[],
                     processing_time=time.time() - start_time,
                     search_queries_used=[],
                     status="extraction_failed",
             return JobProcessingResponse(
                 job_id=job_id,
                 candidates_found=len(candidate_data),
+                candidates=top_candidates,
                 processing_time=processing_time,
                 search_queries_used=[],
                 status="completed",
             return JobProcessingResponse(
                 job_id=job_id,
                 candidates_found=0,
+                candidates=[],
                 processing_time=time.time() - start_time,
                 search_queries_used=[],
                 status="failed",
                 error_response = JobProcessingResponse(
                     job_id=f"error_{i}",
                     candidates_found=0,
+                    candidates=[],
                     processing_time=0,
                     search_queries_used=[],
                     status="error",

app/services/search_service.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import list, dict, Any
 from app.core.query_gen import SearchQueryGenerator
 from app.services.api_clients import SerpAPIClient, URLExtractor

+from typing import Any
 from app.core.query_gen import SearchQueryGenerator
 from app.services.api_clients import SerpAPIClient, URLExtractor

pyproject.toml CHANGED Viewed

@@ -13,6 +13,7 @@ dependencies = [
     "groq>=0.29.0",
     "httpx>=0.28.1",
     "lxml>=6.0.0",
     "pydantic-settings>=2.10.1",
     "pymongo>=4.13.2",
     "redis>=6.2.0",

     "groq>=0.29.0",
     "httpx>=0.28.1",
     "lxml>=6.0.0",
+    "pydantic>=2.11.7",
     "pydantic-settings>=2.10.1",
     "pymongo>=4.13.2",
     "redis>=6.2.0",

pytest.ini ADDED Viewed

	@@ -0,0 +1,4 @@

+[pytest]
+testpaths = test
+python_files = test_*.py *_test.py
+asyncio_mode = auto

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+bloom-filter>=1.3.3
+bs4>=0.0.2
+celery>=5.5.3
+fake-useragent>=2.2.0
+fastapi>=0.115.14
+groq>=0.29.0
+httpx>=0.28.1
+lxml>=6.0.0
+pydantic-settings>=2.10.1
+pymongo>=4.13.2
+redis>=6.2.0
+structlog>=25.4.0
+tenacity>=9.1.2
+uvicorn>=0.35.0

test/basic_test.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import json
+from pathlib import Path
+import pytest
+import respx
+from httpx import Response
+from fastapi.testclient import TestClient
+from app.main import app
+@pytest.fixture(autouse=True)
+def mock_http(respx_mock: respx.MockRouter):
+    # 1) Mock SerpAPI (Google) search
+    gh_search = json.loads(Path("test/data/github_search.json").read_text())
+    respx_mock.get("https://serpapi.com/search.json").mock(
+        return_value=Response(200, json=gh_search)
+    )
+    # 2) Mock GitHub profile HTML fetch
+    gh_html = Path("test/data/github_profile.html").read_text()
+    respx_mock.get(respx.MockRouter._compile(r"https://github\.com/.*")).mock(
+        return_value=Response(200, text=gh_html)
+    )
+    # 3) Mock RapidAPI LinkedIn profile fetch
+    ln_json = json.loads(Path("test/data/mock_linkedin.json").read_text())
+    respx_mock.get(respx.MockRouter._compile(r"https://fresh-linkedin-profile-data\.p\.rapidapi\.com/.*")).mock(
+        return_value=Response(200, json=ln_json)
+    )
+    yield
+@pytest.fixture
+def client() -> TestClient:
+    return TestClient(app)

test/conftest.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import pytest
+from fastapi.testclient import TestClient
+from app.main import app
+@pytest.fixture
+def client():
+    return TestClient(app)
+@pytest.fixture(autouse=True)
+def stub_external_services(monkeypatch):
+    from app.models.schemas import JobDescription, ProfileData, ScoredCandidate, ScoringBreakdown
+    from datetime import datetime
+    # stub job parser
+    async def fake_parse(self, raw_text):
+        return JobDescription(
+            title="StubRole",
+            company="StubCo",
+            requirements=[],
+            location="StubLoc",
+            description=raw_text,
+        )
+    monkeypatch.setattr(
+        "app.core.job_parser.JobParserAgent.parse",
+        fake_parse,
+        raising=True,
+    )
+    # stub LinkedInSearchService context and method
+    class DummySearchService:
+        async def __aenter__(self): return self
+        async def __aexit__(self, exc_type, exc, tb): pass
+        async def search_candidates(self, job, max_c):
+            return [{"linkedin_url": "http://dummy"}]
+    monkeypatch.setattr(
+        "app.services.agent.LinkedInSearchService",
+        lambda *args, **kwargs: DummySearchService(),
+        raising=True,
+    )
+    # stub LinkedInProfileClient
+    class DummyProfileClient:
+        async def __aenter__(self): return self
+        async def __aexit__(self, exc_type, exc, tb): pass
+        async def batch_fetch_profiles(self, urls):
+            return {u: {"status": "success", "html": {}} for u in urls}
+    monkeypatch.setattr(
+        "app.services.api_clients.LinkedInProfileClient",
+        lambda *args, **kwargs: DummyProfileClient(),
+        raising=True,
+    )
+    # stub extractor service
+    async def fake_extract(self, data):
+        # return one minimal ProfileData
+        return [ProfileData(
+            name="Stub",
+            linkedin_url="http://dummy",
+            headline=None,
+            location=None,
+            current_position=None,
+            current_company=None,
+            experience=[],
+            education=[],
+            skills=[],
+            summary=None,
+            connections=None,
+        )]
+    monkeypatch.setattr(
+        "app.core.data_extractor.ProfileExtractorService.extract_linkedin_profiles",
+        fake_extract,
+        raising=True,
+    )
+    # stub scorer
+    async def fake_score(self, profiles, job):
+        sd = ScoringBreakdown()
+        scored = ScoredCandidate(
+            profile=profiles[0],
+            fit_score=0.0,
+            confidence=1.0,
+            adjusted_score=1.0,
+            score_breakdown=sd,
+            outreach_message="",
+            processed_at=datetime.now(),
+        )
+        return [scored]
+    monkeypatch.setattr(
+        "app.core.scoring.CandidateScorer.batch_score_candidates",
+        fake_score,
+        raising=True,
+    )

test/end_to_end_test.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pytest
+from typing import Any
+from app.services.agent import LinkedInSourcingAgent
+from app.models.schemas import JobProcessingRequest, JobDescription
+from fastapi.testclient import TestClient
+from app.main import app
+@pytest.fixture
+def client():
+    return TestClient(app)
+@pytest.mark.asyncio
+async def test_agent_process_job(monkeypatch):
+    """
+    Directly test the LinkedInSourcingAgent without HTTP at all.
+    Monkeypatch its internal calls to return our fixtures.
+    """
+    # patch SerpAPIClient.search
+    async def fake_search(query: str) -> dict[str, Any]:
+        return {"organic_results": [{"link": "dummy"}]}
+    monkeypatch.setattr("app.services.api_clients.SerpAPIClient.search", fake_search)
+    # patch URLExtractor.extract_linkedin_urls / extract_github_urls
+    monkeypatch.setattr("app.services.api_clients.URLExtractor.extract_linkedin_urls", lambda self, x: ["https://dummy"])
+    monkeypatch.setattr("app.services.api_clients.URLExtractor.extract_github_urls", lambda self, x: ["https://dummy"])
+    # patch LinkedInProfileClient.fetch_profile & GitHubClient.fetch_github_profile_html
+    monkeypatch.setattr("app.services.api_clients.LinkedInProfileClient.fetch_profile", lambda self, url: {"status": "success", "html": {}})
+    monkeypatch.setattr("app.services.api_clients.GitHubClient.fetch_github_profile_html", lambda self, url: "<html></html>")
+    # patch extractors
+    monkeypatch.setattr("app.core.data_extractor.LinkedInProfileExtractor.extract_profile", lambda self, html, url: None)
+    monkeypatch.setattr("app.core.data_extractor.GitHubProfileExtractor.extract_github_profile", lambda self, html, url: {"username": "u", "followers": 10})
+    # patch scorer
+    async def fake_score(profiles, job):
+        class C:
+            def __init__(self): self.confidence = 0.6
+        return [C()]
+    monkeypatch.setattr(
+        "app.core.scoring.CandidateScorer.batch_score_candidates",
+        fake_score,
+        raising=True,
+    )
+    agent = LinkedInSourcingAgent()
+    # create a dummy JobDescription for the request
+    job = JobDescription(
+        title="TestRole",
+        company="TestCo",
+        requirements=[],
+        location="Remote",
+        description="Test job"
+    )
+    req = JobProcessingRequest(
+        job_description=job,
+        max_candidates=1,
+        include_github=False,
+        confidence_threshold=0.5
+    )
+    resp = await agent.process_job(req)
+    # verify the new response attributes
+    assert hasattr(resp, "candidates_found")
+    assert resp.candidates_found in (0, 1)
+    assert all(c.confidence >= 0.5 for c in resp.top_candidates)

test/integration_test.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import pytest
+from fastapi import status
+# 4) Integration test of the orchestrator & HTTP layer
+def test_full_pipeline_http(client):
+    """
+    This will:
+     - hit /jobs endpoint
+     - run URLExtractor, SerpAPIClient, GitHubClient, LinkedInProfileClient,
+       ProfileExtractorService, scorer, etc.
+     - all external HTTP calls are stubbed in conftest.py
+    """
+    payload = {
+        "job_id": "test-job-1",
+        "search_query": "pytorch transformers nlp followers:>10 repos:>5",
+        "confidence_threshold": 0.5
+    }
+    resp = client.post("/jobs", json=payload)
+    assert resp.status_code == status.HTTP_200_OK
+    data = resp.json()
+    # expecting a top‐10 list of scored candidates
+    assert "candidates" in data
+    assert isinstance(data["candidates"], list)
+    assert len(data["candidates"]) <= 10
+    for cand in data["candidates"]:
+        assert cand["confidence"] >= payload["confidence_threshold"]

test/test_pipeline.py ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env python3
+"""
+Simple test script to verify the complete LinkedIn sourcing pipeline
+"""
+import asyncio
+from app.models.schemas import JobDescription, JobProcessingRequest, ExperienceLevel
+from app.services.agent import LinkedInSourcingAgent
+async def test_pipeline():
+    """Test the complete pipeline with a simple job"""
+    # Create a sample job description
+    job = JobDescription(
+        title="Senior Python Developer",
+        company="TechCorp",
+        requirements=["Python", "Django", "PostgreSQL", "AWS"],
+        location="San Francisco, CA",
+        experience_level=ExperienceLevel.SENIOR,
+        description="Looking for a senior Python developer with strong backend skills"
+    )
+    # Create job processing request
+    request = JobProcessingRequest(
+        job_description=job,
+        max_candidates=5,  # Keep small for testing
+        include_github=False,
+        confidence_threshold=0.3
+    )
+    print("🚀 Starting LinkedIn sourcing pipeline test...")
+    print(f"Job: {job.title} at {job.company}")
+    print(f"Requirements: {', '.join(job.requirements)}")
+    print(f"Max candidates: {request.max_candidates}")
+    print("-" * 50)
+    # Initialize and run the agent
+    agent = LinkedInSourcingAgent()
+    try:
+        response = await agent.process_job(request)
+        print("✅ Pipeline completed!")
+        print(f"Job ID: {response.job_id}")
+        print(f"Status: {response.status}")
+        print(f"Candidates found: {response.candidates_found}")
+        print(f"Top candidates: {len(response.top_candidates)}")
+        print(f"Processing time: {response.processing_time:.2f}s")
+        if response.top_candidates:
+            print("\n📋 Top Candidates:")
+            for i, candidate in enumerate(response.top_candidates[:3], 1):
+                print(f"\n{i}. {candidate.name}")
+                print(f"   Score: {candidate.fit_score:.1f}/10")
+                print(f"   LinkedIn: {candidate.linkedin_url}")
+                if hasattr(candidate, 'score_breakdown'):
+                    print(f"   Education: {candidate.score_breakdown.education:.1f}")
+                    print(f"   Experience: {candidate.score_breakdown.experience_match:.1f}")
+                    print(f"   Companies: {candidate.score_breakdown.company_relevance:.1f}")
+        return response
+    except Exception as e:
+        print(f"❌ Pipeline failed: {str(e)}")
+        raise
+if __name__ == "__main__":
+    asyncio.run(test_pipeline())

uv.lock CHANGED Viewed

@@ -456,6 +456,7 @@ dependencies = [
     { name = "groq" },
     { name = "httpx" },
     { name = "lxml" },
     { name = "pydantic-settings" },
     { name = "pymongo" },
     { name = "redis" },
@@ -474,6 +475,7 @@ requires-dist = [
     { name = "groq", specifier = ">=0.29.0" },
     { name = "httpx", specifier = ">=0.28.1" },
     { name = "lxml", specifier = ">=6.0.0" },
     { name = "pydantic-settings", specifier = ">=2.10.1" },
     { name = "pymongo", specifier = ">=4.13.2" },
     { name = "redis", specifier = ">=6.2.0" },

     { name = "groq" },
     { name = "httpx" },
     { name = "lxml" },
+    { name = "pydantic" },
     { name = "pydantic-settings" },
     { name = "pymongo" },
     { name = "redis" },
     { name = "groq", specifier = ">=0.29.0" },
     { name = "httpx", specifier = ">=0.28.1" },
     { name = "lxml", specifier = ">=6.0.0" },
+    { name = "pydantic", specifier = ">=2.11.7" },
     { name = "pydantic-settings", specifier = ">=2.10.1" },
     { name = "pymongo", specifier = ">=4.13.2" },
     { name = "redis", specifier = ">=6.2.0" },