Kaito117 commited on
Commit
4ea17aa
Β·
1 Parent(s): c556cb0

hf spaces setup files

Browse files
Files changed (5) hide show
  1. Dockerfile_spaces +34 -0
  2. README_API.md +108 -0
  3. README_spaces.md +8 -0
  4. api_main.py +255 -0
  5. demo_api.py +113 -0
Dockerfile_spaces ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ curl \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements and install Python dependencies
11
+ COPY requirements.txt .
12
+ COPY pyproject.toml .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+ RUN pip install --no-cache-dir fastapi uvicorn
15
+
16
+ # Copy application code
17
+ COPY . .
18
+
19
+ # Create necessary directories
20
+ RUN mkdir -p /app/logs
21
+
22
+ # Set environment variables
23
+ ENV PYTHONPATH=/app
24
+ ENV PORT=7860
25
+
26
+ # Expose port
27
+ EXPOSE 7860
28
+
29
+ # Health check
30
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
31
+ CMD curl -f http://localhost:7860/health || exit 1
32
+
33
+ # Run the application
34
+ CMD ["python", "api_main.py"]
README_API.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LinkedIn Sourcing Agent API 🎯
2
+
3
+ An AI-powered candidate sourcing and scoring system that automatically finds, analyzes, and ranks LinkedIn candidates for job openings.
4
+
5
+ ## πŸš€ Features
6
+
7
+ - **Intelligent Search**: Generates optimized search queries for LinkedIn candidate discovery
8
+ - **Profile Analysis**: Extracts and structures candidate data using advanced parsing
9
+ - **AI Scoring**: Multi-dimensional scoring algorithm evaluating education, experience, skills, and cultural fit
10
+ - **Personalized Outreach**: Generates tailored outreach messages highlighting candidate strengths
11
+ - **RESTful API**: Easy integration with existing HR systems and workflows
12
+
13
+ ## πŸ“‘ API Usage
14
+
15
+ ### POST `/source-candidates`
16
+
17
+ Submit a job description and get ranked candidates with personalized outreach messages.
18
+
19
+ **Request:**
20
+ ```json
21
+ {
22
+ "title": "Software Engineer, ML Research",
23
+ "company": "Windsurf",
24
+ "location": "Mountain View, CA",
25
+ "requirements": [
26
+ "Experience with large language models (LLMs)",
27
+ "Strong background in machine learning and AI",
28
+ "PhD or Master's in Computer Science or related field"
29
+ ],
30
+ "description": "We are looking for a talented ML Research Engineer...",
31
+ "max_candidates": 10,
32
+ "confidence_threshold": 0.3
33
+ }
34
+ ```
35
+
36
+ **Response:**
37
+ ```json
38
+ {
39
+ "job_id": "abc123",
40
+ "job_title": "Software Engineer, ML Research",
41
+ "company": "Windsurf",
42
+ "candidates_found": 5,
43
+ "candidates_scored": 5,
44
+ "top_candidates": [
45
+ {
46
+ "name": "John Doe",
47
+ "linkedin_url": "https://linkedin.com/in/johndoe",
48
+ "fit_score": 8.5,
49
+ "confidence": 0.9,
50
+ "adjusted_score": 7.65,
51
+ "key_highlights": [
52
+ "PhD in Computer Science from Stanford",
53
+ "Current: Senior ML Engineer at Google",
54
+ "Skills: LLM, PyTorch, TensorFlow"
55
+ ],
56
+ "outreach_message": "Hi John, I noticed your impressive work with LLMs at Google and think you'd be perfect for our ML Research role at Windsurf...",
57
+ "profile_summary": {
58
+ "name": "John Doe",
59
+ "headline": "Senior ML Engineer | LLM Specialist",
60
+ "current_company": "Google",
61
+ "score_breakdown": {
62
+ "education": 9.5,
63
+ "career_trajectory": 8.0,
64
+ "company_relevance": 9.0,
65
+ "experience_match": 8.5
66
+ }
67
+ }
68
+ }
69
+ ],
70
+ "processing_time": 12.5,
71
+ "status": "completed",
72
+ "timestamp": "2025-07-01T02:30:00Z"
73
+ }
74
+ ```
75
+
76
+ ## πŸ”§ Endpoints
77
+
78
+ - `GET /` - API information
79
+ - `GET /health` - Health check
80
+ - `POST /source-candidates` - Main sourcing endpoint
81
+ - `GET /example` - Example request format
82
+ - `GET /docs` - Interactive API documentation
83
+
84
+ ## 🎯 Scoring Algorithm
85
+
86
+ The system evaluates candidates across multiple dimensions:
87
+
88
+ - **Education** (25%): University prestige, degree relevance, field of study
89
+ - **Experience Match** (30%): Role similarity, industry relevance, skill alignment
90
+ - **Career Trajectory** (20%): Progression, tenure, company quality
91
+ - **Company Relevance** (15%): Similar company experience, industry fit
92
+ - **Location Match** (10%): Geographic compatibility
93
+
94
+ ## πŸš€ Quick Start
95
+
96
+ 1. Visit the API documentation at `/docs`
97
+ 2. Try the `/example` endpoint to see request format
98
+ 3. Submit a job via `/source-candidates`
99
+ 4. Get ranked candidates with personalized messages
100
+
101
+ ## πŸ” Note
102
+
103
+ This demo uses mock data for educational purposes. In production, you would need:
104
+ - Valid LinkedIn API access
105
+ - SerpAPI key for search
106
+ - Groq API key for LLM processing
107
+
108
+ Built with FastAPI, Pydantic, and modern async Python.
README_spaces.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ title: LinkedIn Sourcing Agent API
2
+ emoji: 🎯
3
+ colorFrom: blue
4
+ colorTo: purple
5
+ sdk: docker
6
+ pinned: false
7
+ license: mit
8
+ app_port: 7860
api_main.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ FastAPI application for LinkedIn Candidate Sourcing Agent
4
+ Deployable to HuggingFace Spaces
5
+ """
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel, Field
9
+ from typing import List, Optional
10
+ import asyncio
11
+ import logging
12
+ from datetime import datetime
13
+
14
+ # Import your existing components
15
+ from app.models.schemas import JobProcessingRequest, JobDescription
16
+ from app.services.agent import LinkedInSourcingAgent
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # FastAPI app
23
+ app = FastAPI(
24
+ title="LinkedIn Sourcing Agent API",
25
+ description="AI-powered candidate sourcing and scoring system",
26
+ version="1.0.0",
27
+ docs_url="/docs",
28
+ redoc_url="/redoc"
29
+ )
30
+
31
+ # Add CORS middleware
32
+ app.add_middleware(
33
+ CORSMiddleware,
34
+ allow_origins=["*"],
35
+ allow_credentials=True,
36
+ allow_methods=["*"],
37
+ allow_headers=["*"],
38
+ )
39
+
40
+ # Initialize the agent
41
+ agent = LinkedInSourcingAgent()
42
+
43
+ # API Models
44
+ class JobInput(BaseModel):
45
+ title: str = Field(..., description="Job title", example="Software Engineer, ML Research")
46
+ company: str = Field(..., description="Company name", example="Windsurf")
47
+ location: Optional[str] = Field(None, description="Job location", example="Mountain View, CA")
48
+ requirements: List[str] = Field(
49
+ default_factory=list,
50
+ description="List of job requirements",
51
+ example=[
52
+ "Experience with large language models (LLMs)",
53
+ "Strong background in machine learning and AI",
54
+ "PhD or Master's in Computer Science or related field"
55
+ ]
56
+ )
57
+ description: Optional[str] = Field(
58
+ None,
59
+ description="Detailed job description",
60
+ example="We are looking for a talented ML Research Engineer to join our team working on cutting-edge AI technologies."
61
+ )
62
+ max_candidates: int = Field(10, ge=1, le=50, description="Maximum number of candidates to find")
63
+ confidence_threshold: float = Field(0.3, ge=0, le=1, description="Minimum confidence threshold")
64
+
65
+ class CandidateOutput(BaseModel):
66
+ name: str
67
+ linkedin_url: str
68
+ fit_score: float
69
+ confidence: float
70
+ adjusted_score: float
71
+ key_highlights: List[str]
72
+ outreach_message: str
73
+ profile_summary: dict
74
+
75
+ class SourcingResponse(BaseModel):
76
+ job_id: str
77
+ job_title: str
78
+ company: str
79
+ candidates_found: int
80
+ candidates_scored: int
81
+ top_candidates: List[CandidateOutput]
82
+ processing_time: float
83
+ status: str
84
+ timestamp: datetime
85
+
86
+ # Helper function to convert ScoredCandidate to API format
87
+ def convert_scored_candidate(candidate) -> CandidateOutput:
88
+ """Convert internal ScoredCandidate to API response format"""
89
+
90
+ # Extract key highlights from profile
91
+ key_highlights = []
92
+ profile = candidate.profile
93
+
94
+ # Add education highlights
95
+ if profile.education:
96
+ for edu in profile.education[:2]: # Top 2 education entries
97
+ if edu.institution and edu.degree:
98
+ key_highlights.append(f"{edu.degree} from {edu.institution}")
99
+
100
+ # Add experience highlights
101
+ if profile.experience:
102
+ current_exp = profile.experience[0]
103
+ key_highlights.append(f"Current: {current_exp.title} at {current_exp.company}")
104
+
105
+ if len(profile.experience) > 1:
106
+ prev_exp = profile.experience[1]
107
+ key_highlights.append(f"Previous: {prev_exp.title} at {prev_exp.company}")
108
+
109
+ # Add skills highlight
110
+ if profile.skills:
111
+ top_skills = profile.skills[:5] # Top 5 skills
112
+ key_highlights.append(f"Skills: {', '.join(top_skills)}")
113
+
114
+ # Add location if available
115
+ if profile.location:
116
+ key_highlights.append(f"Location: {profile.location}")
117
+
118
+ # Create profile summary
119
+ profile_summary = {
120
+ "name": profile.name,
121
+ "headline": profile.headline,
122
+ "current_company": profile.current_company,
123
+ "current_position": profile.current_position,
124
+ "location": profile.location,
125
+ "education_count": len(profile.education),
126
+ "experience_count": len(profile.experience),
127
+ "skills_count": len(profile.skills),
128
+ "score_breakdown": {
129
+ "education": candidate.score_breakdown.education,
130
+ "career_trajectory": candidate.score_breakdown.career_trajectory,
131
+ "company_relevance": candidate.score_breakdown.company_relevance,
132
+ "experience_match": candidate.score_breakdown.experience_match,
133
+ }
134
+ }
135
+
136
+ return CandidateOutput(
137
+ name=profile.name,
138
+ linkedin_url=profile.linkedin_url,
139
+ fit_score=candidate.fit_score,
140
+ confidence=candidate.confidence,
141
+ adjusted_score=candidate.adjusted_score,
142
+ key_highlights=key_highlights,
143
+ outreach_message=candidate.outreach_message,
144
+ profile_summary=profile_summary
145
+ )
146
+
147
+ @app.get("/")
148
+ async def root():
149
+ """Health check endpoint"""
150
+ return {
151
+ "message": "LinkedIn Sourcing Agent API",
152
+ "status": "active",
153
+ "version": "1.0.0",
154
+ "docs": "/docs"
155
+ }
156
+
157
+ @app.get("/health")
158
+ async def health_check():
159
+ """Detailed health check"""
160
+ return {
161
+ "status": "healthy",
162
+ "timestamp": datetime.now().isoformat(),
163
+ "service": "linkedin-sourcing-agent"
164
+ }
165
+
166
+ @app.post("/source-candidates", response_model=SourcingResponse)
167
+ async def source_candidates(job_input: JobInput):
168
+ """
169
+ Source and score candidates for a given job description
170
+
171
+ This endpoint:
172
+ 1. Searches for LinkedIn candidates based on job requirements
173
+ 2. Extracts and analyzes candidate profiles
174
+ 3. Scores candidates using AI-powered algorithms
175
+ 4. Generates personalized outreach messages
176
+ 5. Returns top candidates ranked by fit score
177
+ """
178
+ try:
179
+ logger.info(f"Processing job request: {job_input.title} at {job_input.company}")
180
+
181
+ # Convert API input to internal format
182
+ job_desc = JobDescription(
183
+ title=job_input.title,
184
+ company=job_input.company,
185
+ location=job_input.location,
186
+ requirements=job_input.requirements,
187
+ description=job_input.description or f"Join {job_input.company} as a {job_input.title}"
188
+ )
189
+
190
+ # Create processing request
191
+ request = JobProcessingRequest(
192
+ job_description=job_desc,
193
+ max_candidates=job_input.max_candidates,
194
+ confidence_threshold=job_input.confidence_threshold
195
+ )
196
+
197
+ # Process the job
198
+ result = await agent.process_job(request)
199
+
200
+ # Convert candidates to API format
201
+ api_candidates = []
202
+ for candidate in result.candidates[:10]: # Top 10 candidates
203
+ try:
204
+ api_candidate = convert_scored_candidate(candidate)
205
+ api_candidates.append(api_candidate)
206
+ except Exception as e:
207
+ logger.warning(f"Failed to convert candidate: {e}")
208
+ continue
209
+
210
+ response = SourcingResponse(
211
+ job_id=result.job_id,
212
+ job_title=job_input.title,
213
+ company=job_input.company,
214
+ candidates_found=result.candidates_found,
215
+ candidates_scored=len(result.candidates),
216
+ top_candidates=api_candidates,
217
+ processing_time=result.processing_time,
218
+ status=result.status,
219
+ timestamp=datetime.now()
220
+ )
221
+
222
+ logger.info(f"Successfully processed job. Found {len(api_candidates)} candidates")
223
+ return response
224
+
225
+ except Exception as e:
226
+ logger.error(f"Error processing job request: {str(e)}")
227
+ raise HTTPException(
228
+ status_code=500,
229
+ detail=f"Failed to process job request: {str(e)}"
230
+ )
231
+
232
+ @app.get("/example")
233
+ async def get_example():
234
+ """Get an example job input for testing"""
235
+ return {
236
+ "example_input": {
237
+ "title": "Software Engineer, ML Research",
238
+ "company": "Windsurf",
239
+ "location": "Mountain View, CA",
240
+ "requirements": [
241
+ "Experience with large language models (LLMs)",
242
+ "Strong background in machine learning and AI",
243
+ "PhD or Master's in Computer Science or related field",
244
+ "Experience with search and ranking systems",
245
+ "Python and deep learning frameworks"
246
+ ],
247
+ "description": "We are looking for a talented ML Research Engineer to join our team working on cutting-edge AI technologies. You will be responsible for developing and improving large language models, search algorithms, and AI-powered features.",
248
+ "max_candidates": 5,
249
+ "confidence_threshold": 0.3
250
+ }
251
+ }
252
+
253
+ if __name__ == "__main__":
254
+ import uvicorn
255
+ uvicorn.run(app, host="0.0.0.0", port=7860) # Port 7860 is standard for HuggingFace Spaces
demo_api.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demo script to test the LinkedIn Sourcing Agent API
4
+ """
5
+ import requests
6
+ import json
7
+ import time
8
+
9
+ # API base URL (adjust for your deployment)
10
+ BASE_URL = "http://localhost:7860" # Local testing
11
+ # BASE_URL = "https://your-huggingface-space.hf.space" # HuggingFace deployment
12
+
13
+ def test_api():
14
+ """Test the API with a sample job"""
15
+
16
+ print("🎯 LinkedIn Sourcing Agent API Demo")
17
+ print("=" * 50)
18
+
19
+ # Test health check
20
+ print("1. Health Check...")
21
+ try:
22
+ response = requests.get(f"{BASE_URL}/health")
23
+ if response.status_code == 200:
24
+ print("βœ… API is healthy")
25
+ else:
26
+ print("❌ API health check failed")
27
+ return
28
+ except Exception as e:
29
+ print(f"❌ Cannot connect to API: {e}")
30
+ return
31
+
32
+ # Get example request format
33
+ print("\n2. Getting example format...")
34
+ try:
35
+ response = requests.get(f"{BASE_URL}/example")
36
+ example = response.json()
37
+ print("βœ… Example format retrieved")
38
+ except Exception as e:
39
+ print(f"❌ Failed to get example: {e}")
40
+ return
41
+
42
+ # Test job sourcing
43
+ print("\n3. Testing candidate sourcing...")
44
+ job_data = {
45
+ "title": "Software Engineer, ML Research",
46
+ "company": "Windsurf",
47
+ "location": "Mountain View, CA",
48
+ "requirements": [
49
+ "Experience with large language models (LLMs)",
50
+ "Strong background in machine learning and AI",
51
+ "PhD or Master's in Computer Science or related field",
52
+ "Experience with search and ranking systems",
53
+ "Python and deep learning frameworks"
54
+ ],
55
+ "description": "We are looking for a talented ML Research Engineer to join our team working on cutting-edge AI technologies. You will be responsible for developing and improving large language models, search algorithms, and AI-powered features.",
56
+ "max_candidates": 5,
57
+ "confidence_threshold": 0.3
58
+ }
59
+
60
+ print(f"πŸ“‹ Job: {job_data['title']} at {job_data['company']}")
61
+ print("πŸ” Searching for candidates...")
62
+
63
+ start_time = time.time()
64
+
65
+ try:
66
+ response = requests.post(
67
+ f"{BASE_URL}/source-candidates",
68
+ json=job_data,
69
+ timeout=60 # 60 second timeout
70
+ )
71
+
72
+ if response.status_code == 200:
73
+ result = response.json()
74
+ processing_time = time.time() - start_time
75
+
76
+ print(f"βœ… Search completed in {processing_time:.1f}s")
77
+ print(f"πŸ“Š Results:")
78
+ print(f" Job ID: {result['job_id']}")
79
+ print(f" Candidates Found: {result['candidates_found']}")
80
+ print(f" Candidates Scored: {result['candidates_scored']}")
81
+ print(f" Top Candidates: {len(result['top_candidates'])}")
82
+ print(f" Status: {result['status']}")
83
+
84
+ # Show top candidates
85
+ print(f"\n🎯 Top Candidates:")
86
+ for i, candidate in enumerate(result['top_candidates'][:3], 1):
87
+ print(f"\n {i}. {candidate['name']}")
88
+ print(f" Fit Score: {candidate['fit_score']}/10")
89
+ print(f" Confidence: {candidate['confidence']}")
90
+ print(f" Adjusted Score: {candidate['adjusted_score']}")
91
+ print(f" Company: {candidate['profile_summary']['current_company']}")
92
+ print(f" LinkedIn: {candidate['linkedin_url']}")
93
+ print(f" Key Highlights:")
94
+ for highlight in candidate['key_highlights'][:3]:
95
+ print(f" β€’ {highlight}")
96
+ print(f" Outreach: {candidate['outreach_message'][:100]}...")
97
+
98
+ # Save full results
99
+ with open('demo_results.json', 'w') as f:
100
+ json.dump(result, f, indent=2, default=str)
101
+ print(f"\nπŸ’Ύ Full results saved to demo_results.json")
102
+
103
+ else:
104
+ print(f"❌ API request failed: {response.status_code}")
105
+ print(f" Error: {response.text}")
106
+
107
+ except requests.exceptions.Timeout:
108
+ print("⏰ Request timeout - this is normal for complex searches")
109
+ except Exception as e:
110
+ print(f"❌ Request failed: {e}")
111
+
112
+ if __name__ == "__main__":
113
+ test_api()