next
Browse files- .claude/settings.local.json +2 -1
- CLAUDE.md +30 -18
- CoverLetterGenerator.py +6 -18
- EmailFinderUsingClaude.py +83 -121
- HandshakeDMAutomation.py +137 -337
- HandshakeJobApply.py +95 -189
- ResumeGenerator.py +6 -18
- browser_utils.py +148 -0
- llm_client.py +93 -0
- pdf_utils.py +52 -0
- requirements.txt +5 -5
- test_playwright.py +98 -0
.claude/settings.local.json
CHANGED
|
@@ -26,7 +26,8 @@
|
|
| 26 |
"Bash(pdflatex:*)",
|
| 27 |
"Bash(initexmf:*)",
|
| 28 |
"Bash(del /Q \"C:\\Users\\sumedh\\OneDrive - Georgia Institute of Technology\\Python\\Anthropic Hackathon - AI Apply\\generated_resumes\\*.*\")",
|
| 29 |
-
"Bash(git checkout:*)"
|
|
|
|
| 30 |
],
|
| 31 |
"deny": [],
|
| 32 |
"ask": []
|
|
|
|
| 26 |
"Bash(pdflatex:*)",
|
| 27 |
"Bash(initexmf:*)",
|
| 28 |
"Bash(del /Q \"C:\\Users\\sumedh\\OneDrive - Georgia Institute of Technology\\Python\\Anthropic Hackathon - AI Apply\\generated_resumes\\*.*\")",
|
| 29 |
+
"Bash(git checkout:*)",
|
| 30 |
+
"WebSearch"
|
| 31 |
],
|
| 32 |
"deny": [],
|
| 33 |
"ask": []
|
CLAUDE.md
CHANGED
|
@@ -5,8 +5,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|
| 5 |
## Project Overview
|
| 6 |
|
| 7 |
Flask web application for automated job outreach combining two workflows:
|
| 8 |
-
1. **Email Campaigns**:
|
| 9 |
-
2. **Handshake DM Automation**:
|
| 10 |
|
| 11 |
Key features: User authentication, SQLite persistence, real-time SSE progress updates, contact deduplication.
|
| 12 |
|
|
@@ -15,10 +15,13 @@ Key features: User authentication, SQLite persistence, real-time SSE progress up
|
|
| 15 |
```bash
|
| 16 |
# Install and start
|
| 17 |
pip install -r requirements.txt
|
|
|
|
| 18 |
python app.py # Access at http://localhost:5000
|
| 19 |
|
| 20 |
-
# Test
|
| 21 |
-
python
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Run database migrations when schema changes
|
| 24 |
python migrate_db.py
|
|
@@ -38,18 +41,18 @@ Three-stage **in-memory** pipeline (no intermediate files):
|
|
| 38 |
- Checks both user history (DB) and global `workflow_company_log.json`
|
| 39 |
|
| 40 |
3. **Email Generation & Sending**:
|
| 41 |
-
- `createEmailsUsingClaude(contacts, resume_path, custom_message)` β adds `email_body` field (resume as
|
| 42 |
- `SendEmailWorkFlowV2.main()` β sends via `SimpleEmailer` with rate limiting
|
| 43 |
|
| 44 |
### Handshake DM Workflow
|
| 45 |
Browser automation with manual login (`HandshakeDMAutomation.py`):
|
| 46 |
|
| 47 |
-
1.
|
| 48 |
2. User logs in manually β clicks "I'm Logged In" UI button
|
| 49 |
-
3.
|
| 50 |
4. `sendAllDMs()` iterates employer pages:
|
| 51 |
- Checks `handshake_dm_log.json` for duplicates
|
| 52 |
-
- Finds recruiter profiles β generates personalized message (
|
| 53 |
- Automates Message button β enters text β sends
|
| 54 |
- Saves company to log after success
|
| 55 |
|
|
@@ -82,9 +85,10 @@ Methods: `get_contacted_domains()`, `add_sent_emails()`, `add_contact_history()`
|
|
| 82 |
## Key Implementation Details
|
| 83 |
|
| 84 |
### API Configuration
|
| 85 |
-
- **
|
|
|
|
| 86 |
- **SMTP credentials**: Per-user in DB (`User.sender_email`, `User.sender_password`)
|
| 87 |
-
- **Current model**: `
|
| 88 |
|
| 89 |
### Deduplication Strategy
|
| 90 |
**Email**: Check `User.emails_sent_history` + `User.get_contacted_domains()` + `workflow_company_log.json`
|
|
@@ -92,12 +96,12 @@ Methods: `get_contacted_domains()`, `add_sent_emails()`, `add_contact_history()`
|
|
| 92 |
|
| 93 |
Both systems prevent re-contacting same company/domain.
|
| 94 |
|
| 95 |
-
###
|
| 96 |
1. **Contact Discovery**: Returns JSON `[{company_name, contact_name, email_address}]` with `contact_name: null` for generic emails
|
| 97 |
-
2. **Email Generation**: Resume
|
| 98 |
3. **Handshake Industry**: Maps user input to 100+ Handshake categories (cleantech forced to "Utilities & Renewable Energy")
|
| 99 |
4. **Handshake Location**: Converts "City, State" to lat/long (requires comma in input)
|
| 100 |
-
5. **Handshake DM**: 3-4 sentence limit, resume
|
| 101 |
|
| 102 |
### SimpleEmailer (`SimpleEmailer.py`)
|
| 103 |
- Auto-detects SMTP server from email domain (Gmail, Office365, Yahoo, etc.)
|
|
@@ -105,13 +109,14 @@ Both systems prevent re-contacting same company/domain.
|
|
| 105 |
- Logs to `email_log_YYYYMMDD.log`
|
| 106 |
- Subject line hardcoded in `SendEmailWorkFlowV2.py:18`
|
| 107 |
|
| 108 |
-
### Handshake Automation
|
| 109 |
- Browser visible by default (users see automation)
|
| 110 |
- Manual login required (no credential storage)
|
| 111 |
- 30-second wait before closing browser
|
| 112 |
-
- Multiple XPath selectors tried
|
| 113 |
- URL encoding: `[]` β `%5B%5D` for filter URLs
|
| 114 |
- Requires `Industry Codes Handshake.xlsx` for industry code lookup
|
|
|
|
| 115 |
|
| 116 |
## Common Development Tasks
|
| 117 |
|
|
@@ -138,6 +143,7 @@ if progress_callback:
|
|
| 138 |
|
| 139 |
### Test Individual Modules
|
| 140 |
```bash
|
|
|
|
| 141 |
python EmailFinderUsingClaude.py # Update credentials in __main__ block
|
| 142 |
python HandshakeDMAutomation.py
|
| 143 |
python SimpleEmailer.py
|
|
@@ -151,9 +157,15 @@ python SimpleEmailer.py
|
|
| 151 |
**Email logs**: `email_log_YYYYMMDD.log`
|
| 152 |
**Temp uploads**: `uploads/` (cleaned after processing)
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
## Known Limitations
|
| 155 |
|
| 156 |
-
- **Security**: API keys in `
|
| 157 |
- **Scalability**: SQLite (single-threaded writes), in-memory SSE queues, no Celery
|
| 158 |
-
- **Error Handling**: No retry logic for
|
| 159 |
-
- **Platform**: Windows paths,
|
|
|
|
|
|
| 5 |
## Project Overview
|
| 6 |
|
| 7 |
Flask web application for automated job outreach combining two workflows:
|
| 8 |
+
1. **Email Campaigns**: OpenRouter LLM finds contacts β generates personalized emails β sends via SMTP
|
| 9 |
+
2. **Handshake DM Automation**: Playwright automates direct messages to recruiters on Handshake
|
| 10 |
|
| 11 |
Key features: User authentication, SQLite persistence, real-time SSE progress updates, contact deduplication.
|
| 12 |
|
|
|
|
| 15 |
```bash
|
| 16 |
# Install and start
|
| 17 |
pip install -r requirements.txt
|
| 18 |
+
playwright install chromium # Install Playwright browser
|
| 19 |
python app.py # Access at http://localhost:5000
|
| 20 |
|
| 21 |
+
# Test Playwright and LLM client
|
| 22 |
+
python test_playwright.py # Test browser automation
|
| 23 |
+
python test_playwright.py --llm # Test OpenRouter LLM client
|
| 24 |
+
python test_playwright.py --all # Test both
|
| 25 |
|
| 26 |
# Run database migrations when schema changes
|
| 27 |
python migrate_db.py
|
|
|
|
| 41 |
- Checks both user history (DB) and global `workflow_company_log.json`
|
| 42 |
|
| 43 |
3. **Email Generation & Sending**:
|
| 44 |
+
- `createEmailsUsingClaude(contacts, resume_path, custom_message)` β adds `email_body` field (resume extracted as text)
|
| 45 |
- `SendEmailWorkFlowV2.main()` β sends via `SimpleEmailer` with rate limiting
|
| 46 |
|
| 47 |
### Handshake DM Workflow
|
| 48 |
Browser automation with manual login (`HandshakeDMAutomation.py`):
|
| 49 |
|
| 50 |
+
1. Playwright opens Chromium browser (visible, not headless)
|
| 51 |
2. User logs in manually β clicks "I'm Logged In" UI button
|
| 52 |
+
3. OpenRouter LLM maps user's industry to Handshake taxonomy + geocodes city
|
| 53 |
4. `sendAllDMs()` iterates employer pages:
|
| 54 |
- Checks `handshake_dm_log.json` for duplicates
|
| 55 |
+
- Finds recruiter profiles β generates personalized message (LLM + resume text)
|
| 56 |
- Automates Message button β enters text β sends
|
| 57 |
- Saves company to log after success
|
| 58 |
|
|
|
|
| 85 |
## Key Implementation Details
|
| 86 |
|
| 87 |
### API Configuration
|
| 88 |
+
- **OpenRouter API key**: Environment variable `OPENROUTER_API_KEY` in `.env` file
|
| 89 |
+
- **LLM Client**: `llm_client.py` provides singleton wrapper using OpenAI SDK with OpenRouter base URL
|
| 90 |
- **SMTP credentials**: Per-user in DB (`User.sender_email`, `User.sender_password`)
|
| 91 |
+
- **Current model**: `xiaomi/mimo-v2-flash:free` (configured in `llm_client.py`)
|
| 92 |
|
| 93 |
### Deduplication Strategy
|
| 94 |
**Email**: Check `User.emails_sent_history` + `User.get_contacted_domains()` + `workflow_company_log.json`
|
|
|
|
| 96 |
|
| 97 |
Both systems prevent re-contacting same company/domain.
|
| 98 |
|
| 99 |
+
### LLM Prompt Patterns
|
| 100 |
1. **Contact Discovery**: Returns JSON `[{company_name, contact_name, email_address}]` with `contact_name: null` for generic emails
|
| 101 |
+
2. **Email Generation**: Resume text extracted via `pdf_utils.py` β JSON with `email_body` field
|
| 102 |
3. **Handshake Industry**: Maps user input to 100+ Handshake categories (cleantech forced to "Utilities & Renewable Energy")
|
| 103 |
4. **Handshake Location**: Converts "City, State" to lat/long (requires comma in input)
|
| 104 |
+
5. **Handshake DM**: 3-4 sentence limit, resume text included in prompt, handles "Dr. Name\nTitle" format
|
| 105 |
|
| 106 |
### SimpleEmailer (`SimpleEmailer.py`)
|
| 107 |
- Auto-detects SMTP server from email domain (Gmail, Office365, Yahoo, etc.)
|
|
|
|
| 109 |
- Logs to `email_log_YYYYMMDD.log`
|
| 110 |
- Subject line hardcoded in `SendEmailWorkFlowV2.py:18`
|
| 111 |
|
| 112 |
+
### Handshake Automation (Playwright)
|
| 113 |
- Browser visible by default (users see automation)
|
| 114 |
- Manual login required (no credential storage)
|
| 115 |
- 30-second wait before closing browser
|
| 116 |
+
- Multiple CSS/XPath selectors tried via `find_element_with_fallback()` (Handshake DOM changes)
|
| 117 |
- URL encoding: `[]` β `%5B%5D` for filter URLs
|
| 118 |
- Requires `Industry Codes Handshake.xlsx` for industry code lookup
|
| 119 |
+
- Anti-detection: `playwright-stealth` + custom init scripts to hide automation
|
| 120 |
|
| 121 |
## Common Development Tasks
|
| 122 |
|
|
|
|
| 143 |
|
| 144 |
### Test Individual Modules
|
| 145 |
```bash
|
| 146 |
+
python test_playwright.py # Test browser + LLM setup
|
| 147 |
python EmailFinderUsingClaude.py # Update credentials in __main__ block
|
| 148 |
python HandshakeDMAutomation.py
|
| 149 |
python SimpleEmailer.py
|
|
|
|
| 157 |
**Email logs**: `email_log_YYYYMMDD.log`
|
| 158 |
**Temp uploads**: `uploads/` (cleaned after processing)
|
| 159 |
|
| 160 |
+
### Utility Modules
|
| 161 |
+
- **`llm_client.py`**: OpenRouter API wrapper using OpenAI SDK, singleton pattern via `get_client()`
|
| 162 |
+
- **`pdf_utils.py`**: PDF text extraction using PyPDF2
|
| 163 |
+
- **`browser_utils.py`**: Playwright browser manager with anti-detection, helper functions `find_element_with_fallback()`, `scroll_to_bottom()`
|
| 164 |
+
|
| 165 |
## Known Limitations
|
| 166 |
|
| 167 |
+
- **Security**: API keys in `.env` file, unencrypted SMTP passwords in DB, placeholder Flask secret key
|
| 168 |
- **Scalability**: SQLite (single-threaded writes), in-memory SSE queues, no Celery
|
| 169 |
+
- **Error Handling**: No retry logic for OpenRouter API/SMTP failures, brittle selectors for Handshake DOM
|
| 170 |
+
- **Platform**: Windows paths, Chromium required (auto-installed via `playwright install chromium`)
|
| 171 |
+
- **LLM**: MiMo v2 Flash doesn't support PDF attachments; PDFs are converted to text first
|
CoverLetterGenerator.py
CHANGED
|
@@ -1,20 +1,19 @@
|
|
| 1 |
"""
|
| 2 |
Cover Letter Generation Module for ATS Optimization
|
| 3 |
|
| 4 |
-
This module uses
|
| 5 |
-
tailored cover letters as professional LaTeX PDFs.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
import shutil
|
| 12 |
-
import anthropic
|
| 13 |
-
import setup
|
| 14 |
import subprocess
|
| 15 |
from datetime import datetime
|
| 16 |
from pathlib import Path
|
| 17 |
from PyPDF2 import PdfReader
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def check_latex_installation():
|
|
@@ -59,12 +58,7 @@ class ATSCoverLetterGenerator:
|
|
| 59 |
self.resume_text = resume_text
|
| 60 |
self.candidate_name = candidate_name
|
| 61 |
self.candidate_email = candidate_email
|
| 62 |
-
self.
|
| 63 |
-
|
| 64 |
-
if not self.claude_api_key or not self.claude_api_key.startswith('sk-ant-'):
|
| 65 |
-
raise ValueError("Invalid API key in setup.py")
|
| 66 |
-
|
| 67 |
-
self.claude_client = anthropic.Anthropic(api_key=self.claude_api_key)
|
| 68 |
|
| 69 |
# Create directories for generated cover letters
|
| 70 |
self.generated_letters_dir = os.path.join(os.path.dirname(__file__), "generated_cover_letters")
|
|
@@ -124,14 +118,8 @@ Return ONLY a JSON object with the following structure (no markdown, no code blo
|
|
| 124 |
IMPORTANT: Each paragraph should be a complete, grammatically correct paragraph. Do not use placeholder text."""
|
| 125 |
|
| 126 |
try:
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
max_tokens=2000,
|
| 130 |
-
messages=[{"role": "user", "content": prompt}]
|
| 131 |
-
)
|
| 132 |
-
|
| 133 |
-
# Parse response
|
| 134 |
-
response_text = response.content[0].text.strip()
|
| 135 |
|
| 136 |
# Remove markdown code blocks if present
|
| 137 |
if response_text.startswith("```"):
|
|
|
|
| 1 |
"""
|
| 2 |
Cover Letter Generation Module for ATS Optimization
|
| 3 |
|
| 4 |
+
This module uses OpenRouter API (MiMo v2 Flash) to analyze job descriptions
|
| 5 |
+
and generate tailored cover letters as professional LaTeX PDFs.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
import shutil
|
|
|
|
|
|
|
| 12 |
import subprocess
|
| 13 |
from datetime import datetime
|
| 14 |
from pathlib import Path
|
| 15 |
from PyPDF2 import PdfReader
|
| 16 |
+
from llm_client import get_client
|
| 17 |
|
| 18 |
|
| 19 |
def check_latex_installation():
|
|
|
|
| 58 |
self.resume_text = resume_text
|
| 59 |
self.candidate_name = candidate_name
|
| 60 |
self.candidate_email = candidate_email
|
| 61 |
+
self.llm_client = get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Create directories for generated cover letters
|
| 64 |
self.generated_letters_dir = os.path.join(os.path.dirname(__file__), "generated_cover_letters")
|
|
|
|
| 118 |
IMPORTANT: Each paragraph should be a complete, grammatically correct paragraph. Do not use placeholder text."""
|
| 119 |
|
| 120 |
try:
|
| 121 |
+
response_text = self.llm_client.create_message(prompt, max_tokens=2000)
|
| 122 |
+
response_text = response_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
# Remove markdown code blocks if present
|
| 125 |
if response_text.startswith("```"):
|
EmailFinderUsingClaude.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import json
|
| 2 |
import requests
|
| 3 |
import os
|
| 4 |
-
from anthropic import Anthropic
|
| 5 |
import FindEmailWorkFlowV2
|
| 6 |
import SendEmailWorkFlowV2
|
| 7 |
import setup
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Hunter.io API Key - Replace with your actual API key
|
| 10 |
HUNTER_API_KEY = setup.HUNTER_API_KEY
|
|
@@ -57,13 +58,13 @@ def load_legacy_excel_emails(excel_path="Workflow Company Log.xlsx"):
|
|
| 57 |
|
| 58 |
return emails, domains
|
| 59 |
|
| 60 |
-
def askClaudeToFindCompanies(api_key, location="Atlanta", industry="Clean Tech", num_companies=5):
|
| 61 |
"""
|
| 62 |
-
Uses
|
| 63 |
Returns only company names and domains (no emails or contacts).
|
| 64 |
|
| 65 |
Args:
|
| 66 |
-
api_key:
|
| 67 |
location: City or region to search for companies (default: "Atlanta")
|
| 68 |
industry: Industry type to target (default: "Clean Tech")
|
| 69 |
num_companies: Number of companies to find (default: 5)
|
|
@@ -71,7 +72,7 @@ def askClaudeToFindCompanies(api_key, location="Atlanta", industry="Clean Tech",
|
|
| 71 |
Returns:
|
| 72 |
list: List of dicts with keys: company_name, domain
|
| 73 |
"""
|
| 74 |
-
client =
|
| 75 |
|
| 76 |
# Build industry-specific guidance
|
| 77 |
industry_examples = ""
|
|
@@ -88,49 +89,40 @@ def askClaudeToFindCompanies(api_key, location="Atlanta", industry="Clean Tech",
|
|
| 88 |
else:
|
| 89 |
industry_examples = f"({industry} related technologies and services)"
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
"- Each entry should represent a real company you can verify exists\n\n"
|
| 123 |
-
|
| 124 |
-
"OUTPUT FORMAT:\n"
|
| 125 |
-
"- Output only valid JSON with no markdown, explanations, or commentary\n"
|
| 126 |
-
"- Example: [{\"company_name\": \"Acme Solar\", \"domain\": \"acmesolar.com\"}]\n"
|
| 127 |
-
"- Example: [{\"company_name\": \"Green Energy Solutions\", \"domain\": \"greenenergysolutions.com\"}]"
|
| 128 |
-
),
|
| 129 |
-
}
|
| 130 |
-
],
|
| 131 |
)
|
| 132 |
|
| 133 |
-
response_text =
|
| 134 |
|
| 135 |
|
| 136 |
# Clean response text - remove markdown code blocks if present
|
|
@@ -284,85 +276,63 @@ def enrichCompaniesWithHunter(companies):
|
|
| 284 |
return contacts
|
| 285 |
|
| 286 |
|
| 287 |
-
def createEmailsUsingClaude(contacts, resume_path, api_key, industry="Clean Tech", custom_message=""):
|
| 288 |
"""
|
| 289 |
-
Uses
|
| 290 |
|
| 291 |
Args:
|
| 292 |
contacts: List of contact dicts from askClaudeToFindContacts
|
| 293 |
resume_path: Path to PDF resume file
|
| 294 |
-
api_key:
|
| 295 |
industry: Industry type to tailor email content (default: "Clean Tech")
|
| 296 |
custom_message: Optional custom message to incorporate into emails (default: "")
|
| 297 |
|
| 298 |
Returns:
|
| 299 |
list: List of dicts with company_name, contact_name, email_address, email_body
|
| 300 |
"""
|
| 301 |
-
client =
|
| 302 |
|
| 303 |
-
#
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
encoded_file = base64.standard_b64encode(file_data).decode("utf-8")
|
| 308 |
|
| 309 |
-
# Create contact list text
|
| 310 |
contact_text = json.dumps(contacts, indent=2)
|
| 311 |
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
f"- Research each company and reference their actual work in {industry}\n"
|
| 343 |
-
f"- Extract the applicant's name, university, and major from the resume\n"
|
| 344 |
-
f"- Match skills from the resume to each company's focus area\n"
|
| 345 |
-
f"- Make each email unique - no copy-paste language between companies\n"
|
| 346 |
-
f"- Keep emails concise (150-200 words)\n\n"
|
| 347 |
-
f"Company contacts:\n{contact_text}\n\n"
|
| 348 |
-
f"Return a JSON array with the same contacts but add an 'email_body' field containing the tailored email body. "
|
| 349 |
-
f"Do not include subject line or attachment information. Return only valid JSON with no additional text."
|
| 350 |
-
),
|
| 351 |
-
},
|
| 352 |
-
{
|
| 353 |
-
"type": "document",
|
| 354 |
-
"source": {
|
| 355 |
-
"type": "base64",
|
| 356 |
-
"media_type": "application/pdf",
|
| 357 |
-
"data": encoded_file,
|
| 358 |
-
},
|
| 359 |
-
},
|
| 360 |
-
],
|
| 361 |
-
}
|
| 362 |
-
],
|
| 363 |
)
|
| 364 |
|
| 365 |
-
response_text =
|
| 366 |
|
| 367 |
|
| 368 |
# Clean response text - remove markdown code blocks if present
|
|
@@ -437,15 +407,7 @@ def main(
|
|
| 437 |
Returns:
|
| 438 |
dict: Email sending results with success/failure counts and emails_sent list
|
| 439 |
"""
|
| 440 |
-
#
|
| 441 |
-
import os
|
| 442 |
-
api_key = setup.API_KEY
|
| 443 |
-
|
| 444 |
-
if not api_key:
|
| 445 |
-
raise ValueError(
|
| 446 |
-
"ANTHROPIC_API_KEY environment variable not set. "
|
| 447 |
-
"Please set it with your Claude API key from https://console.anthropic.com/"
|
| 448 |
-
)
|
| 449 |
|
| 450 |
# Initialize user history if not provided
|
| 451 |
if user_emails_sent is None:
|
|
@@ -486,9 +448,9 @@ def main(
|
|
| 486 |
attempt += 1
|
| 487 |
progress(f"Search attempt {attempt}/{max_attempts} (found {len(all_unique_contacts)}/{num_emails} unique contacts so far)...", 'in-progress')
|
| 488 |
|
| 489 |
-
# Step 1: Find companies using
|
| 490 |
progress(f"Searching for {batch_size} {industry} companies...", 'in-progress')
|
| 491 |
-
companies = askClaudeToFindCompanies(
|
| 492 |
progress(f"Found {len(companies)} companies", 'success')
|
| 493 |
|
| 494 |
if len(companies) == 0:
|
|
@@ -551,8 +513,8 @@ def main(
|
|
| 551 |
final_contacts = all_unique_contacts[:num_emails]
|
| 552 |
|
| 553 |
# Step 4: Generate personalized emails
|
| 554 |
-
progress(f"Generating {len(final_contacts)} personalized emails using
|
| 555 |
-
emails_with_bodies = createEmailsUsingClaude(final_contacts, resume_path,
|
| 556 |
progress(f"Created {len(emails_with_bodies)} personalized emails", 'success')
|
| 557 |
|
| 558 |
# Step 5: Send emails
|
|
|
|
| 1 |
import json
|
| 2 |
import requests
|
| 3 |
import os
|
|
|
|
| 4 |
import FindEmailWorkFlowV2
|
| 5 |
import SendEmailWorkFlowV2
|
| 6 |
import setup
|
| 7 |
+
from llm_client import get_client
|
| 8 |
+
from pdf_utils import extract_text_from_pdf
|
| 9 |
|
| 10 |
# Hunter.io API Key - Replace with your actual API key
|
| 11 |
HUNTER_API_KEY = setup.HUNTER_API_KEY
|
|
|
|
| 58 |
|
| 59 |
return emails, domains
|
| 60 |
|
| 61 |
+
def askClaudeToFindCompanies(api_key=None, location="Atlanta", industry="Clean Tech", num_companies=5):
|
| 62 |
"""
|
| 63 |
+
Uses OpenRouter API to find startup companies based on location and industry.
|
| 64 |
Returns only company names and domains (no emails or contacts).
|
| 65 |
|
| 66 |
Args:
|
| 67 |
+
api_key: Deprecated, kept for backwards compatibility
|
| 68 |
location: City or region to search for companies (default: "Atlanta")
|
| 69 |
industry: Industry type to target (default: "Clean Tech")
|
| 70 |
num_companies: Number of companies to find (default: 5)
|
|
|
|
| 72 |
Returns:
|
| 73 |
list: List of dicts with keys: company_name, domain
|
| 74 |
"""
|
| 75 |
+
client = get_client()
|
| 76 |
|
| 77 |
# Build industry-specific guidance
|
| 78 |
industry_examples = ""
|
|
|
|
| 89 |
else:
|
| 90 |
industry_examples = f"({industry} related technologies and services)"
|
| 91 |
|
| 92 |
+
prompt = (
|
| 93 |
+
"Return only a valid JSON array of objects with exactly two fields: "
|
| 94 |
+
"company_name, domain.\n\n"
|
| 95 |
+
f"Find {num_companies} real, actively operating {industry} companies based in the {location} area. "
|
| 96 |
+
"These should be companies you have high confidence actually exist.\n\n"
|
| 97 |
+
|
| 98 |
+
"DOMAIN REQUIREMENTS:\n"
|
| 99 |
+
"- Provide the company's primary website domain (e.g., 'acmesolar.com', NOT 'www.acmesolar.com' or 'https://acmesolar.com')\n"
|
| 100 |
+
"- The domain should be the company's actual corporate domain\n"
|
| 101 |
+
"- Do NOT include protocol (http/https) or subdomains (www)\n"
|
| 102 |
+
"- ONLY include domains you are highly confident are correct\n"
|
| 103 |
+
"- If you cannot find the correct domain for a company, SKIP IT entirely\n\n"
|
| 104 |
+
|
| 105 |
+
"COMPANY REQUIREMENTS:\n"
|
| 106 |
+
f"- Only include companies working in {industry} {industry_examples}\n"
|
| 107 |
+
f"- Companies must be based in or have significant presence in {location}\n"
|
| 108 |
+
"- **CRITICAL: ONLY include startups and early-stage companies (NOT established enterprises)**\n"
|
| 109 |
+
"- Startups typically have more open internship opportunities and are more responsive\n"
|
| 110 |
+
"- Focus on companies with 10-200 employees (smaller is better)\n"
|
| 111 |
+
"- Prefer recently founded companies (last 10 years) that are actively growing\n"
|
| 112 |
+
"- Only include companies you have high confidence are real and currently operating\n\n"
|
| 113 |
+
|
| 114 |
+
"QUALITY OVER QUANTITY:\n"
|
| 115 |
+
f"- It is better to return fewer than {num_companies} companies with REAL domains\n"
|
| 116 |
+
f"- than to return {num_companies} companies with guessed or uncertain domains\n"
|
| 117 |
+
"- Each entry should represent a real company you can verify exists\n\n"
|
| 118 |
+
|
| 119 |
+
"OUTPUT FORMAT:\n"
|
| 120 |
+
"- Output only valid JSON with no markdown, explanations, or commentary\n"
|
| 121 |
+
"- Example: [{\"company_name\": \"Acme Solar\", \"domain\": \"acmesolar.com\"}]\n"
|
| 122 |
+
"- Example: [{\"company_name\": \"Green Energy Solutions\", \"domain\": \"greenenergysolutions.com\"}]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
|
| 125 |
+
response_text = client.create_message(prompt, max_tokens=4096)
|
| 126 |
|
| 127 |
|
| 128 |
# Clean response text - remove markdown code blocks if present
|
|
|
|
| 276 |
return contacts
|
| 277 |
|
| 278 |
|
| 279 |
+
def createEmailsUsingClaude(contacts, resume_path, api_key=None, industry="Clean Tech", custom_message=""):
|
| 280 |
"""
|
| 281 |
+
Uses OpenRouter API to generate personalized emails for each contact.
|
| 282 |
|
| 283 |
Args:
|
| 284 |
contacts: List of contact dicts from askClaudeToFindContacts
|
| 285 |
resume_path: Path to PDF resume file
|
| 286 |
+
api_key: Deprecated, kept for backwards compatibility
|
| 287 |
industry: Industry type to tailor email content (default: "Clean Tech")
|
| 288 |
custom_message: Optional custom message to incorporate into emails (default: "")
|
| 289 |
|
| 290 |
Returns:
|
| 291 |
list: List of dicts with company_name, contact_name, email_address, email_body
|
| 292 |
"""
|
| 293 |
+
client = get_client()
|
| 294 |
|
| 295 |
+
# Extract text from resume PDF
|
| 296 |
+
resume_text = extract_text_from_pdf(resume_path)
|
| 297 |
+
if not resume_text:
|
| 298 |
+
raise ValueError(f"Could not extract text from resume at {resume_path}")
|
|
|
|
| 299 |
|
| 300 |
+
# Create contact list text
|
| 301 |
contact_text = json.dumps(contacts, indent=2)
|
| 302 |
|
| 303 |
+
prompt = (
|
| 304 |
+
f"You are helping draft personalized internship outreach emails for companies in the {industry} industry. "
|
| 305 |
+
f"For each company listed below, create a tailored email that:\n\n"
|
| 306 |
+
f"1. References specific work or projects the company is doing in {industry}\n"
|
| 307 |
+
f"2. Connects the applicant's background (found in the resume) to the company's mission\n"
|
| 308 |
+
f"3. Sounds authentic, human, and genuinely interested (NOT AI-generated)\n"
|
| 309 |
+
f"4. Is professional but warm and conversational\n"
|
| 310 |
+
f"5. Asks for internship opportunities without being pushy\n\n"
|
| 311 |
+
f"6. Keeps the email concise (150-200 words)\n\n"
|
| 312 |
+
f"7. Does not fabricate any information about the company or the applicant\n\n"
|
| 313 |
+
f"{f'8. Incorporates this specific message/requirement: {custom_message}' if custom_message else ''}\n\n"
|
| 314 |
+
f"RESUME CONTENT:\n{resume_text}\n\n"
|
| 315 |
+
f"Example email structure (adapt this based on the resume and each company):\n\n"
|
| 316 |
+
f"Hi [Company Name Team],\n\n"
|
| 317 |
+
f"I hope you're well. My name is [Name from resume], and I'm a [major/background from resume] student at [university from resume]. "
|
| 318 |
+
f"I recently came across [Company Name]'s work on [specific project/technology in {industry}] and was fascinated by [specific technical aspect]. "
|
| 319 |
+
f"I've spent time working on [relevant experience from resume], and I'd love to see how these skills might apply in a real-world, high-impact setting like yours. "
|
| 320 |
+
f"My interest is to learn from experienced teams and contribute in any way I can, however small. "
|
| 321 |
+
f"If there is a way for me to get involved with the technical side at [Company Name], I'd be grateful for the chance to discuss.\n\n"
|
| 322 |
+
f"I've attached my resume for reference. Thank you very much for considering this note, and I appreciate any time or advice you can offer.\n\n"
|
| 323 |
+
f"Best,\n[Name from resume]\n\n"
|
| 324 |
+
f"IMPORTANT:\n"
|
| 325 |
+
f"- Research each company and reference their actual work in {industry}\n"
|
| 326 |
+
f"- Extract the applicant's name, university, and major from the resume\n"
|
| 327 |
+
f"- Match skills from the resume to each company's focus area\n"
|
| 328 |
+
f"- Make each email unique - no copy-paste language between companies\n"
|
| 329 |
+
f"- Keep emails concise (150-200 words)\n\n"
|
| 330 |
+
f"Company contacts:\n{contact_text}\n\n"
|
| 331 |
+
f"Return a JSON array with the same contacts but add an 'email_body' field containing the tailored email body. "
|
| 332 |
+
f"Do not include subject line or attachment information. Return only valid JSON with no additional text."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
)
|
| 334 |
|
| 335 |
+
response_text = client.create_message(prompt, max_tokens=8000)
|
| 336 |
|
| 337 |
|
| 338 |
# Clean response text - remove markdown code blocks if present
|
|
|
|
| 407 |
Returns:
|
| 408 |
dict: Email sending results with success/failure counts and emails_sent list
|
| 409 |
"""
|
| 410 |
+
# OpenRouter API key is loaded automatically by llm_client
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
|
| 412 |
# Initialize user history if not provided
|
| 413 |
if user_emails_sent is None:
|
|
|
|
| 448 |
attempt += 1
|
| 449 |
progress(f"Search attempt {attempt}/{max_attempts} (found {len(all_unique_contacts)}/{num_emails} unique contacts so far)...", 'in-progress')
|
| 450 |
|
| 451 |
+
# Step 1: Find companies using LLM (only company names and domains)
|
| 452 |
progress(f"Searching for {batch_size} {industry} companies...", 'in-progress')
|
| 453 |
+
companies = askClaudeToFindCompanies(location=location, industry=industry, num_companies=batch_size)
|
| 454 |
progress(f"Found {len(companies)} companies", 'success')
|
| 455 |
|
| 456 |
if len(companies) == 0:
|
|
|
|
| 513 |
final_contacts = all_unique_contacts[:num_emails]
|
| 514 |
|
| 515 |
# Step 4: Generate personalized emails
|
| 516 |
+
progress(f"Generating {len(final_contacts)} personalized emails using AI...", 'in-progress')
|
| 517 |
+
emails_with_bodies = createEmailsUsingClaude(final_contacts, resume_path, industry=industry, custom_message=custom_message)
|
| 518 |
progress(f"Created {len(emails_with_bodies)} personalized emails", 'success')
|
| 519 |
|
| 520 |
# Step 5: Send emails
|
HandshakeDMAutomation.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
Handshake Direct Message Automation Module
|
| 3 |
|
| 4 |
This module automates sending direct messages to hiring managers on Handshake.
|
| 5 |
-
It uses
|
| 6 |
1. Log into Handshake with user credentials
|
| 7 |
2. Navigate directly to employer pages matching desired city and industry
|
| 8 |
3. Filter by location and industry
|
|
@@ -18,21 +18,11 @@ import re
|
|
| 18 |
import json
|
| 19 |
import urllib
|
| 20 |
import requests
|
| 21 |
-
import anthropic
|
| 22 |
import pandas as pd
|
| 23 |
-
import setup
|
| 24 |
from datetime import datetime
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
-
from
|
| 28 |
-
from selenium.webdriver.support.ui import WebDriverWait
|
| 29 |
-
from selenium.webdriver.support import expected_conditions as EC
|
| 30 |
-
from selenium.webdriver.chrome.options import Options
|
| 31 |
-
from selenium.webdriver.chrome.service import Service
|
| 32 |
-
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException
|
| 33 |
-
from webdriver_manager.chrome import ChromeDriverManager
|
| 34 |
-
|
| 35 |
-
api_key = setup.API_KEY
|
| 36 |
|
| 37 |
# Official Handshake Industry Categories (from Handshake Help Center)
|
| 38 |
HANDSHAKE_INDUSTRIES = {
|
|
@@ -126,85 +116,28 @@ class HandshakeAutomator:
|
|
| 126 |
headless: Run browser in headless mode (default: False for debugging)
|
| 127 |
"""
|
| 128 |
self.headless = headless
|
| 129 |
-
self.
|
| 130 |
-
self.
|
| 131 |
|
| 132 |
-
#
|
| 133 |
-
self.
|
| 134 |
-
if not self.claude_api_key:
|
| 135 |
-
raise ValueError(
|
| 136 |
-
"API_KEY not set in setup.py. "
|
| 137 |
-
"Please set it with your Claude API key from https://console.anthropic.com/"
|
| 138 |
-
)
|
| 139 |
-
|
| 140 |
-
# Validate API key format
|
| 141 |
-
if not self.claude_api_key.startswith('sk-ant-'):
|
| 142 |
-
raise ValueError(
|
| 143 |
-
f"Invalid API key format. API keys should start with 'sk-ant-'. "
|
| 144 |
-
f"Please check your API key in setup.py"
|
| 145 |
-
)
|
| 146 |
-
|
| 147 |
-
try:
|
| 148 |
-
self.claude_client = anthropic.Anthropic(api_key=self.claude_api_key)
|
| 149 |
-
except Exception as e:
|
| 150 |
-
raise ValueError(
|
| 151 |
-
f"Failed to initialize Claude API client: {str(e)}. "
|
| 152 |
-
f"Please check your API key in setup.py"
|
| 153 |
-
)
|
| 154 |
|
| 155 |
# Company DM tracking log file
|
| 156 |
self.dm_log_file = os.path.join(os.path.dirname(__file__), "handshake_dm_log.json")
|
| 157 |
|
| 158 |
def setup_driver(self):
|
| 159 |
-
"""Set up
|
| 160 |
-
chrome_options = Options()
|
| 161 |
-
|
| 162 |
-
if self.headless:
|
| 163 |
-
chrome_options.add_argument('--headless=new')
|
| 164 |
-
|
| 165 |
-
# Stability and compatibility options
|
| 166 |
-
chrome_options.add_argument('--no-sandbox')
|
| 167 |
-
chrome_options.add_argument('--disable-dev-shm-usage')
|
| 168 |
-
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
|
| 169 |
-
chrome_options.add_argument('--disable-gpu')
|
| 170 |
-
chrome_options.add_argument('--disable-software-rasterizer')
|
| 171 |
-
chrome_options.add_argument('--window-size=1920,1080')
|
| 172 |
-
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
|
| 173 |
-
|
| 174 |
-
# Disable automation flags
|
| 175 |
-
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
|
| 176 |
-
chrome_options.add_experimental_option('useAutomationExtension', False)
|
| 177 |
-
|
| 178 |
-
# Add error logging
|
| 179 |
-
chrome_options.add_argument('--enable-logging')
|
| 180 |
-
chrome_options.add_argument('--v=1')
|
| 181 |
-
|
| 182 |
try:
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
self.driver = webdriver.Chrome(service=Service(driver_path), options=chrome_options)
|
| 187 |
-
|
| 188 |
-
capabilities = self.driver.capabilities
|
| 189 |
-
print(f"Chrome version: {capabilities.get('browserVersion', 'Unknown')}")
|
| 190 |
-
print(f"ChromeDriver version: {capabilities.get('chrome', {}).get('chromedriverVersion', 'Unknown')}")
|
| 191 |
-
|
| 192 |
-
self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
| 193 |
-
'source': '''
|
| 194 |
-
Object.defineProperty(navigator, 'webdriver', {
|
| 195 |
-
get: () => undefined
|
| 196 |
-
})
|
| 197 |
-
'''
|
| 198 |
-
})
|
| 199 |
-
|
| 200 |
-
self.wait = WebDriverWait(self.driver, 20)
|
| 201 |
|
| 202 |
except Exception as e:
|
| 203 |
-
print(f"Error setting up
|
| 204 |
print("Troubleshooting tips:")
|
| 205 |
-
print("1.
|
| 206 |
-
print("2.
|
| 207 |
-
print("3. Close any existing
|
| 208 |
raise
|
| 209 |
|
| 210 |
def load_contacted_companies(self):
|
|
@@ -255,7 +188,7 @@ class HandshakeAutomator:
|
|
| 255 |
|
| 256 |
def login_to_handshake(self, progress_callback=None, login_confirmed_callback=None):
|
| 257 |
"""
|
| 258 |
-
Log into Handshake using
|
| 259 |
Handles case where user is already logged in from previous session.
|
| 260 |
|
| 261 |
Args:
|
|
@@ -269,8 +202,8 @@ class HandshakeAutomator:
|
|
| 269 |
if progress_callback:
|
| 270 |
progress_callback("Navigating to Handshake login page...", "in-progress")
|
| 271 |
|
| 272 |
-
self.
|
| 273 |
-
|
| 274 |
|
| 275 |
if progress_callback:
|
| 276 |
progress_callback("Please log into Handshake in the browser window, then click 'I'm Logged In' button below.", "login-wait")
|
|
@@ -298,23 +231,20 @@ class HandshakeAutomator:
|
|
| 298 |
progress_callback("Login timeout - please try again and click the button after logging in", "error")
|
| 299 |
return False
|
| 300 |
|
| 301 |
-
# Verify login by checking for
|
| 302 |
try:
|
| 303 |
-
self.
|
| 304 |
-
|
| 305 |
-
time.sleep(3)
|
| 306 |
|
| 307 |
-
# Check if we're on the
|
| 308 |
-
self.
|
| 309 |
-
EC.presence_of_element_located((By.XPATH, "./*"))
|
| 310 |
-
)
|
| 311 |
|
| 312 |
if progress_callback:
|
| 313 |
progress_callback("Successfully logged into Handshake!", "success")
|
| 314 |
-
|
| 315 |
return True
|
| 316 |
|
| 317 |
-
except
|
| 318 |
if progress_callback:
|
| 319 |
progress_callback("Login verification failed. Please ensure you're logged in.", "error")
|
| 320 |
return False
|
|
@@ -404,24 +334,13 @@ Return your answer as a JSON array of industry names EXACTLY as they appear in t
|
|
| 404 |
Return ONLY the JSON array with no markdown formatting, nothing else. You must include at least 1 industry."""
|
| 405 |
|
| 406 |
try:
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
max_tokens=300,
|
| 410 |
-
messages=[{"role": "user", "content": prompt}]
|
| 411 |
-
)
|
| 412 |
-
except anthropic.AuthenticationError as auth_error:
|
| 413 |
-
print(f"β Claude API authentication failed: {str(auth_error)}")
|
| 414 |
-
print(f"Your API key in setup.py may be invalid or expired.")
|
| 415 |
-
print(f"Falling back to keyword matching...")
|
| 416 |
-
raise Exception(f"API authentication error: {str(auth_error)}")
|
| 417 |
except Exception as api_error:
|
| 418 |
-
print(f"β
|
| 419 |
print(f"Falling back to keyword matching...")
|
| 420 |
raise Exception(f"API error: {str(api_error)}")
|
| 421 |
|
| 422 |
-
# Parse the response
|
| 423 |
-
response_text = response.content[0].text.strip()
|
| 424 |
-
|
| 425 |
# Remove markdown code blocks if present
|
| 426 |
if response_text.startswith("```"):
|
| 427 |
response_text = response_text.split("```")[1]
|
|
@@ -565,26 +484,14 @@ For example:
|
|
| 565 |
Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown, just the string "lat,long"."""
|
| 566 |
|
| 567 |
try:
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
max_tokens=100,
|
| 571 |
-
messages=[{"role": "user", "content": prompt}]
|
| 572 |
-
)
|
| 573 |
-
except anthropic.AuthenticationError as auth_error:
|
| 574 |
-
raise ValueError(
|
| 575 |
-
f"Claude API authentication failed: {str(auth_error)}\n"
|
| 576 |
-
f"Your API key in setup.py may be invalid or expired.\n"
|
| 577 |
-
f"Please get a new API key from https://console.anthropic.com/"
|
| 578 |
-
)
|
| 579 |
except Exception as api_error:
|
| 580 |
raise ValueError(
|
| 581 |
-
f"
|
| 582 |
f"Please check your API key and internet connection."
|
| 583 |
)
|
| 584 |
|
| 585 |
-
# Parse the response - should be just the coordinates string
|
| 586 |
-
coordinates = response.content[0].text.strip()
|
| 587 |
-
|
| 588 |
# Remove quotes if AI added them
|
| 589 |
coordinates = coordinates.replace('"', '').replace("'", "")
|
| 590 |
|
|
@@ -618,51 +525,29 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 618 |
|
| 619 |
|
| 620 |
|
| 621 |
-
def extract_employer_urls(self,progress_callback=None):
|
| 622 |
-
|
| 623 |
|
| 624 |
# Scroll through the page to load all employer cards
|
| 625 |
if progress_callback:
|
| 626 |
progress_callback("Scrolling through page to load all employers...", "in-progress")
|
| 627 |
|
| 628 |
-
|
| 629 |
-
scroll_attempts = 0
|
| 630 |
-
max_scroll_attempts = 10 # Prevent infinite scrolling
|
| 631 |
-
|
| 632 |
-
while scroll_attempts < max_scroll_attempts:
|
| 633 |
-
# Scroll down to bottom
|
| 634 |
-
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 635 |
-
time.sleep(2) # Wait for page to load
|
| 636 |
-
|
| 637 |
-
# Calculate new scroll height and compare with last scroll height
|
| 638 |
-
new_height = self.driver.execute_script("return document.body.scrollHeight")
|
| 639 |
-
if new_height == last_height:
|
| 640 |
-
# If heights are the same, we've reached the bottom
|
| 641 |
-
break
|
| 642 |
-
last_height = new_height
|
| 643 |
-
scroll_attempts += 1
|
| 644 |
-
|
| 645 |
-
if progress_callback:
|
| 646 |
-
progress_callback(f"Loading more employers... (scroll {scroll_attempts}/{max_scroll_attempts})", "in-progress")
|
| 647 |
-
|
| 648 |
-
# Scroll back to top to ensure all elements are accessible
|
| 649 |
-
self.driver.execute_script("window.scrollTo(0, 0);")
|
| 650 |
-
time.sleep(1)
|
| 651 |
|
| 652 |
# Now extract all employer links
|
| 653 |
-
all_links=self.
|
| 654 |
-
employer_urls=[]
|
| 655 |
-
employer_names=[]
|
| 656 |
for link in all_links:
|
| 657 |
-
href=link.get_attribute('href')
|
| 658 |
if href and '/e/' in href:
|
| 659 |
employer_urls.append(href)
|
| 660 |
-
employer_names.append(link.
|
| 661 |
|
| 662 |
if progress_callback:
|
| 663 |
progress_callback(f"Extracted {len(employer_urls)} employer URLs", "success")
|
| 664 |
|
| 665 |
-
return employer_urls,employer_names
|
| 666 |
|
| 667 |
def clean_company_name(self, raw_company_name):
|
| 668 |
"""
|
|
@@ -687,21 +572,21 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 687 |
# Return cleaned name or fallback
|
| 688 |
return clean_name if clean_name else "Unknown Company"
|
| 689 |
|
| 690 |
-
def find_recruiter_name(self,progress_callback=None):
|
| 691 |
"""
|
| 692 |
Extract recruiter's name from their Handshake profile page.
|
| 693 |
|
| 694 |
Returns:
|
| 695 |
str: Recruiter's name, or None if not found
|
| 696 |
"""
|
| 697 |
-
all_names=self.
|
| 698 |
-
person_name=[]
|
| 699 |
for name in all_names:
|
| 700 |
-
val=name.
|
|
|
|
| 701 |
if "Message" in val:
|
| 702 |
# Extract name after "Message" text
|
| 703 |
# Example: "Message Dr. Alice Wonderland" -> "Dr. Alice Wonderland"
|
| 704 |
-
recruiter_name = val.split("Message",1)[1].strip()
|
| 705 |
|
| 706 |
# If the name has newlines (e.g., "Dr. Alice Wonderland\nDoctor of Research"),
|
| 707 |
# take only the first line (the actual name)
|
|
@@ -723,39 +608,37 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 723 |
"""
|
| 724 |
try:
|
| 725 |
# Strategy 1: Look for h2 elements that might contain job title
|
| 726 |
-
all_h2 = self.
|
| 727 |
for h2 in all_h2:
|
| 728 |
-
text = h2.
|
| 729 |
# Filter out common non-title headers
|
| 730 |
if text and text not in ['Message', 'About', 'Education', 'Experience', 'Skills']:
|
| 731 |
# This might be the job title
|
| 732 |
if len(text) < 100: # Reasonable length for a job title
|
| 733 |
return text
|
| 734 |
|
| 735 |
-
# Strategy 2: Look for elements with specific classes
|
| 736 |
-
# Try common job title selectors
|
| 737 |
job_title_selectors = [
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
]
|
| 743 |
|
| 744 |
-
for
|
| 745 |
try:
|
| 746 |
-
elements = self.
|
| 747 |
for elem in elements:
|
| 748 |
-
text = elem.
|
| 749 |
if text and len(text) < 100 and '\n' not in text:
|
| 750 |
return text
|
| 751 |
except:
|
| 752 |
continue
|
| 753 |
|
| 754 |
# Strategy 3: Extract from recruiter name element if it contains title
|
| 755 |
-
|
| 756 |
-
all_names = self.driver.find_elements(By.TAG_NAME, 'h1')
|
| 757 |
for name in all_names:
|
| 758 |
-
val = name.
|
| 759 |
if "Message" in val:
|
| 760 |
# Remove "Message" prefix
|
| 761 |
remaining_text = val.split("Message", 1)[1].strip()
|
|
@@ -763,7 +646,6 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 763 |
if '\n' in remaining_text:
|
| 764 |
lines = remaining_text.split('\n')
|
| 765 |
if len(lines) > 1:
|
| 766 |
-
# Second line might be "Doctor of Research, Research Labs"
|
| 767 |
potential_title = lines[1].strip()
|
| 768 |
if potential_title:
|
| 769 |
return potential_title
|
|
@@ -778,48 +660,24 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 778 |
|
| 779 |
def find_recruiter_url(self):
|
| 780 |
print('reached find recruiter url')
|
| 781 |
-
|
| 782 |
|
| 783 |
# Scroll through the page to ensure all recruiter profiles are loaded
|
| 784 |
-
|
| 785 |
-
scroll_attempts = 0
|
| 786 |
-
max_scroll_attempts = 5 # Employer pages are usually shorter
|
| 787 |
-
|
| 788 |
-
while scroll_attempts < max_scroll_attempts:
|
| 789 |
-
# Scroll down to bottom
|
| 790 |
-
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
| 791 |
-
time.sleep(1.5) # Wait for content to load
|
| 792 |
-
|
| 793 |
-
# Calculate new scroll height and compare with last scroll height
|
| 794 |
-
new_height = self.driver.execute_script("return document.body.scrollHeight")
|
| 795 |
-
if new_height == last_height:
|
| 796 |
-
# If heights are the same, we've reached the bottom
|
| 797 |
-
break
|
| 798 |
-
last_height = new_height
|
| 799 |
-
scroll_attempts += 1
|
| 800 |
-
|
| 801 |
-
# Scroll back to top to ensure all elements are accessible
|
| 802 |
-
self.driver.execute_script("window.scrollTo(0, 0);")
|
| 803 |
-
time.sleep(1)
|
| 804 |
|
| 805 |
# Now extract all recruiter profile links
|
| 806 |
-
all_links=self.
|
| 807 |
-
person_links=[]
|
| 808 |
-
person_name=[]
|
| 809 |
-
|
| 810 |
for link in all_links:
|
| 811 |
-
|
| 812 |
-
href=link.get_attribute('href')
|
| 813 |
if href and '/profiles/' in href:
|
| 814 |
person_links.append(href)
|
| 815 |
-
person_name.append(link.
|
| 816 |
-
if len(person_name)>=2
|
| 817 |
-
|
| 818 |
-
#print(person_links[1])
|
| 819 |
-
#print('reached end of find recruiter url: returned tuple')
|
| 820 |
-
return person_links[1],person_name[1]
|
| 821 |
else:
|
| 822 |
-
#print('reached end of find recruiter url: returned nothing')
|
| 823 |
return False
|
| 824 |
|
| 825 |
|
|
@@ -855,13 +713,13 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 855 |
progress_callback(f"Skipped '{company_name}' (already contacted)", "info")
|
| 856 |
continue
|
| 857 |
|
| 858 |
-
self.
|
| 859 |
-
|
| 860 |
if(self.find_recruiter_url()):
|
| 861 |
-
recruiter_url,recruiter_name=self.find_recruiter_url()
|
| 862 |
if recruiter_url:
|
| 863 |
-
self.
|
| 864 |
-
|
| 865 |
|
| 866 |
# Extract recruiter name
|
| 867 |
nombre=self.find_recruiter_name(progress_callback)
|
|
@@ -928,29 +786,19 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 928 |
if progress_callback:
|
| 929 |
progress_callback("Opening message composer...", "in-progress")
|
| 930 |
|
| 931 |
-
|
| 932 |
|
| 933 |
-
# Find and click the Message button
|
| 934 |
message_button_selectors = [
|
| 935 |
-
"
|
| 936 |
-
"
|
| 937 |
-
"
|
| 938 |
-
"
|
| 939 |
-
"
|
| 940 |
-
"
|
| 941 |
]
|
| 942 |
|
| 943 |
-
message_button =
|
| 944 |
-
for selector in message_button_selectors:
|
| 945 |
-
try:
|
| 946 |
-
message_button = WebDriverWait(self.driver, 5).until(
|
| 947 |
-
EC.element_to_be_clickable((By.XPATH, selector))
|
| 948 |
-
)
|
| 949 |
-
if message_button:
|
| 950 |
-
print(f"β Found message button using selector: {selector}")
|
| 951 |
-
break
|
| 952 |
-
except:
|
| 953 |
-
continue
|
| 954 |
|
| 955 |
if not message_button:
|
| 956 |
if progress_callback:
|
|
@@ -960,89 +808,62 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 960 |
# Click the message button
|
| 961 |
message_button.click()
|
| 962 |
print("β Clicked message button")
|
| 963 |
-
|
| 964 |
|
| 965 |
# Wait for the message composer to be fully loaded
|
| 966 |
-
# Check if we're now in a messaging interface (modal or separate page)
|
| 967 |
if progress_callback:
|
| 968 |
progress_callback("Waiting for message composer to load...", "in-progress")
|
| 969 |
|
| 970 |
# Try multiple strategies to find the message input
|
| 971 |
-
message_box = None
|
| 972 |
message_box_selectors = [
|
| 973 |
-
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
(
|
| 980 |
]
|
| 981 |
|
| 982 |
-
|
| 983 |
-
try:
|
| 984 |
-
message_box = WebDriverWait(self.driver, 10).until(
|
| 985 |
-
EC.presence_of_element_located((by_method, selector))
|
| 986 |
-
)
|
| 987 |
-
if message_box:
|
| 988 |
-
# Verify it's visible and interactable
|
| 989 |
-
if message_box.is_displayed():
|
| 990 |
-
print(f"β Found message input using: {by_method} - {selector}")
|
| 991 |
-
break
|
| 992 |
-
else:
|
| 993 |
-
message_box = None
|
| 994 |
-
except:
|
| 995 |
-
continue
|
| 996 |
|
| 997 |
if not message_box:
|
| 998 |
if progress_callback:
|
| 999 |
progress_callback("Message input box not found in messaging interface.", "error")
|
| 1000 |
return False
|
| 1001 |
|
|
|
|
|
|
|
| 1002 |
# Clear any existing text and enter the message
|
| 1003 |
if progress_callback:
|
| 1004 |
progress_callback("Composing message...", "in-progress")
|
| 1005 |
|
| 1006 |
-
try:
|
| 1007 |
-
message_box.clear()
|
| 1008 |
-
except:
|
| 1009 |
-
# Some elements don't support clear(), try selecting all and deleting
|
| 1010 |
-
message_box.send_keys(Keys.CONTROL + "a")
|
| 1011 |
-
message_box.send_keys(Keys.DELETE)
|
| 1012 |
-
|
| 1013 |
message_box.click() # Ensure it's focused
|
| 1014 |
-
|
| 1015 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1016 |
print(f"β Entered message text ({len(message_text)} characters)")
|
| 1017 |
-
|
| 1018 |
|
| 1019 |
# Find and click the Send button
|
| 1020 |
if progress_callback:
|
| 1021 |
progress_callback("Sending message...", "in-progress")
|
| 1022 |
|
| 1023 |
send_button_selectors = [
|
| 1024 |
-
"
|
| 1025 |
-
"
|
| 1026 |
-
"
|
| 1027 |
-
"
|
| 1028 |
-
"
|
| 1029 |
-
"
|
| 1030 |
-
"//button[@type='submit']" # Generic submit button
|
| 1031 |
]
|
| 1032 |
|
| 1033 |
-
send_button =
|
| 1034 |
-
for selector in send_button_selectors:
|
| 1035 |
-
try:
|
| 1036 |
-
send_button = WebDriverWait(self.driver, 5).until(
|
| 1037 |
-
EC.element_to_be_clickable((By.XPATH, selector))
|
| 1038 |
-
)
|
| 1039 |
-
if send_button and send_button.is_displayed() and send_button.is_enabled():
|
| 1040 |
-
print(f"β Found send button using selector: {selector}")
|
| 1041 |
-
break
|
| 1042 |
-
else:
|
| 1043 |
-
send_button = None
|
| 1044 |
-
except:
|
| 1045 |
-
continue
|
| 1046 |
|
| 1047 |
if not send_button:
|
| 1048 |
if progress_callback:
|
|
@@ -1052,16 +873,17 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 1052 |
# Click the send button
|
| 1053 |
send_button.click()
|
| 1054 |
print("β Clicked send button")
|
| 1055 |
-
|
| 1056 |
|
| 1057 |
-
# Verify the message was sent
|
| 1058 |
-
# 1. The textarea is cleared/empty
|
| 1059 |
-
# 2. The send button is disabled or no longer visible
|
| 1060 |
-
# 3. No error messages appeared
|
| 1061 |
try:
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1065 |
if len(current_text.strip()) == 0:
|
| 1066 |
print("β Message box cleared - message sent successfully")
|
| 1067 |
if progress_callback:
|
|
@@ -1069,23 +891,15 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 1069 |
return True
|
| 1070 |
else:
|
| 1071 |
print(f"β Message box still contains text: {current_text[:50]}...")
|
| 1072 |
-
# Don't fail immediately - message might still have been sent
|
| 1073 |
if progress_callback:
|
| 1074 |
progress_callback("Message sent (verification unclear)", "success")
|
| 1075 |
return True
|
| 1076 |
except:
|
| 1077 |
-
# If we can't verify, assume success since no error was thrown
|
| 1078 |
print("β Message sent (could not verify, but no errors)")
|
| 1079 |
if progress_callback:
|
| 1080 |
progress_callback("Direct message sent successfully!", "success")
|
| 1081 |
return True
|
| 1082 |
|
| 1083 |
-
except TimeoutException as e:
|
| 1084 |
-
error_msg = f"Timeout while sending DM: {str(e)}"
|
| 1085 |
-
print(f"β {error_msg}")
|
| 1086 |
-
if progress_callback:
|
| 1087 |
-
progress_callback(error_msg, "error")
|
| 1088 |
-
return False
|
| 1089 |
except Exception as e:
|
| 1090 |
error_msg = f"Error sending DM: {str(e)}"
|
| 1091 |
print(f"β {error_msg}")
|
|
@@ -1125,11 +939,20 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 1125 |
try:
|
| 1126 |
greeting = f"Hi {recruiter_name}" if recruiter_name else "Hello"
|
| 1127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1128 |
prompt = f"""You are helping a student write a personalized, professional direct message to a hiring manager on Handshake.
|
| 1129 |
|
| 1130 |
Company: {company_name}
|
| 1131 |
Hiring Manager: {recruiter_name or 'Unknown'}. Only use their full name or title (such as Dr.). So for example if the entry was 'Dr. Alice Wonderland\nDoctor of Research, Research Labs' you should only use 'Dr. Alice Wonderland' or 'Dr. Wonderland'.
|
| 1132 |
|
|
|
|
|
|
|
|
|
|
| 1133 |
Write a short, professional direct message (3-4 sentences max) that:
|
| 1134 |
1. Expresses genuine interest in opportunities at {company_name}
|
| 1135 |
2. Highlights 1-2 relevant skills or experiences from the resume that align with the company's industry
|
|
@@ -1141,43 +964,20 @@ Write a short, professional direct message (3-4 sentences max) that:
|
|
| 1141 |
Return ONLY the message body (no subject line, greeting, or signature). Start directly with the content.
|
| 1142 |
Do not include placeholders like [Your Name] - the message should be ready to send as-is."""
|
| 1143 |
|
| 1144 |
-
content = [{"type": "text", "text": prompt}]
|
| 1145 |
-
|
| 1146 |
-
# Load and attach resume (now mandatory)
|
| 1147 |
-
with open(user_resume_path, 'rb') as f:
|
| 1148 |
-
resume_data = f.read()
|
| 1149 |
-
import base64
|
| 1150 |
-
resume_base64 = base64.b64encode(resume_data).decode('utf-8')
|
| 1151 |
-
|
| 1152 |
-
content.append({
|
| 1153 |
-
"type": "document",
|
| 1154 |
-
"source": {
|
| 1155 |
-
"type": "base64",
|
| 1156 |
-
"media_type": "application/pdf",
|
| 1157 |
-
"data": resume_base64
|
| 1158 |
-
}
|
| 1159 |
-
})
|
| 1160 |
-
|
| 1161 |
try:
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
max_tokens=500,
|
| 1165 |
-
messages=[{"role": "user", "content": content}]
|
| 1166 |
-
)
|
| 1167 |
-
|
| 1168 |
-
message_body = response.content[0].text.strip()
|
| 1169 |
full_message = f"{greeting},\n\n{message_body}\n\nBest regards"
|
| 1170 |
|
| 1171 |
return full_message
|
| 1172 |
|
| 1173 |
-
except
|
| 1174 |
-
print(f"β
|
| 1175 |
-
print(f"Your API key in setup.py may be invalid or expired.")
|
| 1176 |
print(f"Using fallback message template...")
|
| 1177 |
-
raise Exception(f"API
|
| 1178 |
|
| 1179 |
except Exception as e:
|
| 1180 |
-
print(f"
|
| 1181 |
# Fallback to simple template
|
| 1182 |
return f"""{greeting},
|
| 1183 |
|
|
@@ -1298,10 +1098,10 @@ Best regards"""
|
|
| 1298 |
if progress_callback:
|
| 1299 |
progress_callback(f"Navigating to employer search page with filters...", "in-progress")
|
| 1300 |
|
| 1301 |
-
self.
|
| 1302 |
|
| 1303 |
# Wait for the page to load completely
|
| 1304 |
-
|
| 1305 |
|
| 1306 |
if progress_callback:
|
| 1307 |
progress_callback("Employer search page loaded. Extracting employer information...", "in-progress")
|
|
@@ -1325,11 +1125,11 @@ Best regards"""
|
|
| 1325 |
progress_callback(error_msg, "error")
|
| 1326 |
|
| 1327 |
finally:
|
| 1328 |
-
if self.
|
| 1329 |
try:
|
| 1330 |
print("\nClosing browser in 10 seconds...")
|
| 1331 |
time.sleep(10)
|
| 1332 |
-
self.
|
| 1333 |
print("Browser closed successfully.")
|
| 1334 |
except Exception as e:
|
| 1335 |
print(f"Warning: Error closing browser: {str(e)}")
|
|
|
|
| 2 |
Handshake Direct Message Automation Module
|
| 3 |
|
| 4 |
This module automates sending direct messages to hiring managers on Handshake.
|
| 5 |
+
It uses Playwright for browser automation to:
|
| 6 |
1. Log into Handshake with user credentials
|
| 7 |
2. Navigate directly to employer pages matching desired city and industry
|
| 8 |
3. Filter by location and industry
|
|
|
|
| 18 |
import json
|
| 19 |
import urllib
|
| 20 |
import requests
|
|
|
|
| 21 |
import pandas as pd
|
|
|
|
| 22 |
from datetime import datetime
|
| 23 |
+
from browser_utils import BrowserManager, find_element_with_fallback, scroll_to_bottom
|
| 24 |
+
from llm_client import get_client
|
| 25 |
+
from pdf_utils import extract_text_from_pdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# Official Handshake Industry Categories (from Handshake Help Center)
|
| 28 |
HANDSHAKE_INDUSTRIES = {
|
|
|
|
| 116 |
headless: Run browser in headless mode (default: False for debugging)
|
| 117 |
"""
|
| 118 |
self.headless = headless
|
| 119 |
+
self.browser_manager = None
|
| 120 |
+
self.page = None
|
| 121 |
|
| 122 |
+
# LLM client configuration (OpenRouter)
|
| 123 |
+
self.llm_client = get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
# Company DM tracking log file
|
| 126 |
self.dm_log_file = os.path.join(os.path.dirname(__file__), "handshake_dm_log.json")
|
| 127 |
|
| 128 |
def setup_driver(self):
|
| 129 |
+
"""Set up Playwright browser with appropriate options."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
try:
|
| 131 |
+
self.browser_manager = BrowserManager(headless=self.headless)
|
| 132 |
+
self.page = self.browser_manager.setup()
|
| 133 |
+
print(f"Playwright browser initialized successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
except Exception as e:
|
| 136 |
+
print(f"Error setting up Playwright browser: {str(e)}")
|
| 137 |
print("Troubleshooting tips:")
|
| 138 |
+
print("1. Run: pip install playwright")
|
| 139 |
+
print("2. Run: playwright install chromium")
|
| 140 |
+
print("3. Close any existing browser instances")
|
| 141 |
raise
|
| 142 |
|
| 143 |
def load_contacted_companies(self):
|
|
|
|
| 188 |
|
| 189 |
def login_to_handshake(self, progress_callback=None, login_confirmed_callback=None):
|
| 190 |
"""
|
| 191 |
+
Log into Handshake using manual login.
|
| 192 |
Handles case where user is already logged in from previous session.
|
| 193 |
|
| 194 |
Args:
|
|
|
|
| 202 |
if progress_callback:
|
| 203 |
progress_callback("Navigating to Handshake login page...", "in-progress")
|
| 204 |
|
| 205 |
+
self.page.goto("https://app.joinhandshake.com/login")
|
| 206 |
+
self.page.wait_for_timeout(3000)
|
| 207 |
|
| 208 |
if progress_callback:
|
| 209 |
progress_callback("Please log into Handshake in the browser window, then click 'I'm Logged In' button below.", "login-wait")
|
|
|
|
| 231 |
progress_callback("Login timeout - please try again and click the button after logging in", "error")
|
| 232 |
return False
|
| 233 |
|
| 234 |
+
# Verify login by checking for employers page
|
| 235 |
try:
|
| 236 |
+
self.page.goto("https://app.joinhandshake.com/employers")
|
| 237 |
+
self.page.wait_for_timeout(3000)
|
|
|
|
| 238 |
|
| 239 |
+
# Check if we're on the employers page
|
| 240 |
+
self.page.wait_for_selector("body", timeout=20000)
|
|
|
|
|
|
|
| 241 |
|
| 242 |
if progress_callback:
|
| 243 |
progress_callback("Successfully logged into Handshake!", "success")
|
| 244 |
+
|
| 245 |
return True
|
| 246 |
|
| 247 |
+
except Exception:
|
| 248 |
if progress_callback:
|
| 249 |
progress_callback("Login verification failed. Please ensure you're logged in.", "error")
|
| 250 |
return False
|
|
|
|
| 334 |
Return ONLY the JSON array with no markdown formatting, nothing else. You must include at least 1 industry."""
|
| 335 |
|
| 336 |
try:
|
| 337 |
+
response_text = self.llm_client.create_message(prompt, max_tokens=300)
|
| 338 |
+
response_text = response_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
except Exception as api_error:
|
| 340 |
+
print(f"β LLM API error: {str(api_error)}")
|
| 341 |
print(f"Falling back to keyword matching...")
|
| 342 |
raise Exception(f"API error: {str(api_error)}")
|
| 343 |
|
|
|
|
|
|
|
|
|
|
| 344 |
# Remove markdown code blocks if present
|
| 345 |
if response_text.startswith("```"):
|
| 346 |
response_text = response_text.split("```")[1]
|
|
|
|
| 484 |
Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown, just the string "lat,long"."""
|
| 485 |
|
| 486 |
try:
|
| 487 |
+
coordinates = self.llm_client.create_message(prompt, max_tokens=100)
|
| 488 |
+
coordinates = coordinates.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
except Exception as api_error:
|
| 490 |
raise ValueError(
|
| 491 |
+
f"LLM API error: {str(api_error)}\n"
|
| 492 |
f"Please check your API key and internet connection."
|
| 493 |
)
|
| 494 |
|
|
|
|
|
|
|
|
|
|
| 495 |
# Remove quotes if AI added them
|
| 496 |
coordinates = coordinates.replace('"', '').replace("'", "")
|
| 497 |
|
|
|
|
| 525 |
|
| 526 |
|
| 527 |
|
| 528 |
+
def extract_employer_urls(self, progress_callback=None):
|
| 529 |
+
self.page.wait_for_timeout(5000)
|
| 530 |
|
| 531 |
# Scroll through the page to load all employer cards
|
| 532 |
if progress_callback:
|
| 533 |
progress_callback("Scrolling through page to load all employers...", "in-progress")
|
| 534 |
|
| 535 |
+
scroll_to_bottom(self.page, max_scrolls=10, wait_time=2000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
|
| 537 |
# Now extract all employer links
|
| 538 |
+
all_links = self.page.locator('a').all()
|
| 539 |
+
employer_urls = []
|
| 540 |
+
employer_names = []
|
| 541 |
for link in all_links:
|
| 542 |
+
href = link.get_attribute('href')
|
| 543 |
if href and '/e/' in href:
|
| 544 |
employer_urls.append(href)
|
| 545 |
+
employer_names.append(link.text_content() or "")
|
| 546 |
|
| 547 |
if progress_callback:
|
| 548 |
progress_callback(f"Extracted {len(employer_urls)} employer URLs", "success")
|
| 549 |
|
| 550 |
+
return employer_urls, employer_names
|
| 551 |
|
| 552 |
def clean_company_name(self, raw_company_name):
|
| 553 |
"""
|
|
|
|
| 572 |
# Return cleaned name or fallback
|
| 573 |
return clean_name if clean_name else "Unknown Company"
|
| 574 |
|
| 575 |
+
def find_recruiter_name(self, progress_callback=None):
|
| 576 |
"""
|
| 577 |
Extract recruiter's name from their Handshake profile page.
|
| 578 |
|
| 579 |
Returns:
|
| 580 |
str: Recruiter's name, or None if not found
|
| 581 |
"""
|
| 582 |
+
all_names = self.page.locator('h1').all()
|
|
|
|
| 583 |
for name in all_names:
|
| 584 |
+
val = name.text_content() or ""
|
| 585 |
+
val = val.strip()
|
| 586 |
if "Message" in val:
|
| 587 |
# Extract name after "Message" text
|
| 588 |
# Example: "Message Dr. Alice Wonderland" -> "Dr. Alice Wonderland"
|
| 589 |
+
recruiter_name = val.split("Message", 1)[1].strip()
|
| 590 |
|
| 591 |
# If the name has newlines (e.g., "Dr. Alice Wonderland\nDoctor of Research"),
|
| 592 |
# take only the first line (the actual name)
|
|
|
|
| 608 |
"""
|
| 609 |
try:
|
| 610 |
# Strategy 1: Look for h2 elements that might contain job title
|
| 611 |
+
all_h2 = self.page.locator('h2').all()
|
| 612 |
for h2 in all_h2:
|
| 613 |
+
text = (h2.text_content() or "").strip()
|
| 614 |
# Filter out common non-title headers
|
| 615 |
if text and text not in ['Message', 'About', 'Education', 'Experience', 'Skills']:
|
| 616 |
# This might be the job title
|
| 617 |
if len(text) < 100: # Reasonable length for a job title
|
| 618 |
return text
|
| 619 |
|
| 620 |
+
# Strategy 2: Look for elements with specific classes
|
|
|
|
| 621 |
job_title_selectors = [
|
| 622 |
+
'[class*="job-title"]',
|
| 623 |
+
'[class*="title"]',
|
| 624 |
+
'[class*="position"]',
|
| 625 |
+
'div[class*="profile"] p:first-child',
|
| 626 |
]
|
| 627 |
|
| 628 |
+
for selector in job_title_selectors:
|
| 629 |
try:
|
| 630 |
+
elements = self.page.locator(selector).all()
|
| 631 |
for elem in elements:
|
| 632 |
+
text = (elem.text_content() or "").strip()
|
| 633 |
if text and len(text) < 100 and '\n' not in text:
|
| 634 |
return text
|
| 635 |
except:
|
| 636 |
continue
|
| 637 |
|
| 638 |
# Strategy 3: Extract from recruiter name element if it contains title
|
| 639 |
+
all_names = self.page.locator('h1').all()
|
|
|
|
| 640 |
for name in all_names:
|
| 641 |
+
val = (name.text_content() or "").strip()
|
| 642 |
if "Message" in val:
|
| 643 |
# Remove "Message" prefix
|
| 644 |
remaining_text = val.split("Message", 1)[1].strip()
|
|
|
|
| 646 |
if '\n' in remaining_text:
|
| 647 |
lines = remaining_text.split('\n')
|
| 648 |
if len(lines) > 1:
|
|
|
|
| 649 |
potential_title = lines[1].strip()
|
| 650 |
if potential_title:
|
| 651 |
return potential_title
|
|
|
|
| 660 |
|
| 661 |
def find_recruiter_url(self):
|
| 662 |
print('reached find recruiter url')
|
| 663 |
+
self.page.wait_for_timeout(5000)
|
| 664 |
|
| 665 |
# Scroll through the page to ensure all recruiter profiles are loaded
|
| 666 |
+
scroll_to_bottom(self.page, max_scrolls=5, wait_time=1500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
# Now extract all recruiter profile links
|
| 669 |
+
all_links = self.page.locator('a').all()
|
| 670 |
+
person_links = []
|
| 671 |
+
person_name = []
|
| 672 |
+
self.page.wait_for_timeout(2000)
|
| 673 |
for link in all_links:
|
| 674 |
+
href = link.get_attribute('href')
|
|
|
|
| 675 |
if href and '/profiles/' in href:
|
| 676 |
person_links.append(href)
|
| 677 |
+
person_name.append((link.text_content() or "").strip())
|
| 678 |
+
if len(person_name) >= 2 and len(person_links) >= 2:
|
| 679 |
+
return person_links[1], person_name[1]
|
|
|
|
|
|
|
|
|
|
| 680 |
else:
|
|
|
|
| 681 |
return False
|
| 682 |
|
| 683 |
|
|
|
|
| 713 |
progress_callback(f"Skipped '{company_name}' (already contacted)", "info")
|
| 714 |
continue
|
| 715 |
|
| 716 |
+
self.page.goto(employer_urls[i])
|
| 717 |
+
self.page.wait_for_timeout(3000)
|
| 718 |
if(self.find_recruiter_url()):
|
| 719 |
+
recruiter_url, recruiter_name = self.find_recruiter_url()
|
| 720 |
if recruiter_url:
|
| 721 |
+
self.page.goto(recruiter_url)
|
| 722 |
+
self.page.wait_for_timeout(3000)
|
| 723 |
|
| 724 |
# Extract recruiter name
|
| 725 |
nombre=self.find_recruiter_name(progress_callback)
|
|
|
|
| 786 |
if progress_callback:
|
| 787 |
progress_callback("Opening message composer...", "in-progress")
|
| 788 |
|
| 789 |
+
self.page.wait_for_timeout(3000)
|
| 790 |
|
| 791 |
+
# Find and click the Message button using Playwright selectors
|
| 792 |
message_button_selectors = [
|
| 793 |
+
"button:has-text('Message')",
|
| 794 |
+
"button[aria-label='Message']",
|
| 795 |
+
"a:has-text('Message')",
|
| 796 |
+
"button[class*='message']",
|
| 797 |
+
"xpath=//button[contains(text(), 'Message')]",
|
| 798 |
+
"xpath=//*[contains(text(), 'Message') and (self::button or self::a)]"
|
| 799 |
]
|
| 800 |
|
| 801 |
+
message_button = find_element_with_fallback(self.page, message_button_selectors, timeout=5000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 802 |
|
| 803 |
if not message_button:
|
| 804 |
if progress_callback:
|
|
|
|
| 808 |
# Click the message button
|
| 809 |
message_button.click()
|
| 810 |
print("β Clicked message button")
|
| 811 |
+
self.page.wait_for_timeout(3000) # Give time for messaging interface to load
|
| 812 |
|
| 813 |
# Wait for the message composer to be fully loaded
|
|
|
|
| 814 |
if progress_callback:
|
| 815 |
progress_callback("Waiting for message composer to load...", "in-progress")
|
| 816 |
|
| 817 |
# Try multiple strategies to find the message input
|
|
|
|
| 818 |
message_box_selectors = [
|
| 819 |
+
"textarea",
|
| 820 |
+
"textarea[placeholder*='message' i]",
|
| 821 |
+
"textarea[placeholder*='Message' i]",
|
| 822 |
+
"textarea[aria-label*='message' i]",
|
| 823 |
+
"div[contenteditable='true']",
|
| 824 |
+
"div[role='textbox']",
|
| 825 |
+
"xpath=//textarea[contains(@placeholder, 'Type')]"
|
| 826 |
]
|
| 827 |
|
| 828 |
+
message_box = find_element_with_fallback(self.page, message_box_selectors, timeout=10000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
|
| 830 |
if not message_box:
|
| 831 |
if progress_callback:
|
| 832 |
progress_callback("Message input box not found in messaging interface.", "error")
|
| 833 |
return False
|
| 834 |
|
| 835 |
+
print(f"β Found message input")
|
| 836 |
+
|
| 837 |
# Clear any existing text and enter the message
|
| 838 |
if progress_callback:
|
| 839 |
progress_callback("Composing message...", "in-progress")
|
| 840 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 841 |
message_box.click() # Ensure it's focused
|
| 842 |
+
self.page.wait_for_timeout(500)
|
| 843 |
+
|
| 844 |
+
# Clear existing text
|
| 845 |
+
message_box.press("Control+a")
|
| 846 |
+
message_box.press("Delete")
|
| 847 |
+
|
| 848 |
+
# Type the message
|
| 849 |
+
message_box.fill(message_text)
|
| 850 |
print(f"β Entered message text ({len(message_text)} characters)")
|
| 851 |
+
self.page.wait_for_timeout(1500) # Wait for text to fully populate
|
| 852 |
|
| 853 |
# Find and click the Send button
|
| 854 |
if progress_callback:
|
| 855 |
progress_callback("Sending message...", "in-progress")
|
| 856 |
|
| 857 |
send_button_selectors = [
|
| 858 |
+
"button:has-text('Send')",
|
| 859 |
+
"button[type='submit']:has-text('Send')",
|
| 860 |
+
"button[aria-label*='Send']",
|
| 861 |
+
"button[aria-label*='send']",
|
| 862 |
+
"button[type='submit']",
|
| 863 |
+
"xpath=//button[contains(text(), 'Send')]"
|
|
|
|
| 864 |
]
|
| 865 |
|
| 866 |
+
send_button = find_element_with_fallback(self.page, send_button_selectors, timeout=5000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
|
| 868 |
if not send_button:
|
| 869 |
if progress_callback:
|
|
|
|
| 873 |
# Click the send button
|
| 874 |
send_button.click()
|
| 875 |
print("β Clicked send button")
|
| 876 |
+
self.page.wait_for_timeout(2000)
|
| 877 |
|
| 878 |
+
# Verify the message was sent
|
|
|
|
|
|
|
|
|
|
| 879 |
try:
|
| 880 |
+
self.page.wait_for_timeout(1000)
|
| 881 |
+
# Try to get current text in message box
|
| 882 |
+
try:
|
| 883 |
+
current_text = message_box.input_value() if message_box.is_visible() else ""
|
| 884 |
+
except:
|
| 885 |
+
current_text = ""
|
| 886 |
+
|
| 887 |
if len(current_text.strip()) == 0:
|
| 888 |
print("β Message box cleared - message sent successfully")
|
| 889 |
if progress_callback:
|
|
|
|
| 891 |
return True
|
| 892 |
else:
|
| 893 |
print(f"β Message box still contains text: {current_text[:50]}...")
|
|
|
|
| 894 |
if progress_callback:
|
| 895 |
progress_callback("Message sent (verification unclear)", "success")
|
| 896 |
return True
|
| 897 |
except:
|
|
|
|
| 898 |
print("β Message sent (could not verify, but no errors)")
|
| 899 |
if progress_callback:
|
| 900 |
progress_callback("Direct message sent successfully!", "success")
|
| 901 |
return True
|
| 902 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 903 |
except Exception as e:
|
| 904 |
error_msg = f"Error sending DM: {str(e)}"
|
| 905 |
print(f"β {error_msg}")
|
|
|
|
| 939 |
try:
|
| 940 |
greeting = f"Hi {recruiter_name}" if recruiter_name else "Hello"
|
| 941 |
|
| 942 |
+
# Extract text from resume PDF
|
| 943 |
+
resume_text = extract_text_from_pdf(user_resume_path)
|
| 944 |
+
if not resume_text:
|
| 945 |
+
print(f"β οΈ Could not extract text from resume, using fallback message")
|
| 946 |
+
raise ValueError("Could not extract resume text")
|
| 947 |
+
|
| 948 |
prompt = f"""You are helping a student write a personalized, professional direct message to a hiring manager on Handshake.
|
| 949 |
|
| 950 |
Company: {company_name}
|
| 951 |
Hiring Manager: {recruiter_name or 'Unknown'}. Only use their full name or title (such as Dr.). So for example if the entry was 'Dr. Alice Wonderland\nDoctor of Research, Research Labs' you should only use 'Dr. Alice Wonderland' or 'Dr. Wonderland'.
|
| 952 |
|
| 953 |
+
RESUME CONTENT:
|
| 954 |
+
{resume_text}
|
| 955 |
+
|
| 956 |
Write a short, professional direct message (3-4 sentences max) that:
|
| 957 |
1. Expresses genuine interest in opportunities at {company_name}
|
| 958 |
2. Highlights 1-2 relevant skills or experiences from the resume that align with the company's industry
|
|
|
|
| 964 |
Return ONLY the message body (no subject line, greeting, or signature). Start directly with the content.
|
| 965 |
Do not include placeholders like [Your Name] - the message should be ready to send as-is."""
|
| 966 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 967 |
try:
|
| 968 |
+
message_body = self.llm_client.create_message(prompt, max_tokens=500)
|
| 969 |
+
message_body = message_body.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
full_message = f"{greeting},\n\n{message_body}\n\nBest regards"
|
| 971 |
|
| 972 |
return full_message
|
| 973 |
|
| 974 |
+
except Exception as api_error:
|
| 975 |
+
print(f"β LLM API error: {str(api_error)}")
|
|
|
|
| 976 |
print(f"Using fallback message template...")
|
| 977 |
+
raise Exception(f"API error: {str(api_error)}")
|
| 978 |
|
| 979 |
except Exception as e:
|
| 980 |
+
print(f"LLM API error: {str(e)}")
|
| 981 |
# Fallback to simple template
|
| 982 |
return f"""{greeting},
|
| 983 |
|
|
|
|
| 1098 |
if progress_callback:
|
| 1099 |
progress_callback(f"Navigating to employer search page with filters...", "in-progress")
|
| 1100 |
|
| 1101 |
+
self.page.goto(filter_url)
|
| 1102 |
|
| 1103 |
# Wait for the page to load completely
|
| 1104 |
+
self.page.wait_for_timeout(5000)
|
| 1105 |
|
| 1106 |
if progress_callback:
|
| 1107 |
progress_callback("Employer search page loaded. Extracting employer information...", "in-progress")
|
|
|
|
| 1125 |
progress_callback(error_msg, "error")
|
| 1126 |
|
| 1127 |
finally:
|
| 1128 |
+
if self.browser_manager is not None:
|
| 1129 |
try:
|
| 1130 |
print("\nClosing browser in 10 seconds...")
|
| 1131 |
time.sleep(10)
|
| 1132 |
+
self.browser_manager.close()
|
| 1133 |
print("Browser closed successfully.")
|
| 1134 |
except Exception as e:
|
| 1135 |
print(f"Warning: Error closing browser: {str(e)}")
|
HandshakeJobApply.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
Handshake Job Application Automation Module
|
| 3 |
|
| 4 |
This module automates applying to jobs on Handshake.
|
| 5 |
-
It uses
|
| 6 |
1. Log into Handshake with user credentials
|
| 7 |
2. Navigate to job listings matching desired criteria
|
| 8 |
3. Apply to relevant positions
|
|
@@ -14,24 +14,13 @@ import time
|
|
| 14 |
import json
|
| 15 |
import urllib
|
| 16 |
import pandas as pd
|
| 17 |
-
import anthropic
|
| 18 |
-
import setup
|
| 19 |
import ResumeGenerator
|
| 20 |
import CoverLetterGenerator
|
| 21 |
from PyPDF2 import PdfReader
|
| 22 |
from datetime import datetime
|
| 23 |
-
from
|
| 24 |
-
from
|
| 25 |
-
from
|
| 26 |
-
from selenium.webdriver.support.ui import WebDriverWait
|
| 27 |
-
from selenium.webdriver.support import expected_conditions as EC
|
| 28 |
-
from selenium.webdriver.chrome.options import Options
|
| 29 |
-
from selenium.webdriver.chrome.service import Service
|
| 30 |
-
from selenium.webdriver.common.action_chains import ActionChains
|
| 31 |
-
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
| 32 |
-
from webdriver_manager.chrome import ChromeDriverManager
|
| 33 |
-
|
| 34 |
-
api_key = setup.API_KEY
|
| 35 |
|
| 36 |
|
| 37 |
class HandshakeJobApplicator:
|
|
@@ -49,87 +38,30 @@ class HandshakeJobApplicator:
|
|
| 49 |
user_id: User ID for database tracking
|
| 50 |
"""
|
| 51 |
self.headless = headless
|
| 52 |
-
self.
|
| 53 |
-
self.
|
| 54 |
self.resume_path = resume_path
|
| 55 |
self.user_id = user_id
|
| 56 |
|
| 57 |
-
#
|
| 58 |
-
self.
|
| 59 |
-
if not self.claude_api_key:
|
| 60 |
-
raise ValueError(
|
| 61 |
-
"API_KEY not set in setup.py. "
|
| 62 |
-
"Please set it with your Claude API key from https://console.anthropic.com/"
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
# Validate API key format
|
| 66 |
-
if not self.claude_api_key.startswith('sk-ant-'):
|
| 67 |
-
raise ValueError(
|
| 68 |
-
f"Invalid API key format. API keys should start with 'sk-ant-'. "
|
| 69 |
-
f"Please check your API key in setup.py"
|
| 70 |
-
)
|
| 71 |
-
|
| 72 |
-
try:
|
| 73 |
-
self.claude_client = anthropic.Anthropic(api_key=self.claude_api_key)
|
| 74 |
-
except Exception as e:
|
| 75 |
-
raise ValueError(
|
| 76 |
-
f"Failed to initialize Claude API client: {str(e)}. "
|
| 77 |
-
f"Please check your API key in setup.py"
|
| 78 |
-
)
|
| 79 |
|
| 80 |
# Job application tracking log file
|
| 81 |
self.application_log_file = os.path.join(os.path.dirname(__file__), "handshake_applications_log.json")
|
| 82 |
|
| 83 |
def setup_driver(self):
|
| 84 |
-
"""Set up
|
| 85 |
-
chrome_options = Options()
|
| 86 |
-
|
| 87 |
-
if self.headless:
|
| 88 |
-
chrome_options.add_argument('--headless=new')
|
| 89 |
-
|
| 90 |
-
# Stability and compatibility options
|
| 91 |
-
chrome_options.add_argument('--no-sandbox')
|
| 92 |
-
chrome_options.add_argument('--disable-dev-shm-usage')
|
| 93 |
-
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
|
| 94 |
-
chrome_options.add_argument('--disable-gpu')
|
| 95 |
-
chrome_options.add_argument('--disable-software-rasterizer')
|
| 96 |
-
chrome_options.add_argument('--window-size=1920,1080')
|
| 97 |
-
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
|
| 98 |
-
|
| 99 |
-
# Disable automation flags
|
| 100 |
-
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation', 'enable-logging'])
|
| 101 |
-
chrome_options.add_experimental_option('useAutomationExtension', False)
|
| 102 |
-
|
| 103 |
-
# Add error logging
|
| 104 |
-
chrome_options.add_argument('--enable-logging')
|
| 105 |
-
chrome_options.add_argument('--v=1')
|
| 106 |
-
|
| 107 |
try:
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
self.driver = webdriver.Chrome(service=Service(driver_path), options=chrome_options)
|
| 112 |
-
|
| 113 |
-
capabilities = self.driver.capabilities
|
| 114 |
-
print(f"Chrome version: {capabilities.get('browserVersion', 'Unknown')}")
|
| 115 |
-
print(f"ChromeDriver version: {capabilities.get('chrome', {}).get('chromedriverVersion', 'Unknown')}")
|
| 116 |
-
|
| 117 |
-
self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
| 118 |
-
'source': '''
|
| 119 |
-
Object.defineProperty(navigator, 'webdriver', {
|
| 120 |
-
get: () => undefined
|
| 121 |
-
})
|
| 122 |
-
'''
|
| 123 |
-
})
|
| 124 |
-
|
| 125 |
-
self.wait = WebDriverWait(self.driver, 20)
|
| 126 |
|
| 127 |
except Exception as e:
|
| 128 |
-
print(f"Error setting up
|
| 129 |
print("Troubleshooting tips:")
|
| 130 |
-
print("1.
|
| 131 |
-
print("2.
|
| 132 |
-
print("3. Close any existing
|
| 133 |
raise
|
| 134 |
|
| 135 |
def load_applied_jobs(self):
|
|
@@ -205,9 +137,8 @@ class HandshakeJobApplicator:
|
|
| 205 |
if progress_callback:
|
| 206 |
progress_callback("Navigating to Handshake login page...", "in-progress")
|
| 207 |
|
| 208 |
-
self.
|
| 209 |
-
self.
|
| 210 |
-
time.sleep(3)
|
| 211 |
|
| 212 |
if progress_callback:
|
| 213 |
progress_callback("Please log into Handshake in the browser window, then click 'I'm Logged In' button below.", "login-wait")
|
|
@@ -237,21 +168,18 @@ class HandshakeJobApplicator:
|
|
| 237 |
|
| 238 |
# Verify login by checking for jobs page
|
| 239 |
try:
|
| 240 |
-
self.
|
| 241 |
-
self.
|
| 242 |
-
time.sleep(3)
|
| 243 |
|
| 244 |
# Check if we're on the jobs page
|
| 245 |
-
self.
|
| 246 |
-
EC.presence_of_element_located((By.XPATH, "./*"))
|
| 247 |
-
)
|
| 248 |
|
| 249 |
if progress_callback:
|
| 250 |
progress_callback("Successfully logged into Handshake!", "success")
|
| 251 |
|
| 252 |
return True
|
| 253 |
|
| 254 |
-
except
|
| 255 |
if progress_callback:
|
| 256 |
progress_callback("Login verification failed. Please ensure you're logged in.", "error")
|
| 257 |
return False
|
|
@@ -327,21 +255,12 @@ Return your answer as a JSON array of industry names EXACTLY as they appear in t
|
|
| 327 |
Return ONLY the JSON array with no markdown formatting, nothing else. You must include at least 1 industry."""
|
| 328 |
|
| 329 |
try:
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
max_tokens=300,
|
| 333 |
-
messages=[{"role": "user", "content": prompt}]
|
| 334 |
-
)
|
| 335 |
-
except anthropic.AuthenticationError as auth_error:
|
| 336 |
-
print(f"β Claude API authentication failed: {str(auth_error)}")
|
| 337 |
-
raise Exception(f"API authentication error: {str(auth_error)}")
|
| 338 |
except Exception as api_error:
|
| 339 |
-
print(f"β
|
| 340 |
raise Exception(f"API error: {str(api_error)}")
|
| 341 |
|
| 342 |
-
# Parse the response
|
| 343 |
-
response_text = response.content[0].text.strip()
|
| 344 |
-
|
| 345 |
# Remove markdown code blocks if present
|
| 346 |
if response_text.startswith("```"):
|
| 347 |
response_text = response_text.split("```")[1]
|
|
@@ -417,25 +336,14 @@ For example:
|
|
| 417 |
Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown, just the string "lat,long"."""
|
| 418 |
|
| 419 |
try:
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
max_tokens=100,
|
| 423 |
-
messages=[{"role": "user", "content": prompt}]
|
| 424 |
-
)
|
| 425 |
-
except anthropic.AuthenticationError as auth_error:
|
| 426 |
-
raise ValueError(
|
| 427 |
-
f"Claude API authentication failed: {str(auth_error)}\n"
|
| 428 |
-
f"Your API key in setup.py may be invalid or expired."
|
| 429 |
-
)
|
| 430 |
except Exception as api_error:
|
| 431 |
raise ValueError(
|
| 432 |
-
f"
|
| 433 |
f"Please check your API key and internet connection."
|
| 434 |
)
|
| 435 |
|
| 436 |
-
# Parse the response - should be just the coordinates string
|
| 437 |
-
coordinates = response.content[0].text.strip()
|
| 438 |
-
|
| 439 |
# Remove quotes if AI added them
|
| 440 |
coordinates = coordinates.replace('"', '').replace("'", "")
|
| 441 |
|
|
@@ -557,24 +465,22 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 557 |
progress_callback(f"Navigating to filtered jobs (Industry: {industry}, Location: {location}" +
|
| 558 |
(f", Role: {role}" if role else "") + ")...", "in-progress")
|
| 559 |
|
| 560 |
-
self.
|
| 561 |
-
self.
|
| 562 |
-
|
| 563 |
-
currUrl=self.driver.current_url
|
| 564 |
if industry:
|
| 565 |
for code in industry_codes:
|
| 566 |
-
currUrl+= f'&industries={code}'
|
| 567 |
|
| 568 |
-
currUrl=currUrl + '&jobType=3'
|
| 569 |
-
self.
|
| 570 |
-
self.
|
| 571 |
-
time.sleep(3)
|
| 572 |
if(role):
|
| 573 |
-
jobTypeField = self.
|
| 574 |
-
jobTypeField.
|
| 575 |
-
jobTypeField.
|
| 576 |
-
jobTypeField.
|
| 577 |
-
|
| 578 |
|
| 579 |
|
| 580 |
results["message"] = f"Successfully navigated to filtered jobs page. Filters applied - Industry: {industry}, Location: {location}" + (f", Role: {role}" if role else "")
|
|
@@ -588,41 +494,42 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 588 |
progress_callback("Login successful! Ready for job applications (functionality coming soon).", "success")
|
| 589 |
|
| 590 |
|
| 591 |
-
print('reached applying to selected jobs')
|
| 592 |
-
|
| 593 |
-
jobsHook = self.
|
| 594 |
-
jobsHookElements=jobsHook.
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
continue
|
| 620 |
-
else:
|
| 621 |
-
results["applications_submitted"] += 1
|
| 622 |
-
except Exception as e:
|
| 623 |
-
if progress_callback:
|
| 624 |
-
progress_callback(f"Could not click element {index + 1}: {e}")
|
| 625 |
-
continue
|
| 626 |
except Exception as e:
|
| 627 |
error_msg = f"Session error: {str(e)}"
|
| 628 |
print(error_msg)
|
|
@@ -631,11 +538,11 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 631 |
progress_callback(error_msg, "error")
|
| 632 |
|
| 633 |
finally:
|
| 634 |
-
if self.
|
| 635 |
try:
|
| 636 |
print("\nClosing browser in 30 seconds...")
|
| 637 |
time.sleep(30)
|
| 638 |
-
self.
|
| 639 |
print("Browser closed successfully.")
|
| 640 |
except Exception as e:
|
| 641 |
print(f"Warning: Error closing browser: {str(e)}")
|
|
@@ -663,46 +570,45 @@ Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown
|
|
| 663 |
"""
|
| 664 |
try:
|
| 665 |
# ADD FUNCTIONALITY TO SEE IF APPLY BUTTON IS THE RIGHT ONE. RETURN FALSE IF NOT.
|
| 666 |
-
applyButton=self.
|
| 667 |
-
if applyButton.
|
| 668 |
print("Correct Apply Button Found")
|
| 669 |
-
|
| 670 |
# Expand job description to get full details
|
| 671 |
print('π Extracting job details...')
|
| 672 |
if progress_callback:
|
| 673 |
progress_callback("Extracting job details...", "in-progress")
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
expandJobDescription = self.driver.find_elements(By.CSS_SELECTOR, "button[class^='sc-kAuIVs']")[1]
|
| 679 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
|
| 681 |
-
expandJobDescription.click()
|
| 682 |
-
time.sleep(2)
|
| 683 |
-
print('β
Job description expanded')
|
| 684 |
-
|
| 685 |
try:
|
| 686 |
-
job_title_element = self.
|
| 687 |
-
job_title = job_title_element.
|
| 688 |
except:
|
| 689 |
job_title = "Unknown Position"
|
| 690 |
|
| 691 |
-
#FIX THIS PART
|
| 692 |
-
company_name=job_name
|
| 693 |
|
| 694 |
# Extract job description
|
| 695 |
try:
|
| 696 |
-
job_description = self.
|
| 697 |
except:
|
| 698 |
# Fallback: try to get any visible job description
|
| 699 |
try:
|
| 700 |
-
job_description = self.
|
| 701 |
except:
|
| 702 |
job_description = "No job description available"
|
| 703 |
|
| 704 |
# Extract job ID from URL
|
| 705 |
-
current_url = self.
|
| 706 |
job_id = current_url.split('/')[-1].split('?')[0] if '/' in current_url else f"{company_name}_{job_title}_{int(time.time())}"
|
| 707 |
|
| 708 |
print(f'\nβ
Job Details Extracted:')
|
|
|
|
| 2 |
Handshake Job Application Automation Module
|
| 3 |
|
| 4 |
This module automates applying to jobs on Handshake.
|
| 5 |
+
It uses Playwright for browser automation to:
|
| 6 |
1. Log into Handshake with user credentials
|
| 7 |
2. Navigate to job listings matching desired criteria
|
| 8 |
3. Apply to relevant positions
|
|
|
|
| 14 |
import json
|
| 15 |
import urllib
|
| 16 |
import pandas as pd
|
|
|
|
|
|
|
| 17 |
import ResumeGenerator
|
| 18 |
import CoverLetterGenerator
|
| 19 |
from PyPDF2 import PdfReader
|
| 20 |
from datetime import datetime
|
| 21 |
+
from browser_utils import BrowserManager, find_element_with_fallback, scroll_to_bottom
|
| 22 |
+
from llm_client import get_client
|
| 23 |
+
from pdf_utils import extract_text_from_pdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
class HandshakeJobApplicator:
|
|
|
|
| 38 |
user_id: User ID for database tracking
|
| 39 |
"""
|
| 40 |
self.headless = headless
|
| 41 |
+
self.browser_manager = None
|
| 42 |
+
self.page = None
|
| 43 |
self.resume_path = resume_path
|
| 44 |
self.user_id = user_id
|
| 45 |
|
| 46 |
+
# LLM client configuration (OpenRouter)
|
| 47 |
+
self.llm_client = get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# Job application tracking log file
|
| 50 |
self.application_log_file = os.path.join(os.path.dirname(__file__), "handshake_applications_log.json")
|
| 51 |
|
| 52 |
def setup_driver(self):
|
| 53 |
+
"""Set up Playwright browser with appropriate options."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
try:
|
| 55 |
+
self.browser_manager = BrowserManager(headless=self.headless)
|
| 56 |
+
self.page = self.browser_manager.setup()
|
| 57 |
+
print(f"Playwright browser initialized successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
except Exception as e:
|
| 60 |
+
print(f"Error setting up Playwright browser: {str(e)}")
|
| 61 |
print("Troubleshooting tips:")
|
| 62 |
+
print("1. Run: pip install playwright")
|
| 63 |
+
print("2. Run: playwright install chromium")
|
| 64 |
+
print("3. Close any existing browser instances")
|
| 65 |
raise
|
| 66 |
|
| 67 |
def load_applied_jobs(self):
|
|
|
|
| 137 |
if progress_callback:
|
| 138 |
progress_callback("Navigating to Handshake login page...", "in-progress")
|
| 139 |
|
| 140 |
+
self.page.goto("https://app.joinhandshake.com/login")
|
| 141 |
+
self.page.wait_for_timeout(3000)
|
|
|
|
| 142 |
|
| 143 |
if progress_callback:
|
| 144 |
progress_callback("Please log into Handshake in the browser window, then click 'I'm Logged In' button below.", "login-wait")
|
|
|
|
| 168 |
|
| 169 |
# Verify login by checking for jobs page
|
| 170 |
try:
|
| 171 |
+
self.page.goto("https://app.joinhandshake.com/stu/postings")
|
| 172 |
+
self.page.wait_for_timeout(3000)
|
|
|
|
| 173 |
|
| 174 |
# Check if we're on the jobs page
|
| 175 |
+
self.page.wait_for_selector("body", timeout=20000)
|
|
|
|
|
|
|
| 176 |
|
| 177 |
if progress_callback:
|
| 178 |
progress_callback("Successfully logged into Handshake!", "success")
|
| 179 |
|
| 180 |
return True
|
| 181 |
|
| 182 |
+
except Exception:
|
| 183 |
if progress_callback:
|
| 184 |
progress_callback("Login verification failed. Please ensure you're logged in.", "error")
|
| 185 |
return False
|
|
|
|
| 255 |
Return ONLY the JSON array with no markdown formatting, nothing else. You must include at least 1 industry."""
|
| 256 |
|
| 257 |
try:
|
| 258 |
+
response_text = self.llm_client.create_message(prompt, max_tokens=300)
|
| 259 |
+
response_text = response_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
except Exception as api_error:
|
| 261 |
+
print(f"β LLM API error: {str(api_error)}")
|
| 262 |
raise Exception(f"API error: {str(api_error)}")
|
| 263 |
|
|
|
|
|
|
|
|
|
|
| 264 |
# Remove markdown code blocks if present
|
| 265 |
if response_text.startswith("```"):
|
| 266 |
response_text = response_text.split("```")[1]
|
|
|
|
| 336 |
Return ONLY the coordinates string in quotes, nothing else. No JSON, no markdown, just the string "lat,long"."""
|
| 337 |
|
| 338 |
try:
|
| 339 |
+
coordinates = self.llm_client.create_message(prompt, max_tokens=100)
|
| 340 |
+
coordinates = coordinates.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
except Exception as api_error:
|
| 342 |
raise ValueError(
|
| 343 |
+
f"LLM API error: {str(api_error)}\n"
|
| 344 |
f"Please check your API key and internet connection."
|
| 345 |
)
|
| 346 |
|
|
|
|
|
|
|
|
|
|
| 347 |
# Remove quotes if AI added them
|
| 348 |
coordinates = coordinates.replace('"', '').replace("'", "")
|
| 349 |
|
|
|
|
| 465 |
progress_callback(f"Navigating to filtered jobs (Industry: {industry}, Location: {location}" +
|
| 466 |
(f", Role: {role}" if role else "") + ")...", "in-progress")
|
| 467 |
|
| 468 |
+
self.page.goto(filter_url)
|
| 469 |
+
self.page.wait_for_timeout(5000)
|
| 470 |
+
currUrl = self.page.url
|
|
|
|
| 471 |
if industry:
|
| 472 |
for code in industry_codes:
|
| 473 |
+
currUrl += f'&industries={code}'
|
| 474 |
|
| 475 |
+
currUrl = currUrl + '&jobType=3'
|
| 476 |
+
self.page.goto(currUrl)
|
| 477 |
+
self.page.wait_for_timeout(3000)
|
|
|
|
| 478 |
if(role):
|
| 479 |
+
jobTypeField = self.page.locator("input[placeholder='Search jobs']")
|
| 480 |
+
jobTypeField.fill("")
|
| 481 |
+
jobTypeField.fill(role)
|
| 482 |
+
jobTypeField.press("Enter")
|
| 483 |
+
self.page.wait_for_timeout(4000)
|
| 484 |
|
| 485 |
|
| 486 |
results["message"] = f"Successfully navigated to filtered jobs page. Filters applied - Industry: {industry}, Location: {location}" + (f", Role: {role}" if role else "")
|
|
|
|
| 494 |
progress_callback("Login successful! Ready for job applications (functionality coming soon).", "success")
|
| 495 |
|
| 496 |
|
| 497 |
+
print('reached applying to selected jobs')
|
| 498 |
+
self.page.wait_for_timeout(3000)
|
| 499 |
+
jobsHook = self.page.locator("[aria-label='Jobs List']")
|
| 500 |
+
jobsHookElements = jobsHook.locator("> *").all()
|
| 501 |
+
if len(jobsHookElements) > 2:
|
| 502 |
+
clickableJobLinks = jobsHookElements[2]
|
| 503 |
+
iterativeJobLinks = clickableJobLinks.locator("> *").all()
|
| 504 |
+
|
| 505 |
+
for index, element in enumerate(iterativeJobLinks):
|
| 506 |
+
try:
|
| 507 |
+
print(f"Index Value: {index}")
|
| 508 |
+
# Re-fetch elements to avoid stale references
|
| 509 |
+
jobsHook = self.page.locator("[aria-label='Jobs List']")
|
| 510 |
+
jobsHookElements = jobsHook.locator("> *").all()
|
| 511 |
+
clickableJobLinks = jobsHookElements[2]
|
| 512 |
+
iterativeJobLinks = clickableJobLinks.locator("> *").all()
|
| 513 |
+
currentJob = iterativeJobLinks[index]
|
| 514 |
+
|
| 515 |
+
currentJob.scroll_into_view_if_needed()
|
| 516 |
+
print(f"Clicking element {index + 1}...")
|
| 517 |
+
jobText = currentJob.text_content() or ""
|
| 518 |
+
print(jobText)
|
| 519 |
+
currentJob.click()
|
| 520 |
+
|
| 521 |
+
jobName = jobText.split('\n')[0]
|
| 522 |
+
|
| 523 |
+
self.page.wait_for_timeout(1000)
|
| 524 |
+
value = self.applyToSelectedJob(jobName, progress_callback)
|
| 525 |
+
if not value:
|
| 526 |
+
continue
|
| 527 |
+
else:
|
| 528 |
+
results["applications_submitted"] += 1
|
| 529 |
+
except Exception as e:
|
| 530 |
+
if progress_callback:
|
| 531 |
+
progress_callback(f"Could not click element {index + 1}: {e}")
|
| 532 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
except Exception as e:
|
| 534 |
error_msg = f"Session error: {str(e)}"
|
| 535 |
print(error_msg)
|
|
|
|
| 538 |
progress_callback(error_msg, "error")
|
| 539 |
|
| 540 |
finally:
|
| 541 |
+
if self.browser_manager is not None:
|
| 542 |
try:
|
| 543 |
print("\nClosing browser in 30 seconds...")
|
| 544 |
time.sleep(30)
|
| 545 |
+
self.browser_manager.close()
|
| 546 |
print("Browser closed successfully.")
|
| 547 |
except Exception as e:
|
| 548 |
print(f"Warning: Error closing browser: {str(e)}")
|
|
|
|
| 570 |
"""
|
| 571 |
try:
|
| 572 |
# ADD FUNCTIONALITY TO SEE IF APPLY BUTTON IS THE RIGHT ONE. RETURN FALSE IF NOT.
|
| 573 |
+
applyButton = self.page.locator("button[class^='sc-hhOBVt']").first
|
| 574 |
+
if applyButton.text_content() == "Apply":
|
| 575 |
print("Correct Apply Button Found")
|
| 576 |
+
|
| 577 |
# Expand job description to get full details
|
| 578 |
print('π Extracting job details...')
|
| 579 |
if progress_callback:
|
| 580 |
progress_callback("Extracting job details...", "in-progress")
|
| 581 |
+
expandJobDescriptionElements = self.page.locator("button[class^='sc-kAuIVs']").all()
|
| 582 |
+
if len(expandJobDescriptionElements) > 1:
|
| 583 |
+
expandJobDescription = expandJobDescriptionElements[1]
|
| 584 |
+
print(expandJobDescription.text_content())
|
|
|
|
| 585 |
|
| 586 |
+
expandJobDescription.scroll_into_view_if_needed()
|
| 587 |
+
expandJobDescription.click()
|
| 588 |
+
self.page.wait_for_timeout(2000)
|
| 589 |
+
print('β
Job description expanded')
|
| 590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
try:
|
| 592 |
+
job_title_element = self.page.locator("h1[class^='sc-']").first
|
| 593 |
+
job_title = (job_title_element.text_content() or "").strip()
|
| 594 |
except:
|
| 595 |
job_title = "Unknown Position"
|
| 596 |
|
| 597 |
+
# FIX THIS PART
|
| 598 |
+
company_name = job_name
|
| 599 |
|
| 600 |
# Extract job description
|
| 601 |
try:
|
| 602 |
+
job_description = self.page.locator("xpath=//*[text()='At a glance']/ancestor::div[3]/div[5]/div[1]").text_content() or ""
|
| 603 |
except:
|
| 604 |
# Fallback: try to get any visible job description
|
| 605 |
try:
|
| 606 |
+
job_description = self.page.locator("[class*='description']").first.text_content() or ""
|
| 607 |
except:
|
| 608 |
job_description = "No job description available"
|
| 609 |
|
| 610 |
# Extract job ID from URL
|
| 611 |
+
current_url = self.page.url
|
| 612 |
job_id = current_url.split('/')[-1].split('?')[0] if '/' in current_url else f"{company_name}_{job_title}_{int(time.time())}"
|
| 613 |
|
| 614 |
print(f'\nβ
Job Details Extracted:')
|
ResumeGenerator.py
CHANGED
|
@@ -1,22 +1,21 @@
|
|
| 1 |
"""
|
| 2 |
Resume Generation Module for ATS Optimization
|
| 3 |
|
| 4 |
-
This module uses
|
| 5 |
-
to be ATS-optimized. It generates professional LaTeX resumes
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
import shutil
|
| 12 |
-
import anthropic
|
| 13 |
-
import setup
|
| 14 |
import subprocess
|
| 15 |
from datetime import datetime
|
| 16 |
from pathlib import Path
|
| 17 |
from pylatex import Document, Section, Subsection, Command, Package
|
| 18 |
from pylatex.utils import NoEscape, bold, italic
|
| 19 |
from PyPDF2 import PdfReader
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def check_latex_installation():
|
|
@@ -57,12 +56,7 @@ class ATSResumeGenerator:
|
|
| 57 |
warn_latex: Whether to warn about missing LaTeX installation
|
| 58 |
"""
|
| 59 |
self.original_resume_path = original_resume_path
|
| 60 |
-
self.
|
| 61 |
-
|
| 62 |
-
if not self.claude_api_key or not self.claude_api_key.startswith('sk-ant-'):
|
| 63 |
-
raise ValueError("Invalid API key in setup.py")
|
| 64 |
-
|
| 65 |
-
self.claude_client = anthropic.Anthropic(api_key=self.claude_api_key)
|
| 66 |
|
| 67 |
# Create directories for generated resumes
|
| 68 |
self.generated_resumes_dir = os.path.join(os.path.dirname(__file__), "generated_resumes")
|
|
@@ -199,14 +193,8 @@ Return ONLY a JSON object with the following structure (no markdown, no code blo
|
|
| 199 |
IMPORTANT: Wrap items to be bolded with **double asterisks** in the bullet points. Include all relevant sections that exist in the original resume. If a section doesn't exist or isn't relevant, include it as an empty array or omit it. Focus on making this resume highly tailored to the {job_title} position at {company_name}."""
|
| 200 |
|
| 201 |
try:
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
max_tokens=4000,
|
| 205 |
-
messages=[{"role": "user", "content": prompt}]
|
| 206 |
-
)
|
| 207 |
-
|
| 208 |
-
# Parse response
|
| 209 |
-
response_text = response.content[0].text.strip()
|
| 210 |
|
| 211 |
# Remove markdown code blocks if present
|
| 212 |
if response_text.startswith("```"):
|
|
|
|
| 1 |
"""
|
| 2 |
Resume Generation Module for ATS Optimization
|
| 3 |
|
| 4 |
+
This module uses OpenRouter API (MiMo v2 Flash) to analyze job descriptions
|
| 5 |
+
and tailor resumes to be ATS-optimized. It generates professional LaTeX resumes.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
import json
|
| 11 |
import shutil
|
|
|
|
|
|
|
| 12 |
import subprocess
|
| 13 |
from datetime import datetime
|
| 14 |
from pathlib import Path
|
| 15 |
from pylatex import Document, Section, Subsection, Command, Package
|
| 16 |
from pylatex.utils import NoEscape, bold, italic
|
| 17 |
from PyPDF2 import PdfReader
|
| 18 |
+
from llm_client import get_client
|
| 19 |
|
| 20 |
|
| 21 |
def check_latex_installation():
|
|
|
|
| 56 |
warn_latex: Whether to warn about missing LaTeX installation
|
| 57 |
"""
|
| 58 |
self.original_resume_path = original_resume_path
|
| 59 |
+
self.llm_client = get_client()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
# Create directories for generated resumes
|
| 62 |
self.generated_resumes_dir = os.path.join(os.path.dirname(__file__), "generated_resumes")
|
|
|
|
| 193 |
IMPORTANT: Wrap items to be bolded with **double asterisks** in the bullet points. Include all relevant sections that exist in the original resume. If a section doesn't exist or isn't relevant, include it as an empty array or omit it. Focus on making this resume highly tailored to the {job_title} position at {company_name}."""
|
| 194 |
|
| 195 |
try:
|
| 196 |
+
response_text = self.llm_client.create_message(prompt, max_tokens=4000)
|
| 197 |
+
response_text = response_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
# Remove markdown code blocks if present
|
| 200 |
if response_text.startswith("```"):
|
browser_utils.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Browser automation utilities using Playwright.
|
| 3 |
+
Includes anti-detection measures for web automation.
|
| 4 |
+
"""
|
| 5 |
+
from playwright.sync_api import sync_playwright, Page, Browser, Playwright
|
| 6 |
+
|
| 7 |
+
# Try to import stealth plugin if available
|
| 8 |
+
try:
|
| 9 |
+
from playwright_stealth import stealth_sync
|
| 10 |
+
HAS_STEALTH = True
|
| 11 |
+
except ImportError:
|
| 12 |
+
HAS_STEALTH = False
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class BrowserManager:
|
| 16 |
+
"""
|
| 17 |
+
Manages Playwright browser instance with anti-detection measures.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, headless=False):
|
| 21 |
+
self.headless = headless
|
| 22 |
+
self.playwright: Playwright = None
|
| 23 |
+
self.browser: Browser = None
|
| 24 |
+
self.context = None
|
| 25 |
+
self.page: Page = None
|
| 26 |
+
|
| 27 |
+
def setup(self):
|
| 28 |
+
"""Initialize browser with anti-detection measures."""
|
| 29 |
+
self.playwright = sync_playwright().start()
|
| 30 |
+
|
| 31 |
+
# Launch browser with stealth options
|
| 32 |
+
self.browser = self.playwright.chromium.launch(
|
| 33 |
+
headless=self.headless,
|
| 34 |
+
args=[
|
| 35 |
+
'--no-sandbox',
|
| 36 |
+
'--disable-dev-shm-usage',
|
| 37 |
+
'--disable-blink-features=AutomationControlled',
|
| 38 |
+
'--disable-gpu',
|
| 39 |
+
'--disable-software-rasterizer',
|
| 40 |
+
'--window-size=1920,1080',
|
| 41 |
+
]
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Create context with custom user agent
|
| 45 |
+
self.context = self.browser.new_context(
|
| 46 |
+
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
| 47 |
+
viewport={'width': 1920, 'height': 1080},
|
| 48 |
+
java_script_enabled=True,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
self.page = self.context.new_page()
|
| 52 |
+
|
| 53 |
+
# Apply stealth if available
|
| 54 |
+
if HAS_STEALTH:
|
| 55 |
+
stealth_sync(self.page)
|
| 56 |
+
|
| 57 |
+
# Remove webdriver property
|
| 58 |
+
self.page.add_init_script("""
|
| 59 |
+
Object.defineProperty(navigator, 'webdriver', {
|
| 60 |
+
get: () => undefined
|
| 61 |
+
});
|
| 62 |
+
""")
|
| 63 |
+
|
| 64 |
+
# Set default timeout (equivalent to WebDriverWait 20 seconds)
|
| 65 |
+
self.page.set_default_timeout(20000)
|
| 66 |
+
|
| 67 |
+
return self.page
|
| 68 |
+
|
| 69 |
+
def close(self):
|
| 70 |
+
"""Clean up browser resources."""
|
| 71 |
+
if self.page:
|
| 72 |
+
self.page.close()
|
| 73 |
+
if self.context:
|
| 74 |
+
self.context.close()
|
| 75 |
+
if self.browser:
|
| 76 |
+
self.browser.close()
|
| 77 |
+
if self.playwright:
|
| 78 |
+
self.playwright.stop()
|
| 79 |
+
|
| 80 |
+
def __enter__(self):
|
| 81 |
+
"""Context manager entry."""
|
| 82 |
+
self.setup()
|
| 83 |
+
return self
|
| 84 |
+
|
| 85 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 86 |
+
"""Context manager exit."""
|
| 87 |
+
self.close()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def find_element_with_fallback(page: Page, selectors: list, timeout: int = 5000):
|
| 91 |
+
"""
|
| 92 |
+
Try multiple selectors until one succeeds.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
page: Playwright page object
|
| 96 |
+
selectors: List of CSS/XPath selectors
|
| 97 |
+
timeout: Timeout per selector attempt in milliseconds
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
Locator if found, None otherwise
|
| 101 |
+
"""
|
| 102 |
+
for selector in selectors:
|
| 103 |
+
try:
|
| 104 |
+
locator = page.locator(selector)
|
| 105 |
+
locator.wait_for(timeout=timeout, state='visible')
|
| 106 |
+
if locator.count() > 0:
|
| 107 |
+
return locator
|
| 108 |
+
except Exception:
|
| 109 |
+
continue
|
| 110 |
+
return None
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def scroll_to_bottom(page: Page, max_scrolls: int = 10, wait_time: int = 2000):
|
| 114 |
+
"""
|
| 115 |
+
Scroll to bottom of page to load dynamic content.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
page: Playwright page object
|
| 119 |
+
max_scrolls: Maximum number of scroll attempts
|
| 120 |
+
wait_time: Wait time between scrolls in milliseconds
|
| 121 |
+
"""
|
| 122 |
+
last_height = page.evaluate("document.body.scrollHeight")
|
| 123 |
+
|
| 124 |
+
for _ in range(max_scrolls):
|
| 125 |
+
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
| 126 |
+
page.wait_for_timeout(wait_time)
|
| 127 |
+
|
| 128 |
+
new_height = page.evaluate("document.body.scrollHeight")
|
| 129 |
+
if new_height == last_height:
|
| 130 |
+
break
|
| 131 |
+
last_height = new_height
|
| 132 |
+
|
| 133 |
+
# Scroll back to top
|
| 134 |
+
page.evaluate("window.scrollTo(0, 0)")
|
| 135 |
+
page.wait_for_timeout(1000)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def create_browser(headless=False):
|
| 139 |
+
"""
|
| 140 |
+
Factory function to create a browser manager.
|
| 141 |
+
|
| 142 |
+
Args:
|
| 143 |
+
headless: Run in headless mode (default: False for debugging)
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
BrowserManager instance
|
| 147 |
+
"""
|
| 148 |
+
return BrowserManager(headless=headless)
|
llm_client.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLM Client Wrapper for OpenRouter API
|
| 3 |
+
Provides a unified interface for OpenRouter models (MiMo v2 Flash).
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
from openai import OpenAI
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class LLMClient:
|
| 13 |
+
"""
|
| 14 |
+
Wrapper class for OpenRouter API using OpenAI SDK format.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, api_key=None):
|
| 18 |
+
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
|
| 19 |
+
if not self.api_key:
|
| 20 |
+
raise ValueError(
|
| 21 |
+
"OPENROUTER_API_KEY not set. "
|
| 22 |
+
"Please set it in your .env file."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
self.client = OpenAI(
|
| 26 |
+
base_url="https://openrouter.ai/api/v1",
|
| 27 |
+
api_key=self.api_key
|
| 28 |
+
)
|
| 29 |
+
self.model = "xiaomi/mimo-v2-flash:free"
|
| 30 |
+
|
| 31 |
+
def create_message(self, prompt, max_tokens=4096, system_prompt=None):
|
| 32 |
+
"""
|
| 33 |
+
Create a message using OpenRouter API.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
prompt: User prompt (string or list of content blocks)
|
| 37 |
+
max_tokens: Maximum tokens to generate
|
| 38 |
+
system_prompt: Optional system prompt
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
str: The model's response text
|
| 42 |
+
"""
|
| 43 |
+
messages = []
|
| 44 |
+
|
| 45 |
+
if system_prompt:
|
| 46 |
+
messages.append({"role": "system", "content": system_prompt})
|
| 47 |
+
|
| 48 |
+
# Handle string prompt or content list
|
| 49 |
+
if isinstance(prompt, str):
|
| 50 |
+
messages.append({"role": "user", "content": prompt})
|
| 51 |
+
elif isinstance(prompt, list):
|
| 52 |
+
# Convert content blocks to text-only format
|
| 53 |
+
content_str = self._convert_content_blocks(prompt)
|
| 54 |
+
messages.append({"role": "user", "content": content_str})
|
| 55 |
+
|
| 56 |
+
response = self.client.chat.completions.create(
|
| 57 |
+
model=self.model,
|
| 58 |
+
max_tokens=max_tokens,
|
| 59 |
+
messages=messages
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
return response.choices[0].message.content
|
| 63 |
+
|
| 64 |
+
def _convert_content_blocks(self, content_blocks):
|
| 65 |
+
"""
|
| 66 |
+
Convert content blocks to text-only format.
|
| 67 |
+
OpenRouter/MiMo doesn't support document attachments.
|
| 68 |
+
"""
|
| 69 |
+
text_parts = []
|
| 70 |
+
for block in content_blocks:
|
| 71 |
+
if isinstance(block, dict) and block.get("type") == "text":
|
| 72 |
+
text_parts.append(block.get("text", ""))
|
| 73 |
+
elif isinstance(block, str):
|
| 74 |
+
text_parts.append(block)
|
| 75 |
+
return "\n".join(text_parts)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Singleton instance for global use
|
| 79 |
+
_client = None
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def get_client(api_key=None):
|
| 83 |
+
"""Get or create the LLM client singleton."""
|
| 84 |
+
global _client
|
| 85 |
+
if _client is None:
|
| 86 |
+
_client = LLMClient(api_key)
|
| 87 |
+
return _client
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def reset_client():
|
| 91 |
+
"""Reset the singleton client (useful for testing)."""
|
| 92 |
+
global _client
|
| 93 |
+
_client = None
|
pdf_utils.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PDF Utility functions for text extraction.
|
| 3 |
+
Used to convert PDF resumes to text for LLM processing.
|
| 4 |
+
"""
|
| 5 |
+
from PyPDF2 import PdfReader
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def extract_text_from_pdf(pdf_path):
|
| 9 |
+
"""
|
| 10 |
+
Extract text content from a PDF file.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
pdf_path: Path to the PDF file
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
str: Extracted text content
|
| 17 |
+
"""
|
| 18 |
+
try:
|
| 19 |
+
reader = PdfReader(pdf_path)
|
| 20 |
+
text = ""
|
| 21 |
+
for page in reader.pages:
|
| 22 |
+
page_text = page.extract_text()
|
| 23 |
+
if page_text:
|
| 24 |
+
text += page_text + "\n"
|
| 25 |
+
return text.strip()
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f"Error extracting PDF text: {str(e)}")
|
| 28 |
+
return ""
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def extract_text_from_pdf_bytes(pdf_bytes):
|
| 32 |
+
"""
|
| 33 |
+
Extract text content from PDF bytes (for in-memory PDFs).
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
pdf_bytes: PDF file content as bytes
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
str: Extracted text content
|
| 40 |
+
"""
|
| 41 |
+
import io
|
| 42 |
+
try:
|
| 43 |
+
reader = PdfReader(io.BytesIO(pdf_bytes))
|
| 44 |
+
text = ""
|
| 45 |
+
for page in reader.pages:
|
| 46 |
+
page_text = page.extract_text()
|
| 47 |
+
if page_text:
|
| 48 |
+
text += page_text + "\n"
|
| 49 |
+
return text.strip()
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"Error extracting PDF text from bytes: {str(e)}")
|
| 52 |
+
return ""
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
# Core AI and API
|
| 2 |
-
|
| 3 |
requests>=2.31.0
|
| 4 |
|
| 5 |
# Web Framework and Authentication
|
|
@@ -8,9 +8,9 @@ werkzeug>=3.0.0
|
|
| 8 |
flask-login>=0.6.3
|
| 9 |
flask-sqlalchemy>=3.1.1
|
| 10 |
|
| 11 |
-
# Browser Automation (
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
# Excel File Processing (for legacy Workflow Company Log)
|
| 16 |
pandas>=2.0.0
|
|
|
|
| 1 |
+
# Core AI and API (OpenRouter with OpenAI SDK)
|
| 2 |
+
openai>=1.0.0
|
| 3 |
requests>=2.31.0
|
| 4 |
|
| 5 |
# Web Framework and Authentication
|
|
|
|
| 8 |
flask-login>=0.6.3
|
| 9 |
flask-sqlalchemy>=3.1.1
|
| 10 |
|
| 11 |
+
# Browser Automation (Playwright for Vercel compatibility)
|
| 12 |
+
playwright>=1.40.0
|
| 13 |
+
playwright-stealth>=1.0.0
|
| 14 |
|
| 15 |
# Excel File Processing (for legacy Workflow Company Log)
|
| 16 |
pandas>=2.0.0
|
test_playwright.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test script for Playwright browser automation.
|
| 3 |
+
Verifies that Playwright is properly installed and configured.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from browser_utils import BrowserManager, create_browser
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_playwright():
|
| 10 |
+
"""Test Playwright browser initialization and basic navigation."""
|
| 11 |
+
print("=" * 60)
|
| 12 |
+
print("Playwright Browser Test")
|
| 13 |
+
print("=" * 60)
|
| 14 |
+
|
| 15 |
+
manager = None
|
| 16 |
+
try:
|
| 17 |
+
print("\n1. Initializing Playwright browser...")
|
| 18 |
+
manager = create_browser(headless=False)
|
| 19 |
+
page = manager.setup()
|
| 20 |
+
print(" β Browser initialized successfully")
|
| 21 |
+
|
| 22 |
+
print("\n2. Navigating to Google...")
|
| 23 |
+
page.goto("https://www.google.com")
|
| 24 |
+
print(f" β Successfully navigated to: {page.url}")
|
| 25 |
+
print(f" β Page title: {page.title()}")
|
| 26 |
+
|
| 27 |
+
print("\n3. Testing page interaction...")
|
| 28 |
+
# Wait for search input
|
| 29 |
+
search_input = page.locator("textarea[name='q'], input[name='q']").first
|
| 30 |
+
if search_input.is_visible():
|
| 31 |
+
print(" β Found search input")
|
| 32 |
+
|
| 33 |
+
print("\n4. Browser info:")
|
| 34 |
+
# Get browser version from context
|
| 35 |
+
browser_version = page.context.browser.version
|
| 36 |
+
print(f" β Browser version: {browser_version}")
|
| 37 |
+
|
| 38 |
+
print("\n" + "=" * 60)
|
| 39 |
+
print("All tests passed! Playwright is working correctly.")
|
| 40 |
+
print("=" * 60)
|
| 41 |
+
|
| 42 |
+
print("\nClosing browser in 5 seconds...")
|
| 43 |
+
page.wait_for_timeout(5000)
|
| 44 |
+
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"\nβ Test failed with error: {str(e)}")
|
| 47 |
+
print("\nTroubleshooting steps:")
|
| 48 |
+
print("1. Run: pip install playwright")
|
| 49 |
+
print("2. Run: playwright install chromium")
|
| 50 |
+
print("3. Make sure no other browser instances are blocking")
|
| 51 |
+
raise
|
| 52 |
+
|
| 53 |
+
finally:
|
| 54 |
+
if manager:
|
| 55 |
+
manager.close()
|
| 56 |
+
print("Browser closed successfully.")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def test_llm_client():
|
| 60 |
+
"""Test OpenRouter LLM client."""
|
| 61 |
+
print("\n" + "=" * 60)
|
| 62 |
+
print("OpenRouter LLM Client Test")
|
| 63 |
+
print("=" * 60)
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
from llm_client import get_client
|
| 67 |
+
|
| 68 |
+
print("\n1. Initializing LLM client...")
|
| 69 |
+
client = get_client()
|
| 70 |
+
print(" β Client initialized successfully")
|
| 71 |
+
print(f" β Model: {client.model}")
|
| 72 |
+
|
| 73 |
+
print("\n2. Testing API call...")
|
| 74 |
+
response = client.create_message("Say 'Hello, World!' and nothing else.", max_tokens=50)
|
| 75 |
+
print(f" β Response: {response}")
|
| 76 |
+
|
| 77 |
+
print("\n" + "=" * 60)
|
| 78 |
+
print("LLM client test passed!")
|
| 79 |
+
print("=" * 60)
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"\nβ LLM test failed with error: {str(e)}")
|
| 83 |
+
print("\nTroubleshooting steps:")
|
| 84 |
+
print("1. Check your .env file has OPENROUTER_API_KEY set")
|
| 85 |
+
print("2. Verify your API key is valid at https://openrouter.ai")
|
| 86 |
+
raise
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
if __name__ == "__main__":
|
| 90 |
+
import sys
|
| 91 |
+
|
| 92 |
+
if len(sys.argv) > 1 and sys.argv[1] == "--llm":
|
| 93 |
+
test_llm_client()
|
| 94 |
+
elif len(sys.argv) > 1 and sys.argv[1] == "--all":
|
| 95 |
+
test_playwright()
|
| 96 |
+
test_llm_client()
|
| 97 |
+
else:
|
| 98 |
+
test_playwright()
|