Commit
·
046508a
1
Parent(s):
6e2bf85
Code Refactoring and Central Logging
Browse files- .dockerignore +38 -0
- .gitignore +2 -0
- .vscode/settings.json +8 -1
- DEPLOYMENT_GUIDE.md +303 -0
- DOCKERFILE_EXPLANATION.md +147 -0
- Dockerfile +41 -0
- docker-compose.override.example.yml +21 -0
- docker-compose.yml +59 -0
- langgraph.json +5 -3
- pyproject.toml +17 -15
- pyrightconfig.json +18 -0
- src/job_writing_agent/agents/nodes.py +220 -53
- src/job_writing_agent/agents/output_schema.py +48 -11
- src/job_writing_agent/classes/__init__.py +2 -2
- src/job_writing_agent/classes/classes.py +60 -7
- src/job_writing_agent/logs/job_writer.log +0 -0
- src/job_writing_agent/nodes/initializing.py +408 -205
- src/job_writing_agent/nodes/job_description_loader.py +192 -0
- src/job_writing_agent/nodes/research_workflow.py +260 -53
- src/job_writing_agent/nodes/resume_loader.py +140 -0
- src/job_writing_agent/nodes/selfconsistency.py +28 -20
- src/job_writing_agent/nodes/variations.py +10 -8
- src/job_writing_agent/prompts/templates.py +22 -11
- src/job_writing_agent/prompts/test_prompts.py +38 -0
- src/job_writing_agent/tools/SearchTool.py +203 -79
- src/job_writing_agent/tools/__init__.py +2 -2
- src/job_writing_agent/utils/application_cli_interface.py +2 -2
- src/job_writing_agent/utils/document_processing.py +129 -87
- src/job_writing_agent/utils/llm_client.py +143 -127
- src/job_writing_agent/utils/llm_provider_factory.py +3 -0
- src/job_writing_agent/utils/logging/logging_config.py +132 -0
- src/job_writing_agent/utils/logging/logging_decorators.py +103 -0
- src/job_writing_agent/workflow.py +221 -42
- uv.lock +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
app_env/
|
| 8 |
+
venv/
|
| 9 |
+
env/
|
| 10 |
+
ENV/
|
| 11 |
+
|
| 12 |
+
# IDE
|
| 13 |
+
.vscode/
|
| 14 |
+
.idea/
|
| 15 |
+
*.swp
|
| 16 |
+
*.swo
|
| 17 |
+
|
| 18 |
+
# Logs
|
| 19 |
+
*.log
|
| 20 |
+
logs/
|
| 21 |
+
|
| 22 |
+
# OS
|
| 23 |
+
.DS_Store
|
| 24 |
+
Thumbs.db
|
| 25 |
+
|
| 26 |
+
# Project specific
|
| 27 |
+
*.pdf
|
| 28 |
+
cover_letter_*.txt
|
| 29 |
+
uv.lock
|
| 30 |
+
|
| 31 |
+
# Git
|
| 32 |
+
.git/
|
| 33 |
+
.gitignore
|
| 34 |
+
|
| 35 |
+
# Documentation
|
| 36 |
+
*.md
|
| 37 |
+
!README.md
|
| 38 |
+
|
.gitignore
CHANGED
|
@@ -4,6 +4,8 @@
|
|
| 4 |
# Environment / secret files
|
| 5 |
job_writing_agent/.env
|
| 6 |
job_writing_agent/.env.*
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Jupyter notebooks
|
| 9 |
job_writing_agent/*.ipynb
|
|
|
|
| 4 |
# Environment / secret files
|
| 5 |
job_writing_agent/.env
|
| 6 |
job_writing_agent/.env.*
|
| 7 |
+
src/job_writing_agent/.env
|
| 8 |
+
src/job_writing_agent/.env.*
|
| 9 |
|
| 10 |
# Jupyter notebooks
|
| 11 |
job_writing_agent/*.ipynb
|
.vscode/settings.json
CHANGED
|
@@ -1,3 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"python.defaultInterpreterPath": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"python.defaultInterpreterPath": "C:\\Users\\risha\\python-dir\\job_application_agent\\job_writer\\app_env\\Scripts\\python.exe",
|
| 3 |
+
"python.formatting.provider": "black",
|
| 4 |
+
"editor.formatOnSave": true,
|
| 5 |
+
"python.formatting.blackArgs": ["--line-length", "88"],
|
| 6 |
+
"python.linting.enabled": true,
|
| 7 |
+
"python.linting.pylintEnabled": true,
|
| 8 |
+
"python.linting.lintOnSave": true,
|
| 9 |
+
"python.linting.mypyEnabled": true
|
| 10 |
}
|
DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deployment Guide for Job Application Agent
|
| 2 |
+
|
| 3 |
+
## Option 1: LangGraph Cloud (Easiest & Recommended)
|
| 4 |
+
|
| 5 |
+
### Prerequisites
|
| 6 |
+
- LangGraph CLI installed (`langgraph-cli` in requirements.txt)
|
| 7 |
+
- `langgraph.json` already configured ✅
|
| 8 |
+
|
| 9 |
+
### Steps
|
| 10 |
+
|
| 11 |
+
1. **Install LangGraph CLI** (if not already):
|
| 12 |
+
```powershell
|
| 13 |
+
pip install langgraph-cli
|
| 14 |
+
```
|
| 15 |
+
|
| 16 |
+
2. **Login to LangGraph Cloud**:
|
| 17 |
+
```powershell
|
| 18 |
+
langgraph login
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
3. **Deploy your agent**:
|
| 22 |
+
```powershell
|
| 23 |
+
langgraph deploy
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
4. **Get your API endpoint** - LangGraph Cloud provides a REST API automatically
|
| 27 |
+
|
| 28 |
+
### Cost
|
| 29 |
+
- **Free tier**: Limited requests/month
|
| 30 |
+
- **Paid**: Pay-per-use pricing
|
| 31 |
+
|
| 32 |
+
### Pros
|
| 33 |
+
- ✅ Zero infrastructure management
|
| 34 |
+
- ✅ Built-in state persistence
|
| 35 |
+
- ✅ Automatic API generation
|
| 36 |
+
- ✅ LangSmith integration
|
| 37 |
+
- ✅ Perfect for LangGraph apps
|
| 38 |
+
|
| 39 |
+
### Cons
|
| 40 |
+
- ⚠️ Vendor lock-in
|
| 41 |
+
- ⚠️ Limited customization
|
| 42 |
+
|
| 43 |
+
---
|
| 44 |
+
|
| 45 |
+
## Option 2: Railway.app (Simple & Cheap)
|
| 46 |
+
|
| 47 |
+
### Steps
|
| 48 |
+
|
| 49 |
+
1. **Create a FastAPI wrapper** (create `api.py`):
|
| 50 |
+
```python
|
| 51 |
+
from fastapi import FastAPI, File, UploadFile
|
| 52 |
+
from job_writing_agent.workflow import JobWorkflow
|
| 53 |
+
import tempfile
|
| 54 |
+
import os
|
| 55 |
+
|
| 56 |
+
app = FastAPI()
|
| 57 |
+
|
| 58 |
+
@app.post("/generate")
|
| 59 |
+
async def generate_application(
|
| 60 |
+
resume: UploadFile = File(...),
|
| 61 |
+
job_description: str,
|
| 62 |
+
content_type: str = "cover_letter"
|
| 63 |
+
):
|
| 64 |
+
# Save resume temporarily
|
| 65 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 66 |
+
tmp.write(await resume.read())
|
| 67 |
+
resume_path = tmp.name
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
workflow = JobWorkflow(
|
| 71 |
+
resume=resume_path,
|
| 72 |
+
job_description_source=job_description,
|
| 73 |
+
content=content_type
|
| 74 |
+
)
|
| 75 |
+
result = await workflow.run()
|
| 76 |
+
return {"result": result}
|
| 77 |
+
finally:
|
| 78 |
+
os.unlink(resume_path)
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
2. **Create `Procfile`**:
|
| 82 |
+
```
|
| 83 |
+
web: uvicorn api:app --host 0.0.0.0 --port $PORT
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
3. **Deploy to Railway**:
|
| 87 |
+
- Sign up at [railway.app](https://railway.app)
|
| 88 |
+
- Connect GitHub repo
|
| 89 |
+
- Railway auto-detects Python and runs `Procfile`
|
| 90 |
+
|
| 91 |
+
### Cost
|
| 92 |
+
- **Free tier**: $5 credit/month
|
| 93 |
+
- **Hobby**: $5/month for 512MB RAM
|
| 94 |
+
- **Pro**: $20/month for 2GB RAM
|
| 95 |
+
|
| 96 |
+
### Pros
|
| 97 |
+
- ✅ Very simple deployment
|
| 98 |
+
- ✅ Auto-scaling
|
| 99 |
+
- ✅ Free tier available
|
| 100 |
+
- ✅ Automatic HTTPS
|
| 101 |
+
|
| 102 |
+
### Cons
|
| 103 |
+
- ⚠️ Need to add FastAPI wrapper
|
| 104 |
+
- ⚠️ State management needs Redis/Postgres
|
| 105 |
+
|
| 106 |
+
---
|
| 107 |
+
|
| 108 |
+
## Option 3: Render.com (Similar to Railway)
|
| 109 |
+
|
| 110 |
+
### Steps
|
| 111 |
+
|
| 112 |
+
1. **Create `render.yaml`**:
|
| 113 |
+
```yaml
|
| 114 |
+
services:
|
| 115 |
+
- type: web
|
| 116 |
+
name: job-writer-api
|
| 117 |
+
env: python
|
| 118 |
+
buildCommand: pip install -r requirements.txt
|
| 119 |
+
startCommand: uvicorn api:app --host 0.0.0.0 --port $PORT
|
| 120 |
+
envVars:
|
| 121 |
+
- key: OPENROUTER_API_KEY
|
| 122 |
+
sync: false
|
| 123 |
+
- key: TAVILY_API_KEY
|
| 124 |
+
sync: false
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
2. **Deploy**:
|
| 128 |
+
- Connect GitHub repo to Render
|
| 129 |
+
- Render auto-detects `render.yaml`
|
| 130 |
+
|
| 131 |
+
### Cost
|
| 132 |
+
- **Free tier**: 750 hours/month (sleeps after 15min inactivity)
|
| 133 |
+
- **Starter**: $7/month (always on)
|
| 134 |
+
|
| 135 |
+
### Pros
|
| 136 |
+
- ✅ Free tier for testing
|
| 137 |
+
- ✅ Simple YAML config
|
| 138 |
+
- ✅ Auto-deploy from Git
|
| 139 |
+
|
| 140 |
+
### Cons
|
| 141 |
+
- ⚠️ Free tier sleeps (cold starts)
|
| 142 |
+
- ⚠️ Need FastAPI wrapper
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## Option 4: Fly.io (Good Free Tier)
|
| 147 |
+
|
| 148 |
+
### Steps
|
| 149 |
+
|
| 150 |
+
1. **Install Fly CLI**:
|
| 151 |
+
```powershell
|
| 152 |
+
iwr https://fly.io/install.ps1 -useb | iex
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
2. **Create `Dockerfile`**:
|
| 156 |
+
```dockerfile
|
| 157 |
+
FROM python:3.12-slim
|
| 158 |
+
|
| 159 |
+
WORKDIR /app
|
| 160 |
+
COPY requirements.txt .
|
| 161 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 162 |
+
|
| 163 |
+
COPY . .
|
| 164 |
+
|
| 165 |
+
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8080"]
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
3. **Deploy**:
|
| 169 |
+
```powershell
|
| 170 |
+
fly launch
|
| 171 |
+
fly deploy
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
### Cost
|
| 175 |
+
- **Free tier**: 3 shared-cpu VMs, 3GB storage
|
| 176 |
+
- **Paid**: $1.94/month per VM
|
| 177 |
+
|
| 178 |
+
### Pros
|
| 179 |
+
- ✅ Generous free tier
|
| 180 |
+
- ✅ Global edge deployment
|
| 181 |
+
- ✅ Docker-based (flexible)
|
| 182 |
+
|
| 183 |
+
### Cons
|
| 184 |
+
- ⚠️ Need Docker knowledge
|
| 185 |
+
- ⚠️ Need FastAPI wrapper
|
| 186 |
+
|
| 187 |
+
---
|
| 188 |
+
|
| 189 |
+
## Option 5: AWS Lambda (Serverless - Pay Per Use)
|
| 190 |
+
|
| 191 |
+
### Steps
|
| 192 |
+
|
| 193 |
+
1. **Create Lambda handler** (`lambda_handler.py`):
|
| 194 |
+
```python
|
| 195 |
+
import json
|
| 196 |
+
from job_writing_agent.workflow import JobWorkflow
|
| 197 |
+
|
| 198 |
+
def lambda_handler(event, context):
|
| 199 |
+
# Parse event
|
| 200 |
+
body = json.loads(event['body'])
|
| 201 |
+
|
| 202 |
+
workflow = JobWorkflow(
|
| 203 |
+
resume=body['resume_path'],
|
| 204 |
+
job_description_source=body['job_description'],
|
| 205 |
+
content=body.get('content_type', 'cover_letter')
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
result = workflow.run()
|
| 209 |
+
|
| 210 |
+
return {
|
| 211 |
+
'statusCode': 200,
|
| 212 |
+
'body': json.dumps({'result': result})
|
| 213 |
+
}
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
2. **Package and deploy** using AWS SAM or Serverless Framework
|
| 217 |
+
|
| 218 |
+
### Cost
|
| 219 |
+
- **Free tier**: 1M requests/month
|
| 220 |
+
- **Paid**: $0.20 per 1M requests + compute time
|
| 221 |
+
|
| 222 |
+
### Pros
|
| 223 |
+
- ✅ Pay only for usage
|
| 224 |
+
- ✅ Auto-scaling
|
| 225 |
+
- ✅ Very cheap for low traffic
|
| 226 |
+
|
| 227 |
+
### Cons
|
| 228 |
+
- ⚠️ 15min timeout limit
|
| 229 |
+
- ⚠️ Cold starts
|
| 230 |
+
- ⚠️ Complex setup
|
| 231 |
+
- ⚠️ Need to handle state externally
|
| 232 |
+
|
| 233 |
+
---
|
| 234 |
+
|
| 235 |
+
## Recommendation
|
| 236 |
+
|
| 237 |
+
**For your use case, I recommend:**
|
| 238 |
+
|
| 239 |
+
1. **Start with LangGraph Cloud** - Easiest, built for your stack
|
| 240 |
+
2. **If you need more control → Railway** - Simple, good free tier
|
| 241 |
+
3. **If you need serverless → AWS Lambda** - Cheapest for low traffic
|
| 242 |
+
|
| 243 |
+
---
|
| 244 |
+
|
| 245 |
+
## Quick Start: FastAPI Wrapper (for Railway/Render/Fly.io)
|
| 246 |
+
|
| 247 |
+
Create `api.py` in your project root:
|
| 248 |
+
|
| 249 |
+
```python
|
| 250 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 251 |
+
from fastapi.responses import JSONResponse
|
| 252 |
+
from job_writing_agent.workflow import JobWorkflow
|
| 253 |
+
import tempfile
|
| 254 |
+
import os
|
| 255 |
+
import asyncio
|
| 256 |
+
|
| 257 |
+
app = FastAPI(title="Job Application Writer API")
|
| 258 |
+
|
| 259 |
+
@app.get("/")
|
| 260 |
+
def health():
|
| 261 |
+
return {"status": "ok"}
|
| 262 |
+
|
| 263 |
+
@app.post("/generate")
|
| 264 |
+
async def generate_application(
|
| 265 |
+
resume: UploadFile = File(...),
|
| 266 |
+
job_description: str,
|
| 267 |
+
content_type: str = "cover_letter"
|
| 268 |
+
):
|
| 269 |
+
"""Generate job application material."""
|
| 270 |
+
# Save resume temporarily
|
| 271 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 272 |
+
content = await resume.read()
|
| 273 |
+
tmp.write(content)
|
| 274 |
+
resume_path = tmp.name
|
| 275 |
+
|
| 276 |
+
try:
|
| 277 |
+
workflow = JobWorkflow(
|
| 278 |
+
resume=resume_path,
|
| 279 |
+
job_description_source=job_description,
|
| 280 |
+
content=content_type
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
# Run workflow (assuming it's async or can be wrapped)
|
| 284 |
+
result = await asyncio.to_thread(workflow.run)
|
| 285 |
+
|
| 286 |
+
return JSONResponse({
|
| 287 |
+
"status": "success",
|
| 288 |
+
"result": result
|
| 289 |
+
})
|
| 290 |
+
except Exception as e:
|
| 291 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 292 |
+
finally:
|
| 293 |
+
# Cleanup
|
| 294 |
+
if os.path.exists(resume_path):
|
| 295 |
+
os.unlink(resume_path)
|
| 296 |
+
|
| 297 |
+
if __name__ == "__main__":
|
| 298 |
+
import uvicorn
|
| 299 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 300 |
+
```
|
| 301 |
+
|
| 302 |
+
Then update `requirements.txt` to ensure FastAPI and uvicorn are included (they already are ✅).
|
| 303 |
+
|
DOCKERFILE_EXPLANATION.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile Explanation
|
| 2 |
+
|
| 3 |
+
This Dockerfile is specifically designed for **LangGraph Cloud/LangServe deployment**. It uses the official LangGraph API base image and configures your agent graphs to be served as REST APIs.
|
| 4 |
+
|
| 5 |
+
## Line-by-Line Breakdown
|
| 6 |
+
|
| 7 |
+
### 1. Base Image (Line 1)
|
| 8 |
+
```dockerfile
|
| 9 |
+
FROM langchain/langgraph-api:3.12
|
| 10 |
+
```
|
| 11 |
+
- **Purpose**: Uses the official LangGraph API base image with Python 3.12
|
| 12 |
+
- **What it includes**: Pre-configured LangGraph runtime, LangServe server, and all LangGraph dependencies
|
| 13 |
+
- **Why**: This image already has everything needed to serve LangGraph workflows as REST APIs
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
### 2. Install Node Dependencies (Line 9)
|
| 18 |
+
```dockerfile
|
| 19 |
+
RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt nodes
|
| 20 |
+
```
|
| 21 |
+
- **Purpose**: Installs the `nodes` package (likely a dependency from your `langgraph.json`)
|
| 22 |
+
- **`PYTHONDONTWRITEBYTECODE=1`**: Prevents creating `.pyc` files (smaller image)
|
| 23 |
+
- **`uv pip`**: Uses `uv` (fast Python package installer) instead of regular `pip`
|
| 24 |
+
- **`--system`**: Installs to system Python (not virtual env)
|
| 25 |
+
- **`--no-cache-dir`**: Doesn't cache pip downloads (smaller image)
|
| 26 |
+
- **`-c /api/constraints.txt`**: Uses constraint file from base image (ensures compatible versions)
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
### 3. Copy Your Code (Line 14)
|
| 31 |
+
```dockerfile
|
| 32 |
+
ADD . /deps/job_writer
|
| 33 |
+
```
|
| 34 |
+
- **Purpose**: Copies your entire project into `/deps/job_writer` in the container
|
| 35 |
+
- **Why `/deps/`**: LangGraph API expects dependencies in this directory
|
| 36 |
+
- **What gets copied**: All your source code, `pyproject.toml`, `requirements.txt`, etc.
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
### 4. Install Your Package (Lines 19-21)
|
| 41 |
+
```dockerfile
|
| 42 |
+
RUN for dep in /deps/*; do
|
| 43 |
+
echo "Installing $dep";
|
| 44 |
+
if [ -d "$dep" ]; then
|
| 45 |
+
echo "Installing $dep";
|
| 46 |
+
(cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .);
|
| 47 |
+
fi;
|
| 48 |
+
done
|
| 49 |
+
```
|
| 50 |
+
- **Purpose**: Installs your `job_writer` package in editable mode (`-e`)
|
| 51 |
+
- **How it works**:
|
| 52 |
+
- Loops through all directories in `/deps/`
|
| 53 |
+
- For each directory, changes into it and runs `pip install -e .`
|
| 54 |
+
- The `-e` flag installs in "editable" mode (changes to code are reflected)
|
| 55 |
+
- **Why**: Makes your package importable as `job_writing_agent` inside the container
|
| 56 |
+
|
| 57 |
+
---
|
| 58 |
+
|
| 59 |
+
### 5. Register Your Graphs (Line 25)
|
| 60 |
+
```dockerfile
|
| 61 |
+
ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", ...}'
|
| 62 |
+
```
|
| 63 |
+
- **Purpose**: Tells LangServe which graphs to expose as REST APIs
|
| 64 |
+
- **Format**: JSON mapping of `graph_name` → `module_path:attribute_name`
|
| 65 |
+
- **What it does**:
|
| 66 |
+
- `job_app_graph` → Exposes `JobWorkflow.job_app_graph` property as an API endpoint
|
| 67 |
+
- `research_workflow` → Exposes the research subgraph
|
| 68 |
+
- `data_loading_workflow` → Exposes the data loading subgraph
|
| 69 |
+
- **Result**: Each graph becomes a REST API endpoint like `/invoke/job_app_graph`
|
| 70 |
+
|
| 71 |
+
---
|
| 72 |
+
|
| 73 |
+
### 6. Protect LangGraph API (Lines 33-35)
|
| 74 |
+
```dockerfile
|
| 75 |
+
RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && \
|
| 76 |
+
touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py
|
| 77 |
+
RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
|
| 78 |
+
```
|
| 79 |
+
- **Purpose**: Prevents your dependencies from accidentally overwriting LangGraph API packages
|
| 80 |
+
- **How**:
|
| 81 |
+
1. Creates placeholder `__init__.py` files for LangGraph packages
|
| 82 |
+
2. Reinstalls LangGraph API (without dependencies) to ensure it's not overwritten
|
| 83 |
+
- **Why**: If your `requirements.txt` has conflicting versions, this ensures LangGraph API stays intact
|
| 84 |
+
|
| 85 |
+
---
|
| 86 |
+
|
| 87 |
+
### 7. Cleanup Build Tools (Lines 37-41)
|
| 88 |
+
```dockerfile
|
| 89 |
+
RUN pip uninstall -y pip setuptools wheel
|
| 90 |
+
RUN rm -rf /usr/local/lib/python*/site-packages/pip* ...
|
| 91 |
+
RUN uv pip uninstall --system pip setuptools wheel && rm /usr/bin/uv /usr/bin/uvx
|
| 92 |
+
```
|
| 93 |
+
- **Purpose**: Removes all build tools to make the image smaller and more secure
|
| 94 |
+
- **What gets removed**:
|
| 95 |
+
- `pip`, `setuptools`, `wheel` (Python build tools)
|
| 96 |
+
- `uv` and `uvx` (package installers)
|
| 97 |
+
- **Why**: These tools aren't needed at runtime, only during build
|
| 98 |
+
- **Security**: Smaller attack surface (can't install malicious packages at runtime)
|
| 99 |
+
|
| 100 |
+
---
|
| 101 |
+
|
| 102 |
+
### 8. Set Working Directory (Line 45)
|
| 103 |
+
```dockerfile
|
| 104 |
+
WORKDIR /deps/job_writer
|
| 105 |
+
```
|
| 106 |
+
- **Purpose**: Sets the default directory when the container starts
|
| 107 |
+
- **Why**: Makes it easier to reference files relative to your project root
|
| 108 |
+
|
| 109 |
+
---
|
| 110 |
+
|
| 111 |
+
## How It Works at Runtime
|
| 112 |
+
|
| 113 |
+
When this container runs:
|
| 114 |
+
|
| 115 |
+
1. **LangServe starts automatically** (from base image)
|
| 116 |
+
2. **Reads `LANGSERVE_GRAPHS`** environment variable
|
| 117 |
+
3. **Imports your graphs** from the specified paths
|
| 118 |
+
4. **Exposes REST API endpoints**:
|
| 119 |
+
- `POST /invoke/job_app_graph` - Main workflow
|
| 120 |
+
- `POST /invoke/research_workflow` - Research subgraph
|
| 121 |
+
- `POST /invoke/data_loading_workflow` - Data loading subgraph
|
| 122 |
+
5. **Handles state management** automatically (checkpointing, persistence)
|
| 123 |
+
|
| 124 |
+
## Example API Usage
|
| 125 |
+
|
| 126 |
+
Once deployed, you can call your agent like this:
|
| 127 |
+
|
| 128 |
+
```bash
|
| 129 |
+
curl -X POST http://your-deployment/invoke/job_app_graph \
|
| 130 |
+
-H "Content-Type: application/json" \
|
| 131 |
+
-d '{
|
| 132 |
+
"resume_path": "...",
|
| 133 |
+
"job_description_source": "...",
|
| 134 |
+
"content": "cover_letter"
|
| 135 |
+
}'
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
## Key Points
|
| 139 |
+
|
| 140 |
+
✅ **Optimized for LangGraph Cloud** - Uses official base image
|
| 141 |
+
✅ **Automatic API generation** - No need to write FastAPI code
|
| 142 |
+
✅ **State management** - Built-in checkpointing and persistence
|
| 143 |
+
✅ **Security** - Removes build tools from final image
|
| 144 |
+
✅ **Small image** - No-cache installs, no bytecode files
|
| 145 |
+
|
| 146 |
+
This is the **easiest deployment option** for LangGraph apps - just build and push this Docker image!
|
| 147 |
+
|
Dockerfile
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM langchain/langgraph-api:3.12
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# -- Adding local package . --
|
| 10 |
+
ADD . /deps/job_writer
|
| 11 |
+
# -- End of local package . --
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# -- Installing all local dependencies --
|
| 16 |
+
|
| 17 |
+
RUN for dep in /deps/*; do echo "Installing $dep"; if [ -d "$dep" ]; then echo "Installing $dep"; (cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .); fi; done
|
| 18 |
+
|
| 19 |
+
# -- End of local dependencies install --
|
| 20 |
+
|
| 21 |
+
ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", "research_workflow": "/deps/job_writer/src/job_writing_agent/nodes/research_workflow.py:research_workflow", "data_loading_workflow": "/deps/job_writer/src/job_writing_agent/nodes/initializing.py:data_loading_workflow"}'
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# -- Ensure user deps didn't inadvertently overwrite langgraph-api
|
| 30 |
+
RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py
|
| 31 |
+
RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
|
| 32 |
+
# -- End of ensuring user deps didn't inadvertently overwrite langgraph-api --
|
| 33 |
+
# -- Removing build deps from the final image ~<:===~~~ --
|
| 34 |
+
RUN pip uninstall -y pip setuptools wheel
|
| 35 |
+
RUN rm -rf /usr/local/lib/python*/site-packages/pip* /usr/local/lib/python*/site-packages/setuptools* /usr/local/lib/python*/site-packages/wheel* && find /usr/local/bin -name "pip*" -delete || true
|
| 36 |
+
RUN rm -rf /usr/lib/python*/site-packages/pip* /usr/lib/python*/site-packages/setuptools* /usr/lib/python*/site-packages/wheel* && find /usr/bin -name "pip*" -delete || true
|
| 37 |
+
RUN uv pip uninstall --system pip setuptools wheel && rm /usr/bin/uv /usr/bin/uvx
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
WORKDIR /deps/job_writer
|
docker-compose.override.example.yml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Example override file for local development
|
| 2 |
+
# Copy this to docker-compose.override.yml to customize settings
|
| 3 |
+
# docker-compose automatically loads override files
|
| 4 |
+
|
| 5 |
+
version: "3.9"
|
| 6 |
+
services:
|
| 7 |
+
redis:
|
| 8 |
+
# Override Redis port for local development
|
| 9 |
+
ports:
|
| 10 |
+
- "6380:6379" # Use different port if 6379 is already in use
|
| 11 |
+
|
| 12 |
+
postgres:
|
| 13 |
+
# Override Postgres port for local development
|
| 14 |
+
ports:
|
| 15 |
+
- "5433:5432" # Use different port if 5432 is already in use
|
| 16 |
+
environment:
|
| 17 |
+
# Override credentials for local dev
|
| 18 |
+
- POSTGRES_USER=dev_user
|
| 19 |
+
- POSTGRES_PASSWORD=dev_password
|
| 20 |
+
- POSTGRES_DB=job_app_dev
|
| 21 |
+
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
redis:
|
| 3 |
+
image: redis:6-alpine
|
| 4 |
+
container_name: job-app-redis
|
| 5 |
+
ports:
|
| 6 |
+
- "6379:6379"
|
| 7 |
+
healthcheck:
|
| 8 |
+
test: ["CMD", "redis-cli", "ping"]
|
| 9 |
+
interval: 5s
|
| 10 |
+
timeout: 3s
|
| 11 |
+
retries: 5
|
| 12 |
+
networks:
|
| 13 |
+
- job-app-network
|
| 14 |
+
|
| 15 |
+
postgres:
|
| 16 |
+
image: postgres:16-alpine
|
| 17 |
+
container_name: job-app-postgres
|
| 18 |
+
environment:
|
| 19 |
+
- POSTGRES_USER=postgres
|
| 20 |
+
- POSTGRES_PASSWORD=postgres
|
| 21 |
+
- POSTGRES_DB=postgres
|
| 22 |
+
ports:
|
| 23 |
+
- "5432:5432"
|
| 24 |
+
healthcheck:
|
| 25 |
+
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
| 26 |
+
interval: 5s
|
| 27 |
+
timeout: 5s
|
| 28 |
+
retries: 5
|
| 29 |
+
volumes:
|
| 30 |
+
- pg_data_local:/var/lib/postgresql/data
|
| 31 |
+
networks:
|
| 32 |
+
- job-app-network
|
| 33 |
+
|
| 34 |
+
# Optional: Uncomment to run your agent container alongside Redis/Postgres
|
| 35 |
+
agent:
|
| 36 |
+
build:
|
| 37 |
+
context: .
|
| 38 |
+
dockerfile: Dockerfile
|
| 39 |
+
image: job-app-workflow:latest
|
| 40 |
+
container_name: job-app-agent
|
| 41 |
+
ports:
|
| 42 |
+
- "8000:8000"
|
| 43 |
+
environment:
|
| 44 |
+
- REDIS_URL=redis://redis:6379
|
| 45 |
+
- POSTGRES_URL=postgresql://postgres:postgres@postgres:5432/postgres
|
| 46 |
+
depends_on:
|
| 47 |
+
redis:
|
| 48 |
+
condition: service_healthy
|
| 49 |
+
postgres:
|
| 50 |
+
condition: service_healthy
|
| 51 |
+
networks:
|
| 52 |
+
- job-app-network
|
| 53 |
+
|
| 54 |
+
networks:
|
| 55 |
+
job-app-network:
|
| 56 |
+
driver: bridge
|
| 57 |
+
|
| 58 |
+
volumes:
|
| 59 |
+
pg_data_local:
|
langgraph.json
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"dependencies": [
|
| 3 |
-
"
|
| 4 |
],
|
| 5 |
"graphs": {
|
| 6 |
-
"
|
|
|
|
|
|
|
| 7 |
},
|
| 8 |
-
"env": "./
|
| 9 |
"python_version": "3.12"
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"dependencies": [
|
| 3 |
+
"."
|
| 4 |
],
|
| 5 |
"graphs": {
|
| 6 |
+
"job_app_graph": "./src/job_writing_agent/workflow.py:job_app_graph",
|
| 7 |
+
"research_workflow": "./src/job_writing_agent/nodes/research_workflow.py:research_workflow",
|
| 8 |
+
"data_loading_workflow": "./src/job_writing_agent/nodes/initializing.py:data_loading_workflow"
|
| 9 |
},
|
| 10 |
+
"env": "./app_env",
|
| 11 |
"python_version": "3.12"
|
| 12 |
}
|
pyproject.toml
CHANGED
|
@@ -23,6 +23,7 @@ dependencies = [
|
|
| 23 |
"babel==2.17.0",
|
| 24 |
"backoff==2.2.1",
|
| 25 |
"beautifulsoup4==4.14.2",
|
|
|
|
| 26 |
"blinker==1.9.0",
|
| 27 |
"blockbuster==1.5.25",
|
| 28 |
"bs4==0.0.2",
|
|
@@ -103,20 +104,19 @@ dependencies = [
|
|
| 103 |
"jsonschema-specifications==2025.9.1",
|
| 104 |
"justext==3.0.2",
|
| 105 |
"kiwisolver==1.4.9",
|
| 106 |
-
"langchain
|
| 107 |
-
"langchain-cerebras
|
| 108 |
-
"langchain-community
|
| 109 |
-
"langchain-core
|
| 110 |
-
"langchain-ollama
|
| 111 |
-
"langchain-openai
|
| 112 |
-
"langchain-tavily
|
| 113 |
-
"langchain-text-splitters
|
| 114 |
"langfuse==3.6.1",
|
| 115 |
-
"langgraph
|
| 116 |
-
"langgraph-api
|
| 117 |
-
"langgraph-
|
| 118 |
-
"langgraph-
|
| 119 |
-
"langgraph-prebuilt==0.6.4",
|
| 120 |
"langgraph-runtime-inmem==0.14.1",
|
| 121 |
"langgraph-sdk==0.2.9",
|
| 122 |
"langsmith==0.4.32",
|
|
@@ -168,8 +168,8 @@ dependencies = [
|
|
| 168 |
"opentelemetry-sdk==1.37.0",
|
| 169 |
"opentelemetry-semantic-conventions==0.58b0",
|
| 170 |
"optuna==4.5.0",
|
| 171 |
-
"orjson
|
| 172 |
-
"ormsgpack
|
| 173 |
"packaging==25.0",
|
| 174 |
"pandas==2.3.3",
|
| 175 |
"parse==1.20.2",
|
|
@@ -212,6 +212,7 @@ dependencies = [
|
|
| 212 |
"rich-rst==1.3.1",
|
| 213 |
"rpds-py==0.27.1",
|
| 214 |
"rsa==4.9.1",
|
|
|
|
| 215 |
"scikit-learn==1.7.2",
|
| 216 |
"scipy==1.16.2",
|
| 217 |
"setuptools==80.9.0",
|
|
@@ -258,3 +259,4 @@ dependencies = [
|
|
| 258 |
|
| 259 |
[tool.setuptools.packages.find]
|
| 260 |
where = ["src"]
|
|
|
|
|
|
| 23 |
"babel==2.17.0",
|
| 24 |
"backoff==2.2.1",
|
| 25 |
"beautifulsoup4==4.14.2",
|
| 26 |
+
"black>=25.12.0",
|
| 27 |
"blinker==1.9.0",
|
| 28 |
"blockbuster==1.5.25",
|
| 29 |
"bs4==0.0.2",
|
|
|
|
| 104 |
"jsonschema-specifications==2025.9.1",
|
| 105 |
"justext==3.0.2",
|
| 106 |
"kiwisolver==1.4.9",
|
| 107 |
+
"langchain",
|
| 108 |
+
"langchain-cerebras",
|
| 109 |
+
"langchain-community",
|
| 110 |
+
"langchain-core>=1.0.0",
|
| 111 |
+
"langchain-ollama",
|
| 112 |
+
"langchain-openai",
|
| 113 |
+
"langchain-tavily",
|
| 114 |
+
"langchain-text-splitters",
|
| 115 |
"langfuse==3.6.1",
|
| 116 |
+
"langgraph",
|
| 117 |
+
"langgraph-api",
|
| 118 |
+
"langgraph-cli",
|
| 119 |
+
"langgraph-prebuilt",
|
|
|
|
| 120 |
"langgraph-runtime-inmem==0.14.1",
|
| 121 |
"langgraph-sdk==0.2.9",
|
| 122 |
"langsmith==0.4.32",
|
|
|
|
| 168 |
"opentelemetry-sdk==1.37.0",
|
| 169 |
"opentelemetry-semantic-conventions==0.58b0",
|
| 170 |
"optuna==4.5.0",
|
| 171 |
+
"orjson>=3.9.7,<3.10.17",
|
| 172 |
+
"ormsgpack>=1.12.0",
|
| 173 |
"packaging==25.0",
|
| 174 |
"pandas==2.3.3",
|
| 175 |
"parse==1.20.2",
|
|
|
|
| 212 |
"rich-rst==1.3.1",
|
| 213 |
"rpds-py==0.27.1",
|
| 214 |
"rsa==4.9.1",
|
| 215 |
+
"ruff>=0.14.10",
|
| 216 |
"scikit-learn==1.7.2",
|
| 217 |
"scipy==1.16.2",
|
| 218 |
"setuptools==80.9.0",
|
|
|
|
| 259 |
|
| 260 |
[tool.setuptools.packages.find]
|
| 261 |
where = ["src"]
|
| 262 |
+
|
pyrightconfig.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"include": [
|
| 3 |
+
"src"
|
| 4 |
+
],
|
| 5 |
+
"exclude": [
|
| 6 |
+
"**/__pycache__",
|
| 7 |
+
"**/.*",
|
| 8 |
+
"app_env",
|
| 9 |
+
"node_modules"
|
| 10 |
+
],
|
| 11 |
+
"extraPaths": [
|
| 12 |
+
"src"
|
| 13 |
+
],
|
| 14 |
+
"pythonVersion": "3.12",
|
| 15 |
+
"typeCheckingMode": "basic",
|
| 16 |
+
"reportMissingImports": true,
|
| 17 |
+
"reportMissingTypeStubs": false
|
| 18 |
+
}
|
src/job_writing_agent/agents/nodes.py
CHANGED
|
@@ -10,8 +10,9 @@ from datetime import datetime
|
|
| 10 |
|
| 11 |
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
| 12 |
from langchain_core.output_parsers import StrOutputParser
|
|
|
|
| 13 |
|
| 14 |
-
from ..classes.classes import AppState, ResearchState
|
| 15 |
from ..prompts.templates import (
|
| 16 |
CRITIQUE_PROMPT,
|
| 17 |
PERSONA_DEVELOPMENT_PROMPT,
|
|
@@ -26,30 +27,38 @@ logger = logging.getLogger(__name__)
|
|
| 26 |
# Constants
|
| 27 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 28 |
|
| 29 |
-
llm_provider = LLMFactory()
|
| 30 |
|
| 31 |
-
|
| 32 |
-
"qwen/qwen3-4b:free", provider="openrouter", temperature=0.3
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def create_draft(state: ResearchState) -> AppState:
|
| 37 |
"""Create initial draft of the application material."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Determine which type of content we're creating
|
| 39 |
-
|
| 40 |
|
| 41 |
content_category = state.get("content_category", "cover_letter")
|
| 42 |
|
|
|
|
|
|
|
|
|
|
| 43 |
try:
|
|
|
|
| 44 |
if state.get("vector_store"):
|
| 45 |
vector_store = state.get("vector_store")
|
| 46 |
|
| 47 |
# Extract key requirements from job description
|
| 48 |
prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
|
| 49 |
|
| 50 |
-
if
|
| 51 |
key_requirements = prompt.invoke(
|
| 52 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
)
|
| 54 |
else:
|
| 55 |
return key_requirements
|
|
@@ -68,13 +77,16 @@ def create_draft(state: ResearchState) -> AppState:
|
|
| 68 |
highly_relevant_resume = "\n".join(
|
| 69 |
[doc.page_content for doc in relevant_docs]
|
| 70 |
)
|
|
|
|
| 71 |
resume_text = f"""
|
| 72 |
# Most Relevant Experience
|
| 73 |
{highly_relevant_resume}
|
| 74 |
|
| 75 |
# Full Resume
|
| 76 |
-
{
|
| 77 |
"""
|
|
|
|
|
|
|
| 78 |
except Exception as e:
|
| 79 |
logger.warning(f"Could not use vector search for relevant resume parts: {e}")
|
| 80 |
# Continue with regular resume text
|
|
@@ -91,31 +103,42 @@ def create_draft(state: ResearchState) -> AppState:
|
|
| 91 |
# Create the draft using the selected prompt template
|
| 92 |
CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
|
| 93 |
"""
|
| 94 |
-
Below is the Job Description
|
| 95 |
|
| 96 |
-
Job Description
|
| 97 |
|
| 98 |
-
|
| 99 |
{current_job_role}
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
""",
|
| 108 |
-
input_variables=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
)
|
| 110 |
|
| 111 |
FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
|
| 112 |
|
| 113 |
# Invoke the chain with the appropriate inputs
|
| 114 |
-
|
| 115 |
(
|
| 116 |
{
|
| 117 |
"current_job_role": lambda x: x["current_job_role"],
|
| 118 |
"company_research_data": lambda x: x["company_research_data"],
|
|
|
|
| 119 |
}
|
| 120 |
)
|
| 121 |
| FirstDraftGenerationPromptTemplate
|
|
@@ -123,59 +146,203 @@ def create_draft(state: ResearchState) -> AppState:
|
|
| 123 |
)
|
| 124 |
|
| 125 |
# Prepare the inputs
|
| 126 |
-
|
| 127 |
-
"current_job_role":
|
| 128 |
-
"company_research_data":
|
|
|
|
|
|
|
|
|
|
| 129 |
}
|
| 130 |
|
| 131 |
-
response =
|
| 132 |
-
logger.info(f"Draft has been created: {response}")
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
|
| 137 |
-
def critique_draft(state:
|
| 138 |
-
"""
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
)
|
| 143 |
-
)
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
|
|
|
| 149 |
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
"""Human-in-the-loop checkpoint for feedback on the draft."""
|
| 152 |
# This is a placeholder function that would be replaced by actual UI interaction
|
| 153 |
print("\n" + "=" * 80)
|
| 154 |
print("DRAFT FOR REVIEW:")
|
| 155 |
print(state["draft"])
|
| 156 |
print("\nAUTOMATIC CRITIQUE:")
|
| 157 |
-
print(state.get("
|
| 158 |
print("=" * 80)
|
| 159 |
print("\nPlease provide your feedback (press Enter to continue with no changes):")
|
| 160 |
|
| 161 |
# In a real implementation, this would be handled by the UI
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
|
| 167 |
-
def finalize_document(state:
|
| 168 |
"""Incorporate feedback and finalize the document."""
|
| 169 |
-
if not state["feedback"].strip():
|
| 170 |
-
state["final"] = state["draft"]
|
| 171 |
-
return state
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
)
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
"""
|
|
|
|
| 10 |
|
| 11 |
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
| 12 |
from langchain_core.output_parsers import StrOutputParser
|
| 13 |
+
from langchain_core.messages import SystemMessage
|
| 14 |
|
| 15 |
+
from ..classes.classes import AppState, ResearchState, ResultState, DataLoadState
|
| 16 |
from ..prompts.templates import (
|
| 17 |
CRITIQUE_PROMPT,
|
| 18 |
PERSONA_DEVELOPMENT_PROMPT,
|
|
|
|
| 27 |
# Constants
|
| 28 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 29 |
|
|
|
|
| 30 |
|
| 31 |
+
def create_draft(state: ResearchState) -> ResultState:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"""Create initial draft of the application material."""
|
| 33 |
+
# Create LLM inside function (lazy initialization)
|
| 34 |
+
llm_provider = LLMFactory()
|
| 35 |
+
llm = llm_provider.create_langchain(
|
| 36 |
+
"mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
# Determine which type of content we're creating
|
| 40 |
+
company_background_information = state.get("company_research_data", {})
|
| 41 |
|
| 42 |
content_category = state.get("content_category", "cover_letter")
|
| 43 |
|
| 44 |
+
# Get the original resume text from state (used later if vector search is available)
|
| 45 |
+
original_resume_text = company_background_information.get("resume", "")
|
| 46 |
+
|
| 47 |
try:
|
| 48 |
+
# Not yet implemented
|
| 49 |
if state.get("vector_store"):
|
| 50 |
vector_store = state.get("vector_store")
|
| 51 |
|
| 52 |
# Extract key requirements from job description
|
| 53 |
prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
|
| 54 |
|
| 55 |
+
if company_background_information:
|
| 56 |
key_requirements = prompt.invoke(
|
| 57 |
+
{
|
| 58 |
+
"job_description": company_background_information[
|
| 59 |
+
"job_description"
|
| 60 |
+
]
|
| 61 |
+
}
|
| 62 |
)
|
| 63 |
else:
|
| 64 |
return key_requirements
|
|
|
|
| 77 |
highly_relevant_resume = "\n".join(
|
| 78 |
[doc.page_content for doc in relevant_docs]
|
| 79 |
)
|
| 80 |
+
# Combine highly relevant parts with full resume text
|
| 81 |
resume_text = f"""
|
| 82 |
# Most Relevant Experience
|
| 83 |
{highly_relevant_resume}
|
| 84 |
|
| 85 |
# Full Resume
|
| 86 |
+
{original_resume_text}
|
| 87 |
"""
|
| 88 |
+
# Update the company_background_information with the enhanced resume
|
| 89 |
+
company_background_information["resume"] = resume_text
|
| 90 |
except Exception as e:
|
| 91 |
logger.warning(f"Could not use vector search for relevant resume parts: {e}")
|
| 92 |
# Continue with regular resume text
|
|
|
|
| 103 |
# Create the draft using the selected prompt template
|
| 104 |
CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
|
| 105 |
"""
|
| 106 |
+
Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
|
| 107 |
|
| 108 |
+
**Job Description:**
|
| 109 |
|
| 110 |
+
'''
|
| 111 |
{current_job_role}
|
| 112 |
+
'''
|
| 113 |
+
|
| 114 |
+
**Candidate Resume:**
|
| 115 |
+
|
| 116 |
+
'''
|
| 117 |
+
{candidate_resume}
|
| 118 |
+
'''
|
| 119 |
+
|
| 120 |
+
**Company Research Data:**
|
| 121 |
+
|
| 122 |
+
'''
|
| 123 |
+
{company_research_data}
|
| 124 |
+
'''
|
| 125 |
""",
|
| 126 |
+
input_variables=[
|
| 127 |
+
"current_job_role",
|
| 128 |
+
"company_research_data",
|
| 129 |
+
"candidate_resume",
|
| 130 |
+
],
|
| 131 |
)
|
| 132 |
|
| 133 |
FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
|
| 134 |
|
| 135 |
# Invoke the chain with the appropriate inputs
|
| 136 |
+
draft_generation_chain = (
|
| 137 |
(
|
| 138 |
{
|
| 139 |
"current_job_role": lambda x: x["current_job_role"],
|
| 140 |
"company_research_data": lambda x: x["company_research_data"],
|
| 141 |
+
"candidate_resume": lambda x: x["candidate_resume"],
|
| 142 |
}
|
| 143 |
)
|
| 144 |
| FirstDraftGenerationPromptTemplate
|
|
|
|
| 146 |
)
|
| 147 |
|
| 148 |
# Prepare the inputs
|
| 149 |
+
application_background_data = {
|
| 150 |
+
"current_job_role": company_background_information["job_description"],
|
| 151 |
+
"company_research_data": company_background_information[
|
| 152 |
+
"company_research_data_summary"
|
| 153 |
+
],
|
| 154 |
+
"candidate_resume": company_background_information["resume"],
|
| 155 |
}
|
| 156 |
|
| 157 |
+
response = draft_generation_chain.invoke(application_background_data)
|
| 158 |
+
logger.info(f"Draft has been created: {response.content}")
|
| 159 |
+
app_state = ResultState(
|
| 160 |
+
draft=response.content,
|
| 161 |
+
feedback="",
|
| 162 |
+
critique_feedback="",
|
| 163 |
+
current_node="create_draft",
|
| 164 |
+
company_research_data=company_background_information,
|
| 165 |
+
output_data={},
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
return app_state
|
| 169 |
|
| 170 |
|
| 171 |
+
def critique_draft(state: ResultState) -> ResultState:
|
| 172 |
+
"""
|
| 173 |
+
Critique the draft for improvements.
|
| 174 |
+
Provides external evaluation focusing on job requirements, tone, clarity, and style.
|
| 175 |
+
"""
|
| 176 |
+
try:
|
| 177 |
+
logger.info("Critiquing draft...")
|
| 178 |
+
|
| 179 |
+
# Create LLM inside function (lazy initialization)
|
| 180 |
+
llm_provider = LLMFactory()
|
| 181 |
+
llm = llm_provider.create_langchain(
|
| 182 |
+
"mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
|
| 183 |
)
|
|
|
|
| 184 |
|
| 185 |
+
job_description = str(state["company_research_data"].get("job_description", ""))
|
| 186 |
+
draft = str(state.get("draft", ""))
|
| 187 |
+
|
| 188 |
+
# Debug logging to verify values
|
| 189 |
+
logger.debug(f"Job description length: {len(job_description)}")
|
| 190 |
+
logger.debug(f"Draft length: {len(draft)}")
|
| 191 |
+
|
| 192 |
+
if not job_description or not draft:
|
| 193 |
+
logger.warning("Missing job_description or draft in state")
|
| 194 |
+
# Return state with empty feedback
|
| 195 |
+
return ResultState(
|
| 196 |
+
draft=draft,
|
| 197 |
+
feedback="",
|
| 198 |
+
critique_feedback="",
|
| 199 |
+
current_node="critique",
|
| 200 |
+
company_research_data=state["company_research_data"],
|
| 201 |
+
output_data=state["output_data"],
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Use the same pattern as create_draft:
|
| 205 |
+
# 1. Create ChatPromptTemplate from SystemMessage
|
| 206 |
+
# 2. Append HumanMessagePromptTemplate with variables
|
| 207 |
+
# 3. Create chain and invoke
|
| 208 |
+
|
| 209 |
+
# Extract SystemMessage from CRITIQUE_PROMPT
|
| 210 |
+
|
| 211 |
+
critique_system_message = SystemMessage(
|
| 212 |
+
content="You are a professional editor who specializes in job applications. Provide constructive feedback."
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
|
| 216 |
+
CritiquePromptTemplate = ChatPromptTemplate([critique_system_message])
|
| 217 |
+
|
| 218 |
+
# Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
|
| 219 |
+
CritiqueContextMessage = HumanMessagePromptTemplate.from_template(
|
| 220 |
+
"""
|
| 221 |
+
# Job Description
|
| 222 |
+
{job_description}
|
| 223 |
+
|
| 224 |
+
# Current Draft
|
| 225 |
+
{draft}
|
| 226 |
+
|
| 227 |
+
Critique this draft and suggest specific improvements. Focus on:
|
| 228 |
+
1. How well it targets the job requirements
|
| 229 |
+
2. Professional tone and language
|
| 230 |
+
3. Clarity and impact
|
| 231 |
+
4. Grammar and style
|
| 232 |
+
|
| 233 |
+
Return your critique in a constructive, actionable format.
|
| 234 |
+
""",
|
| 235 |
+
input_variables=["job_description", "draft"],
|
| 236 |
+
)
|
| 237 |
|
| 238 |
+
CritiquePromptTemplate.append(CritiqueContextMessage)
|
| 239 |
|
| 240 |
+
# Create chain (like line 129-139 in create_draft)
|
| 241 |
+
critique_chain = (
|
| 242 |
+
{
|
| 243 |
+
"job_description": lambda x: x["job_description"],
|
| 244 |
+
"draft": lambda x: x["draft"],
|
| 245 |
+
}
|
| 246 |
+
| CritiquePromptTemplate
|
| 247 |
+
| llm
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
# Invoke with input variables (like line 150 in create_draft)
|
| 251 |
+
critique = critique_chain.invoke(
|
| 252 |
+
{
|
| 253 |
+
"job_description": job_description,
|
| 254 |
+
"draft": draft,
|
| 255 |
+
}
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
critique_content = (
|
| 259 |
+
critique.content if hasattr(critique, "content") else str(critique)
|
| 260 |
+
)
|
| 261 |
+
logger.info("Draft critique completed")
|
| 262 |
+
|
| 263 |
+
# Store the critique for reference during revision
|
| 264 |
+
app_state = ResultState(
|
| 265 |
+
draft=state["draft"],
|
| 266 |
+
feedback=state["feedback"],
|
| 267 |
+
critique_feedback=critique_content,
|
| 268 |
+
current_node="critique",
|
| 269 |
+
company_research_data=state["company_research_data"],
|
| 270 |
+
output_data=state["output_data"],
|
| 271 |
+
)
|
| 272 |
+
return app_state
|
| 273 |
+
|
| 274 |
+
except Exception as e:
|
| 275 |
+
logger.error(f"Error in critique_draft: {e}", exc_info=True)
|
| 276 |
+
# Return state unchanged on error
|
| 277 |
+
return state
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def human_approval(state: ResultState) -> ResultState:
|
| 281 |
"""Human-in-the-loop checkpoint for feedback on the draft."""
|
| 282 |
# This is a placeholder function that would be replaced by actual UI interaction
|
| 283 |
print("\n" + "=" * 80)
|
| 284 |
print("DRAFT FOR REVIEW:")
|
| 285 |
print(state["draft"])
|
| 286 |
print("\nAUTOMATIC CRITIQUE:")
|
| 287 |
+
print(state.get("critique_feedback", "No critique available"))
|
| 288 |
print("=" * 80)
|
| 289 |
print("\nPlease provide your feedback (press Enter to continue with no changes):")
|
| 290 |
|
| 291 |
# In a real implementation, this would be handled by the UI
|
| 292 |
+
human_feedback = input()
|
| 293 |
+
result_state = ResultState(
|
| 294 |
+
draft=state["draft"],
|
| 295 |
+
feedback=human_feedback,
|
| 296 |
+
critique_feedback=state["critique_feedback"],
|
| 297 |
+
current_node="human_approval",
|
| 298 |
+
company_research_data=state["company_research_data"],
|
| 299 |
+
output_data=state["output_data"],
|
| 300 |
+
)
|
| 301 |
+
return result_state
|
| 302 |
|
| 303 |
|
| 304 |
+
def finalize_document(state: ResultState) -> DataLoadState:
|
| 305 |
"""Incorporate feedback and finalize the document."""
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
+
# Create LLM inside function (lazy initialization)
|
| 308 |
+
llm_provider = LLMFactory()
|
| 309 |
+
llm = llm_provider.create_langchain(
|
| 310 |
+
"mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
# Create chain like in critique_draft (line 229-236)
|
| 314 |
+
revision_chain = (
|
| 315 |
+
{
|
| 316 |
+
"draft": lambda x: x["draft"],
|
| 317 |
+
"feedback": lambda x: x["feedback"],
|
| 318 |
+
"critique_feedback": lambda x: x["critique_feedback"],
|
| 319 |
+
}
|
| 320 |
+
| REVISION_PROMPT
|
| 321 |
+
| llm
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
print(f"revision_chain: {revision_chain}")
|
| 325 |
+
|
| 326 |
+
# Invoke with input variables (like line 239 in critique_draft)
|
| 327 |
+
final_content = revision_chain.invoke(
|
| 328 |
+
{
|
| 329 |
+
"draft": state["draft"],
|
| 330 |
+
"feedback": state["feedback"],
|
| 331 |
+
"critique_feedback": state["critique_feedback"],
|
| 332 |
+
}
|
| 333 |
)
|
| 334 |
|
| 335 |
+
app_state = DataLoadState(
|
| 336 |
+
draft=state["draft"],
|
| 337 |
+
feedback=state["feedback"],
|
| 338 |
+
critique_feedback=state["critique_feedback"],
|
| 339 |
+
company_research_data=state["company_research_data"],
|
| 340 |
+
current_node="finalize",
|
| 341 |
+
output_data=final_content.content
|
| 342 |
+
if hasattr(final_content, "content")
|
| 343 |
+
else str(final_content),
|
| 344 |
+
)
|
| 345 |
+
return app_state
|
| 346 |
|
| 347 |
|
| 348 |
"""
|
src/job_writing_agent/agents/output_schema.py
CHANGED
|
@@ -2,12 +2,24 @@ from pydantic import BaseModel, Field, field_validator
|
|
| 2 |
from typing import List, Optional
|
| 3 |
import dspy
|
| 4 |
|
|
|
|
| 5 |
class TavilyQuerySet(BaseModel):
|
| 6 |
-
query1: Optional[List[str]] = Field(
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
@field_validator("query1", "query2", "query3", "query4", "query5", mode="after")
|
| 13 |
@classmethod
|
|
@@ -16,13 +28,38 @@ class TavilyQuerySet(BaseModel):
|
|
| 16 |
if v is not None: # Only validate if the list is actually provided
|
| 17 |
if len(v) != 1:
|
| 18 |
# Updated error message for clarity
|
| 19 |
-
raise ValueError(
|
|
|
|
|
|
|
| 20 |
return v
|
| 21 |
|
|
|
|
| 22 |
class TavilySearchQueries(dspy.Signature):
|
| 23 |
-
"""Use the job description and company name
|
| 24 |
to create exactly 5 search queries for the tavily search tool in JSON Format"""
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from typing import List, Optional
|
| 3 |
import dspy
|
| 4 |
|
| 5 |
+
|
| 6 |
class TavilyQuerySet(BaseModel):
|
| 7 |
+
query1: Optional[List[str]] = Field(
|
| 8 |
+
default=None,
|
| 9 |
+
description="First search query and its rationale, e.g., ['query text']",
|
| 10 |
+
)
|
| 11 |
+
query2: Optional[List[str]] = Field(
|
| 12 |
+
default=None, description="Second search query and its rationale"
|
| 13 |
+
)
|
| 14 |
+
query3: Optional[List[str]] = Field(
|
| 15 |
+
default=None, description="Third search query and its rationale"
|
| 16 |
+
)
|
| 17 |
+
query4: Optional[List[str]] = Field(
|
| 18 |
+
default=None, description="Fourth search query and its rationale"
|
| 19 |
+
)
|
| 20 |
+
query5: Optional[List[str]] = Field(
|
| 21 |
+
default=None, description="Fifth search query and its rationale"
|
| 22 |
+
)
|
| 23 |
|
| 24 |
@field_validator("query1", "query2", "query3", "query4", "query5", mode="after")
|
| 25 |
@classmethod
|
|
|
|
| 28 |
if v is not None: # Only validate if the list is actually provided
|
| 29 |
if len(v) != 1:
|
| 30 |
# Updated error message for clarity
|
| 31 |
+
raise ValueError(
|
| 32 |
+
"Each query list, when provided, must contain exactly one string: the query text."
|
| 33 |
+
)
|
| 34 |
return v
|
| 35 |
|
| 36 |
+
|
| 37 |
class TavilySearchQueries(dspy.Signature):
|
| 38 |
+
"""Use the job description and company name
|
| 39 |
to create exactly 5 search queries for the tavily search tool in JSON Format"""
|
| 40 |
+
|
| 41 |
+
job_description = dspy.InputField(
|
| 42 |
+
desc="Job description of the role that candidate is applying for."
|
| 43 |
+
)
|
| 44 |
+
company_name = dspy.InputField(
|
| 45 |
+
desc="Name of the company the candidate is applying for."
|
| 46 |
+
)
|
| 47 |
+
search_queries = dspy.OutputField(
|
| 48 |
+
desc="Dictionary of tavily search queries which will gather understanding of the company and it's culture",
|
| 49 |
+
json=True,
|
| 50 |
+
)
|
| 51 |
+
search_query_relevance = dspy.OutputField(
|
| 52 |
+
desc="Dictionary of relevance for each tavily search query that is generated",
|
| 53 |
+
json=True,
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class CompanyResearchDataSummarizationSchema(dspy.Signature):
|
| 58 |
+
"""This schema is used to summarize the company research data into a concise summary to provide a clear understanding of the company."""
|
| 59 |
+
|
| 60 |
+
company_research_data = dspy.InputField(
|
| 61 |
+
desc="These are the results of the tavily search queries that were generated. They have been filtered for relevance and are now ready to be summarized."
|
| 62 |
+
)
|
| 63 |
+
company_research_data_summary = dspy.OutputField(
|
| 64 |
+
desc="This is summary of the company research data that will be used by a job application writer to assist the candidate in writing content supporting the job application. The summary should be relevant to the job application and the company.",
|
| 65 |
+
)
|
src/job_writing_agent/classes/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
-
from .classes import AppState, ResearchState, DataLoadState
|
| 2 |
|
| 3 |
-
__all__ = ["AppState", "ResearchState", "DataLoadState"]
|
|
|
|
| 1 |
+
from .classes import AppState, ResearchState, DataLoadState, ResultState
|
| 2 |
|
| 3 |
+
__all__ = ["AppState", "ResearchState", "DataLoadState", "ResultState"]
|
src/job_writing_agent/classes/classes.py
CHANGED
|
@@ -2,11 +2,36 @@
|
|
| 2 |
State definitions for the Job Writer LangGraph Workflow.
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
from
|
| 6 |
from typing_extensions import List, Dict, Any
|
| 7 |
from langgraph.graph import MessagesState
|
| 8 |
from dataclasses import dataclass
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
@dataclass
|
| 11 |
class AppState(MessagesState):
|
| 12 |
"""
|
|
@@ -23,33 +48,45 @@ class AppState(MessagesState):
|
|
| 23 |
final: Final version of the application material
|
| 24 |
content: Type of application material to generate
|
| 25 |
"""
|
|
|
|
| 26 |
resume_path: str
|
| 27 |
job_description_source: str
|
| 28 |
-
company_research_data: Dict[str, Any]
|
| 29 |
-
draft: str
|
| 30 |
-
feedback: str
|
| 31 |
-
final_version: str
|
| 32 |
content: str # "cover_letter", "bullets", "linkedin_note"
|
| 33 |
current_node: str
|
| 34 |
|
| 35 |
|
| 36 |
-
class DataLoadState(MessagesState):
|
| 37 |
"""
|
| 38 |
State container for the job application writer workflow.
|
|
|
|
| 39 |
|
| 40 |
Attributes:
|
| 41 |
resume: List of text chunks from the candidate's resume
|
| 42 |
job_description: List of text chunks from the job description
|
| 43 |
persona: The writing persona to use ("recruiter" or "hiring_manager")
|
| 44 |
content: Type of application material to generate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
"""
|
|
|
|
| 46 |
resume_path: str
|
| 47 |
job_description_source: str
|
|
|
|
| 48 |
resume: str
|
| 49 |
job_description: str
|
| 50 |
company_name: str
|
| 51 |
current_node: str
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
class ResearchState(MessagesState):
|
|
@@ -60,6 +97,22 @@ class ResearchState(MessagesState):
|
|
| 60 |
attempted_search_queries: List of queries used extracted from the job description
|
| 61 |
compiled_knowledge: Compiled knowledge from the research
|
| 62 |
"""
|
|
|
|
| 63 |
company_research_data: Dict[str, Any]
|
| 64 |
attempted_search_queries: List[str]
|
| 65 |
current_node: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
State definitions for the Job Writer LangGraph Workflow.
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
from typing import Annotated
|
| 6 |
from typing_extensions import List, Dict, Any
|
| 7 |
from langgraph.graph import MessagesState
|
| 8 |
from dataclasses import dataclass
|
| 9 |
|
| 10 |
+
|
| 11 |
+
def merge_dict_reducer(
|
| 12 |
+
x: Dict[str, Any] | None, y: Dict[str, Any] | None
|
| 13 |
+
) -> Dict[str, Any]:
|
| 14 |
+
"""
|
| 15 |
+
Reducer function to merge two dictionaries.
|
| 16 |
+
Used for company_research_data to allow parallel nodes to update it.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
x: First dictionary (existing state or None)
|
| 20 |
+
y: Second dictionary (new update or None)
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Merged dictionary with y taking precedence for overlapping keys
|
| 24 |
+
"""
|
| 25 |
+
# Handle None cases - treat as empty dict
|
| 26 |
+
if x is None:
|
| 27 |
+
x = {}
|
| 28 |
+
if y is None:
|
| 29 |
+
y = {}
|
| 30 |
+
|
| 31 |
+
# Merge dictionaries, with y taking precedence for overlapping keys
|
| 32 |
+
return {**x, **y}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
@dataclass
|
| 36 |
class AppState(MessagesState):
|
| 37 |
"""
|
|
|
|
| 48 |
final: Final version of the application material
|
| 49 |
content: Type of application material to generate
|
| 50 |
"""
|
| 51 |
+
|
| 52 |
resume_path: str
|
| 53 |
job_description_source: str
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
content: str # "cover_letter", "bullets", "linkedin_note"
|
| 55 |
current_node: str
|
| 56 |
|
| 57 |
|
| 58 |
+
class DataLoadState(MessagesState, total=False):
|
| 59 |
"""
|
| 60 |
State container for the job application writer workflow.
|
| 61 |
+
Includes all fields needed throughout the entire workflow.
|
| 62 |
|
| 63 |
Attributes:
|
| 64 |
resume: List of text chunks from the candidate's resume
|
| 65 |
job_description: List of text chunks from the job description
|
| 66 |
persona: The writing persona to use ("recruiter" or "hiring_manager")
|
| 67 |
content: Type of application material to generate
|
| 68 |
+
draft: Current draft of the application material
|
| 69 |
+
feedback: Human feedback on the draft
|
| 70 |
+
critique_feedback: Automated critique feedback
|
| 71 |
+
output_data: Final output data
|
| 72 |
+
next_node: Next node to route to after data loading subgraph
|
| 73 |
"""
|
| 74 |
+
|
| 75 |
resume_path: str
|
| 76 |
job_description_source: str
|
| 77 |
+
content: str # "cover_letter", "bullets", "linkedin_note"
|
| 78 |
resume: str
|
| 79 |
job_description: str
|
| 80 |
company_name: str
|
| 81 |
current_node: str
|
| 82 |
+
next_node: str # For routing after data loading subgraph
|
| 83 |
+
# Use Annotated with reducer to allow parallel nodes to merge dictionary updates
|
| 84 |
+
company_research_data: Annotated[Dict[str, Any], merge_dict_reducer]
|
| 85 |
+
# Result fields (added for final output - optional, populated later)
|
| 86 |
+
draft: str
|
| 87 |
+
feedback: str
|
| 88 |
+
critique_feedback: str
|
| 89 |
+
output_data: str
|
| 90 |
|
| 91 |
|
| 92 |
class ResearchState(MessagesState):
|
|
|
|
| 97 |
attempted_search_queries: List of queries used extracted from the job description
|
| 98 |
compiled_knowledge: Compiled knowledge from the research
|
| 99 |
"""
|
| 100 |
+
|
| 101 |
company_research_data: Dict[str, Any]
|
| 102 |
attempted_search_queries: List[str]
|
| 103 |
current_node: str
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
class ResultState(MessagesState):
|
| 107 |
+
"""
|
| 108 |
+
State container for the job application writer workflow.
|
| 109 |
+
Attributes:
|
| 110 |
+
final_result: The final generated application material
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
draft: str
|
| 114 |
+
feedback: str
|
| 115 |
+
critique_feedback: str
|
| 116 |
+
current_node: str
|
| 117 |
+
company_research_data: Dict[str, Any]
|
| 118 |
+
output_data: str
|
src/job_writing_agent/logs/job_writer.log
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/job_writing_agent/nodes/initializing.py
CHANGED
|
@@ -8,74 +8,80 @@ job descriptions, managing missing inputs, and populating application state.
|
|
| 8 |
|
| 9 |
The module includes utilities for:
|
| 10 |
- Parsing resume files and extracting text content
|
| 11 |
-
- Parsing job descriptions and extracting company information
|
| 12 |
- Orchestrating input loading with validation
|
| 13 |
- Providing user prompts for missing information during verification
|
| 14 |
"""
|
| 15 |
|
| 16 |
import logging
|
| 17 |
-
from typing import Tuple
|
| 18 |
-
from typing_extensions import Literal
|
| 19 |
|
| 20 |
from langchain_core.documents import Document
|
| 21 |
from langchain_core.messages import SystemMessage
|
|
|
|
| 22 |
|
| 23 |
-
from job_writing_agent.classes import
|
| 24 |
-
from job_writing_agent.utils.document_processing import
|
|
|
|
|
|
|
|
|
|
| 25 |
from job_writing_agent.prompts.templates import agent_system_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
logger = logging.getLogger(__name__)
|
| 28 |
|
| 29 |
|
| 30 |
-
#
|
| 31 |
-
# Helper decorator to log exceptions for async methods
|
| 32 |
-
# ---------------------------------------------------------------------------
|
| 33 |
-
def log_exceptions(func):
|
| 34 |
-
"""Decorator to log exceptions in async functions."""
|
| 35 |
-
async def wrapper(*args, **kwargs):
|
| 36 |
-
try:
|
| 37 |
-
return await func(*args, **kwargs)
|
| 38 |
-
except Exception as exc:
|
| 39 |
-
logger.error(
|
| 40 |
-
"Exception in %s: %s", func.__name__, exc, exc_info=True
|
| 41 |
-
)
|
| 42 |
-
raise
|
| 43 |
-
|
| 44 |
-
return wrapper
|
| 45 |
|
| 46 |
|
| 47 |
class Dataloading:
|
| 48 |
"""
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
Methods
|
| 52 |
-------
|
| 53 |
-
set_agent_system_message(state:
|
| 54 |
Adds the system prompt to the conversation state.
|
| 55 |
get_resume(resume_source) -> str
|
| 56 |
Parses a resume file and returns its plain‑text content.
|
| 57 |
parse_job_description(job_description_source) -> Tuple[str, str]
|
| 58 |
Parses a job description and returns its text and company name.
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
"""
|
|
|
|
| 69 |
def __init__(self):
|
|
|
|
| 70 |
pass
|
| 71 |
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
|
|
|
| 74 |
"""Add the system prompt to the conversation state.
|
| 75 |
|
| 76 |
Parameters
|
| 77 |
----------
|
| 78 |
-
state:
|
| 79 |
Current workflow state.
|
| 80 |
|
| 81 |
Returns
|
|
@@ -83,9 +89,7 @@ class Dataloading:
|
|
| 83 |
DataLoadState
|
| 84 |
Updated state with the system message and the next node identifier.
|
| 85 |
"""
|
| 86 |
-
agent_initialization_system_message = SystemMessage(
|
| 87 |
-
content=agent_system_prompt
|
| 88 |
-
)
|
| 89 |
messages = state.get("messages", [])
|
| 90 |
messages.append(agent_initialization_system_message)
|
| 91 |
return {
|
|
@@ -94,217 +98,416 @@ class Dataloading:
|
|
| 94 |
"current_node": "initialize_system",
|
| 95 |
}
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
async def get_resume(self, resume_source):
|
| 98 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
Parameters
|
| 101 |
----------
|
| 102 |
resume_source: Any
|
| 103 |
Path or file‑like object accepted by ``parse_resume``.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
"""
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
"Skipping empty or invalid chunk in resume: %s", chunk
|
| 118 |
-
)
|
| 119 |
-
return resume_text
|
| 120 |
-
except Exception as e:
|
| 121 |
-
logger.error("Error parsing resume: %s", e)
|
| 122 |
-
raise
|
| 123 |
|
|
|
|
|
|
|
| 124 |
async def parse_job_description(self, job_description_source):
|
| 125 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
Parameters
|
| 128 |
----------
|
| 129 |
job_description_source: Any
|
| 130 |
-
Source accepted by ``get_job_description
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
"""
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
)
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
)
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
else:
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
logger.error(
|
| 176 |
-
"Error parsing job description from source '%s': %s",
|
| 177 |
-
job_description_source,
|
| 178 |
-
e,
|
| 179 |
-
exc_info=True,
|
| 180 |
-
)
|
| 181 |
-
raise
|
| 182 |
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
# -----------------------------------------------------------------------
|
| 187 |
-
@log_exceptions
|
| 188 |
async def _load_resume(self, resume_source) -> str:
|
| 189 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
if not resume_source:
|
| 191 |
raise ValueError("resume_source is required")
|
| 192 |
return await self.get_resume(resume_source)
|
| 193 |
|
| 194 |
-
|
| 195 |
-
@
|
| 196 |
async def _load_job_description(self, jd_source) -> Tuple[str, str]:
|
| 197 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
if not jd_source:
|
| 199 |
raise ValueError("job_description_source is required")
|
| 200 |
return await self.parse_job_description(jd_source)
|
| 201 |
|
| 202 |
-
|
| 203 |
-
@
|
| 204 |
async def _prompt_user(self, prompt_msg: str) -> str:
|
| 205 |
-
"""
|
| 206 |
-
|
| 207 |
-
return input(prompt_msg)
|
| 208 |
|
|
|
|
|
|
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
| 216 |
"""
|
| 217 |
-
|
| 218 |
-
|
| 219 |
|
| 220 |
-
# -------------------------------------------------------------------
|
| 221 |
-
# Load job description (or prompt if missing during verification)
|
| 222 |
-
# -------------------------------------------------------------------
|
| 223 |
-
job_text = ""
|
| 224 |
-
company_name = ""
|
| 225 |
-
if jd_src:
|
| 226 |
-
job_text, company_name = await self._load_job_description(jd_src)
|
| 227 |
-
elif state.get("current_node") == "verify":
|
| 228 |
-
job_text = await self._prompt_user(
|
| 229 |
-
"Please paste the job posting in text format: "
|
| 230 |
-
)
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
resume_text = ""
|
| 236 |
-
if resume_src:
|
| 237 |
-
resume_text = await self._load_resume(resume_src)
|
| 238 |
-
elif state.get("current_node") == "verify":
|
| 239 |
-
raw = await self._prompt_user(
|
| 240 |
-
"Please paste the resume in text format: "
|
| 241 |
-
)
|
| 242 |
-
resume_text = raw
|
| 243 |
|
| 244 |
-
# Populate state
|
| 245 |
-
state["company_research_data"] = {
|
| 246 |
-
"resume": resume_text,
|
| 247 |
-
"job_description": job_text,
|
| 248 |
-
"company_name": company_name,
|
| 249 |
-
}
|
| 250 |
-
state["current_node"] = "load_inputs"
|
| 251 |
-
return state
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
-
def verify_inputs(self, state: AppState) -> Literal["load", "research"]:
|
| 265 |
-
"""Validate inputs and decide the next workflow node.
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
"""
|
| 272 |
-
print("Verifying Inputs")
|
| 273 |
-
state["current_node"] = "verify"
|
| 274 |
-
logger.info("Verifying loaded inputs!")
|
| 275 |
-
assert state["company_research_data"].get(
|
| 276 |
-
"resume"
|
| 277 |
-
), "Resume is missing in company_research_data"
|
| 278 |
-
assert state["company_research_data"].get(
|
| 279 |
-
"job_description"
|
| 280 |
-
), "Job description is missing"
|
| 281 |
-
if not state.get("company_research_data"):
|
| 282 |
-
missing_items = []
|
| 283 |
-
if not state["company_research_data"].get("resume", ""):
|
| 284 |
-
missing_items.append("resume")
|
| 285 |
-
if not state["company_research_data"].get("job_description", ""):
|
| 286 |
-
missing_items.append("job description")
|
| 287 |
-
logger.error("Missing required data: %s", ", ".join(missing_items))
|
| 288 |
-
return "load"
|
| 289 |
-
# Normalise values to strings
|
| 290 |
-
for key in ["resume", "job_description"]:
|
| 291 |
-
try:
|
| 292 |
-
value = state["company_research_data"][key]
|
| 293 |
-
if isinstance(value, (list, tuple)):
|
| 294 |
-
state["company_research_data"][key] = " ".join(
|
| 295 |
-
str(x) for x in value
|
| 296 |
-
)
|
| 297 |
-
elif isinstance(value, dict):
|
| 298 |
-
state["company_research_data"][key] = str(value)
|
| 299 |
-
else:
|
| 300 |
-
state["company_research_data"][key] = str(value)
|
| 301 |
-
except Exception as e:
|
| 302 |
-
logger.warning("Error converting %s to string: %s", key, e)
|
| 303 |
-
raise
|
| 304 |
-
return "research"
|
| 305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
The module includes utilities for:
|
| 10 |
- Parsing resume files and extracting text content
|
| 11 |
+
- Parsing job descriptions and extracting company information
|
| 12 |
- Orchestrating input loading with validation
|
| 13 |
- Providing user prompts for missing information during verification
|
| 14 |
"""
|
| 15 |
|
| 16 |
import logging
|
| 17 |
+
from typing import Tuple, Optional
|
|
|
|
| 18 |
|
| 19 |
from langchain_core.documents import Document
|
| 20 |
from langchain_core.messages import SystemMessage
|
| 21 |
+
from langgraph.graph import StateGraph, END, START
|
| 22 |
|
| 23 |
+
from job_writing_agent.classes import DataLoadState
|
| 24 |
+
from job_writing_agent.utils.document_processing import (
|
| 25 |
+
parse_resume,
|
| 26 |
+
get_job_description,
|
| 27 |
+
)
|
| 28 |
from job_writing_agent.prompts.templates import agent_system_prompt
|
| 29 |
+
from job_writing_agent.utils.logging.logging_decorators import (
|
| 30 |
+
log_async,
|
| 31 |
+
log_execution,
|
| 32 |
+
log_errors,
|
| 33 |
+
)
|
| 34 |
|
| 35 |
logger = logging.getLogger(__name__)
|
| 36 |
|
| 37 |
|
| 38 |
+
# Note: Using centralized logging decorators from utils.logging.logging_decorators
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
class Dataloading:
|
| 42 |
"""
|
| 43 |
+
Helper class providing utility methods for loading and parsing data.
|
| 44 |
+
|
| 45 |
+
This class provides helper methods used by the data loading subgraph nodes.
|
| 46 |
+
The actual workflow orchestration is handled by the data_loading_workflow subgraph.
|
| 47 |
|
| 48 |
Methods
|
| 49 |
-------
|
| 50 |
+
set_agent_system_message(state: DataLoadState) -> DataLoadState
|
| 51 |
Adds the system prompt to the conversation state.
|
| 52 |
get_resume(resume_source) -> str
|
| 53 |
Parses a resume file and returns its plain‑text content.
|
| 54 |
parse_job_description(job_description_source) -> Tuple[str, str]
|
| 55 |
Parses a job description and returns its text and company name.
|
| 56 |
+
verify_inputs(state: DataLoadState) -> DataLoadState
|
| 57 |
+
Validates inputs and sets next_node for routing.
|
| 58 |
+
|
| 59 |
+
Private Methods (used by subgraph nodes)
|
| 60 |
+
-----------------------------------------
|
| 61 |
+
_load_resume(resume_source) -> str
|
| 62 |
+
Load resume content, raising if the source is missing.
|
| 63 |
+
_load_job_description(jd_source) -> Tuple[str, str]
|
| 64 |
+
Load job description text and company name, raising if missing.
|
| 65 |
+
_prompt_user(prompt_msg: str) -> str
|
| 66 |
+
Prompt the user for input (synchronous input wrapped for async use).
|
| 67 |
|
| 68 |
"""
|
| 69 |
+
|
| 70 |
def __init__(self):
|
| 71 |
+
"""Initialize Dataloading helper class."""
|
| 72 |
pass
|
| 73 |
|
| 74 |
+
# =======================================================================
|
| 75 |
+
# System/Initialization Methods
|
| 76 |
+
# =======================================================================
|
| 77 |
|
| 78 |
+
@log_async
|
| 79 |
+
async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
|
| 80 |
"""Add the system prompt to the conversation state.
|
| 81 |
|
| 82 |
Parameters
|
| 83 |
----------
|
| 84 |
+
state: DataLoadState
|
| 85 |
Current workflow state.
|
| 86 |
|
| 87 |
Returns
|
|
|
|
| 89 |
DataLoadState
|
| 90 |
Updated state with the system message and the next node identifier.
|
| 91 |
"""
|
| 92 |
+
agent_initialization_system_message = SystemMessage(content=agent_system_prompt)
|
|
|
|
|
|
|
| 93 |
messages = state.get("messages", [])
|
| 94 |
messages.append(agent_initialization_system_message)
|
| 95 |
return {
|
|
|
|
| 98 |
"current_node": "initialize_system",
|
| 99 |
}
|
| 100 |
|
| 101 |
+
# =======================================================================
|
| 102 |
+
# Public Parsing Methods
|
| 103 |
+
# =======================================================================
|
| 104 |
+
|
| 105 |
+
@log_async
|
| 106 |
+
@log_errors
|
| 107 |
async def get_resume(self, resume_source):
|
| 108 |
+
"""
|
| 109 |
+
Parse a resume file and return its plain‑text content.
|
| 110 |
+
|
| 111 |
+
This method extracts text from resume chunks, handling both Document
|
| 112 |
+
objects and plain strings. Empty or invalid chunks are skipped.
|
| 113 |
|
| 114 |
Parameters
|
| 115 |
----------
|
| 116 |
resume_source: Any
|
| 117 |
Path or file‑like object accepted by ``parse_resume``.
|
| 118 |
+
|
| 119 |
+
Returns
|
| 120 |
+
-------
|
| 121 |
+
str
|
| 122 |
+
Plain text content of the resume.
|
| 123 |
+
|
| 124 |
+
Raises
|
| 125 |
+
------
|
| 126 |
+
AssertionError
|
| 127 |
+
If resume_source is None.
|
| 128 |
+
Exception
|
| 129 |
+
If parsing fails.
|
| 130 |
"""
|
| 131 |
+
logger.info("Parsing resume...")
|
| 132 |
+
resume_text = ""
|
| 133 |
+
assert resume_source is not None
|
| 134 |
+
resume_chunks = parse_resume(resume_source)
|
| 135 |
+
for chunk in resume_chunks:
|
| 136 |
+
if hasattr(chunk, "page_content") and chunk.page_content:
|
| 137 |
+
resume_text += chunk.page_content
|
| 138 |
+
elif isinstance(chunk, str) and chunk:
|
| 139 |
+
resume_text += chunk
|
| 140 |
+
else:
|
| 141 |
+
logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
|
| 142 |
+
return resume_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
@log_async
|
| 145 |
+
@log_errors
|
| 146 |
async def parse_job_description(self, job_description_source):
|
| 147 |
+
"""
|
| 148 |
+
Parse a job description and return its text and company name.
|
| 149 |
+
|
| 150 |
+
Extracts both the job posting text and company name from the document.
|
| 151 |
+
Company name is extracted from document metadata if available.
|
| 152 |
|
| 153 |
Parameters
|
| 154 |
----------
|
| 155 |
job_description_source: Any
|
| 156 |
+
Source accepted by ``get_job_description`` (URL, file path, etc.).
|
| 157 |
+
|
| 158 |
+
Returns
|
| 159 |
+
-------
|
| 160 |
+
Tuple[str, str]
|
| 161 |
+
A tuple of (job_posting_text, company_name).
|
| 162 |
+
|
| 163 |
+
Raises
|
| 164 |
+
------
|
| 165 |
+
AssertionError
|
| 166 |
+
If job_description_source is None.
|
| 167 |
+
Exception
|
| 168 |
+
If parsing fails.
|
| 169 |
"""
|
| 170 |
+
company_name = ""
|
| 171 |
+
job_posting_text = ""
|
| 172 |
+
|
| 173 |
+
logger.info("Parsing job description from: %s", job_description_source)
|
| 174 |
+
assert job_description_source is not None, (
|
| 175 |
+
"Job description source cannot be None"
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
job_description_document: Optional[Document] = await get_job_description(
|
| 179 |
+
job_description_source
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
# Extract company name from metadata
|
| 183 |
+
if hasattr(job_description_document, "metadata") and isinstance(
|
| 184 |
+
job_description_document.metadata, dict
|
| 185 |
+
):
|
| 186 |
+
company_name = job_description_document.metadata.get("company_name", "")
|
| 187 |
+
if not company_name:
|
| 188 |
+
logger.warning("Company name not found in job description metadata.")
|
| 189 |
+
else:
|
| 190 |
+
logger.warning(
|
| 191 |
+
"Metadata attribute missing or not a dict in job description document."
|
| 192 |
)
|
| 193 |
+
|
| 194 |
+
# Extract job posting text
|
| 195 |
+
if hasattr(job_description_document, "page_content"):
|
| 196 |
+
job_posting_text = job_description_document.page_content or ""
|
| 197 |
+
if not job_posting_text:
|
| 198 |
+
logger.info("Parsed job posting text is empty.")
|
| 199 |
+
else:
|
| 200 |
+
logger.warning(
|
| 201 |
+
"page_content attribute missing in job description document."
|
| 202 |
)
|
| 203 |
+
|
| 204 |
+
return job_posting_text, company_name
|
| 205 |
+
|
| 206 |
+
@log_async
|
| 207 |
+
async def get_application_form_details(self, job_description_source):
|
| 208 |
+
"""
|
| 209 |
+
Placeholder for future method to get application form details.
|
| 210 |
+
|
| 211 |
+
This method will be implemented to extract form fields and requirements
|
| 212 |
+
from job application forms.
|
| 213 |
+
|
| 214 |
+
Parameters
|
| 215 |
+
----------
|
| 216 |
+
job_description_source: Any
|
| 217 |
+
Source of the job description or application form.
|
| 218 |
+
"""
|
| 219 |
+
# TODO: Implement form field extraction
|
| 220 |
+
pass
|
| 221 |
+
|
| 222 |
+
# =======================================================================
|
| 223 |
+
# Validation Methods
|
| 224 |
+
# =======================================================================
|
| 225 |
+
|
| 226 |
+
@log_execution
|
| 227 |
+
@log_errors
|
| 228 |
+
def verify_inputs(self, state: DataLoadState) -> DataLoadState:
|
| 229 |
+
"""
|
| 230 |
+
Validate inputs and set next_node for routing.
|
| 231 |
+
|
| 232 |
+
This method validates that both resume and job description are present
|
| 233 |
+
in the state, normalizes their values to strings, and sets the next_node
|
| 234 |
+
field for conditional routing in the main workflow.
|
| 235 |
+
|
| 236 |
+
Parameters
|
| 237 |
+
----------
|
| 238 |
+
state: DataLoadState
|
| 239 |
+
Current workflow state containing company_research_data.
|
| 240 |
+
|
| 241 |
+
Returns
|
| 242 |
+
-------
|
| 243 |
+
DataLoadState
|
| 244 |
+
Updated state with next_node set to "load" (if validation fails)
|
| 245 |
+
or "research" (if validation passes).
|
| 246 |
+
|
| 247 |
+
Raises
|
| 248 |
+
------
|
| 249 |
+
Exception
|
| 250 |
+
If normalization fails for any field.
|
| 251 |
+
"""
|
| 252 |
+
logger.info("Verifying loaded inputs!")
|
| 253 |
+
state["current_node"] = "verify"
|
| 254 |
+
|
| 255 |
+
# Validate required fields
|
| 256 |
+
company_research_data = state.get("company_research_data", {})
|
| 257 |
+
|
| 258 |
+
if not company_research_data.get("resume"):
|
| 259 |
+
logger.error("Resume is missing in company_research_data")
|
| 260 |
+
state["next_node"] = "load" # Loop back to load subgraph
|
| 261 |
+
return state
|
| 262 |
+
|
| 263 |
+
if not company_research_data.get("job_description"):
|
| 264 |
+
logger.error("Job description is missing in company_research_data")
|
| 265 |
+
state["next_node"] = "load" # Loop back to load subgraph
|
| 266 |
+
return state
|
| 267 |
+
|
| 268 |
+
# Normalize values to strings
|
| 269 |
+
for key in ["resume", "job_description"]:
|
| 270 |
+
try:
|
| 271 |
+
value = company_research_data[key]
|
| 272 |
+
if isinstance(value, (list, tuple)):
|
| 273 |
+
company_research_data[key] = " ".join(str(x) for x in value)
|
| 274 |
+
elif isinstance(value, dict):
|
| 275 |
+
company_research_data[key] = str(value)
|
| 276 |
else:
|
| 277 |
+
company_research_data[key] = str(value)
|
| 278 |
+
except Exception as e:
|
| 279 |
+
logger.warning("Error converting %s to string: %s", key, e)
|
| 280 |
+
state["next_node"] = "load"
|
| 281 |
+
return state
|
| 282 |
+
|
| 283 |
+
# All validations passed
|
| 284 |
+
state["next_node"] = "research"
|
| 285 |
+
logger.info("Inputs verified successfully, proceeding to research")
|
| 286 |
+
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
+
# =======================================================================
|
| 289 |
+
# Private Helper Methods (used by subgraph nodes)
|
| 290 |
+
# =======================================================================
|
| 291 |
|
| 292 |
+
@log_async
|
| 293 |
+
@log_errors
|
|
|
|
|
|
|
| 294 |
async def _load_resume(self, resume_source) -> str:
|
| 295 |
+
"""
|
| 296 |
+
Load resume content, raising if the source is missing.
|
| 297 |
+
|
| 298 |
+
This is a wrapper around get_resume() that validates the source first.
|
| 299 |
+
Used by subgraph nodes for consistent error handling.
|
| 300 |
+
|
| 301 |
+
Parameters
|
| 302 |
+
----------
|
| 303 |
+
resume_source: Any
|
| 304 |
+
Path or file-like object for the resume.
|
| 305 |
+
|
| 306 |
+
Returns
|
| 307 |
+
-------
|
| 308 |
+
str
|
| 309 |
+
Plain text content of the resume.
|
| 310 |
+
|
| 311 |
+
Raises
|
| 312 |
+
------
|
| 313 |
+
ValueError
|
| 314 |
+
If resume_source is None or empty.
|
| 315 |
+
"""
|
| 316 |
if not resume_source:
|
| 317 |
raise ValueError("resume_source is required")
|
| 318 |
return await self.get_resume(resume_source)
|
| 319 |
|
| 320 |
+
@log_async
|
| 321 |
+
@log_errors
|
| 322 |
async def _load_job_description(self, jd_source) -> Tuple[str, str]:
|
| 323 |
+
"""
|
| 324 |
+
Load job description text and company name, raising if missing.
|
| 325 |
+
|
| 326 |
+
This is a wrapper around parse_job_description() that validates the source first.
|
| 327 |
+
Used by subgraph nodes for consistent error handling.
|
| 328 |
+
|
| 329 |
+
Parameters
|
| 330 |
+
----------
|
| 331 |
+
jd_source: Any
|
| 332 |
+
Source for the job description (URL, file path, etc.).
|
| 333 |
+
|
| 334 |
+
Returns
|
| 335 |
+
-------
|
| 336 |
+
Tuple[str, str]
|
| 337 |
+
A tuple of (job_posting_text, company_name).
|
| 338 |
+
|
| 339 |
+
Raises
|
| 340 |
+
------
|
| 341 |
+
ValueError
|
| 342 |
+
If jd_source is None or empty.
|
| 343 |
+
"""
|
| 344 |
if not jd_source:
|
| 345 |
raise ValueError("job_description_source is required")
|
| 346 |
return await self.parse_job_description(jd_source)
|
| 347 |
|
| 348 |
+
@log_async
|
| 349 |
+
@log_errors
|
| 350 |
async def _prompt_user(self, prompt_msg: str) -> str:
|
| 351 |
+
"""
|
| 352 |
+
Prompt the user for input (synchronous input wrapped for async use).
|
|
|
|
| 353 |
|
| 354 |
+
This method wraps the synchronous input() function to be used in async contexts.
|
| 355 |
+
In a production async UI, this would be replaced with an async input mechanism.
|
| 356 |
|
| 357 |
+
Parameters
|
| 358 |
+
----------
|
| 359 |
+
prompt_msg: str
|
| 360 |
+
Message to display to the user.
|
| 361 |
|
| 362 |
+
Returns
|
| 363 |
+
-------
|
| 364 |
+
str
|
| 365 |
+
User input string.
|
| 366 |
"""
|
| 367 |
+
# In a real async UI replace input with an async call.
|
| 368 |
+
return input(prompt_msg)
|
| 369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
+
# ============================================================================
|
| 372 |
+
# Data Loading Subgraph Nodes
|
| 373 |
+
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
+
@log_async
|
| 377 |
+
async def parse_resume_node(state: DataLoadState) -> DataLoadState:
|
| 378 |
+
"""
|
| 379 |
+
Node to parse resume in parallel with job description parsing.
|
| 380 |
|
| 381 |
+
Extracts resume parsing logic from load_inputs for parallel execution.
|
| 382 |
+
Returns only the resume data - reducer will merge with job description data.
|
| 383 |
+
"""
|
| 384 |
+
dataloading = Dataloading()
|
| 385 |
+
resume_src = state.get("resume_path")
|
| 386 |
+
|
| 387 |
+
resume_text = ""
|
| 388 |
+
if resume_src:
|
| 389 |
+
resume_text = await dataloading._load_resume(resume_src)
|
| 390 |
+
elif state.get("current_node") == "verify":
|
| 391 |
+
resume_text = await dataloading._prompt_user(
|
| 392 |
+
"Please paste the resume in text format: "
|
| 393 |
+
)
|
| 394 |
|
| 395 |
+
# Return only the resume data - reducer will merge this with job description data
|
| 396 |
+
logger.info(f"Resume parsed: {len(resume_text)} characters")
|
| 397 |
+
# Return partial state update - LangGraph will merge this with other parallel updates
|
| 398 |
+
return {
|
| 399 |
+
"company_research_data": {"resume": resume_text},
|
| 400 |
+
}
|
| 401 |
|
|
|
|
|
|
|
| 402 |
|
| 403 |
+
@log_async
|
| 404 |
+
async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
|
| 405 |
+
"""
|
| 406 |
+
Node to parse job description in parallel with resume parsing.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
+
Extracts job description parsing logic from load_inputs for parallel execution.
|
| 409 |
+
Returns only the job description data - reducer will merge with resume data.
|
| 410 |
+
"""
|
| 411 |
+
dataloading = Dataloading()
|
| 412 |
+
jd_src = state.get("job_description_source")
|
| 413 |
+
|
| 414 |
+
job_text = ""
|
| 415 |
+
company_name = ""
|
| 416 |
+
if jd_src:
|
| 417 |
+
job_text, company_name = await dataloading._load_job_description(jd_src)
|
| 418 |
+
elif state.get("current_node") == "verify":
|
| 419 |
+
job_text = await dataloading._prompt_user(
|
| 420 |
+
"Please paste the job posting in text format: "
|
| 421 |
+
)
|
| 422 |
|
| 423 |
+
# Return only the job description data - reducer will merge this with resume data
|
| 424 |
+
logger.info(
|
| 425 |
+
f"Job description parsed: {len(job_text)} characters, company: {company_name}"
|
| 426 |
+
)
|
| 427 |
+
# Return partial state update - LangGraph will merge this with other parallel updates
|
| 428 |
+
return {
|
| 429 |
+
"company_research_data": {
|
| 430 |
+
"job_description": job_text,
|
| 431 |
+
"company_name": company_name,
|
| 432 |
+
},
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
@log_execution
|
| 437 |
+
def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
|
| 438 |
+
"""
|
| 439 |
+
Aggregate results from parallel resume and job description parsing nodes.
|
| 440 |
+
|
| 441 |
+
This node runs after both parse_resume_node and parse_job_description_node
|
| 442 |
+
complete. It ensures both results are present and normalizes the state.
|
| 443 |
+
"""
|
| 444 |
+
# Ensure company_research_data exists
|
| 445 |
+
if "company_research_data" not in state:
|
| 446 |
+
state["company_research_data"] = {}
|
| 447 |
+
|
| 448 |
+
# Get results from parallel nodes
|
| 449 |
+
resume_text = state["company_research_data"].get("resume", "")
|
| 450 |
+
job_text = state["company_research_data"].get("job_description", "")
|
| 451 |
+
company_name = state["company_research_data"].get("company_name", "")
|
| 452 |
+
|
| 453 |
+
# Validate both are present
|
| 454 |
+
if not resume_text:
|
| 455 |
+
logger.warning("Resume text is empty after parsing")
|
| 456 |
+
if not job_text:
|
| 457 |
+
logger.warning("Job description text is empty after parsing")
|
| 458 |
+
|
| 459 |
+
# Ensure final structure is correct
|
| 460 |
+
state["company_research_data"] = {
|
| 461 |
+
"resume": resume_text,
|
| 462 |
+
"job_description": job_text,
|
| 463 |
+
"company_name": company_name,
|
| 464 |
+
}
|
| 465 |
+
state["current_node"] = "aggregate_results"
|
| 466 |
+
|
| 467 |
+
logger.info("Data loading results aggregated successfully")
|
| 468 |
+
return state
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
@log_execution
|
| 472 |
+
def verify_inputs_node(state: DataLoadState) -> DataLoadState:
|
| 473 |
+
"""
|
| 474 |
+
Verify that required inputs are present and set next_node for routing.
|
| 475 |
+
|
| 476 |
+
Modified from verify_inputs to return state with next_node instead of string.
|
| 477 |
+
"""
|
| 478 |
+
dataloading = Dataloading()
|
| 479 |
+
return dataloading.verify_inputs(state)
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
# ============================================================================
|
| 483 |
+
# Data Loading Subgraph
|
| 484 |
+
# ============================================================================
|
| 485 |
+
|
| 486 |
+
# Create data loading subgraph
|
| 487 |
+
data_loading_subgraph = StateGraph(DataLoadState)
|
| 488 |
+
|
| 489 |
+
# Add subgraph nodes
|
| 490 |
+
dataloading_instance = Dataloading()
|
| 491 |
+
data_loading_subgraph.add_node(
|
| 492 |
+
"set_agent_system_message", dataloading_instance.set_agent_system_message
|
| 493 |
+
)
|
| 494 |
+
data_loading_subgraph.add_node("parse_resume", parse_resume_node)
|
| 495 |
+
data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
|
| 496 |
+
data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
|
| 497 |
+
data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
|
| 498 |
+
|
| 499 |
+
# Add subgraph edges
|
| 500 |
+
data_loading_subgraph.add_edge(START, "set_agent_system_message")
|
| 501 |
+
# Parallel execution: both nodes start after set_agent_system_message
|
| 502 |
+
data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
|
| 503 |
+
data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
|
| 504 |
+
# Both parallel nodes feed into aggregate (LangGraph waits for both)
|
| 505 |
+
data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
|
| 506 |
+
data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
|
| 507 |
+
# Aggregate feeds into verification
|
| 508 |
+
data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
|
| 509 |
+
# Verification ends the subgraph
|
| 510 |
+
data_loading_subgraph.add_edge("verify_inputs", END)
|
| 511 |
+
|
| 512 |
+
# Compile data loading subgraph
|
| 513 |
+
data_loading_workflow = data_loading_subgraph.compile()
|
src/job_writing_agent/nodes/job_description_loader.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Job Description Loader Module
|
| 4 |
+
|
| 5 |
+
This module provides the JobDescriptionLoader class responsible for loading and parsing
|
| 6 |
+
job description files and URLs, extracting both the job posting text and company name.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
from typing import Callable, Any, Optional, Tuple, Awaitable
|
| 11 |
+
|
| 12 |
+
from langchain_core.documents import Document
|
| 13 |
+
|
| 14 |
+
from job_writing_agent.utils.document_processing import get_job_description
|
| 15 |
+
from job_writing_agent.utils.logging.logging_decorators import (
|
| 16 |
+
log_async,
|
| 17 |
+
log_errors,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger(__name__)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class JobDescriptionLoader:
|
| 24 |
+
"""
|
| 25 |
+
Responsible for loading and parsing job description documents.
|
| 26 |
+
|
| 27 |
+
This class follows SOLID principles:
|
| 28 |
+
- Single Responsibility: Only handles job description parsing
|
| 29 |
+
- Dependency Inversion: Parser is injected for testability
|
| 30 |
+
- Open/Closed: Can extend with different parsers without modification
|
| 31 |
+
- Interface Segregation: Focused interface (only job description methods)
|
| 32 |
+
|
| 33 |
+
Example:
|
| 34 |
+
>>> loader = JobDescriptionLoader()
|
| 35 |
+
>>> job_text, company = await loader.parse_job_description("https://example.com/job")
|
| 36 |
+
>>>
|
| 37 |
+
>>> # With custom parser for testing
|
| 38 |
+
>>> async def mock_parser(source):
|
| 39 |
+
... return Document(page_content="test", metadata={"company_name": "TestCo"})
|
| 40 |
+
>>> loader = JobDescriptionLoader(parser=mock_parser)
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
def __init__(self, parser: Optional[Callable[[Any], Awaitable[Document]]] = None):
|
| 44 |
+
"""
|
| 45 |
+
Initialize JobDescriptionLoader with optional parser dependency injection.
|
| 46 |
+
|
| 47 |
+
Parameters
|
| 48 |
+
----------
|
| 49 |
+
parser: Optional[Callable[[Any], Awaitable[Document]]]
|
| 50 |
+
Async function to parse job description documents. Defaults to
|
| 51 |
+
`get_job_description` from document_processing. Can be injected
|
| 52 |
+
for testing or custom parsing.
|
| 53 |
+
|
| 54 |
+
The parser should:
|
| 55 |
+
- Take one argument (source: str) - URL or file path
|
| 56 |
+
- Return an awaitable that resolves to a Document object
|
| 57 |
+
- Document should have page_content (str) and metadata (dict)
|
| 58 |
+
"""
|
| 59 |
+
self._parser = parser or get_job_description
|
| 60 |
+
|
| 61 |
+
@log_async
|
| 62 |
+
@log_errors
|
| 63 |
+
async def parse_job_description(
|
| 64 |
+
self, job_description_source: Any
|
| 65 |
+
) -> Tuple[str, str]:
|
| 66 |
+
"""
|
| 67 |
+
Parse a job description and return its text and company name.
|
| 68 |
+
|
| 69 |
+
Extracts both the job posting text and company name from the document.
|
| 70 |
+
Company name is extracted from document metadata if available.
|
| 71 |
+
|
| 72 |
+
Parameters
|
| 73 |
+
----------
|
| 74 |
+
job_description_source: Any
|
| 75 |
+
Source accepted by the parser function (URL, file path, etc.).
|
| 76 |
+
Can be a URL starting with http:// or https://, or a local file path.
|
| 77 |
+
|
| 78 |
+
Returns
|
| 79 |
+
-------
|
| 80 |
+
Tuple[str, str]
|
| 81 |
+
A tuple of (job_posting_text, company_name).
|
| 82 |
+
If company name is not found in metadata, returns empty string.
|
| 83 |
+
|
| 84 |
+
Raises
|
| 85 |
+
------
|
| 86 |
+
AssertionError
|
| 87 |
+
If job_description_source is None.
|
| 88 |
+
Exception
|
| 89 |
+
If parsing fails.
|
| 90 |
+
"""
|
| 91 |
+
company_name = ""
|
| 92 |
+
job_posting_text = ""
|
| 93 |
+
|
| 94 |
+
logger.info("Parsing job description from: %s", job_description_source)
|
| 95 |
+
assert job_description_source is not None, (
|
| 96 |
+
"Job description source cannot be None"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
job_description_document: Document = await self._parser(job_description_source)
|
| 100 |
+
|
| 101 |
+
# Extract company name from metadata
|
| 102 |
+
if hasattr(job_description_document, "metadata") and isinstance(
|
| 103 |
+
job_description_document.metadata, dict
|
| 104 |
+
):
|
| 105 |
+
company_name = job_description_document.metadata.get("company_name", "")
|
| 106 |
+
if not company_name:
|
| 107 |
+
logger.warning("Company name not found in job description metadata.")
|
| 108 |
+
else:
|
| 109 |
+
logger.warning(
|
| 110 |
+
"Metadata attribute missing or not a dict in job description document."
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# Extract job posting text
|
| 114 |
+
if hasattr(job_description_document, "page_content"):
|
| 115 |
+
job_posting_text = job_description_document.page_content or ""
|
| 116 |
+
if not job_posting_text:
|
| 117 |
+
logger.info("Parsed job posting text is empty.")
|
| 118 |
+
else:
|
| 119 |
+
logger.warning(
|
| 120 |
+
"page_content attribute missing in job description document."
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
return job_posting_text, company_name
|
| 124 |
+
|
| 125 |
+
@log_async
|
| 126 |
+
@log_errors
|
| 127 |
+
async def _load_job_description(self, jd_source: Any) -> Tuple[str, str]:
|
| 128 |
+
"""
|
| 129 |
+
Load job description text and company name, raising if missing.
|
| 130 |
+
|
| 131 |
+
This is a wrapper around parse_job_description() that validates the
|
| 132 |
+
source first. Used by subgraph nodes for consistent error handling.
|
| 133 |
+
|
| 134 |
+
Parameters
|
| 135 |
+
----------
|
| 136 |
+
jd_source: Any
|
| 137 |
+
Source for the job description (URL, file path, etc.).
|
| 138 |
+
|
| 139 |
+
Returns
|
| 140 |
+
-------
|
| 141 |
+
Tuple[str, str]
|
| 142 |
+
A tuple of (job_posting_text, company_name).
|
| 143 |
+
|
| 144 |
+
Raises
|
| 145 |
+
------
|
| 146 |
+
ValueError
|
| 147 |
+
If jd_source is None or empty.
|
| 148 |
+
"""
|
| 149 |
+
if not jd_source:
|
| 150 |
+
raise ValueError("job_description_source is required")
|
| 151 |
+
return await self.parse_job_description(jd_source)
|
| 152 |
+
|
| 153 |
+
@log_async
|
| 154 |
+
async def get_application_form_details(self, job_description_source: Any):
|
| 155 |
+
"""
|
| 156 |
+
Placeholder for future method to get application form details.
|
| 157 |
+
|
| 158 |
+
This method will be implemented to extract form fields and requirements
|
| 159 |
+
from job application forms.
|
| 160 |
+
|
| 161 |
+
Parameters
|
| 162 |
+
----------
|
| 163 |
+
job_description_source: Any
|
| 164 |
+
Source of the job description or application form.
|
| 165 |
+
"""
|
| 166 |
+
# TODO: Implement form field extraction
|
| 167 |
+
pass
|
| 168 |
+
|
| 169 |
+
async def _prompt_user(self) -> str:
|
| 170 |
+
"""
|
| 171 |
+
Prompt the user for input (synchronous input wrapped for async use).
|
| 172 |
+
|
| 173 |
+
This method wraps the synchronous input() function to be used in async
|
| 174 |
+
contexts. In a production async UI, this would be replaced with an
|
| 175 |
+
async input mechanism.
|
| 176 |
+
|
| 177 |
+
Note: This is a shared utility method. In a future refactoring, this
|
| 178 |
+
could be extracted to a separate UserInputHelper class following the
|
| 179 |
+
Interface Segregation Principle.
|
| 180 |
+
|
| 181 |
+
Parameters
|
| 182 |
+
----------
|
| 183 |
+
prompt_msg: str
|
| 184 |
+
Message to display to the user.
|
| 185 |
+
|
| 186 |
+
Returns
|
| 187 |
+
-------
|
| 188 |
+
str
|
| 189 |
+
User input string.
|
| 190 |
+
"""
|
| 191 |
+
# In a real async UI replace input with an async call.
|
| 192 |
+
return input("Please paste the job description in text format: ")
|
src/job_writing_agent/nodes/research_workflow.py
CHANGED
|
@@ -1,97 +1,304 @@
|
|
| 1 |
-
#
|
| 2 |
-
"""
|
| 3 |
-
This module performs the research phase of the job application writing process.
|
| 4 |
-
One of the stages is Tavily Search which will be use to search for the company
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
import logging
|
| 8 |
import json
|
| 9 |
-
|
|
|
|
| 10 |
|
|
|
|
|
|
|
| 11 |
from job_writing_agent.tools.SearchTool import TavilyResearchTool
|
| 12 |
from job_writing_agent.classes.classes import ResearchState
|
| 13 |
-
from job_writing_agent.tools.SearchTool import
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
-
|
| 24 |
-
"""
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
try:
|
| 28 |
-
#
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
tavily_search = TavilyResearchTool(
|
| 42 |
-
job_description=job_description, company_name=company_name
|
| 43 |
-
)
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
)
|
| 50 |
|
| 51 |
-
logger.info(list(tavily_search_queries_json.values()))
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
)
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
-
assert len(tavily_search_queries_json) > 0, "No search queries were attempted"
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
)
|
| 68 |
|
| 69 |
-
|
| 70 |
-
state["attempted_search_queries"] = list(tavily_search_queries_json.values())
|
| 71 |
-
state["company_research_data"]["tavily_search"] = tavily_search_results
|
| 72 |
|
| 73 |
except Exception as e:
|
| 74 |
-
logger.error(f"Error in
|
| 75 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
state["company_research_data"]["tavily_search"] = []
|
| 77 |
state["attempted_search_queries"] = []
|
| 78 |
-
finally:
|
| 79 |
return state
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
print("\n\n\nInitializing research workflow...\n\n\n")
|
| 83 |
# Create research subgraph
|
| 84 |
research_subgraph = StateGraph(ResearchState)
|
| 85 |
|
| 86 |
# Add research subgraph nodes
|
| 87 |
research_subgraph.add_node("research_company", research_company)
|
| 88 |
-
research_subgraph.add_node("relevance_filter",
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Add research subgraph edges
|
| 92 |
research_subgraph.add_edge(START, "research_company")
|
| 93 |
research_subgraph.add_edge("research_company", "relevance_filter")
|
| 94 |
-
research_subgraph.add_edge("relevance_filter",
|
|
|
|
| 95 |
|
| 96 |
# Compile research subgraph
|
| 97 |
research_workflow = research_subgraph.compile()
|
|
|
|
| 1 |
+
# research_workflow.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import logging
|
| 3 |
import json
|
| 4 |
+
import asyncio
|
| 5 |
+
from typing import Dict, Any, cast
|
| 6 |
|
| 7 |
+
from langgraph.graph import StateGraph, END, START
|
| 8 |
+
import dspy
|
| 9 |
from job_writing_agent.tools.SearchTool import TavilyResearchTool
|
| 10 |
from job_writing_agent.classes.classes import ResearchState
|
| 11 |
+
from job_writing_agent.tools.SearchTool import filter_research_results_by_relevance
|
| 12 |
+
from job_writing_agent.agents.output_schema import (
|
| 13 |
+
CompanyResearchDataSummarizationSchema,
|
| 14 |
+
)
|
| 15 |
+
from job_writing_agent.utils.llm_provider_factory import LLMFactory
|
| 16 |
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
|
| 19 |
+
# Configuration
|
| 20 |
+
MAX_RETRIES = 3
|
| 21 |
+
RETRY_DELAY = 2 # seconds
|
| 22 |
+
QUERY_TIMEOUT = 30 # seconds
|
| 23 |
+
EVAL_TIMEOUT = 15 # seconds per evaluation
|
| 24 |
|
| 25 |
|
| 26 |
+
def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
|
| 27 |
+
"""
|
| 28 |
+
Validate that required inputs are present.
|
| 29 |
+
Returns: (is_valid, company_name, job_description)
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
company_name = state["company_research_data"].get("company_name", "")
|
| 33 |
+
job_description = state["company_research_data"].get("job_description", "")
|
| 34 |
+
|
| 35 |
+
if not company_name or not company_name.strip():
|
| 36 |
+
logger.error("Company name is missing or empty")
|
| 37 |
+
return False, "", ""
|
| 38 |
|
| 39 |
+
if not job_description or not job_description.strip():
|
| 40 |
+
logger.error("Job description is missing or empty")
|
| 41 |
+
return False, "", ""
|
| 42 |
+
|
| 43 |
+
return True, company_name.strip(), job_description.strip()
|
| 44 |
+
|
| 45 |
+
except (KeyError, TypeError, AttributeError) as e:
|
| 46 |
+
logger.error(f"Invalid state structure: {e}")
|
| 47 |
+
return False, "", ""
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def parse_dspy_queries_with_fallback(
|
| 51 |
+
raw_queries: Dict[str, Any], company_name: str
|
| 52 |
+
) -> Dict[str, str]:
|
| 53 |
+
"""
|
| 54 |
+
Parse DSPy query output with multiple fallback strategies.
|
| 55 |
+
Returns a dict of query_id -> query_string.
|
| 56 |
+
"""
|
| 57 |
try:
|
| 58 |
+
# Try to extract search_queries field
|
| 59 |
+
if isinstance(raw_queries, dict) and "search_queries" in raw_queries:
|
| 60 |
+
queries_data = raw_queries["search_queries"]
|
| 61 |
|
| 62 |
+
# If it's a JSON string, parse it
|
| 63 |
+
if isinstance(queries_data, str):
|
| 64 |
+
try:
|
| 65 |
+
queries_data = json.loads(queries_data)
|
| 66 |
+
except json.JSONDecodeError as e:
|
| 67 |
+
logger.warning(f"JSON decode failed: {e}. Using fallback queries.")
|
| 68 |
+
return get_fallback_queries(company_name)
|
| 69 |
|
| 70 |
+
# Extract query strings
|
| 71 |
+
if isinstance(queries_data, dict):
|
| 72 |
+
parsed = {}
|
| 73 |
+
for key, value in queries_data.items():
|
| 74 |
+
if isinstance(value, str):
|
| 75 |
+
parsed[key] = value
|
| 76 |
+
elif isinstance(value, list) and len(value) > 0:
|
| 77 |
+
parsed[key] = str(value[0])
|
| 78 |
|
| 79 |
+
if parsed:
|
| 80 |
+
return parsed
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
# If we reach here, parsing failed
|
| 83 |
+
logger.warning("Could not parse DSPy queries. Using fallback.")
|
| 84 |
+
return get_fallback_queries(company_name)
|
| 85 |
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(f"Error parsing DSPy queries: {e}. Using fallback.")
|
| 88 |
+
return get_fallback_queries(company_name)
|
| 89 |
|
|
|
|
| 90 |
|
| 91 |
+
def get_fallback_queries(company_name: str) -> Dict[str, str]:
|
| 92 |
+
"""
|
| 93 |
+
Generate basic fallback queries when DSPy fails.
|
| 94 |
+
"""
|
| 95 |
+
return {
|
| 96 |
+
"query1": f"{company_name} company culture and values",
|
| 97 |
+
"query2": f"{company_name} recent news and achievements",
|
| 98 |
+
"query3": f"{company_name} mission statement and goals",
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def company_research_data_summary(state: ResearchState) -> ResearchState:
|
| 103 |
+
"""
|
| 104 |
+
Summarize the filtered research data into a concise summary.
|
| 105 |
+
Replaces the raw tavily_search results with a summarized version.
|
| 106 |
+
"""
|
| 107 |
+
try:
|
| 108 |
+
state["current_node"] = "company_research_data_summary"
|
| 109 |
+
|
| 110 |
+
# Extract the current research data
|
| 111 |
+
company_research_data = state.get("company_research_data", {})
|
| 112 |
+
tavily_search_data = company_research_data.get("tavily_search", [])
|
| 113 |
+
|
| 114 |
+
# If no research data, skip summarization
|
| 115 |
+
if not tavily_search_data or len(tavily_search_data) == 0:
|
| 116 |
+
logger.warning("No research data to summarize. Skipping summarization.")
|
| 117 |
+
return state
|
| 118 |
|
| 119 |
+
logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
|
| 120 |
+
|
| 121 |
+
# Create DSPy summarization chain
|
| 122 |
+
company_research_data_summarization = dspy.ChainOfThought(
|
| 123 |
+
CompanyResearchDataSummarizationSchema
|
| 124 |
)
|
| 125 |
+
|
| 126 |
+
# Initialize LLM provider
|
| 127 |
+
|
| 128 |
+
llm_provider = LLMFactory()
|
| 129 |
+
llm = llm_provider.create_dspy(
|
| 130 |
+
model="mistralai/mistral-7b-instruct:free",
|
| 131 |
+
provider="openrouter",
|
| 132 |
+
temperature=0.3,
|
| 133 |
)
|
|
|
|
| 134 |
|
| 135 |
+
# Generate summary using DSPy
|
| 136 |
+
with dspy.context(lm=llm, adapter=dspy.JSONAdapter()):
|
| 137 |
+
response = company_research_data_summarization(
|
| 138 |
+
company_research_data=company_research_data
|
| 139 |
+
)
|
| 140 |
+
# Extract the summary from the response
|
| 141 |
+
# The response should have a 'company_research_data_summary' field (JSON string)
|
| 142 |
+
if hasattr(response, "company_research_data_summary"):
|
| 143 |
+
summary_json_str = response.company_research_data_summary
|
| 144 |
+
elif isinstance(response, dict) and "company_research_data_summary" in response:
|
| 145 |
+
summary_json_str = response["company_research_data_summary"]
|
| 146 |
+
else:
|
| 147 |
+
logger.error(
|
| 148 |
+
f"Unexpected response format from summarization: {type(response)}"
|
| 149 |
+
)
|
| 150 |
+
return state
|
| 151 |
+
|
| 152 |
+
# Parse the JSON summary
|
| 153 |
+
state["company_research_data"]["company_research_data_summary"] = (
|
| 154 |
+
summary_json_str
|
| 155 |
)
|
| 156 |
|
| 157 |
+
return state
|
|
|
|
|
|
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
+
logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
|
| 161 |
+
# Return state unchanged on error
|
| 162 |
+
return state
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
async def research_company_with_retry(state: ResearchState) -> ResearchState:
|
| 166 |
+
"""
|
| 167 |
+
Research company with retry logic and timeouts.
|
| 168 |
+
"""
|
| 169 |
+
state["current_node"] = "research_company"
|
| 170 |
+
|
| 171 |
+
# Validate inputs
|
| 172 |
+
is_valid, company_name, job_description = validate_research_inputs(state)
|
| 173 |
+
|
| 174 |
+
if not is_valid:
|
| 175 |
+
logger.error("Invalid inputs for research. Skipping research phase.")
|
| 176 |
state["company_research_data"]["tavily_search"] = []
|
| 177 |
state["attempted_search_queries"] = []
|
|
|
|
| 178 |
return state
|
| 179 |
|
| 180 |
+
logger.info(f"Researching company: {company_name}")
|
| 181 |
+
|
| 182 |
+
# Try with retries
|
| 183 |
+
for attempt in range(MAX_RETRIES):
|
| 184 |
+
try:
|
| 185 |
+
# Create tool instance
|
| 186 |
+
tavily_search = TavilyResearchTool(
|
| 187 |
+
job_description=job_description, company_name=company_name
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Generate queries with timeout
|
| 191 |
+
queries_task = asyncio.create_task(
|
| 192 |
+
asyncio.to_thread(tavily_search.create_tavily_queries)
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
try:
|
| 196 |
+
raw_queries = await asyncio.wait_for(
|
| 197 |
+
queries_task, timeout=QUERY_TIMEOUT
|
| 198 |
+
)
|
| 199 |
+
except asyncio.TimeoutError:
|
| 200 |
+
logger.warning(
|
| 201 |
+
f"Query generation timed out (attempt {attempt + 1}/{MAX_RETRIES})"
|
| 202 |
+
)
|
| 203 |
+
if attempt < MAX_RETRIES - 1:
|
| 204 |
+
await asyncio.sleep(RETRY_DELAY)
|
| 205 |
+
continue
|
| 206 |
+
else:
|
| 207 |
+
raise
|
| 208 |
+
|
| 209 |
+
# Parse queries with fallback
|
| 210 |
+
# Convert DSPy Prediction to dict if needed
|
| 211 |
+
if hasattr(raw_queries, "dict"):
|
| 212 |
+
raw_queries_dict = cast(Dict[str, Any], raw_queries.dict())
|
| 213 |
+
elif hasattr(raw_queries, "__dict__"):
|
| 214 |
+
raw_queries_dict = cast(Dict[str, Any], raw_queries.__dict__)
|
| 215 |
+
elif isinstance(raw_queries, dict):
|
| 216 |
+
raw_queries_dict = cast(Dict[str, Any], raw_queries)
|
| 217 |
+
else:
|
| 218 |
+
raw_queries_dict = cast(Dict[str, Any], dict(raw_queries))
|
| 219 |
+
|
| 220 |
+
queries = parse_dspy_queries_with_fallback(raw_queries_dict, company_name)
|
| 221 |
+
|
| 222 |
+
if not queries:
|
| 223 |
+
logger.warning("No valid queries generated")
|
| 224 |
+
queries = get_fallback_queries(company_name)
|
| 225 |
+
|
| 226 |
+
logger.info(
|
| 227 |
+
f"Generated {len(queries)} search queries: {list(queries.keys())}"
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
# Perform searches with timeout
|
| 231 |
+
search_task = asyncio.create_task(
|
| 232 |
+
asyncio.to_thread(tavily_search.tavily_search_company, queries)
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
try:
|
| 236 |
+
search_results = await asyncio.wait_for(
|
| 237 |
+
search_task, timeout=QUERY_TIMEOUT * len(queries)
|
| 238 |
+
)
|
| 239 |
+
except asyncio.TimeoutError:
|
| 240 |
+
logger.warning(
|
| 241 |
+
f"Search timed out (attempt {attempt + 1}/{MAX_RETRIES})"
|
| 242 |
+
)
|
| 243 |
+
if attempt < MAX_RETRIES - 1:
|
| 244 |
+
await asyncio.sleep(RETRY_DELAY)
|
| 245 |
+
continue
|
| 246 |
+
else:
|
| 247 |
+
raise
|
| 248 |
+
|
| 249 |
+
# Validate results
|
| 250 |
+
if not isinstance(search_results, list):
|
| 251 |
+
logger.warning(f"Invalid search results type: {type(search_results)}")
|
| 252 |
+
search_results = []
|
| 253 |
+
|
| 254 |
+
if len(search_results) == 0:
|
| 255 |
+
logger.warning("No search results returned")
|
| 256 |
+
|
| 257 |
+
# Store results
|
| 258 |
+
state["attempted_search_queries"] = list(queries.values())
|
| 259 |
+
state["company_research_data"]["tavily_search"] = search_results
|
| 260 |
+
|
| 261 |
+
logger.info(
|
| 262 |
+
f"Research completed successfully with {len(search_results)} result sets"
|
| 263 |
+
)
|
| 264 |
+
return state
|
| 265 |
+
|
| 266 |
+
except Exception as e:
|
| 267 |
+
logger.error(
|
| 268 |
+
f"Error in research_company (attempt {attempt + 1}/{MAX_RETRIES}): {e}",
|
| 269 |
+
exc_info=True,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
if attempt < MAX_RETRIES - 1:
|
| 273 |
+
await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
|
| 274 |
+
else:
|
| 275 |
+
logger.error("All retry attempts exhausted. Using empty results.")
|
| 276 |
+
state["company_research_data"]["tavily_search"] = []
|
| 277 |
+
state["attempted_search_queries"] = []
|
| 278 |
+
|
| 279 |
+
return state
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
async def research_company(state: ResearchState) -> ResearchState:
|
| 283 |
+
"""Wrapper to call the retry version."""
|
| 284 |
+
return await research_company_with_retry(state)
|
| 285 |
+
|
| 286 |
|
|
|
|
| 287 |
# Create research subgraph
|
| 288 |
research_subgraph = StateGraph(ResearchState)
|
| 289 |
|
| 290 |
# Add research subgraph nodes
|
| 291 |
research_subgraph.add_node("research_company", research_company)
|
| 292 |
+
research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
|
| 293 |
+
research_subgraph.add_node(
|
| 294 |
+
"company_research_data_summary", company_research_data_summary
|
| 295 |
+
)
|
| 296 |
|
| 297 |
# Add research subgraph edges
|
| 298 |
research_subgraph.add_edge(START, "research_company")
|
| 299 |
research_subgraph.add_edge("research_company", "relevance_filter")
|
| 300 |
+
research_subgraph.add_edge("relevance_filter", "company_research_data_summary")
|
| 301 |
+
research_subgraph.add_edge("company_research_data_summary", END)
|
| 302 |
|
| 303 |
# Compile research subgraph
|
| 304 |
research_workflow = research_subgraph.compile()
|
src/job_writing_agent/nodes/resume_loader.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Resume Loader Module
|
| 4 |
+
|
| 5 |
+
This module provides the ResumeLoader class responsible for loading and parsing
|
| 6 |
+
the resume file and returning the resume in the required format.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
from typing import Callable, Any, Optional
|
| 11 |
+
|
| 12 |
+
from job_writing_agent.utils.document_processing import parse_resume
|
| 13 |
+
from job_writing_agent.utils.logging.logging_decorators import (
|
| 14 |
+
log_async,
|
| 15 |
+
log_errors,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ResumeLoader:
|
| 22 |
+
"""
|
| 23 |
+
Responsible for loading and parsing resume documents.
|
| 24 |
+
|
| 25 |
+
Example:
|
| 26 |
+
>>> loader = ResumeLoader()
|
| 27 |
+
>>> resume_text = await loader.get_resume("path/to/resume.pdf")
|
| 28 |
+
>>>
|
| 29 |
+
>>> # With custom parser for testing
|
| 30 |
+
>>> mock_parser = lambda x: [Document(page_content="test")]
|
| 31 |
+
>>> loader = ResumeLoader(parser=mock_parser)
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(self, parser: Optional[Callable[[Any], Any]] = None):
|
| 35 |
+
"""
|
| 36 |
+
Initialize ResumeLoader with optional parser dependency injection.
|
| 37 |
+
|
| 38 |
+
Parameters
|
| 39 |
+
----------
|
| 40 |
+
parser: Optional[Callable[[Any], Any]]
|
| 41 |
+
Function to parse resume documents. Defaults to `parse_resume` from
|
| 42 |
+
document_processing. Can be injected for testing or custom parsing.
|
| 43 |
+
"""
|
| 44 |
+
self._parser = parser or parse_resume
|
| 45 |
+
|
| 46 |
+
@log_async
|
| 47 |
+
@log_errors
|
| 48 |
+
async def get_resume(self, resume_source: Any) -> str:
|
| 49 |
+
"""
|
| 50 |
+
Parse a resume file and return its plain-text content.
|
| 51 |
+
|
| 52 |
+
This method extracts text from resume chunks, handling both Document
|
| 53 |
+
objects and plain strings. Empty or invalid chunks are skipped.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
resume_source: Any
|
| 58 |
+
Path or file-like object accepted by the parser function.
|
| 59 |
+
Can be a file path, URL, or file-like object.
|
| 60 |
+
|
| 61 |
+
Returns
|
| 62 |
+
-------
|
| 63 |
+
str
|
| 64 |
+
Plain text content of the resume.
|
| 65 |
+
|
| 66 |
+
Raises
|
| 67 |
+
------
|
| 68 |
+
AssertionError
|
| 69 |
+
If resume_source is None.
|
| 70 |
+
Exception
|
| 71 |
+
If parsing fails.
|
| 72 |
+
"""
|
| 73 |
+
logger.info("Parsing resume...")
|
| 74 |
+
resume_text = ""
|
| 75 |
+
assert resume_source is not None, "resume_source cannot be None"
|
| 76 |
+
|
| 77 |
+
resume_chunks = self._parser(resume_source)
|
| 78 |
+
|
| 79 |
+
for chunk in resume_chunks:
|
| 80 |
+
if hasattr(chunk, "page_content") and chunk.page_content:
|
| 81 |
+
resume_text += chunk.page_content
|
| 82 |
+
elif isinstance(chunk, str) and chunk:
|
| 83 |
+
resume_text += chunk
|
| 84 |
+
else:
|
| 85 |
+
logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
|
| 86 |
+
|
| 87 |
+
return resume_text
|
| 88 |
+
|
| 89 |
+
@log_async
|
| 90 |
+
@log_errors
|
| 91 |
+
async def _load_resume(self, resume_source: Any) -> str:
|
| 92 |
+
"""
|
| 93 |
+
Load resume content, raising if the source is missing.
|
| 94 |
+
|
| 95 |
+
This is a wrapper around get_resume() that validates the source first.
|
| 96 |
+
Used by subgraph nodes for consistent error handling.
|
| 97 |
+
|
| 98 |
+
Parameters
|
| 99 |
+
----------
|
| 100 |
+
resume_source: Any
|
| 101 |
+
Path or file-like object for the resume.
|
| 102 |
+
|
| 103 |
+
Returns
|
| 104 |
+
-------
|
| 105 |
+
str
|
| 106 |
+
Plain text content of the resume.
|
| 107 |
+
|
| 108 |
+
Raises
|
| 109 |
+
------
|
| 110 |
+
ValueError
|
| 111 |
+
If resume_source is None or empty.
|
| 112 |
+
"""
|
| 113 |
+
if not resume_source:
|
| 114 |
+
raise ValueError("resume_source is required")
|
| 115 |
+
return await self.get_resume(resume_source)
|
| 116 |
+
|
| 117 |
+
async def _prompt_user_for_resume(self) -> str:
|
| 118 |
+
"""
|
| 119 |
+
Prompt the user for input (synchronous input wrapped for async use).
|
| 120 |
+
|
| 121 |
+
This method wraps the synchronous input() function to be used in async
|
| 122 |
+
contexts. In a production async UI, this would be replaced with an
|
| 123 |
+
async input mechanism.
|
| 124 |
+
|
| 125 |
+
Note: This is a shared utility method. In a future refactoring, this
|
| 126 |
+
could be extracted to a separate UserInputHelper class following the
|
| 127 |
+
Interface Segregation Principle.
|
| 128 |
+
|
| 129 |
+
Parameters
|
| 130 |
+
----------
|
| 131 |
+
prompt_msg: str
|
| 132 |
+
Message to display to the user.
|
| 133 |
+
|
| 134 |
+
Returns
|
| 135 |
+
-------
|
| 136 |
+
str
|
| 137 |
+
User input string.
|
| 138 |
+
"""
|
| 139 |
+
# In a real async UI replace input with an async call.
|
| 140 |
+
return input("Please paste the resume in text format: ")
|
src/job_writing_agent/nodes/selfconsistency.py
CHANGED
|
@@ -4,23 +4,23 @@ import json
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
from ..classes.classes import AppState
|
| 7 |
-
from ..prompts.templates import
|
| 8 |
-
DRAFT_RATING_PROMPT,
|
| 9 |
-
BEST_DRAFT_SELECTION_PROMPT
|
| 10 |
-
)
|
| 11 |
from ..utils.llm_provider_factory import LLMFactory
|
| 12 |
|
| 13 |
|
| 14 |
-
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
# Constants
|
| 17 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 18 |
|
| 19 |
-
llm_factory = LLMFactory()
|
| 20 |
-
llm_precise = llm_factory.create_langchain(model="qwen/qwen3-4b:free", provider="openrouter", temperature=0.1)
|
| 21 |
|
| 22 |
def self_consistency_vote(state: AppState) -> AppState:
|
| 23 |
"""Choose the best draft from multiple variations."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
variations = state.get("variations", {"variations": []})
|
| 25 |
|
| 26 |
all_drafts = [state["draft"]] + variations["variations"]
|
|
@@ -31,7 +31,7 @@ def self_consistency_vote(state: AppState) -> AppState:
|
|
| 31 |
# Get resume and job summaries, handling different formats
|
| 32 |
try:
|
| 33 |
if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
|
| 34 |
-
if hasattr(state["resume_path"][0],
|
| 35 |
resume_summary = state["resume_path"][0].page_content
|
| 36 |
else:
|
| 37 |
resume_summary = state["resume_path"][0]
|
|
@@ -42,7 +42,10 @@ def self_consistency_vote(state: AppState) -> AppState:
|
|
| 42 |
resume_summary = str(state["resume_path"])
|
| 43 |
|
| 44 |
try:
|
| 45 |
-
if
|
|
|
|
|
|
|
|
|
|
| 46 |
job_summary = state["job_description_source"][0]
|
| 47 |
else:
|
| 48 |
job_summary = str(state["job_description_source"])
|
|
@@ -51,33 +54,38 @@ def self_consistency_vote(state: AppState) -> AppState:
|
|
| 51 |
job_summary = str(state["job_description_source"])
|
| 52 |
|
| 53 |
for i, draft in enumerate(all_drafts):
|
| 54 |
-
rating = llm_precise.invoke(
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
ratings.append(rating)
|
| 61 |
|
| 62 |
# Create a clearer, more structured prompt for draft selection
|
| 63 |
selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
|
| 64 |
-
ratings_json=json.dumps(ratings, indent=2),
|
| 65 |
-
num_drafts=len(all_drafts)
|
| 66 |
)
|
| 67 |
|
| 68 |
# Get the selected draft index with error handling
|
| 69 |
try:
|
| 70 |
selection = llm_precise.invoke(selection_prompt).strip()
|
| 71 |
# Extract just the first number found in the response
|
| 72 |
-
number_match = re.search(r
|
| 73 |
if not number_match:
|
| 74 |
-
print(
|
|
|
|
|
|
|
| 75 |
best_draft_idx = 0
|
| 76 |
else:
|
| 77 |
best_draft_idx = int(number_match.group()) - 1
|
| 78 |
# Validate the index is in range
|
| 79 |
if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
|
| 80 |
-
print(
|
|
|
|
|
|
|
| 81 |
best_draft_idx = 0
|
| 82 |
except (ValueError, TypeError) as e:
|
| 83 |
print(f"Warning: Error selecting best draft: {e}. Using original draft.")
|
|
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
from ..classes.classes import AppState
|
| 7 |
+
from ..prompts.templates import DRAFT_RATING_PROMPT, BEST_DRAFT_SELECTION_PROMPT
|
|
|
|
|
|
|
|
|
|
| 8 |
from ..utils.llm_provider_factory import LLMFactory
|
| 9 |
|
| 10 |
|
|
|
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
# Constants
|
| 13 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 14 |
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def self_consistency_vote(state: AppState) -> AppState:
|
| 17 |
"""Choose the best draft from multiple variations."""
|
| 18 |
+
# Create LLM inside function (lazy initialization)
|
| 19 |
+
llm_factory = LLMFactory()
|
| 20 |
+
llm_precise = llm_factory.create_langchain(
|
| 21 |
+
model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
variations = state.get("variations", {"variations": []})
|
| 25 |
|
| 26 |
all_drafts = [state["draft"]] + variations["variations"]
|
|
|
|
| 31 |
# Get resume and job summaries, handling different formats
|
| 32 |
try:
|
| 33 |
if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
|
| 34 |
+
if hasattr(state["resume_path"][0], "page_content"):
|
| 35 |
resume_summary = state["resume_path"][0].page_content
|
| 36 |
else:
|
| 37 |
resume_summary = state["resume_path"][0]
|
|
|
|
| 42 |
resume_summary = str(state["resume_path"])
|
| 43 |
|
| 44 |
try:
|
| 45 |
+
if (
|
| 46 |
+
isinstance(state["job_description_source"], list)
|
| 47 |
+
and len(state["job_description_source"]) > 0
|
| 48 |
+
):
|
| 49 |
job_summary = state["job_description_source"][0]
|
| 50 |
else:
|
| 51 |
job_summary = str(state["job_description_source"])
|
|
|
|
| 54 |
job_summary = str(state["job_description_source"])
|
| 55 |
|
| 56 |
for i, draft in enumerate(all_drafts):
|
| 57 |
+
rating = llm_precise.invoke(
|
| 58 |
+
DRAFT_RATING_PROMPT.format(
|
| 59 |
+
resume_summary=resume_summary,
|
| 60 |
+
job_summary=job_summary,
|
| 61 |
+
draft=draft,
|
| 62 |
+
draft_number=i + 1,
|
| 63 |
+
)
|
| 64 |
+
)
|
| 65 |
ratings.append(rating)
|
| 66 |
|
| 67 |
# Create a clearer, more structured prompt for draft selection
|
| 68 |
selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
|
| 69 |
+
ratings_json=json.dumps(ratings, indent=2), num_drafts=len(all_drafts)
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
# Get the selected draft index with error handling
|
| 73 |
try:
|
| 74 |
selection = llm_precise.invoke(selection_prompt).strip()
|
| 75 |
# Extract just the first number found in the response
|
| 76 |
+
number_match = re.search(r"\d+", selection)
|
| 77 |
if not number_match:
|
| 78 |
+
print(
|
| 79 |
+
"Warning: Could not extract draft number from LLM response. Using original draft."
|
| 80 |
+
)
|
| 81 |
best_draft_idx = 0
|
| 82 |
else:
|
| 83 |
best_draft_idx = int(number_match.group()) - 1
|
| 84 |
# Validate the index is in range
|
| 85 |
if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
|
| 86 |
+
print(
|
| 87 |
+
f"Warning: Selected draft index {best_draft_idx + 1} out of range. Using original draft."
|
| 88 |
+
)
|
| 89 |
best_draft_idx = 0
|
| 90 |
except (ValueError, TypeError) as e:
|
| 91 |
print(f"Warning: Error selecting best draft: {e}. Using original draft.")
|
src/job_writing_agent/nodes/variations.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing_extensions import Dict, List
|
|
| 5 |
from langchain_core.documents import Document
|
| 6 |
|
| 7 |
|
| 8 |
-
from ..classes.classes import
|
| 9 |
from ..utils.llm_provider_factory import LLMFactory
|
| 10 |
from ..prompts.templates import VARIATION_PROMPT
|
| 11 |
|
|
@@ -14,15 +14,15 @@ logger = logging.getLogger(__name__)
|
|
| 14 |
# Constants
|
| 15 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 16 |
|
| 17 |
-
llm_provider = LLMFactory()
|
| 18 |
|
| 19 |
-
|
| 20 |
-
"qwen/qwen3-4b:free", provider="openrouter", temperature=0.3
|
| 21 |
-
)
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
| 25 |
"""Generate multiple variations of the draft for self-consistency voting."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
variations = []
|
| 27 |
|
| 28 |
# Get resume and job text, handling both string and Document types
|
|
@@ -70,6 +70,8 @@ def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
|
| 70 |
|
| 71 |
response = configured_llm.invoke(variation)
|
| 72 |
|
|
|
|
|
|
|
| 73 |
if response and response.strip(): # Only add non-empty variations
|
| 74 |
variations.append(response)
|
| 75 |
except Exception as e:
|
|
|
|
| 5 |
from langchain_core.documents import Document
|
| 6 |
|
| 7 |
|
| 8 |
+
from ..classes.classes import ResultState
|
| 9 |
from ..utils.llm_provider_factory import LLMFactory
|
| 10 |
from ..prompts.templates import VARIATION_PROMPT
|
| 11 |
|
|
|
|
| 14 |
# Constants
|
| 15 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 16 |
|
|
|
|
| 17 |
|
| 18 |
+
def generate_variations(state: ResultState) -> Dict[str, List[str]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
"""Generate multiple variations of the draft for self-consistency voting."""
|
| 20 |
+
# Create LLM inside function (lazy initialization)
|
| 21 |
+
llm_provider = LLMFactory()
|
| 22 |
+
llm = llm_provider.create_langchain(
|
| 23 |
+
"google/gemma-3-27b-it:free", provider="openrouter", temperature=0.3
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
variations = []
|
| 27 |
|
| 28 |
# Get resume and job text, handling both string and Document types
|
|
|
|
| 70 |
|
| 71 |
response = configured_llm.invoke(variation)
|
| 72 |
|
| 73 |
+
print(f"Response for setting: {variation} has a response: {response}")
|
| 74 |
+
|
| 75 |
if response and response.strip(): # Only add non-empty variations
|
| 76 |
variations.append(response)
|
| 77 |
except Exception as e:
|
src/job_writing_agent/prompts/templates.py
CHANGED
|
@@ -5,7 +5,11 @@ This module contains all prompt templates used throughout the job application
|
|
| 5 |
generation process, organized by task.
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
from langchain_core.prompts import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 10 |
|
| 11 |
# Persona selection prompts
|
|
@@ -201,19 +205,26 @@ Example: If draft #2 is best, return ONLY '2'.
|
|
| 201 |
|
| 202 |
REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 203 |
[
|
| 204 |
-
|
| 205 |
-
|
| 206 |
),
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
{draft}
|
| 211 |
-
|
| 212 |
-
|
|
|
|
| 213 |
{feedback}
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
"""
|
| 218 |
),
|
| 219 |
]
|
|
|
|
| 5 |
generation process, organized by task.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
from langchain_core.prompts import (
|
| 9 |
+
ChatPromptTemplate,
|
| 10 |
+
SystemMessagePromptTemplate,
|
| 11 |
+
HumanMessagePromptTemplate,
|
| 12 |
+
)
|
| 13 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 14 |
|
| 15 |
# Persona selection prompts
|
|
|
|
| 205 |
|
| 206 |
REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 207 |
[
|
| 208 |
+
SystemMessagePromptTemplate.from_template(
|
| 209 |
+
"You are an expert job application writer. Revise the draft based on BOTH the self-evaluation and external feedback provided."
|
| 210 |
),
|
| 211 |
+
HumanMessagePromptTemplate.from_template(
|
| 212 |
+
"""
|
| 213 |
+
--------------------------------Original Draft--------------------------------
|
| 214 |
{draft}
|
| 215 |
+
----------------------------------------------------------------------------------------
|
| 216 |
+
|
| 217 |
+
--------------------------------Candidate Feedback--------------------------------
|
| 218 |
{feedback}
|
| 219 |
+
----------------------------------------------------------------------------------------
|
| 220 |
+
|
| 221 |
+
--------------------------------Critique Feedback--------------------------------
|
| 222 |
+
{critique_feedback}
|
| 223 |
+
----------------------------------------------------------------------------------------
|
| 224 |
+
|
| 225 |
+
Based on the self evaluation in the Original Draft, Critique Feedback and the Candidates' Feedback, revise the content taking essence of the self evaluation, Critique Feedback and the Candidates' Feedback into account. Do not repeat the same content from the Original Draft, Critique Feedback and the Candidates' Feedback.
|
| 226 |
+
|
| 227 |
+
Return the content of the revised draft. Make sure the output is only the content that is the revised content and nothing else.
|
| 228 |
"""
|
| 229 |
),
|
| 230 |
]
|
src/job_writing_agent/prompts/test_prompts.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.prompts import (
|
| 2 |
+
ChatPromptTemplate,
|
| 3 |
+
HumanMessagePromptTemplate,
|
| 4 |
+
SystemMessagePromptTemplate,
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 9 |
+
[
|
| 10 |
+
SystemMessagePromptTemplate.from_template(
|
| 11 |
+
"You are an expert job application writer. Revise the draft based on BOTH the self-evaluation and external feedback provided."
|
| 12 |
+
),
|
| 13 |
+
HumanMessagePromptTemplate.from_template(
|
| 14 |
+
"""
|
| 15 |
+
# Original Draft Content with Evaluation Section at the end
|
| 16 |
+
{draft}
|
| 17 |
+
|
| 18 |
+
# Candidates' Feedback (Human Feedback)
|
| 19 |
+
{feedback}
|
| 20 |
+
|
| 21 |
+
# Critique Feedback (AI Feedback)
|
| 22 |
+
{critique_feedback}
|
| 23 |
+
|
| 24 |
+
Based on the self evaluation in the Original Draft, Critique Feedback and the Candidates' Feedback, revise the content taking essence of the self evaluation, Critique Feedback and the Candidates' Feedback into account. Do not repeat the same content from the Original Draft, Critique Feedback and the Candidates' Feedback.
|
| 25 |
+
|
| 26 |
+
Return the content of the revised draft. Make sure the output is only the content that is the revised content and nothing else.
|
| 27 |
+
"""
|
| 28 |
+
),
|
| 29 |
+
]
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
print(
|
| 33 |
+
REVISION_PROMPT.format_messages(
|
| 34 |
+
draft="Hello, how are you?",
|
| 35 |
+
feedback="I like your draft.",
|
| 36 |
+
critique_feedback="Your draft is good.",
|
| 37 |
+
)
|
| 38 |
+
)
|
src/job_writing_agent/tools/SearchTool.py
CHANGED
|
@@ -6,37 +6,40 @@ from pathlib import Path
|
|
| 6 |
|
| 7 |
from langchain_tavily import TavilySearch
|
| 8 |
from openevals.llm import create_async_llm_as_judge
|
| 9 |
-
from openevals.prompts import
|
| 10 |
-
RAG_RETRIEVAL_RELEVANCE_PROMPT,
|
| 11 |
-
RAG_HELPFULNESS_PROMPT
|
| 12 |
-
)
|
| 13 |
import dspy
|
| 14 |
|
| 15 |
from ..agents.output_schema import TavilySearchQueries
|
| 16 |
from ..classes.classes import ResearchState
|
| 17 |
from ..utils.llm_provider_factory import LLMFactory
|
| 18 |
|
|
|
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
|
| 22 |
-
env_path = Path(__file__).parent /
|
| 23 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 24 |
|
| 25 |
|
| 26 |
openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
|
| 27 |
|
| 28 |
-
llm_provider = LLMFactory()
|
| 29 |
-
|
| 30 |
|
| 31 |
class TavilyResearchTool:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
self.job_description = job_description
|
| 38 |
self.company_name = company_name
|
| 39 |
-
self.tavily_searchtool
|
| 40 |
|
| 41 |
def create_tavily_queries(self):
|
| 42 |
"""
|
|
@@ -46,101 +49,222 @@ class TavilyResearchTool:
|
|
| 46 |
"""
|
| 47 |
tavily_query_generator = dspy.ChainOfThought(TavilySearchQueries)
|
| 48 |
with dspy.context(lm=self.dspy_llm, adapter=dspy.JSONAdapter()):
|
| 49 |
-
response = tavily_query_generator(
|
|
|
|
|
|
|
| 50 |
return response
|
| 51 |
|
| 52 |
-
|
| 53 |
def tavily_search_company(self, queries):
|
| 54 |
-
|
| 55 |
query_results: list[list[str]] = []
|
| 56 |
for query in queries:
|
| 57 |
try:
|
| 58 |
-
search_query_response = self.tavily_searchtool.invoke(
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
|
| 61 |
except Exception as e:
|
| 62 |
-
logger.error(
|
|
|
|
|
|
|
| 63 |
continue
|
| 64 |
|
| 65 |
return query_results
|
| 66 |
|
| 67 |
-
llm_structured = llm_provider.create_langchain("llama3.1-8b",
|
| 68 |
-
provider="cerebras",
|
| 69 |
-
temperature=0.3)
|
| 70 |
|
| 71 |
def get_relevance_evaluator():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
return create_async_llm_as_judge(
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
|
| 78 |
|
| 79 |
def get_helpfulness_evaluator():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
return create_async_llm_as_judge(
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
async def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
try:
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
semaphore = asyncio.Semaphore(2)
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
relevance_evaluator = get_relevance_evaluator()
|
| 112 |
-
eval_result = await relevance_evaluator(
|
| 113 |
-
inputs=input_query, context=query_result_item # context is the whole result block for the query
|
| 114 |
-
)
|
| 115 |
-
return query_result_item, eval_result
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
# Process tasks as they complete
|
| 123 |
-
for completed_task in asyncio.as_completed(tasks):
|
| 124 |
-
query_result_item, eval_result = await completed_task
|
| 125 |
-
# logger.info(f"Evaluated query result for '{query_result_item}': {eval_result}")
|
| 126 |
-
if eval_result.get("score"): # Safely check for score
|
| 127 |
-
if isinstance(query_result_item, list):
|
| 128 |
-
filtered_search_results.extend(query_result_item)
|
| 129 |
-
else:
|
| 130 |
-
# Handle cases where "results" might not be a list or is missing
|
| 131 |
-
logger.warning("Expected a list in query_result_item, got: %s", type(query_result_item))
|
| 132 |
|
| 133 |
-
#
|
| 134 |
-
|
|
|
|
|
|
|
| 135 |
|
| 136 |
-
logger.info(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
return state
|
| 139 |
|
| 140 |
except Exception as e:
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
traceback.print_exc()
|
| 144 |
-
logger.error(f"Error in relevance_filter: {str(e)}")
|
| 145 |
-
# Return original state to avoid breaking the flow
|
| 146 |
return state
|
|
|
|
| 6 |
|
| 7 |
from langchain_tavily import TavilySearch
|
| 8 |
from openevals.llm import create_async_llm_as_judge
|
| 9 |
+
from openevals.prompts import RAG_RETRIEVAL_RELEVANCE_PROMPT, RAG_HELPFULNESS_PROMPT
|
|
|
|
|
|
|
|
|
|
| 10 |
import dspy
|
| 11 |
|
| 12 |
from ..agents.output_schema import TavilySearchQueries
|
| 13 |
from ..classes.classes import ResearchState
|
| 14 |
from ..utils.llm_provider_factory import LLMFactory
|
| 15 |
|
| 16 |
+
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
|
| 19 |
|
| 20 |
+
env_path = Path(__file__).parent / ".env"
|
| 21 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 22 |
|
| 23 |
|
| 24 |
openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
|
| 25 |
|
|
|
|
|
|
|
| 26 |
|
| 27 |
class TavilyResearchTool:
|
| 28 |
+
def __init__(
|
| 29 |
+
self,
|
| 30 |
+
job_description,
|
| 31 |
+
company_name,
|
| 32 |
+
max_results=5,
|
| 33 |
+
model_name="mistralai/mistral-7b-instruct:free",
|
| 34 |
+
):
|
| 35 |
+
# Create LLM inside __init__ (lazy initialization)
|
| 36 |
+
llm_provider = LLMFactory()
|
| 37 |
+
self.dspy_llm = llm_provider.create_dspy(
|
| 38 |
+
model=model_name, provider="openrouter", temperature=0.3
|
| 39 |
+
)
|
| 40 |
self.job_description = job_description
|
| 41 |
self.company_name = company_name
|
| 42 |
+
self.tavily_searchtool = TavilySearch(max_results=max_results)
|
| 43 |
|
| 44 |
def create_tavily_queries(self):
|
| 45 |
"""
|
|
|
|
| 49 |
"""
|
| 50 |
tavily_query_generator = dspy.ChainOfThought(TavilySearchQueries)
|
| 51 |
with dspy.context(lm=self.dspy_llm, adapter=dspy.JSONAdapter()):
|
| 52 |
+
response = tavily_query_generator(
|
| 53 |
+
job_description=self.job_description, company_name=self.company_name
|
| 54 |
+
)
|
| 55 |
return response
|
| 56 |
|
|
|
|
| 57 |
def tavily_search_company(self, queries):
|
|
|
|
| 58 |
query_results: list[list[str]] = []
|
| 59 |
for query in queries:
|
| 60 |
try:
|
| 61 |
+
search_query_response = self.tavily_searchtool.invoke(
|
| 62 |
+
{"query": queries[query]}
|
| 63 |
+
)
|
| 64 |
+
query_results.append(
|
| 65 |
+
[res["content"] for res in search_query_response["results"]]
|
| 66 |
+
)
|
| 67 |
# print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
|
| 68 |
except Exception as e:
|
| 69 |
+
logger.error(
|
| 70 |
+
f"Failed to perform company research using TavilySearchTool. Error : {e}"
|
| 71 |
+
)
|
| 72 |
continue
|
| 73 |
|
| 74 |
return query_results
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def get_relevance_evaluator():
|
| 78 |
+
"""
|
| 79 |
+
Create an LLM-as-judge evaluator for relevance filtering.
|
| 80 |
+
|
| 81 |
+
Creates the LLM on-demand (lazy initialization) to avoid startup delays.
|
| 82 |
+
"""
|
| 83 |
+
# Create LLM inside function (lazy initialization)
|
| 84 |
+
llm_provider = LLMFactory()
|
| 85 |
+
llm_structured = llm_provider.create_langchain(
|
| 86 |
+
"llama3.1-8b", provider="cerebras", temperature=0.3
|
| 87 |
+
)
|
| 88 |
return create_async_llm_as_judge(
|
| 89 |
+
judge=llm_structured,
|
| 90 |
+
prompt=RAG_RETRIEVAL_RELEVANCE_PROMPT,
|
| 91 |
+
feedback_key="retrieval_relevance",
|
| 92 |
+
)
|
| 93 |
|
| 94 |
|
| 95 |
def get_helpfulness_evaluator():
|
| 96 |
+
"""
|
| 97 |
+
Create an LLM-as-judge evaluator for helpfulness filtering.
|
| 98 |
+
|
| 99 |
+
Creates the LLM on-demand (lazy initialization) to avoid startup delays.
|
| 100 |
+
"""
|
| 101 |
+
# Create LLM inside function (lazy initialization)
|
| 102 |
+
llm_provider = LLMFactory()
|
| 103 |
+
llm_structured = llm_provider.create_langchain(
|
| 104 |
+
"llama3.1-8b", provider="cerebras", temperature=0.3
|
| 105 |
+
)
|
| 106 |
return create_async_llm_as_judge(
|
| 107 |
+
judge=llm_structured,
|
| 108 |
+
prompt=RAG_HELPFULNESS_PROMPT
|
| 109 |
+
+ '\nReturn "true" if the answer is helpful, and "false" otherwise.',
|
| 110 |
+
feedback_key="helpfulness",
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
async def filter_research_results_by_relevance(state: ResearchState) -> ResearchState:
|
| 115 |
+
"""
|
| 116 |
+
Filter search results to keep only relevant company information.
|
| 117 |
+
Uses LLM-as-judge to evaluate if each result set is relevant to its query.
|
| 118 |
+
Irrelevant results are REMOVED from the final output.
|
| 119 |
+
"""
|
| 120 |
try:
|
| 121 |
+
state["current_node"] = "filter_research_results_by_relevance"
|
| 122 |
+
|
| 123 |
+
# Extract search data from state
|
| 124 |
+
raw_search_results = state.get("company_research_data", {}).get(
|
| 125 |
+
"tavily_search", []
|
| 126 |
+
)
|
| 127 |
+
search_queries_used = state.get("attempted_search_queries", [])
|
| 128 |
+
|
| 129 |
+
# Validate data types
|
| 130 |
+
if not isinstance(raw_search_results, list):
|
| 131 |
+
logger.warning(f"Invalid search results type: {type(raw_search_results)}")
|
| 132 |
+
return state
|
| 133 |
+
|
| 134 |
+
if not isinstance(search_queries_used, list):
|
| 135 |
+
logger.warning(f"Invalid queries type: {type(search_queries_used)}")
|
| 136 |
+
search_queries_used = []
|
| 137 |
+
|
| 138 |
+
# Early exit if no results
|
| 139 |
+
if len(raw_search_results) == 0:
|
| 140 |
+
logger.info("No search results to filter.")
|
| 141 |
+
state["company_research_data"]["tavily_search"] = []
|
| 142 |
+
return state
|
| 143 |
+
|
| 144 |
+
logger.info(
|
| 145 |
+
f"Starting relevance filtering for {len(raw_search_results)} result sets..."
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Track filtering statistics
|
| 149 |
+
results_kept = []
|
| 150 |
+
results_removed_count = 0
|
| 151 |
+
evaluation_errors_count = 0
|
| 152 |
+
|
| 153 |
+
# Limit concurrent evaluations to prevent rate limiting
|
| 154 |
+
concurrency_limiter = asyncio.Semaphore(2)
|
| 155 |
+
|
| 156 |
+
async def evaluate_result_set_relevance(
|
| 157 |
+
search_result_content, original_query: str
|
| 158 |
+
):
|
| 159 |
+
"""
|
| 160 |
+
Evaluate if a search result set is relevant to its query.
|
| 161 |
+
|
| 162 |
+
Returns:
|
| 163 |
+
tuple: (search_result_content, is_relevant: bool, error: str|None)
|
| 164 |
+
"""
|
| 165 |
+
async with concurrency_limiter:
|
| 166 |
+
try:
|
| 167 |
+
# Skip empty result sets
|
| 168 |
+
if not search_result_content:
|
| 169 |
+
logger.debug(
|
| 170 |
+
f"Skipping empty result set for query: {original_query[:50]}..."
|
| 171 |
+
)
|
| 172 |
+
return (None, False, "empty")
|
| 173 |
+
|
| 174 |
+
# Create relevance evaluator
|
| 175 |
+
llm_relevance_judge = get_relevance_evaluator()
|
| 176 |
+
|
| 177 |
+
# Evaluate with timeout protection
|
| 178 |
+
evaluation_task = llm_relevance_judge(
|
| 179 |
+
inputs=original_query, context=search_result_content
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
evaluation_result = await asyncio.wait_for(
|
| 183 |
+
evaluation_task, timeout=15
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
# Extract relevance score (True = relevant, False = not relevant)
|
| 187 |
+
is_result_relevant = bool(evaluation_result.get("score", False))
|
| 188 |
+
|
| 189 |
+
if is_result_relevant:
|
| 190 |
+
logger.debug(
|
| 191 |
+
f"KEPT: Result relevant for query: {original_query[:60]}..."
|
| 192 |
+
)
|
| 193 |
+
return (search_result_content, True, None)
|
| 194 |
+
else:
|
| 195 |
+
logger.debug(
|
| 196 |
+
f"REMOVED: Result not relevant for query: {original_query[:60]}..."
|
| 197 |
+
)
|
| 198 |
+
return (None, False, None)
|
| 199 |
+
|
| 200 |
+
except asyncio.TimeoutError:
|
| 201 |
+
logger.warning(
|
| 202 |
+
f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
|
| 203 |
+
)
|
| 204 |
+
return (search_result_content, True, "timeout")
|
| 205 |
+
|
| 206 |
+
except Exception as e:
|
| 207 |
+
logger.error(
|
| 208 |
+
f"Evaluation failed for query: {original_query[:60]}... - {e} (KEEPING result)"
|
| 209 |
+
)
|
| 210 |
+
return (search_result_content, True, f"error:{str(e)}")
|
| 211 |
+
|
| 212 |
+
# Create evaluation tasks for all result sets
|
| 213 |
+
evaluation_tasks = []
|
| 214 |
+
for result_set, query in zip(raw_search_results, search_queries_used):
|
| 215 |
+
task = evaluate_result_set_relevance(result_set, query)
|
| 216 |
+
evaluation_tasks.append(task)
|
| 217 |
+
|
| 218 |
+
# Execute all evaluations concurrently
|
| 219 |
+
all_evaluation_results = await asyncio.gather(
|
| 220 |
+
*evaluation_tasks, return_exceptions=True
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Process evaluation results and separate kept vs removed
|
| 224 |
+
for eval_result in all_evaluation_results:
|
| 225 |
+
# Handle exceptions from gather
|
| 226 |
+
if isinstance(eval_result, Exception):
|
| 227 |
+
logger.error(f"Evaluation task failed with exception: {eval_result}")
|
| 228 |
+
evaluation_errors_count += 1
|
| 229 |
+
continue
|
| 230 |
|
| 231 |
+
# Type guard: eval_result is now guaranteed to be a tuple
|
| 232 |
+
if not isinstance(eval_result, tuple) or len(eval_result) != 3:
|
| 233 |
+
logger.error(
|
| 234 |
+
f"Unexpected evaluation result format: {type(eval_result)}"
|
| 235 |
+
)
|
| 236 |
+
evaluation_errors_count += 1
|
| 237 |
+
continue
|
| 238 |
|
| 239 |
+
result_content, is_relevant, error = eval_result
|
|
|
|
| 240 |
|
| 241 |
+
# Track errors
|
| 242 |
+
if error:
|
| 243 |
+
evaluation_errors_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
+
# Keep relevant results, discard irrelevant ones
|
| 246 |
+
if result_content is not None and is_relevant:
|
| 247 |
+
results_kept.append(result_content)
|
| 248 |
+
else:
|
| 249 |
+
results_removed_count += 1
|
| 250 |
|
| 251 |
+
# Update state with ONLY the relevant results
|
| 252 |
+
state["company_research_data"]["tavily_search"] = results_kept
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
+
# Log filtering summary
|
| 255 |
+
total_evaluated = len(raw_search_results)
|
| 256 |
+
kept_count = len(results_kept)
|
| 257 |
+
removed_count = results_removed_count
|
| 258 |
|
| 259 |
+
logger.info(
|
| 260 |
+
f"Relevance filtering complete: "
|
| 261 |
+
f"KEPT {kept_count} | REMOVED {removed_count} | TOTAL {total_evaluated} "
|
| 262 |
+
f"({evaluation_errors_count} evaluation errors)"
|
| 263 |
+
)
|
| 264 |
|
| 265 |
return state
|
| 266 |
|
| 267 |
except Exception as e:
|
| 268 |
+
logger.error(f"Critical error in relevance filtering: {e}", exc_info=True)
|
| 269 |
+
# On critical error, return original state unchanged
|
|
|
|
|
|
|
|
|
|
| 270 |
return state
|
src/job_writing_agent/tools/__init__.py
CHANGED
|
@@ -4,6 +4,6 @@ Created on Mon Oct 23 16:49:52 2023
|
|
| 4 |
@author: rishabhaggarwal
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
from .SearchTool import
|
| 8 |
|
| 9 |
-
__all__ = ["
|
|
|
|
| 4 |
@author: rishabhaggarwal
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
from .SearchTool import filter_research_results_by_relevance
|
| 8 |
|
| 9 |
+
__all__ = ["filter_research_results_by_relevance"]
|
src/job_writing_agent/utils/application_cli_interface.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
import argparse
|
| 2 |
import os
|
| 3 |
-
from typing import
|
| 4 |
|
| 5 |
import requests
|
| 6 |
from requests.exceptions import RequestException
|
| 7 |
|
| 8 |
|
| 9 |
-
DEFAULT_MODEL = "
|
| 10 |
DEFAULT_CONTENT_TYPE = "cover_letter"
|
| 11 |
|
| 12 |
|
|
|
|
| 1 |
import argparse
|
| 2 |
import os
|
| 3 |
+
from typing import Iterable
|
| 4 |
|
| 5 |
import requests
|
| 6 |
from requests.exceptions import RequestException
|
| 7 |
|
| 8 |
|
| 9 |
+
DEFAULT_MODEL = "mistralai/mistral-7b-instruct:free"
|
| 10 |
DEFAULT_CONTENT_TYPE = "cover_letter"
|
| 11 |
|
| 12 |
|
src/job_writing_agent/utils/document_processing.py
CHANGED
|
@@ -13,54 +13,66 @@ from typing_extensions import Dict, List, Any
|
|
| 13 |
import dspy
|
| 14 |
from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
|
| 15 |
from langchain_community.document_transformers import Html2TextTransformer
|
| 16 |
-
from langchain_text_splitters import
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
from langchain_core.documents import Document
|
| 19 |
from langfuse import observe
|
| 20 |
from pydantic import BaseModel, Field
|
| 21 |
|
| 22 |
# Local imports - using relative imports
|
| 23 |
from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
|
| 24 |
-
from .llm_provider_factory import LLMFactory
|
| 25 |
|
| 26 |
# Set up logging
|
| 27 |
logger = logging.getLogger(__name__)
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
| 29 |
|
| 30 |
-
llm_provider = LLMFactory()
|
| 31 |
-
|
| 32 |
-
llm = llm_provider.create_langchain("qwen-3-32b",
|
| 33 |
-
provider="cerebras",
|
| 34 |
-
temperature=0.3,
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
# Default paths
|
| 38 |
DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
|
| 39 |
|
| 40 |
|
| 41 |
# Most Occurring Resume Section Headers
|
| 42 |
RESUME_SECTIONS: list[str] = [
|
| 43 |
-
"EDUCATION",
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
]
|
| 48 |
|
| 49 |
|
| 50 |
class ResumeSection(BaseModel):
|
| 51 |
"""Model for a structured resume section."""
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
| 53 |
content: str = Field(description="The full content of this section")
|
| 54 |
|
| 55 |
|
| 56 |
class StructuredResume(BaseModel):
|
| 57 |
"""Model for a structured resume with sections."""
|
|
|
|
| 58 |
sections: List[ResumeSection] = Field(description="List of resume sections")
|
| 59 |
-
contact_info: Dict[str, str] = Field(
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
class JobDescriptionComponents(BaseModel):
|
| 63 |
"""Model for job description components."""
|
|
|
|
| 64 |
company_name: str = Field(description="The company name")
|
| 65 |
job_description: str = Field(description="The job description")
|
| 66 |
reasoning: str = Field(description="The reasoning for the extracted information")
|
|
@@ -72,8 +84,13 @@ class ExtractJobDescription(dspy.Signature):
|
|
| 72 |
Role Introduction,Qualifications and Requirements, Prefrred Qualifications, Salary, Location.
|
| 73 |
Do not alter the content of the job description.
|
| 74 |
"""
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
job_role = dspy.OutputField(desc="The job role in the posting.")
|
| 78 |
company_name = dspy.OutputField(desc="Company Name of the Job listing.")
|
| 79 |
location = dspy.OutputField(desc="The location for the provided job posting.")
|
|
@@ -90,19 +107,20 @@ def clean_resume_text(text: str) -> str:
|
|
| 90 |
Cleaned text
|
| 91 |
"""
|
| 92 |
# Remove excessive whitespace
|
| 93 |
-
text = re.sub(r
|
| 94 |
|
| 95 |
# Fix common PDF extraction issues
|
| 96 |
-
text = re.sub(r
|
| 97 |
|
| 98 |
# Remove header/footer page numbers
|
| 99 |
-
text = re.sub(r
|
| 100 |
|
| 101 |
# Replace bullet variations with standard markdown bullets
|
| 102 |
-
text = re.sub(r
|
| 103 |
|
| 104 |
return text.strip()
|
| 105 |
|
|
|
|
| 106 |
@observe()
|
| 107 |
def extract_contact_info(text: str) -> Dict[str, str]:
|
| 108 |
"""Extract contact information from resume text.
|
|
@@ -116,28 +134,33 @@ def extract_contact_info(text: str) -> Dict[str, str]:
|
|
| 116 |
contact_info = {}
|
| 117 |
|
| 118 |
# Extract email
|
| 119 |
-
email_match = re.search(
|
|
|
|
|
|
|
| 120 |
if email_match:
|
| 121 |
-
contact_info[
|
| 122 |
|
| 123 |
# Extract phone (various formats)
|
| 124 |
-
phone_match = re.search(
|
|
|
|
|
|
|
| 125 |
if phone_match:
|
| 126 |
-
contact_info[
|
| 127 |
|
| 128 |
# Extract LinkedIn URL
|
| 129 |
-
linkedin_match = re.search(r
|
| 130 |
if linkedin_match:
|
| 131 |
-
contact_info[
|
| 132 |
|
| 133 |
# Try to extract name (this is approximate and might need LLM for better accuracy)
|
| 134 |
# Typically name appears at the top of the resume
|
| 135 |
-
first_line = text.strip().split(
|
| 136 |
if len(first_line) < 40 and not any(char.isdigit() for char in first_line):
|
| 137 |
-
contact_info[
|
| 138 |
|
| 139 |
return contact_info
|
| 140 |
|
|
|
|
| 141 |
@observe()
|
| 142 |
def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
|
| 143 |
"""Identify sections in a resume text.
|
|
@@ -174,15 +197,21 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
|
|
| 174 |
|
| 175 |
# Regex-based section identification
|
| 176 |
# Create a pattern that matches common section headers
|
| 177 |
-
section_pattern =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
matches = list(re.finditer(section_pattern, text, re.IGNORECASE))
|
| 179 |
|
| 180 |
if not matches:
|
| 181 |
# If no sections found, treat the whole resume as one section
|
| 182 |
-
sections.append(
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
| 186 |
return sections
|
| 187 |
|
| 188 |
# Process each section
|
|
@@ -191,15 +220,12 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
|
|
| 191 |
start_pos = match.start()
|
| 192 |
|
| 193 |
# Find the end position (start of next section or end of text)
|
| 194 |
-
end_pos = matches[i+1].start() if i < len(matches) - 1 else len(text)
|
| 195 |
|
| 196 |
# Extract section content (excluding the header)
|
| 197 |
section_content = text[start_pos:end_pos].strip()
|
| 198 |
|
| 199 |
-
sections.append({
|
| 200 |
-
"title": section_title.lower(),
|
| 201 |
-
"content": section_content
|
| 202 |
-
})
|
| 203 |
|
| 204 |
return sections
|
| 205 |
|
|
@@ -211,11 +237,8 @@ def _collapse_ws(text: str) -> str:
|
|
| 211 |
|
| 212 |
|
| 213 |
def _is_heading(line: str) -> bool:
|
| 214 |
-
return (
|
| 215 |
-
|
| 216 |
-
and len(line.split()) <= 5
|
| 217 |
-
and not re.search(r"\d", line)
|
| 218 |
-
)
|
| 219 |
|
| 220 |
def parse_resume(file_path: str | Path) -> List[Document]:
|
| 221 |
"""
|
|
@@ -225,11 +248,13 @@ def parse_resume(file_path: str | Path) -> List[Document]:
|
|
| 225 |
file_extension = Path(file_path).suffix.lower()
|
| 226 |
|
| 227 |
# Handle different file types
|
| 228 |
-
if file_extension ==
|
| 229 |
-
text =
|
| 230 |
-
|
|
|
|
|
|
|
| 231 |
try:
|
| 232 |
-
with open(file_path,
|
| 233 |
text = f.read()
|
| 234 |
if not text.strip():
|
| 235 |
raise ValueError("File is empty")
|
|
@@ -237,27 +262,26 @@ def parse_resume(file_path: str | Path) -> List[Document]:
|
|
| 237 |
logger.error(f"Error reading text file: {str(e)}")
|
| 238 |
raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
|
| 239 |
else:
|
| 240 |
-
raise ValueError(
|
|
|
|
|
|
|
| 241 |
|
| 242 |
text = _collapse_ws(text)
|
| 243 |
|
| 244 |
# Tag headings with "###" so Markdown splitter can see them
|
| 245 |
-
tagged_lines = [
|
| 246 |
-
f"### {ln}" if _is_heading(ln) else ln
|
| 247 |
-
for ln in text.splitlines()]
|
| 248 |
|
| 249 |
md_text = "\n".join(tagged_lines)
|
| 250 |
|
| 251 |
if "###" in md_text:
|
| 252 |
-
splitter = MarkdownHeaderTextSplitter(
|
| 253 |
-
headers_to_split_on=[("###", "section")]
|
| 254 |
-
)
|
| 255 |
chunks = splitter.split_text(md_text) # already returns Documents
|
| 256 |
else:
|
| 257 |
-
splitter = RecursiveCharacterTextSplitter(
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
|
|
|
| 261 |
for doc in chunks:
|
| 262 |
doc.metadata.setdefault("source", str(file_path))
|
| 263 |
# section already present if header‑splitter was used
|
|
@@ -274,26 +298,32 @@ async def get_job_description(file_path_or_url: str) -> Document:
|
|
| 274 |
Document containing the job description
|
| 275 |
"""
|
| 276 |
# Check if the input is a URL
|
| 277 |
-
if file_path_or_url.startswith((
|
| 278 |
return await parse_job_description_from_url(file_path_or_url)
|
| 279 |
|
| 280 |
# Handle local files based on extension
|
| 281 |
file_extension = Path(file_path_or_url).suffix.lower()
|
| 282 |
|
| 283 |
# Handle txt files
|
| 284 |
-
if file_extension ==
|
| 285 |
try:
|
| 286 |
-
with open(file_path_or_url,
|
| 287 |
content = f.read()
|
| 288 |
if not content.strip():
|
| 289 |
raise ValueError(f"File is empty: {file_path_or_url}")
|
| 290 |
-
return Document(
|
|
|
|
|
|
|
| 291 |
except Exception as e:
|
| 292 |
logger.error(f"Error reading text file: {str(e)}")
|
| 293 |
-
raise ValueError(
|
|
|
|
|
|
|
| 294 |
|
| 295 |
# For other file types
|
| 296 |
-
raise ValueError(
|
|
|
|
|
|
|
| 297 |
|
| 298 |
|
| 299 |
async def scrape_job_description_from_web(urls: List[str]):
|
|
@@ -304,7 +334,9 @@ async def scrape_job_description_from_web(urls: List[str]):
|
|
| 304 |
scraped_data_documents = await loader.aload()
|
| 305 |
|
| 306 |
html2text = Html2TextTransformer()
|
| 307 |
-
markdown_scraped_data_documents = html2text.transform_documents(
|
|
|
|
|
|
|
| 308 |
|
| 309 |
# Grab the first 1000 tokens of the site
|
| 310 |
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
|
@@ -312,14 +344,14 @@ async def scrape_job_description_from_web(urls: List[str]):
|
|
| 312 |
)
|
| 313 |
|
| 314 |
extracted_content = splitter.split_documents(markdown_scraped_data_documents)
|
| 315 |
-
|
| 316 |
return ".".join(doc.page_content for doc in extracted_content)
|
| 317 |
|
| 318 |
|
| 319 |
async def parse_job_description_from_url(url: str) -> Document:
|
| 320 |
"""Extracts and structures a job description from a URL using an LLM.
|
| 321 |
|
| 322 |
-
This function fetches content from a URL, uses a DSPy
|
| 323 |
and returns a structured LangChain Document. If the LLM processing fails, it falls
|
| 324 |
back to returning the raw extracted text.
|
| 325 |
|
|
@@ -334,8 +366,8 @@ async def parse_job_description_from_url(url: str) -> Document:
|
|
| 334 |
JobDescriptionParsingError: For any unexpected errors during the process.
|
| 335 |
"""
|
| 336 |
logger.info("Starting job description extraction from URL: %s", url)
|
| 337 |
-
|
| 338 |
-
# 1. Validate URL
|
| 339 |
parsed_url = urlparse(url)
|
| 340 |
if not all([parsed_url.scheme, parsed_url.netloc]):
|
| 341 |
logger.error("Invalid URL format: %s", url)
|
|
@@ -348,27 +380,33 @@ async def parse_job_description_from_url(url: str) -> Document:
|
|
| 348 |
logger.info("Fetching content from URL...")
|
| 349 |
raw_content = await scrape_job_description_from_web([url])
|
| 350 |
if not raw_content or not raw_content.strip():
|
| 351 |
-
raise URLExtractionError(
|
|
|
|
|
|
|
| 352 |
logger.info("Successfully fetched raw content from URL.")
|
| 353 |
except Exception as e:
|
| 354 |
# Wrap any fetching error into our custom exception
|
| 355 |
-
raise URLExtractionError(
|
|
|
|
|
|
|
| 356 |
|
| 357 |
# 3. Process content with the LLM
|
| 358 |
try:
|
| 359 |
logger.info("Processing content with DSPy LLM...")
|
| 360 |
# Configure DSPy LM (it's good practice to do this here if it can change)
|
| 361 |
-
dspy.configure(
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
|
|
|
|
|
|
| 368 |
job_extract_fn = dspy.Predict(ExtractJobDescription)
|
| 369 |
result = job_extract_fn(job_description_html_content=raw_content)
|
| 370 |
logger.info("Successfully processed job description with LLM.")
|
| 371 |
-
|
| 372 |
# 4. Create the final Document with structured data
|
| 373 |
job_doc = Document(
|
| 374 |
page_content=result.job_description,
|
|
@@ -376,8 +414,8 @@ async def parse_job_description_from_url(url: str) -> Document:
|
|
| 376 |
"company_name": result.company_name,
|
| 377 |
"source": url,
|
| 378 |
"job_role": result.job_role,
|
| 379 |
-
"location": result.location
|
| 380 |
-
}
|
| 381 |
)
|
| 382 |
return job_doc
|
| 383 |
|
|
@@ -392,11 +430,13 @@ async def parse_job_description_from_url(url: str) -> Document:
|
|
| 392 |
if raw_content:
|
| 393 |
return Document(
|
| 394 |
page_content=raw_content,
|
| 395 |
-
metadata={"company_name": "Unknown", "source": url, "error": str(e)}
|
| 396 |
)
|
| 397 |
# If raw_content is also None, then the failure was catastrophic.
|
| 398 |
-
raise LLMProcessingError(
|
| 399 |
-
|
|
|
|
|
|
|
| 400 |
except URLExtractionError as e:
|
| 401 |
logger.error(f"Could not extract content from URL: {e}")
|
| 402 |
raise URLExtractionError("Failed to extract content from the URL.") from e
|
|
@@ -404,4 +444,6 @@ async def parse_job_description_from_url(url: str) -> Document:
|
|
| 404 |
# 6. Catch any other unexpected errors
|
| 405 |
except Exception as e:
|
| 406 |
logger.error(f"An unexpected error occurred: {e}", exc_info=True)
|
| 407 |
-
raise JobDescriptionParsingError(
|
|
|
|
|
|
|
|
|
| 13 |
import dspy
|
| 14 |
from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
|
| 15 |
from langchain_community.document_transformers import Html2TextTransformer
|
| 16 |
+
from langchain_text_splitters import (
|
| 17 |
+
RecursiveCharacterTextSplitter,
|
| 18 |
+
MarkdownHeaderTextSplitter,
|
| 19 |
+
)
|
| 20 |
from langchain_core.documents import Document
|
| 21 |
from langfuse import observe
|
| 22 |
from pydantic import BaseModel, Field
|
| 23 |
|
| 24 |
# Local imports - using relative imports
|
| 25 |
from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
|
|
|
|
| 26 |
|
| 27 |
# Set up logging
|
| 28 |
logger = logging.getLogger(__name__)
|
| 29 |
logging.basicConfig(level=logging.INFO)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# Default paths
|
| 32 |
DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
|
| 33 |
|
| 34 |
|
| 35 |
# Most Occurring Resume Section Headers
|
| 36 |
RESUME_SECTIONS: list[str] = [
|
| 37 |
+
"EDUCATION",
|
| 38 |
+
"EXPERIENCE",
|
| 39 |
+
"SKILLS",
|
| 40 |
+
"WORK EXPERIENCE",
|
| 41 |
+
"PROFESSIONAL EXPERIENCE",
|
| 42 |
+
"PROJECTS",
|
| 43 |
+
"CERTIFICATIONS",
|
| 44 |
+
"SUMMARY",
|
| 45 |
+
"OBJECTIVE",
|
| 46 |
+
"CONTACT",
|
| 47 |
+
"PUBLICATIONS",
|
| 48 |
+
"AWARDS",
|
| 49 |
+
"LANGUAGES",
|
| 50 |
+
"INTERESTS",
|
| 51 |
+
"REFERENCES",
|
| 52 |
]
|
| 53 |
|
| 54 |
|
| 55 |
class ResumeSection(BaseModel):
|
| 56 |
"""Model for a structured resume section."""
|
| 57 |
+
|
| 58 |
+
title: str = Field(
|
| 59 |
+
description="The section title (e.g., 'Experience', 'Education')"
|
| 60 |
+
)
|
| 61 |
content: str = Field(description="The full content of this section")
|
| 62 |
|
| 63 |
|
| 64 |
class StructuredResume(BaseModel):
|
| 65 |
"""Model for a structured resume with sections."""
|
| 66 |
+
|
| 67 |
sections: List[ResumeSection] = Field(description="List of resume sections")
|
| 68 |
+
contact_info: Dict[str, str] = Field(
|
| 69 |
+
description="Contact information extracted from the resume"
|
| 70 |
+
)
|
| 71 |
|
| 72 |
|
| 73 |
class JobDescriptionComponents(BaseModel):
|
| 74 |
"""Model for job description components."""
|
| 75 |
+
|
| 76 |
company_name: str = Field(description="The company name")
|
| 77 |
job_description: str = Field(description="The job description")
|
| 78 |
reasoning: str = Field(description="The reasoning for the extracted information")
|
|
|
|
| 84 |
Role Introduction,Qualifications and Requirements, Prefrred Qualifications, Salary, Location.
|
| 85 |
Do not alter the content of the job description.
|
| 86 |
"""
|
| 87 |
+
|
| 88 |
+
job_description_html_content = dspy.InputField(
|
| 89 |
+
desc="HTML content of the job posting."
|
| 90 |
+
)
|
| 91 |
+
job_description = dspy.OutputField(
|
| 92 |
+
desc="Clean job description which is free of HTML tags and irrelevant information."
|
| 93 |
+
)
|
| 94 |
job_role = dspy.OutputField(desc="The job role in the posting.")
|
| 95 |
company_name = dspy.OutputField(desc="Company Name of the Job listing.")
|
| 96 |
location = dspy.OutputField(desc="The location for the provided job posting.")
|
|
|
|
| 107 |
Cleaned text
|
| 108 |
"""
|
| 109 |
# Remove excessive whitespace
|
| 110 |
+
text = re.sub(r"\s+", " ", text)
|
| 111 |
|
| 112 |
# Fix common PDF extraction issues
|
| 113 |
+
text = re.sub(r"([a-z])- ([a-z])", r"\1\2", text) # Fix hyphenated words
|
| 114 |
|
| 115 |
# Remove header/footer page numbers
|
| 116 |
+
text = re.sub(r"\n\s*\d+\s*\n", "\n", text)
|
| 117 |
|
| 118 |
# Replace bullet variations with standard markdown bullets
|
| 119 |
+
text = re.sub(r"[•●○◘◙♦♣♠★]", "* ", text)
|
| 120 |
|
| 121 |
return text.strip()
|
| 122 |
|
| 123 |
+
|
| 124 |
@observe()
|
| 125 |
def extract_contact_info(text: str) -> Dict[str, str]:
|
| 126 |
"""Extract contact information from resume text.
|
|
|
|
| 134 |
contact_info = {}
|
| 135 |
|
| 136 |
# Extract email
|
| 137 |
+
email_match = re.search(
|
| 138 |
+
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", text
|
| 139 |
+
)
|
| 140 |
if email_match:
|
| 141 |
+
contact_info["email"] = email_match.group(0)
|
| 142 |
|
| 143 |
# Extract phone (various formats)
|
| 144 |
+
phone_match = re.search(
|
| 145 |
+
r"(\+\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}", text
|
| 146 |
+
)
|
| 147 |
if phone_match:
|
| 148 |
+
contact_info["phone"] = phone_match.group(0)
|
| 149 |
|
| 150 |
# Extract LinkedIn URL
|
| 151 |
+
linkedin_match = re.search(r"linkedin\.com/in/[a-zA-Z0-9_-]+/?", text)
|
| 152 |
if linkedin_match:
|
| 153 |
+
contact_info["linkedin"] = "https://www." + linkedin_match.group(0)
|
| 154 |
|
| 155 |
# Try to extract name (this is approximate and might need LLM for better accuracy)
|
| 156 |
# Typically name appears at the top of the resume
|
| 157 |
+
first_line = text.strip().split("\n")[0].strip()
|
| 158 |
if len(first_line) < 40 and not any(char.isdigit() for char in first_line):
|
| 159 |
+
contact_info["name"] = first_line
|
| 160 |
|
| 161 |
return contact_info
|
| 162 |
|
| 163 |
+
|
| 164 |
@observe()
|
| 165 |
def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
|
| 166 |
"""Identify sections in a resume text.
|
|
|
|
| 197 |
|
| 198 |
# Regex-based section identification
|
| 199 |
# Create a pattern that matches common section headers
|
| 200 |
+
section_pattern = (
|
| 201 |
+
r"(?:^|\n)(?:[^a-zA-Z\d\s]|\s)*("
|
| 202 |
+
+ "|".join(RESUME_SECTIONS)
|
| 203 |
+
+ r")(?:[^a-zA-Z\d\s]|\s)*(?:$|\n)"
|
| 204 |
+
)
|
| 205 |
matches = list(re.finditer(section_pattern, text, re.IGNORECASE))
|
| 206 |
|
| 207 |
if not matches:
|
| 208 |
# If no sections found, treat the whole resume as one section
|
| 209 |
+
sections.append(
|
| 210 |
+
{
|
| 211 |
+
"title": "resume",
|
| 212 |
+
"content": text,
|
| 213 |
+
}
|
| 214 |
+
)
|
| 215 |
return sections
|
| 216 |
|
| 217 |
# Process each section
|
|
|
|
| 220 |
start_pos = match.start()
|
| 221 |
|
| 222 |
# Find the end position (start of next section or end of text)
|
| 223 |
+
end_pos = matches[i + 1].start() if i < len(matches) - 1 else len(text)
|
| 224 |
|
| 225 |
# Extract section content (excluding the header)
|
| 226 |
section_content = text[start_pos:end_pos].strip()
|
| 227 |
|
| 228 |
+
sections.append({"title": section_title.lower(), "content": section_content})
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
return sections
|
| 231 |
|
|
|
|
| 237 |
|
| 238 |
|
| 239 |
def _is_heading(line: str) -> bool:
|
| 240 |
+
return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
|
| 241 |
+
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
def parse_resume(file_path: str | Path) -> List[Document]:
|
| 244 |
"""
|
|
|
|
| 248 |
file_extension = Path(file_path).suffix.lower()
|
| 249 |
|
| 250 |
# Handle different file types
|
| 251 |
+
if file_extension == ".pdf":
|
| 252 |
+
text = (
|
| 253 |
+
PyPDFLoader(str(file_path), extraction_mode="layout").load()[0].page_content
|
| 254 |
+
)
|
| 255 |
+
elif file_extension == ".txt":
|
| 256 |
try:
|
| 257 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 258 |
text = f.read()
|
| 259 |
if not text.strip():
|
| 260 |
raise ValueError("File is empty")
|
|
|
|
| 262 |
logger.error(f"Error reading text file: {str(e)}")
|
| 263 |
raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
|
| 264 |
else:
|
| 265 |
+
raise ValueError(
|
| 266 |
+
f"Unsupported resume file type: {file_path}. Supported types: .pdf, .txt"
|
| 267 |
+
)
|
| 268 |
|
| 269 |
text = _collapse_ws(text)
|
| 270 |
|
| 271 |
# Tag headings with "###" so Markdown splitter can see them
|
| 272 |
+
tagged_lines = [f"### {ln}" if _is_heading(ln) else ln for ln in text.splitlines()]
|
|
|
|
|
|
|
| 273 |
|
| 274 |
md_text = "\n".join(tagged_lines)
|
| 275 |
|
| 276 |
if "###" in md_text:
|
| 277 |
+
splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[("###", "section")])
|
|
|
|
|
|
|
| 278 |
chunks = splitter.split_text(md_text) # already returns Documents
|
| 279 |
else:
|
| 280 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
|
| 281 |
+
chunks: list[Document] = [
|
| 282 |
+
Document(page_content=chunk, metadata={})
|
| 283 |
+
for chunk in splitter.split_text(md_text)
|
| 284 |
+
] # Attach metadata
|
| 285 |
for doc in chunks:
|
| 286 |
doc.metadata.setdefault("source", str(file_path))
|
| 287 |
# section already present if header‑splitter was used
|
|
|
|
| 298 |
Document containing the job description
|
| 299 |
"""
|
| 300 |
# Check if the input is a URL
|
| 301 |
+
if file_path_or_url.startswith(("http://", "https://")):
|
| 302 |
return await parse_job_description_from_url(file_path_or_url)
|
| 303 |
|
| 304 |
# Handle local files based on extension
|
| 305 |
file_extension = Path(file_path_or_url).suffix.lower()
|
| 306 |
|
| 307 |
# Handle txt files
|
| 308 |
+
if file_extension == ".txt":
|
| 309 |
try:
|
| 310 |
+
with open(file_path_or_url, "r", encoding="utf-8") as f:
|
| 311 |
content = f.read()
|
| 312 |
if not content.strip():
|
| 313 |
raise ValueError(f"File is empty: {file_path_or_url}")
|
| 314 |
+
return Document(
|
| 315 |
+
page_content=content, metadata={"source": file_path_or_url}
|
| 316 |
+
)
|
| 317 |
except Exception as e:
|
| 318 |
logger.error(f"Error reading text file: {str(e)}")
|
| 319 |
+
raise ValueError(
|
| 320 |
+
f"Could not read text file: {file_path_or_url}. Error: {str(e)}"
|
| 321 |
+
)
|
| 322 |
|
| 323 |
# For other file types
|
| 324 |
+
raise ValueError(
|
| 325 |
+
f"Unsupported file type: {file_path_or_url}. Supported types: .pdf, .docx, .txt, .md"
|
| 326 |
+
)
|
| 327 |
|
| 328 |
|
| 329 |
async def scrape_job_description_from_web(urls: List[str]):
|
|
|
|
| 334 |
scraped_data_documents = await loader.aload()
|
| 335 |
|
| 336 |
html2text = Html2TextTransformer()
|
| 337 |
+
markdown_scraped_data_documents = html2text.transform_documents(
|
| 338 |
+
scraped_data_documents
|
| 339 |
+
)
|
| 340 |
|
| 341 |
# Grab the first 1000 tokens of the site
|
| 342 |
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
|
|
|
| 344 |
)
|
| 345 |
|
| 346 |
extracted_content = splitter.split_documents(markdown_scraped_data_documents)
|
| 347 |
+
|
| 348 |
return ".".join(doc.page_content for doc in extracted_content)
|
| 349 |
|
| 350 |
|
| 351 |
async def parse_job_description_from_url(url: str) -> Document:
|
| 352 |
"""Extracts and structures a job description from a URL using an LLM.
|
| 353 |
|
| 354 |
+
This function fetches content from a URL, uses a DSPy to extract key details,
|
| 355 |
and returns a structured LangChain Document. If the LLM processing fails, it falls
|
| 356 |
back to returning the raw extracted text.
|
| 357 |
|
|
|
|
| 366 |
JobDescriptionParsingError: For any unexpected errors during the process.
|
| 367 |
"""
|
| 368 |
logger.info("Starting job description extraction from URL: %s", url)
|
| 369 |
+
|
| 370 |
+
# 1. Validate URL
|
| 371 |
parsed_url = urlparse(url)
|
| 372 |
if not all([parsed_url.scheme, parsed_url.netloc]):
|
| 373 |
logger.error("Invalid URL format: %s", url)
|
|
|
|
| 380 |
logger.info("Fetching content from URL...")
|
| 381 |
raw_content = await scrape_job_description_from_web([url])
|
| 382 |
if not raw_content or not raw_content.strip():
|
| 383 |
+
raise URLExtractionError(
|
| 384 |
+
"Failed to extract any meaningful content from the URL."
|
| 385 |
+
)
|
| 386 |
logger.info("Successfully fetched raw content from URL.")
|
| 387 |
except Exception as e:
|
| 388 |
# Wrap any fetching error into our custom exception
|
| 389 |
+
raise URLExtractionError(
|
| 390 |
+
f"Failed to download or read content from {url}: {e}"
|
| 391 |
+
) from e
|
| 392 |
|
| 393 |
# 3. Process content with the LLM
|
| 394 |
try:
|
| 395 |
logger.info("Processing content with DSPy LLM...")
|
| 396 |
# Configure DSPy LM (it's good practice to do this here if it can change)
|
| 397 |
+
dspy.configure(
|
| 398 |
+
lm=dspy.LM(
|
| 399 |
+
"cerebras/qwen-3-32b",
|
| 400 |
+
api_key=os.environ.get("CEREBRAS_API_KEY"),
|
| 401 |
+
temperature=0.1,
|
| 402 |
+
max_tokens=60000, # Note: This max_tokens is unusually high
|
| 403 |
+
)
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
job_extract_fn = dspy.Predict(ExtractJobDescription)
|
| 407 |
result = job_extract_fn(job_description_html_content=raw_content)
|
| 408 |
logger.info("Successfully processed job description with LLM.")
|
| 409 |
+
|
| 410 |
# 4. Create the final Document with structured data
|
| 411 |
job_doc = Document(
|
| 412 |
page_content=result.job_description,
|
|
|
|
| 414 |
"company_name": result.company_name,
|
| 415 |
"source": url,
|
| 416 |
"job_role": result.job_role,
|
| 417 |
+
"location": result.location,
|
| 418 |
+
},
|
| 419 |
)
|
| 420 |
return job_doc
|
| 421 |
|
|
|
|
| 430 |
if raw_content:
|
| 431 |
return Document(
|
| 432 |
page_content=raw_content,
|
| 433 |
+
metadata={"company_name": "Unknown", "source": url, "error": str(e)},
|
| 434 |
)
|
| 435 |
# If raw_content is also None, then the failure was catastrophic.
|
| 436 |
+
raise LLMProcessingError(
|
| 437 |
+
"LLM processing failed and no raw content was available for fallback."
|
| 438 |
+
) from e
|
| 439 |
+
|
| 440 |
except URLExtractionError as e:
|
| 441 |
logger.error(f"Could not extract content from URL: {e}")
|
| 442 |
raise URLExtractionError("Failed to extract content from the URL.") from e
|
|
|
|
| 444 |
# 6. Catch any other unexpected errors
|
| 445 |
except Exception as e:
|
| 446 |
logger.error(f"An unexpected error occurred: {e}", exc_info=True)
|
| 447 |
+
raise JobDescriptionParsingError(
|
| 448 |
+
f"An unexpected error occurred while parsing the job description: {e}"
|
| 449 |
+
) from e
|
src/job_writing_agent/utils/llm_client.py
CHANGED
|
@@ -14,85 +14,88 @@ import dspy
|
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
__all__ = [
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
]
|
| 21 |
|
|
|
|
| 22 |
class LLMProvider(ABC):
|
| 23 |
"""Base class for LLM provider strategies."""
|
| 24 |
-
|
| 25 |
@abstractmethod
|
| 26 |
def get_default_config(self) -> Dict[str, Any]:
|
| 27 |
pass
|
| 28 |
-
|
| 29 |
@abstractmethod
|
| 30 |
def get_langchain_params(self) -> set[str]:
|
| 31 |
pass
|
| 32 |
-
|
| 33 |
@abstractmethod
|
| 34 |
def get_dspy_params(self) -> set[str]:
|
| 35 |
pass
|
| 36 |
-
|
| 37 |
@abstractmethod
|
| 38 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 39 |
"""Convert model name to DSPy format.
|
| 40 |
-
|
| 41 |
Different providers require different prefixes in DSPy.
|
| 42 |
-
|
| 43 |
Args:
|
| 44 |
model: Model name as used in LangChain
|
| 45 |
-
|
| 46 |
Returns:
|
| 47 |
Model name formatted for DSPy
|
| 48 |
"""
|
| 49 |
pass
|
| 50 |
-
|
| 51 |
@abstractmethod
|
| 52 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 53 |
pass
|
| 54 |
-
|
| 55 |
def create_llm_instance(
|
| 56 |
-
self,
|
| 57 |
-
model: str,
|
| 58 |
-
framework: Literal[
|
| 59 |
-
**config
|
| 60 |
) -> BaseChatModel | dspy.LM:
|
| 61 |
"""Create LLM instance for specified framework."""
|
| 62 |
defaults = self.get_default_config()
|
| 63 |
-
|
| 64 |
# Get framework-specific supported params
|
| 65 |
-
if framework ==
|
| 66 |
supported = self.get_langchain_params()
|
| 67 |
else:
|
| 68 |
supported = self.get_dspy_params()
|
| 69 |
-
|
| 70 |
# Filter unsupported params
|
| 71 |
filtered_config = {k: v for k, v in config.items() if k in supported}
|
| 72 |
-
|
| 73 |
# Warn about ignored params
|
| 74 |
ignored = set(config.keys()) - supported
|
| 75 |
if ignored:
|
| 76 |
-
logger.warning(
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
# Merge configs
|
| 79 |
merged_config = {**defaults, **filtered_config}
|
| 80 |
-
|
| 81 |
# Validate
|
| 82 |
validated_config = self.validate_config(**merged_config)
|
| 83 |
-
|
| 84 |
# Create instance based on framework
|
| 85 |
-
if framework ==
|
| 86 |
return self._create_langchain_instance(model, **validated_config)
|
| 87 |
-
elif framework ==
|
| 88 |
return self._create_dspy_instance(model, **validated_config)
|
| 89 |
else:
|
| 90 |
raise ValueError(f"Unsupported framework: {framework}")
|
| 91 |
-
|
| 92 |
@abstractmethod
|
| 93 |
def _create_langchain_instance(self, model: str, **config) -> BaseChatModel:
|
| 94 |
pass
|
| 95 |
-
|
| 96 |
@abstractmethod
|
| 97 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 98 |
pass
|
|
@@ -100,224 +103,237 @@ class LLMProvider(ABC):
|
|
| 100 |
|
| 101 |
class OpenRouterChatProvider(LLMProvider):
|
| 102 |
"""Provider for OpenRouter.
|
| 103 |
-
|
| 104 |
Model format:
|
| 105 |
- LangChain: "openai/gpt-4", "anthropic/claude-3-opus"
|
| 106 |
- DSPy: Same - "openai/gpt-4", "anthropic/claude-3-opus"
|
| 107 |
-
|
| 108 |
Docs: https://openrouter.ai/docs
|
| 109 |
"""
|
| 110 |
-
|
| 111 |
OPENROUTER_API_URL = "https://openrouter.ai/api/v1"
|
| 112 |
-
|
| 113 |
def get_default_config(self) -> Dict[str, Any]:
|
| 114 |
-
return {
|
| 115 |
-
|
| 116 |
def get_langchain_params(self) -> set[str]:
|
| 117 |
return {
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
}
|
| 122 |
-
|
| 123 |
def get_dspy_params(self) -> set[str]:
|
| 124 |
-
return {
|
| 125 |
-
|
| 126 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 127 |
"""OpenRouter models are used as-is in DSPy.
|
| 128 |
-
|
| 129 |
Examples:
|
| 130 |
"openai/gpt-4" -> "openai/gpt-4"
|
| 131 |
"anthropic/claude-3-opus" -> "anthropic/claude-3-opus"
|
| 132 |
"""
|
| 133 |
return f"{model}" # ✅ Use as-is - already has provider/model format
|
| 134 |
-
|
| 135 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 136 |
-
if
|
| 137 |
-
temp = config[
|
| 138 |
if not 0 <= temp <= 2:
|
| 139 |
logger.warning(f"Temperature must be 0-2, got {temp}")
|
| 140 |
-
|
| 141 |
-
if
|
| 142 |
-
api_key = os.getenv(
|
| 143 |
if not api_key:
|
| 144 |
raise ValueError("OPENROUTER_API_KEY not set")
|
| 145 |
-
config[
|
| 146 |
-
|
| 147 |
return config
|
| 148 |
-
|
| 149 |
def _create_langchain_instance(self, model: str, **config) -> ChatOpenAI:
|
| 150 |
"""Create LangChain instance.
|
| 151 |
-
|
| 152 |
Example model: "openai/gpt-4"
|
| 153 |
"""
|
| 154 |
-
api_key = config.pop(
|
| 155 |
-
|
| 156 |
return ChatOpenAI(
|
| 157 |
-
model=self.format_model_name_for_provider(
|
|
|
|
|
|
|
| 158 |
api_key=SecretStr(api_key),
|
| 159 |
base_url=self.OPENROUTER_API_URL,
|
| 160 |
-
**config
|
| 161 |
)
|
| 162 |
-
|
| 163 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 164 |
"""Create DSPy instance.
|
| 165 |
-
|
| 166 |
Example model: "openai/gpt-4"
|
| 167 |
"""
|
| 168 |
-
api_key = config.pop(
|
| 169 |
-
|
| 170 |
return dspy.LM(
|
| 171 |
-
model=self.format_model_name_for_provider(model), # ✅ Use as-is: "openai/gpt-4"
|
| 172 |
api_key=api_key,
|
| 173 |
api_base=self.OPENROUTER_API_URL,
|
| 174 |
-
**config
|
| 175 |
)
|
| 176 |
|
| 177 |
|
| 178 |
class CerebrasChatProvider(LLMProvider):
|
| 179 |
"""Provider for Cerebras.
|
| 180 |
-
|
| 181 |
Model format:
|
| 182 |
- LangChain: "llama3.1-8b", "llama3.1-70b" (direct names)
|
| 183 |
- DSPy: "openai/llama3.1-8b" (needs openai/ prefix for compatibility)
|
| 184 |
-
|
| 185 |
Docs: https://inference-docs.cerebras.ai/
|
| 186 |
"""
|
| 187 |
-
|
| 188 |
CEREBRAS_API_URL = "https://api.cerebras.ai/v1"
|
| 189 |
-
|
| 190 |
def get_default_config(self) -> Dict[str, Any]:
|
| 191 |
-
return {
|
| 192 |
-
|
| 193 |
def get_langchain_params(self) -> set[str]:
|
| 194 |
-
return {
|
| 195 |
-
|
| 196 |
-
'stop', 'stream', 'seed'
|
| 197 |
-
}
|
| 198 |
-
|
| 199 |
def get_dspy_params(self) -> set[str]:
|
| 200 |
-
return {
|
| 201 |
-
|
| 202 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 203 |
"""Cerebras models need 'cerebras/' prefix.
|
| 204 |
-
|
| 205 |
Examples:
|
| 206 |
"llama3.1-8b" -> "cerebras/llama3.1-8b"
|
| 207 |
"llama3.1-70b" -> "cerebras/llama3.1-70b"
|
| 208 |
"""
|
| 209 |
return f"cerebras/{model}" # ✅ Add openai/ prefix for OpenAI-compatible API
|
| 210 |
-
|
| 211 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 212 |
-
if
|
| 213 |
-
temp = config[
|
| 214 |
if not 0 <= temp <= 1.5:
|
| 215 |
raise ValueError(f"Temperature must be 0-1.5, got {temp}")
|
| 216 |
-
|
| 217 |
-
if
|
| 218 |
-
api_key = os.getenv(
|
| 219 |
if not api_key:
|
| 220 |
raise ValueError("CEREBRAS_API_KEY not set")
|
| 221 |
-
config[
|
| 222 |
-
|
| 223 |
return config
|
| 224 |
-
|
| 225 |
def _create_langchain_instance(self, model: str, **config) -> ChatCerebras:
|
| 226 |
"""Create LangChain instance.
|
| 227 |
-
|
| 228 |
Example model: "llama3.1-8b"
|
| 229 |
"""
|
| 230 |
|
| 231 |
return ChatCerebras(
|
| 232 |
model=model, # Direct name: "llama3.1-8b"
|
| 233 |
-
**config
|
| 234 |
)
|
| 235 |
|
| 236 |
-
|
| 237 |
@DeprecationWarning
|
| 238 |
-
def _create_langchain_instance_openaiclient(
|
|
|
|
|
|
|
| 239 |
"""
|
| 240 |
Create LangChain instance
|
| 241 |
Example model: "llama3.1-8b"
|
| 242 |
"""
|
| 243 |
-
|
| 244 |
-
api_key = config.pop(
|
| 245 |
-
|
| 246 |
return ChatOpenAI(
|
| 247 |
-
model=self.format_model_name_for_provider(
|
|
|
|
|
|
|
| 248 |
api_key=SecretStr(api_key),
|
| 249 |
base_url=self.CEREBRAS_API_URL,
|
| 250 |
-
**config
|
| 251 |
)
|
| 252 |
-
|
| 253 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 254 |
"""Create DSPy instance.
|
| 255 |
-
|
| 256 |
Example model input: "llama3.1-8b"
|
| 257 |
DSPy format: "openai/llama3.1-8b"
|
| 258 |
"""
|
| 259 |
-
api_key = config.pop(
|
| 260 |
-
|
| 261 |
return dspy.LM(
|
| 262 |
-
model=self.format_model_name_for_provider(
|
|
|
|
|
|
|
| 263 |
api_key=api_key,
|
| 264 |
api_base=self.CEREBRAS_API_URL,
|
| 265 |
-
**config
|
| 266 |
)
|
| 267 |
|
| 268 |
|
| 269 |
class OllamaChatProvider(LLMProvider):
|
| 270 |
"""Provider for Ollama.
|
| 271 |
-
|
| 272 |
Model format:
|
| 273 |
- LangChain: "llama3.2", "llama3.2:latest" (direct names with optional tags)
|
| 274 |
- DSPy: "ollama_chat/llama3.2" (needs ollama_chat/ prefix)
|
| 275 |
-
|
| 276 |
Docs: https://ollama.com/
|
| 277 |
"""
|
| 278 |
-
|
| 279 |
def get_default_config(self) -> Dict[str, Any]:
|
| 280 |
-
return {
|
| 281 |
-
|
| 282 |
def get_langchain_params(self) -> set[str]:
|
| 283 |
return {
|
| 284 |
-
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
}
|
| 287 |
-
|
| 288 |
def get_dspy_params(self) -> set[str]:
|
| 289 |
-
return {
|
| 290 |
-
|
| 291 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 292 |
"""Ollama models need 'ollama_chat/' prefix for DSPy.
|
| 293 |
-
|
| 294 |
Examples:
|
| 295 |
"llama3.2" -> "ollama_chat/llama3.2"
|
| 296 |
"llama3.2:latest" -> "ollama_chat/llama3.2:latest"
|
| 297 |
"""
|
| 298 |
return f"ollama_chat/{model}" # ✅ Add ollama_chat/ prefix
|
| 299 |
-
|
| 300 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 301 |
-
if
|
| 302 |
-
temp = config[
|
| 303 |
if not 0 <= temp <= 2:
|
| 304 |
raise ValueError(f"Temperature must be 0-2, got {temp}")
|
| 305 |
-
|
| 306 |
-
if
|
| 307 |
-
if not isinstance(config[
|
| 308 |
raise ValueError("top_k must be positive integer")
|
| 309 |
-
|
| 310 |
return config
|
| 311 |
-
|
| 312 |
def _create_langchain_instance(self, model: str, **config) -> ChatOllama:
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
model=self.format_model_name_for_provider(model),
|
| 316 |
-
**config)
|
| 317 |
-
|
| 318 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 319 |
-
|
| 320 |
return dspy.LM(
|
| 321 |
-
model=self.format_model_name_for_provider(
|
| 322 |
-
|
| 323 |
-
)
|
|
|
|
|
|
|
|
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
__all__ = [
|
| 17 |
+
"OllamaChatProvider",
|
| 18 |
+
"CerebrasChatProvider",
|
| 19 |
+
"OpenRouterChatProvider",
|
| 20 |
]
|
| 21 |
|
| 22 |
+
|
| 23 |
class LLMProvider(ABC):
|
| 24 |
"""Base class for LLM provider strategies."""
|
| 25 |
+
|
| 26 |
@abstractmethod
|
| 27 |
def get_default_config(self) -> Dict[str, Any]:
|
| 28 |
pass
|
| 29 |
+
|
| 30 |
@abstractmethod
|
| 31 |
def get_langchain_params(self) -> set[str]:
|
| 32 |
pass
|
| 33 |
+
|
| 34 |
@abstractmethod
|
| 35 |
def get_dspy_params(self) -> set[str]:
|
| 36 |
pass
|
| 37 |
+
|
| 38 |
@abstractmethod
|
| 39 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 40 |
"""Convert model name to DSPy format.
|
| 41 |
+
|
| 42 |
Different providers require different prefixes in DSPy.
|
| 43 |
+
|
| 44 |
Args:
|
| 45 |
model: Model name as used in LangChain
|
| 46 |
+
|
| 47 |
Returns:
|
| 48 |
Model name formatted for DSPy
|
| 49 |
"""
|
| 50 |
pass
|
| 51 |
+
|
| 52 |
@abstractmethod
|
| 53 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 54 |
pass
|
| 55 |
+
|
| 56 |
def create_llm_instance(
|
| 57 |
+
self,
|
| 58 |
+
model: str,
|
| 59 |
+
framework: Literal["langchain", "dspy"] = "langchain",
|
| 60 |
+
**config,
|
| 61 |
) -> BaseChatModel | dspy.LM:
|
| 62 |
"""Create LLM instance for specified framework."""
|
| 63 |
defaults = self.get_default_config()
|
| 64 |
+
|
| 65 |
# Get framework-specific supported params
|
| 66 |
+
if framework == "langchain":
|
| 67 |
supported = self.get_langchain_params()
|
| 68 |
else:
|
| 69 |
supported = self.get_dspy_params()
|
| 70 |
+
|
| 71 |
# Filter unsupported params
|
| 72 |
filtered_config = {k: v for k, v in config.items() if k in supported}
|
| 73 |
+
|
| 74 |
# Warn about ignored params
|
| 75 |
ignored = set(config.keys()) - supported
|
| 76 |
if ignored:
|
| 77 |
+
logger.warning(
|
| 78 |
+
f"Ignoring unsupported parameters for {framework}: {ignored}"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
# Merge configs
|
| 82 |
merged_config = {**defaults, **filtered_config}
|
| 83 |
+
|
| 84 |
# Validate
|
| 85 |
validated_config = self.validate_config(**merged_config)
|
| 86 |
+
|
| 87 |
# Create instance based on framework
|
| 88 |
+
if framework == "langchain":
|
| 89 |
return self._create_langchain_instance(model, **validated_config)
|
| 90 |
+
elif framework == "dspy":
|
| 91 |
return self._create_dspy_instance(model, **validated_config)
|
| 92 |
else:
|
| 93 |
raise ValueError(f"Unsupported framework: {framework}")
|
| 94 |
+
|
| 95 |
@abstractmethod
|
| 96 |
def _create_langchain_instance(self, model: str, **config) -> BaseChatModel:
|
| 97 |
pass
|
| 98 |
+
|
| 99 |
@abstractmethod
|
| 100 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 101 |
pass
|
|
|
|
| 103 |
|
| 104 |
class OpenRouterChatProvider(LLMProvider):
|
| 105 |
"""Provider for OpenRouter.
|
| 106 |
+
|
| 107 |
Model format:
|
| 108 |
- LangChain: "openai/gpt-4", "anthropic/claude-3-opus"
|
| 109 |
- DSPy: Same - "openai/gpt-4", "anthropic/claude-3-opus"
|
| 110 |
+
|
| 111 |
Docs: https://openrouter.ai/docs
|
| 112 |
"""
|
| 113 |
+
|
| 114 |
OPENROUTER_API_URL = "https://openrouter.ai/api/v1"
|
| 115 |
+
|
| 116 |
def get_default_config(self) -> Dict[str, Any]:
|
| 117 |
+
return {"temperature": 0.2}
|
| 118 |
+
|
| 119 |
def get_langchain_params(self) -> set[str]:
|
| 120 |
return {
|
| 121 |
+
"temperature",
|
| 122 |
+
"max_tokens",
|
| 123 |
+
"top_p",
|
| 124 |
+
"frequency_penalty",
|
| 125 |
+
"presence_penalty",
|
| 126 |
+
"stop",
|
| 127 |
+
"n",
|
| 128 |
+
"stream",
|
| 129 |
}
|
| 130 |
+
|
| 131 |
def get_dspy_params(self) -> set[str]:
|
| 132 |
+
return {"temperature", "max_tokens", "top_p", "stop", "n"}
|
| 133 |
+
|
| 134 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 135 |
"""OpenRouter models are used as-is in DSPy.
|
| 136 |
+
|
| 137 |
Examples:
|
| 138 |
"openai/gpt-4" -> "openai/gpt-4"
|
| 139 |
"anthropic/claude-3-opus" -> "anthropic/claude-3-opus"
|
| 140 |
"""
|
| 141 |
return f"{model}" # ✅ Use as-is - already has provider/model format
|
| 142 |
+
|
| 143 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 144 |
+
if "temperature" in config:
|
| 145 |
+
temp = config["temperature"]
|
| 146 |
if not 0 <= temp <= 2:
|
| 147 |
logger.warning(f"Temperature must be 0-2, got {temp}")
|
| 148 |
+
|
| 149 |
+
if "api_key" not in config:
|
| 150 |
+
api_key = os.getenv("OPENROUTER_API_KEY")
|
| 151 |
if not api_key:
|
| 152 |
raise ValueError("OPENROUTER_API_KEY not set")
|
| 153 |
+
config["api_key"] = api_key
|
| 154 |
+
|
| 155 |
return config
|
| 156 |
+
|
| 157 |
def _create_langchain_instance(self, model: str, **config) -> ChatOpenAI:
|
| 158 |
"""Create LangChain instance.
|
| 159 |
+
|
| 160 |
Example model: "openai/gpt-4"
|
| 161 |
"""
|
| 162 |
+
api_key = config.pop("api_key")
|
| 163 |
+
|
| 164 |
return ChatOpenAI(
|
| 165 |
+
model=self.format_model_name_for_provider(
|
| 166 |
+
model
|
| 167 |
+
), # ✅ Use model as-is: "openai/gpt-4"
|
| 168 |
api_key=SecretStr(api_key),
|
| 169 |
base_url=self.OPENROUTER_API_URL,
|
| 170 |
+
**config,
|
| 171 |
)
|
| 172 |
+
|
| 173 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 174 |
"""Create DSPy instance.
|
| 175 |
+
|
| 176 |
Example model: "openai/gpt-4"
|
| 177 |
"""
|
| 178 |
+
api_key = config.pop("api_key")
|
| 179 |
+
|
| 180 |
return dspy.LM(
|
| 181 |
+
model=f"openrouter/{self.format_model_name_for_provider(model)}", # ✅ Use as-is: "openai/gpt-4"
|
| 182 |
api_key=api_key,
|
| 183 |
api_base=self.OPENROUTER_API_URL,
|
| 184 |
+
**config,
|
| 185 |
)
|
| 186 |
|
| 187 |
|
| 188 |
class CerebrasChatProvider(LLMProvider):
|
| 189 |
"""Provider for Cerebras.
|
| 190 |
+
|
| 191 |
Model format:
|
| 192 |
- LangChain: "llama3.1-8b", "llama3.1-70b" (direct names)
|
| 193 |
- DSPy: "openai/llama3.1-8b" (needs openai/ prefix for compatibility)
|
| 194 |
+
|
| 195 |
Docs: https://inference-docs.cerebras.ai/
|
| 196 |
"""
|
| 197 |
+
|
| 198 |
CEREBRAS_API_URL = "https://api.cerebras.ai/v1"
|
| 199 |
+
|
| 200 |
def get_default_config(self) -> Dict[str, Any]:
|
| 201 |
+
return {"temperature": 0.2, "max_tokens": 1024}
|
| 202 |
+
|
| 203 |
def get_langchain_params(self) -> set[str]:
|
| 204 |
+
return {"temperature", "max_tokens", "top_p", "stop", "stream", "seed"}
|
| 205 |
+
|
|
|
|
|
|
|
|
|
|
| 206 |
def get_dspy_params(self) -> set[str]:
|
| 207 |
+
return {"temperature", "max_tokens", "top_p", "stop"}
|
| 208 |
+
|
| 209 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 210 |
"""Cerebras models need 'cerebras/' prefix.
|
| 211 |
+
|
| 212 |
Examples:
|
| 213 |
"llama3.1-8b" -> "cerebras/llama3.1-8b"
|
| 214 |
"llama3.1-70b" -> "cerebras/llama3.1-70b"
|
| 215 |
"""
|
| 216 |
return f"cerebras/{model}" # ✅ Add openai/ prefix for OpenAI-compatible API
|
| 217 |
+
|
| 218 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 219 |
+
if "temperature" in config:
|
| 220 |
+
temp = config["temperature"]
|
| 221 |
if not 0 <= temp <= 1.5:
|
| 222 |
raise ValueError(f"Temperature must be 0-1.5, got {temp}")
|
| 223 |
+
|
| 224 |
+
if "api_key" not in config:
|
| 225 |
+
api_key = os.getenv("CEREBRAS_API_KEY")
|
| 226 |
if not api_key:
|
| 227 |
raise ValueError("CEREBRAS_API_KEY not set")
|
| 228 |
+
config["api_key"] = api_key
|
| 229 |
+
|
| 230 |
return config
|
| 231 |
+
|
| 232 |
def _create_langchain_instance(self, model: str, **config) -> ChatCerebras:
|
| 233 |
"""Create LangChain instance.
|
| 234 |
+
|
| 235 |
Example model: "llama3.1-8b"
|
| 236 |
"""
|
| 237 |
|
| 238 |
return ChatCerebras(
|
| 239 |
model=model, # Direct name: "llama3.1-8b"
|
| 240 |
+
**config,
|
| 241 |
)
|
| 242 |
|
|
|
|
| 243 |
@DeprecationWarning
|
| 244 |
+
def _create_langchain_instance_openaiclient(
|
| 245 |
+
self, model: str, **config
|
| 246 |
+
) -> ChatOpenAI:
|
| 247 |
"""
|
| 248 |
Create LangChain instance
|
| 249 |
Example model: "llama3.1-8b"
|
| 250 |
"""
|
| 251 |
+
|
| 252 |
+
api_key = config.pop("api_key")
|
| 253 |
+
|
| 254 |
return ChatOpenAI(
|
| 255 |
+
model=self.format_model_name_for_provider(
|
| 256 |
+
model
|
| 257 |
+
), # Direct name: "llama3.1-8b"
|
| 258 |
api_key=SecretStr(api_key),
|
| 259 |
base_url=self.CEREBRAS_API_URL,
|
| 260 |
+
**config,
|
| 261 |
)
|
| 262 |
+
|
| 263 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 264 |
"""Create DSPy instance.
|
| 265 |
+
|
| 266 |
Example model input: "llama3.1-8b"
|
| 267 |
DSPy format: "openai/llama3.1-8b"
|
| 268 |
"""
|
| 269 |
+
api_key = config.pop("api_key")
|
| 270 |
+
|
| 271 |
return dspy.LM(
|
| 272 |
+
model=self.format_model_name_for_provider(
|
| 273 |
+
model
|
| 274 |
+
), # With prefix: "openai/llama3.1-8b"
|
| 275 |
api_key=api_key,
|
| 276 |
api_base=self.CEREBRAS_API_URL,
|
| 277 |
+
**config,
|
| 278 |
)
|
| 279 |
|
| 280 |
|
| 281 |
class OllamaChatProvider(LLMProvider):
|
| 282 |
"""Provider for Ollama.
|
| 283 |
+
|
| 284 |
Model format:
|
| 285 |
- LangChain: "llama3.2", "llama3.2:latest" (direct names with optional tags)
|
| 286 |
- DSPy: "ollama_chat/llama3.2" (needs ollama_chat/ prefix)
|
| 287 |
+
|
| 288 |
Docs: https://ollama.com/
|
| 289 |
"""
|
| 290 |
+
|
| 291 |
def get_default_config(self) -> Dict[str, Any]:
|
| 292 |
+
return {"temperature": 0.2, "top_k": 40, "top_p": 0.9}
|
| 293 |
+
|
| 294 |
def get_langchain_params(self) -> set[str]:
|
| 295 |
return {
|
| 296 |
+
"temperature",
|
| 297 |
+
"top_k",
|
| 298 |
+
"top_p",
|
| 299 |
+
"repeat_penalty",
|
| 300 |
+
"num_ctx",
|
| 301 |
+
"num_predict",
|
| 302 |
+
"format",
|
| 303 |
+
"seed",
|
| 304 |
}
|
| 305 |
+
|
| 306 |
def get_dspy_params(self) -> set[str]:
|
| 307 |
+
return {"temperature", "top_p", "num_ctx", "seed"}
|
| 308 |
+
|
| 309 |
def format_model_name_for_provider(self, model: str) -> str:
|
| 310 |
"""Ollama models need 'ollama_chat/' prefix for DSPy.
|
| 311 |
+
|
| 312 |
Examples:
|
| 313 |
"llama3.2" -> "ollama_chat/llama3.2"
|
| 314 |
"llama3.2:latest" -> "ollama_chat/llama3.2:latest"
|
| 315 |
"""
|
| 316 |
return f"ollama_chat/{model}" # ✅ Add ollama_chat/ prefix
|
| 317 |
+
|
| 318 |
def validate_config(self, **config) -> Dict[str, Any]:
|
| 319 |
+
if "temperature" in config:
|
| 320 |
+
temp = config["temperature"]
|
| 321 |
if not 0 <= temp <= 2:
|
| 322 |
raise ValueError(f"Temperature must be 0-2, got {temp}")
|
| 323 |
+
|
| 324 |
+
if "top_k" in config:
|
| 325 |
+
if not isinstance(config["top_k"], int) or config["top_k"] < 1:
|
| 326 |
raise ValueError("top_k must be positive integer")
|
| 327 |
+
|
| 328 |
return config
|
| 329 |
+
|
| 330 |
def _create_langchain_instance(self, model: str, **config) -> ChatOllama:
|
| 331 |
+
return ChatOllama(model=self.format_model_name_for_provider(model), **config)
|
| 332 |
+
|
|
|
|
|
|
|
|
|
|
| 333 |
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
|
|
|
| 334 |
return dspy.LM(
|
| 335 |
+
model=self.format_model_name_for_provider(
|
| 336 |
+
model
|
| 337 |
+
), # ✅ With prefix: "ollama_chat/llama3.2"
|
| 338 |
+
**config,
|
| 339 |
+
)
|
src/job_writing_agent/utils/llm_provider_factory.py
CHANGED
|
@@ -10,6 +10,7 @@ from .llm_client import (
|
|
| 10 |
OllamaChatProvider,
|
| 11 |
OpenRouterChatProvider,
|
| 12 |
)
|
|
|
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
|
@@ -32,6 +33,7 @@ class LLMFactory:
|
|
| 32 |
>>> dspy.configure(lm=lm)
|
| 33 |
"""
|
| 34 |
|
|
|
|
| 35 |
def __init__(self, default_provider: str = "openrouter"):
|
| 36 |
"""Initialize factory with available providers.
|
| 37 |
|
|
@@ -50,6 +52,7 @@ class LLMFactory:
|
|
| 50 |
f"default: {default_provider}"
|
| 51 |
)
|
| 52 |
|
|
|
|
| 53 |
def create(
|
| 54 |
self,
|
| 55 |
model: str,
|
|
|
|
| 10 |
OllamaChatProvider,
|
| 11 |
OpenRouterChatProvider,
|
| 12 |
)
|
| 13 |
+
from .logging.logging_decorators import log_execution
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
|
|
|
| 33 |
>>> dspy.configure(lm=lm)
|
| 34 |
"""
|
| 35 |
|
| 36 |
+
@log_execution
|
| 37 |
def __init__(self, default_provider: str = "openrouter"):
|
| 38 |
"""Initialize factory with available providers.
|
| 39 |
|
|
|
|
| 52 |
f"default: {default_provider}"
|
| 53 |
)
|
| 54 |
|
| 55 |
+
@log_execution
|
| 56 |
def create(
|
| 57 |
self,
|
| 58 |
model: str,
|
src/job_writing_agent/utils/logging/logging_config.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Logging configuration for the application
|
| 3 |
+
|
| 4 |
+
This module provides a centralized logging manager that configures
|
| 5 |
+
logging once at application startup, ensuring consistent log format
|
| 6 |
+
and behavior across all modules.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
import sys
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing_extensions import Optional
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class LoggingManager:
|
| 16 |
+
"""
|
| 17 |
+
Centralized logging configuration manager.
|
| 18 |
+
|
| 19 |
+
Uses Singleton pattern to ensure logging is configured only once.
|
| 20 |
+
|
| 21 |
+
Example:
|
| 22 |
+
>>> manager = LoggingManager()
|
| 23 |
+
>>> manager.configure_logging(log_level=logging.INFO)
|
| 24 |
+
>>> logger = logging.getLogger(__name__)
|
| 25 |
+
>>> logger.info("This will be logged consistently")
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
_instance: Optional["LoggingManager"] = None
|
| 29 |
+
_configured: bool = False
|
| 30 |
+
|
| 31 |
+
def __new__(cls):
|
| 32 |
+
if cls._instance is None:
|
| 33 |
+
cls._instance = super().__new__(cls)
|
| 34 |
+
cls._configured = False
|
| 35 |
+
return cls._instance
|
| 36 |
+
|
| 37 |
+
def configure_logging(
|
| 38 |
+
self,
|
| 39 |
+
log_level: int = logging.INFO,
|
| 40 |
+
log_file: Optional[Path] = None,
|
| 41 |
+
log_format: Optional[str] = None,
|
| 42 |
+
date_format: Optional[str] = None,
|
| 43 |
+
) -> None:
|
| 44 |
+
"""
|
| 45 |
+
Configure logging for the entire application.
|
| 46 |
+
|
| 47 |
+
This should be called once at application startup (e.g., in main()).
|
| 48 |
+
Subsequent calls are ignored if already configured.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
log_level: Logging level (logging.DEBUG, INFO, WARNING, ERROR)
|
| 52 |
+
log_file: Optional path to log file. If None, logs only to console.
|
| 53 |
+
log_format: Optional custom format string. Default includes timestamp, level, module, message.
|
| 54 |
+
date_format: Optional date format string. Default: "%Y-%m-%d %H:%M:%S"
|
| 55 |
+
|
| 56 |
+
Example:
|
| 57 |
+
>>> manager = LoggingManager()
|
| 58 |
+
>>> manager.configure_logging(
|
| 59 |
+
... log_level=logging.INFO,
|
| 60 |
+
... log_file=Path("logs/app.log")
|
| 61 |
+
... )
|
| 62 |
+
"""
|
| 63 |
+
if self._configured:
|
| 64 |
+
# Already configured - don't reconfigure
|
| 65 |
+
return
|
| 66 |
+
|
| 67 |
+
# Default format: [2025-01-15 10:30:45] INFO module_name: message
|
| 68 |
+
if log_format is None:
|
| 69 |
+
log_format = "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s"
|
| 70 |
+
|
| 71 |
+
if date_format is None:
|
| 72 |
+
date_format = "%Y-%m-%d %H:%M:%S"
|
| 73 |
+
|
| 74 |
+
# Create formatter
|
| 75 |
+
formatter = logging.Formatter(log_format, datefmt=date_format)
|
| 76 |
+
|
| 77 |
+
# Configure root logger
|
| 78 |
+
root_logger = logging.getLogger()
|
| 79 |
+
root_logger.setLevel(log_level)
|
| 80 |
+
|
| 81 |
+
# Remove existing handlers to avoid duplicates
|
| 82 |
+
root_logger.handlers.clear()
|
| 83 |
+
|
| 84 |
+
# Console handler (always add)
|
| 85 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 86 |
+
console_handler.setLevel(log_level)
|
| 87 |
+
console_handler.setFormatter(formatter)
|
| 88 |
+
root_logger.addHandler(console_handler)
|
| 89 |
+
|
| 90 |
+
# File handler (if log_file specified)
|
| 91 |
+
if log_file:
|
| 92 |
+
# Create log directory if it doesn't exist
|
| 93 |
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
| 94 |
+
|
| 95 |
+
file_handler = logging.FileHandler(log_file, mode="a", encoding="utf-8")
|
| 96 |
+
file_handler.setLevel(log_level)
|
| 97 |
+
file_handler.setFormatter(formatter)
|
| 98 |
+
root_logger.addHandler(file_handler)
|
| 99 |
+
|
| 100 |
+
self._configured = True
|
| 101 |
+
|
| 102 |
+
# Log that logging is configured
|
| 103 |
+
logger = logging.getLogger(__name__)
|
| 104 |
+
logger.info(
|
| 105 |
+
f"Logging configured: level={logging.getLevelName(log_level)}, "
|
| 106 |
+
f"file={'enabled' if log_file else 'disabled'}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
def is_configured(self) -> bool:
|
| 110 |
+
"""Check if logging has been configured."""
|
| 111 |
+
return self._configured
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# Convenience function for easy access
|
| 115 |
+
def get_logger(name: str) -> logging.Logger:
|
| 116 |
+
"""
|
| 117 |
+
Get a logger instance for a module.
|
| 118 |
+
|
| 119 |
+
This is a convenience function that ensures consistent logger creation.
|
| 120 |
+
Use this instead of logging.getLogger(__name__) for consistency.
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
name: Logger name (typically __name__)
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
Logger instance
|
| 127 |
+
|
| 128 |
+
Example:
|
| 129 |
+
>>> logger = get_logger(__name__)
|
| 130 |
+
>>> logger.info("Application started")
|
| 131 |
+
"""
|
| 132 |
+
return logging.getLogger(name)
|
src/job_writing_agent/utils/logging/logging_decorators.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Simple decorators for logging.
|
| 3 |
+
|
| 4 |
+
These decorators add logging behavior without cluttering your function code.
|
| 5 |
+
Keep it simple - just the essentials.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import functools
|
| 9 |
+
import logging
|
| 10 |
+
import time
|
| 11 |
+
from typing import Callable, TypeVar
|
| 12 |
+
|
| 13 |
+
# Type variable for function signatures
|
| 14 |
+
F = TypeVar("F", bound=Callable)
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def log_execution(func: F) -> F:
|
| 20 |
+
"""
|
| 21 |
+
Simple decorator to log when a function starts and finishes.
|
| 22 |
+
|
| 23 |
+
Logs entry, exit, and how long it took.
|
| 24 |
+
|
| 25 |
+
Example:
|
| 26 |
+
>>> @log_execution
|
| 27 |
+
>>> def process_data(data: str) -> str:
|
| 28 |
+
... return data.upper()
|
| 29 |
+
>>> process_data("hello")
|
| 30 |
+
# Logs: "Entering process_data" ... "Exiting process_data (took 0.001s)"
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
@functools.wraps(func)
|
| 34 |
+
def log_execution_wrapper(*args, **kwargs):
|
| 35 |
+
func_name = func.__name__
|
| 36 |
+
logger.info(f"Entering {func_name}")
|
| 37 |
+
|
| 38 |
+
start_time = time.time()
|
| 39 |
+
try:
|
| 40 |
+
result = func(*args, **kwargs)
|
| 41 |
+
elapsed = time.time() - start_time
|
| 42 |
+
logger.info(f"Exiting {func_name} (took {elapsed:.3f}s)")
|
| 43 |
+
return result
|
| 44 |
+
except Exception as e:
|
| 45 |
+
elapsed = time.time() - start_time
|
| 46 |
+
logger.error(f"{func_name} failed after {elapsed:.3f}s: {e}", exc_info=True)
|
| 47 |
+
raise
|
| 48 |
+
|
| 49 |
+
return log_execution_wrapper
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def log_async(func: F) -> F:
|
| 53 |
+
"""
|
| 54 |
+
Simple decorator for async functions - logs entry, exit, and timing.
|
| 55 |
+
|
| 56 |
+
Example:
|
| 57 |
+
>>> @log_async
|
| 58 |
+
>>> async def fetch_data(url: str) -> dict:
|
| 59 |
+
... return await http.get(url)
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
@functools.wraps(func)
|
| 63 |
+
async def log_async_wrapper(*args, **kwargs):
|
| 64 |
+
func_name = func.__name__
|
| 65 |
+
logger.info(f"Entering async {func_name}")
|
| 66 |
+
|
| 67 |
+
start_time = time.time()
|
| 68 |
+
try:
|
| 69 |
+
result = await func(*args, **kwargs)
|
| 70 |
+
elapsed = time.time() - start_time
|
| 71 |
+
logger.info(f"Exiting async {func_name} (took {elapsed:.3f}s)")
|
| 72 |
+
return result
|
| 73 |
+
except Exception as e:
|
| 74 |
+
elapsed = time.time() - start_time
|
| 75 |
+
logger.error(f"{func_name} failed after {elapsed:.3f}s: {e}", exc_info=True)
|
| 76 |
+
raise
|
| 77 |
+
|
| 78 |
+
return log_async_wrapper
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def log_errors(func: F) -> F:
|
| 82 |
+
"""
|
| 83 |
+
Simple decorator to catch and log exceptions.
|
| 84 |
+
|
| 85 |
+
Logs the error, then re-raises it so your code still fails normally.
|
| 86 |
+
|
| 87 |
+
Example:
|
| 88 |
+
>>> @log_errors
|
| 89 |
+
>>> def risky_operation():
|
| 90 |
+
... raise ValueError("Something went wrong")
|
| 91 |
+
>>> risky_operation()
|
| 92 |
+
# Logs the error, then raises it
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
@functools.wraps(func)
|
| 96 |
+
def log_errors_wrapper(*args, **kwargs):
|
| 97 |
+
try:
|
| 98 |
+
return func(*args, **kwargs)
|
| 99 |
+
except Exception as e:
|
| 100 |
+
logger.error(f"Error in {func.__name__}: {e}", exc_info=True)
|
| 101 |
+
raise
|
| 102 |
+
|
| 103 |
+
return log_errors_wrapper
|
src/job_writing_agent/workflow.py
CHANGED
|
@@ -6,13 +6,13 @@ This module provides the JobWorkflow class and CLI runner.
|
|
| 6 |
import asyncio
|
| 7 |
import logging
|
| 8 |
import sys
|
|
|
|
| 9 |
from datetime import datetime
|
| 10 |
from functools import cached_property
|
| 11 |
from typing import Optional, Dict, Any
|
| 12 |
|
| 13 |
-
from langchain_core.tracers import ConsoleCallbackHandler
|
| 14 |
from langgraph.graph import StateGraph
|
| 15 |
-
from langfuse import Langfuse
|
| 16 |
from langgraph.graph.state import CompiledStateGraph
|
| 17 |
|
| 18 |
from job_writing_agent.agents.nodes import (
|
|
@@ -21,96 +21,274 @@ from job_writing_agent.agents.nodes import (
|
|
| 21 |
finalize_document,
|
| 22 |
human_approval,
|
| 23 |
)
|
| 24 |
-
from job_writing_agent.classes import
|
| 25 |
-
from job_writing_agent.nodes import
|
| 26 |
from job_writing_agent.nodes.research_workflow import research_workflow
|
| 27 |
from job_writing_agent.utils.application_cli_interface import handle_cli
|
| 28 |
from job_writing_agent.utils.result_utils import print_result, save_result
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
logger = logging.getLogger(__name__)
|
| 32 |
|
| 33 |
|
| 34 |
class JobWorkflow:
|
| 35 |
"""
|
| 36 |
-
Workflow
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
|
| 39 |
def __init__(self, resume: str, job_description_source: str, content: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
self.resume = resume
|
| 41 |
self.job_description_source = job_description_source
|
| 42 |
self.content = content
|
| 43 |
-
self.dataloading = Dataloading()
|
| 44 |
-
self.langfuse = Langfuse()
|
| 45 |
|
| 46 |
@cached_property
|
| 47 |
-
def app_state(self) ->
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
content
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
@cached_property
|
| 60 |
def job_app_graph(self) -> StateGraph:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
graph = StateGraph(DataLoadState)
|
| 62 |
-
|
| 63 |
-
|
|
|
|
| 64 |
graph.add_node("research", research_workflow)
|
| 65 |
graph.add_node("create_draft", create_draft)
|
| 66 |
-
graph.add_node("variations", generate_variations)
|
| 67 |
-
graph.add_node("self_consistency", self_consistency_vote)
|
| 68 |
graph.add_node("critique", critique_draft)
|
| 69 |
graph.add_node("human_approval", human_approval)
|
| 70 |
graph.add_node("finalize", finalize_document)
|
| 71 |
|
| 72 |
-
|
|
|
|
| 73 |
graph.set_finish_point("finalize")
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
graph.add_edge("research", "create_draft")
|
| 77 |
-
graph.add_edge("create_draft", "
|
| 78 |
-
graph.add_edge("variations", "self_consistency")
|
| 79 |
-
graph.add_edge("self_consistency", "critique")
|
| 80 |
graph.add_edge("critique", "human_approval")
|
| 81 |
graph.add_edge("human_approval", "finalize")
|
|
|
|
| 82 |
return graph
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
async def run(self) -> Optional[Dict[str, Any]]:
|
| 85 |
"""
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
"""
|
| 88 |
try:
|
| 89 |
compiled_graph = self.compile()
|
| 90 |
except Exception as e:
|
| 91 |
-
logger.error("Error compiling graph: %s", e)
|
| 92 |
return None
|
| 93 |
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
config = {
|
| 96 |
"configurable": {
|
| 97 |
-
"thread_id":
|
| 98 |
-
"callbacks":
|
| 99 |
"run_name": run_name,
|
| 100 |
-
"
|
|
|
|
| 101 |
},
|
| 102 |
"recursion_limit": 10,
|
| 103 |
}
|
|
|
|
| 104 |
try:
|
| 105 |
-
self.app_state["current_node"] = "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
|
|
|
|
|
|
|
| 107 |
except Exception as e:
|
| 108 |
-
logger.error("Error running graph: %s", e)
|
| 109 |
return None
|
| 110 |
-
return graph_output
|
| 111 |
|
|
|
|
|
|
|
| 112 |
def compile(self) -> CompiledStateGraph:
|
| 113 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
return self.job_app_graph.compile()
|
| 115 |
|
| 116 |
|
|
@@ -122,9 +300,10 @@ def main():
|
|
| 122 |
content=args.content_type,
|
| 123 |
)
|
| 124 |
result = asyncio.run(workflow.run())
|
|
|
|
| 125 |
if result:
|
| 126 |
-
print_result(args.content_type, result["
|
| 127 |
-
save_result(args.content_type, result["
|
| 128 |
print("Workflow completed successfully.")
|
| 129 |
else:
|
| 130 |
print("Error running workflow.")
|
|
|
|
| 6 |
import asyncio
|
| 7 |
import logging
|
| 8 |
import sys
|
| 9 |
+
import os
|
| 10 |
from datetime import datetime
|
| 11 |
from functools import cached_property
|
| 12 |
from typing import Optional, Dict, Any
|
| 13 |
|
| 14 |
+
from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
|
| 15 |
from langgraph.graph import StateGraph
|
|
|
|
| 16 |
from langgraph.graph.state import CompiledStateGraph
|
| 17 |
|
| 18 |
from job_writing_agent.agents.nodes import (
|
|
|
|
| 21 |
finalize_document,
|
| 22 |
human_approval,
|
| 23 |
)
|
| 24 |
+
from job_writing_agent.classes import DataLoadState
|
| 25 |
+
from job_writing_agent.nodes.initializing import data_loading_workflow
|
| 26 |
from job_writing_agent.nodes.research_workflow import research_workflow
|
| 27 |
from job_writing_agent.utils.application_cli_interface import handle_cli
|
| 28 |
from job_writing_agent.utils.result_utils import print_result, save_result
|
| 29 |
+
from job_writing_agent.utils.logging.logging_decorators import (
|
| 30 |
+
log_execution,
|
| 31 |
+
log_errors,
|
| 32 |
+
)
|
| 33 |
|
| 34 |
logger = logging.getLogger(__name__)
|
| 35 |
|
| 36 |
|
| 37 |
class JobWorkflow:
|
| 38 |
"""
|
| 39 |
+
Workflow orchestrator for the job application writer.
|
| 40 |
+
|
| 41 |
+
This class coordinates the execution of the job application writing workflow,
|
| 42 |
+
managing the LangGraph state machine and LangSmith tracing. It follows the
|
| 43 |
+
orchestrator pattern, coordinating multiple subgraphs and nodes without
|
| 44 |
+
implementing business logic itself.
|
| 45 |
+
|
| 46 |
+
The workflow consists of:
|
| 47 |
+
1. Data Loading: Parse resume and job description (parallel subgraph)
|
| 48 |
+
2. Research: Company research and relevance filtering (subgraph)
|
| 49 |
+
3. Draft Creation: Generate initial application material
|
| 50 |
+
4. Critique: AI-powered feedback on the draft
|
| 51 |
+
5. Human Approval: User feedback collection
|
| 52 |
+
6. Finalization: Incorporate feedback and produce final output
|
| 53 |
"""
|
| 54 |
|
| 55 |
def __init__(self, resume: str, job_description_source: str, content: str):
|
| 56 |
+
"""
|
| 57 |
+
Initialize the JobWorkflow orchestrator.
|
| 58 |
+
|
| 59 |
+
Parameters
|
| 60 |
+
----------
|
| 61 |
+
resume: str
|
| 62 |
+
Path to the resume file or resume text.
|
| 63 |
+
job_description_source: str
|
| 64 |
+
URL, file path, or text content of the job description.
|
| 65 |
+
content: str
|
| 66 |
+
Type of application material to generate ("cover_letter", "bullets", "linkedin_note").
|
| 67 |
+
"""
|
| 68 |
self.resume = resume
|
| 69 |
self.job_description_source = job_description_source
|
| 70 |
self.content = content
|
|
|
|
|
|
|
| 71 |
|
| 72 |
@cached_property
|
| 73 |
+
def app_state(self) -> DataLoadState:
|
| 74 |
+
"""
|
| 75 |
+
Get the initial application state for the workflow.
|
| 76 |
+
|
| 77 |
+
Returns
|
| 78 |
+
-------
|
| 79 |
+
DataLoadState
|
| 80 |
+
Initialized state dictionary with resume path, job description source,
|
| 81 |
+
content type, and empty messages list.
|
| 82 |
+
"""
|
| 83 |
+
|
| 84 |
+
return {
|
| 85 |
+
"resume_path": self.resume,
|
| 86 |
+
"job_description_source": self.job_description_source,
|
| 87 |
+
"content": self.content,
|
| 88 |
+
"current_node": "",
|
| 89 |
+
"messages": [],
|
| 90 |
+
"company_research_data": {},
|
| 91 |
+
}
|
| 92 |
|
|
|
|
| 93 |
def job_app_graph(self) -> StateGraph:
|
| 94 |
+
"""
|
| 95 |
+
Build and configure the job application workflow graph.
|
| 96 |
+
|
| 97 |
+
This method constructs the LangGraph state machine with all nodes and edges.
|
| 98 |
+
The graph is cached as a property to avoid rebuilding on each access.
|
| 99 |
+
|
| 100 |
+
Workflow Structure:
|
| 101 |
+
- Entry: Data loading subgraph (parallel resume + job description parsing)
|
| 102 |
+
- Research: Company research subgraph
|
| 103 |
+
- Draft Creation: Generate initial application material
|
| 104 |
+
- Critique: AI feedback on draft
|
| 105 |
+
- Human Approval: User feedback collection
|
| 106 |
+
- Finalization: Produce final output
|
| 107 |
+
- Exit: Finalize node
|
| 108 |
+
|
| 109 |
+
Returns
|
| 110 |
+
-------
|
| 111 |
+
StateGraph
|
| 112 |
+
Configured LangGraph state machine ready for compilation.
|
| 113 |
+
"""
|
| 114 |
graph = StateGraph(DataLoadState)
|
| 115 |
+
|
| 116 |
+
# Add workflow nodes (subgraphs and individual nodes)
|
| 117 |
+
graph.add_node("load", data_loading_workflow)
|
| 118 |
graph.add_node("research", research_workflow)
|
| 119 |
graph.add_node("create_draft", create_draft)
|
|
|
|
|
|
|
| 120 |
graph.add_node("critique", critique_draft)
|
| 121 |
graph.add_node("human_approval", human_approval)
|
| 122 |
graph.add_node("finalize", finalize_document)
|
| 123 |
|
| 124 |
+
# Set entry and exit points
|
| 125 |
+
graph.set_entry_point("load")
|
| 126 |
graph.set_finish_point("finalize")
|
| 127 |
+
|
| 128 |
+
# Conditional routing after data loading
|
| 129 |
+
def route_after_load(state: DataLoadState) -> str:
|
| 130 |
+
"""
|
| 131 |
+
Route based on next_node set by data loading subgraph.
|
| 132 |
+
|
| 133 |
+
The data loading subgraph sets next_node to either "load" (if validation
|
| 134 |
+
fails) or "research" (if validation passes).
|
| 135 |
+
|
| 136 |
+
Parameters
|
| 137 |
+
----------
|
| 138 |
+
state: DataLoadState
|
| 139 |
+
Current workflow state.
|
| 140 |
+
|
| 141 |
+
Returns
|
| 142 |
+
-------
|
| 143 |
+
str
|
| 144 |
+
Next node name: "load" or "research".
|
| 145 |
+
"""
|
| 146 |
+
next_node = state.get("next_node", "research") # Default to research
|
| 147 |
+
logger.info(f"Routing after load: {next_node}")
|
| 148 |
+
return next_node
|
| 149 |
+
|
| 150 |
+
graph.add_conditional_edges(
|
| 151 |
+
"load",
|
| 152 |
+
route_after_load,
|
| 153 |
+
{
|
| 154 |
+
"load": "load", # Loop back to load subgraph if validation fails
|
| 155 |
+
"research": "research", # Proceed to research if validation passes
|
| 156 |
+
},
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# Sequential edges for main workflow
|
| 160 |
graph.add_edge("research", "create_draft")
|
| 161 |
+
graph.add_edge("create_draft", "critique")
|
|
|
|
|
|
|
| 162 |
graph.add_edge("critique", "human_approval")
|
| 163 |
graph.add_edge("human_approval", "finalize")
|
| 164 |
+
|
| 165 |
return graph
|
| 166 |
|
| 167 |
+
def _get_callbacks(self) -> list:
|
| 168 |
+
"""
|
| 169 |
+
Get list of callbacks including LangSmith tracer with enhanced metadata.
|
| 170 |
+
|
| 171 |
+
This method creates callback handlers for LangGraph execution, including
|
| 172 |
+
LangSmith tracing with workflow-level metadata and tags for better
|
| 173 |
+
observability and filtering in the LangSmith UI.
|
| 174 |
+
|
| 175 |
+
Returns
|
| 176 |
+
-------
|
| 177 |
+
list
|
| 178 |
+
List of callback handlers for LangGraph execution, including:
|
| 179 |
+
- ConsoleCallbackHandler: Console output
|
| 180 |
+
- LangChainTracer: LangSmith tracing (if enabled)
|
| 181 |
+
"""
|
| 182 |
+
callbacks = [ConsoleCallbackHandler()]
|
| 183 |
+
|
| 184 |
+
# Add LangSmith tracer if tracing is enabled via environment variable
|
| 185 |
+
if os.getenv("LANGSMITH_TRACING", "").lower() == "true":
|
| 186 |
+
try:
|
| 187 |
+
# LangChainTracer automatically reads from environment variables:
|
| 188 |
+
# - LANGSMITH_API_KEY
|
| 189 |
+
# - LANGSMITH_PROJECT (optional, defaults to "default")
|
| 190 |
+
# - LANGSMITH_ENDPOINT (optional, defaults to https://api.smith.langchain.com)
|
| 191 |
+
langsmith_tracer = LangChainTracer(
|
| 192 |
+
project_name=os.getenv(
|
| 193 |
+
"LANGSMITH_PROJECT", "job_application_writer"
|
| 194 |
+
)
|
| 195 |
+
)
|
| 196 |
+
callbacks.append(langsmith_tracer)
|
| 197 |
+
logger.info("LangSmith tracing enabled with metadata")
|
| 198 |
+
except Exception as e:
|
| 199 |
+
logger.warning(
|
| 200 |
+
f"Failed to initialize LangSmith tracer: {e}. Continuing without tracing."
|
| 201 |
+
)
|
| 202 |
+
else:
|
| 203 |
+
logger.debug(
|
| 204 |
+
"LangSmith tracing is not enabled (LANGSMITH_TRACING != 'true')"
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
return callbacks
|
| 208 |
+
|
| 209 |
+
@log_execution
|
| 210 |
+
@log_errors
|
| 211 |
async def run(self) -> Optional[Dict[str, Any]]:
|
| 212 |
"""
|
| 213 |
+
Execute the complete job application writer workflow.
|
| 214 |
+
|
| 215 |
+
This method compiles the graph, configures LangSmith tracing with
|
| 216 |
+
enhanced metadata, and executes the workflow. It handles errors
|
| 217 |
+
gracefully and returns the final state or None if execution fails.
|
| 218 |
+
|
| 219 |
+
Returns
|
| 220 |
+
-------
|
| 221 |
+
Optional[Dict[str, Any]]
|
| 222 |
+
Final workflow state containing the generated application material
|
| 223 |
+
in the "output_data" field, or None if execution fails.
|
| 224 |
"""
|
| 225 |
try:
|
| 226 |
compiled_graph = self.compile()
|
| 227 |
except Exception as e:
|
| 228 |
+
logger.error("Error compiling graph: %s", e, exc_info=True)
|
| 229 |
return None
|
| 230 |
|
| 231 |
+
# Prepare enhanced LangSmith metadata and tags
|
| 232 |
+
content = self.app_state.get("content", "cover_letter")
|
| 233 |
+
thread_id = f"job_app_session_{datetime.now():%Y%m%d%H%M%S}"
|
| 234 |
+
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
| 235 |
+
|
| 236 |
+
# Enhanced metadata for better trace filtering and analysis
|
| 237 |
+
metadata = {
|
| 238 |
+
"workflow": "job_application_writer",
|
| 239 |
+
"content_type": content,
|
| 240 |
+
"session_id": thread_id,
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
# Enhanced tags for trace organization
|
| 244 |
+
tags = [
|
| 245 |
+
"job-application",
|
| 246 |
+
content,
|
| 247 |
+
]
|
| 248 |
+
|
| 249 |
+
# Descriptive run name for LangSmith UI
|
| 250 |
+
run_name = f"JobAppWriter.{content}.{timestamp}"
|
| 251 |
+
|
| 252 |
config = {
|
| 253 |
"configurable": {
|
| 254 |
+
"thread_id": thread_id,
|
| 255 |
+
"callbacks": self._get_callbacks(),
|
| 256 |
"run_name": run_name,
|
| 257 |
+
"metadata": metadata,
|
| 258 |
+
"tags": tags,
|
| 259 |
},
|
| 260 |
"recursion_limit": 10,
|
| 261 |
}
|
| 262 |
+
|
| 263 |
try:
|
| 264 |
+
self.app_state["current_node"] = "load"
|
| 265 |
+
logger.info(
|
| 266 |
+
f"Starting workflow execution: {run_name} "
|
| 267 |
+
f"(content_type={content}, session_id={thread_id})"
|
| 268 |
+
)
|
| 269 |
graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
|
| 270 |
+
logger.info("Workflow execution completed successfully")
|
| 271 |
+
return graph_output
|
| 272 |
except Exception as e:
|
| 273 |
+
logger.error("Error running graph: %s", e, exc_info=True)
|
| 274 |
return None
|
|
|
|
| 275 |
|
| 276 |
+
@log_execution
|
| 277 |
+
@log_errors
|
| 278 |
def compile(self) -> CompiledStateGraph:
|
| 279 |
+
"""
|
| 280 |
+
Compile the workflow graph into an executable state machine.
|
| 281 |
+
|
| 282 |
+
Returns
|
| 283 |
+
-------
|
| 284 |
+
CompiledStateGraph
|
| 285 |
+
Compiled LangGraph state machine ready for execution.
|
| 286 |
+
|
| 287 |
+
Raises
|
| 288 |
+
------
|
| 289 |
+
Exception
|
| 290 |
+
If graph compilation fails (e.g., invalid edges, missing nodes).
|
| 291 |
+
"""
|
| 292 |
return self.job_app_graph.compile()
|
| 293 |
|
| 294 |
|
|
|
|
| 300 |
content=args.content_type,
|
| 301 |
)
|
| 302 |
result = asyncio.run(workflow.run())
|
| 303 |
+
# print(f"result: {result}")
|
| 304 |
if result:
|
| 305 |
+
print_result(args.content_type, result["output_data"])
|
| 306 |
+
save_result(args.content_type, result["output_data"])
|
| 307 |
print("Workflow completed successfully.")
|
| 308 |
else:
|
| 309 |
print("Error running workflow.")
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|