Merge branch 'main' of https://github.com/rishabh1024/job_writer
Browse files- .dockerignore +74 -0
- .github/agents/PythonMentor.agent.md +0 -0
- .vscode/settings.json +7 -3
- DEPLOYMENT_GUIDE.md +0 -303
- DOCKERFILE_EXPLANATION.md +0 -147
- api-1.json +0 -0
- demo_candidate_store.py +273 -0
- docker-compose.override.example.yml +0 -21
- src/job_writing_agent/agent_memory/__init__.py +0 -0
- src/job_writing_agent/agent_memory/agent_shopping_example.py +249 -0
- src/job_writing_agent/agent_memory/candidate_profile_store.py +345 -0
- src/job_writing_agent/agent_memory/mongodb_logterm_memory.py +0 -0
- uv.lock +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual environments
|
| 2 |
+
app_env/
|
| 3 |
+
venv/
|
| 4 |
+
env/
|
| 5 |
+
.venv/
|
| 6 |
+
ENV/
|
| 7 |
+
|
| 8 |
+
# Python cache
|
| 9 |
+
__pycache__/
|
| 10 |
+
*.py[cod]
|
| 11 |
+
*$py.class
|
| 12 |
+
*.so
|
| 13 |
+
.Python
|
| 14 |
+
*.egg-info/
|
| 15 |
+
dist/
|
| 16 |
+
build/
|
| 17 |
+
|
| 18 |
+
# IDE
|
| 19 |
+
.vscode/
|
| 20 |
+
.cursor/
|
| 21 |
+
.idea/
|
| 22 |
+
*.swp
|
| 23 |
+
*.swo
|
| 24 |
+
*~
|
| 25 |
+
|
| 26 |
+
# Logs
|
| 27 |
+
*.log
|
| 28 |
+
logs/
|
| 29 |
+
src/job_writing_agent/logs/*
|
| 30 |
+
|
| 31 |
+
# Environment files
|
| 32 |
+
.env
|
| 33 |
+
.env.*
|
| 34 |
+
.docker_env
|
| 35 |
+
*.env
|
| 36 |
+
|
| 37 |
+
# Git
|
| 38 |
+
.git/
|
| 39 |
+
.gitignore
|
| 40 |
+
.gitattributes
|
| 41 |
+
|
| 42 |
+
# Documentation
|
| 43 |
+
*.md
|
| 44 |
+
!README.md
|
| 45 |
+
DEPLOYMENT_GUIDE.md
|
| 46 |
+
|
| 47 |
+
# Test files
|
| 48 |
+
test_*.py
|
| 49 |
+
*_test.py
|
| 50 |
+
tests/
|
| 51 |
+
|
| 52 |
+
# Data files (if large)
|
| 53 |
+
*.csv
|
| 54 |
+
*.json
|
| 55 |
+
!langgraph.json
|
| 56 |
+
!pyproject.toml
|
| 57 |
+
|
| 58 |
+
# Docker
|
| 59 |
+
docker-compose.yml
|
| 60 |
+
Dockerfile
|
| 61 |
+
.dockerignore
|
| 62 |
+
.docker_env
|
| 63 |
+
|
| 64 |
+
# LangGraph artifacts
|
| 65 |
+
.langgraph_api/
|
| 66 |
+
*.pkl
|
| 67 |
+
*.pickle
|
| 68 |
+
|
| 69 |
+
# API docs
|
| 70 |
+
api-1.json
|
| 71 |
+
|
| 72 |
+
# OS
|
| 73 |
+
.DS_Store
|
| 74 |
+
Thumbs.db
|
.github/agents/PythonMentor.agent.md
ADDED
|
File without changes
|
.vscode/settings.json
CHANGED
|
@@ -2,9 +2,13 @@
|
|
| 2 |
"python.defaultInterpreterPath": "C:\\Users\\risha\\python-dir\\job_application_agent\\job_writer\\app_env\\Scripts\\python.exe",
|
| 3 |
"python.formatting.provider": "black",
|
| 4 |
"editor.formatOnSave": true,
|
| 5 |
-
"python.formatting.blackArgs": [
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
"python.linting.pylintEnabled": true,
|
| 8 |
"python.linting.lintOnSave": true,
|
| 9 |
-
"python.linting.mypyEnabled": true
|
|
|
|
| 10 |
}
|
|
|
|
| 2 |
"python.defaultInterpreterPath": "C:\\Users\\risha\\python-dir\\job_application_agent\\job_writer\\app_env\\Scripts\\python.exe",
|
| 3 |
"python.formatting.provider": "black",
|
| 4 |
"editor.formatOnSave": true,
|
| 5 |
+
"python.formatting.blackArgs": [
|
| 6 |
+
"--line-length",
|
| 7 |
+
"88"
|
| 8 |
+
],
|
| 9 |
+
"python.linting.enabled": true,
|
| 10 |
"python.linting.pylintEnabled": true,
|
| 11 |
"python.linting.lintOnSave": true,
|
| 12 |
+
"python.linting.mypyEnabled": true,
|
| 13 |
+
"python-envs.pythonProjects": []
|
| 14 |
}
|
DEPLOYMENT_GUIDE.md
DELETED
|
@@ -1,303 +0,0 @@
|
|
| 1 |
-
# Deployment Guide for Job Application Agent
|
| 2 |
-
|
| 3 |
-
## Option 1: LangGraph Cloud (Easiest & Recommended)
|
| 4 |
-
|
| 5 |
-
### Prerequisites
|
| 6 |
-
- LangGraph CLI installed (`langgraph-cli` in requirements.txt)
|
| 7 |
-
- `langgraph.json` already configured ✅
|
| 8 |
-
|
| 9 |
-
### Steps
|
| 10 |
-
|
| 11 |
-
1. **Install LangGraph CLI** (if not already):
|
| 12 |
-
```powershell
|
| 13 |
-
pip install langgraph-cli
|
| 14 |
-
```
|
| 15 |
-
|
| 16 |
-
2. **Login to LangGraph Cloud**:
|
| 17 |
-
```powershell
|
| 18 |
-
langgraph login
|
| 19 |
-
```
|
| 20 |
-
|
| 21 |
-
3. **Deploy your agent**:
|
| 22 |
-
```powershell
|
| 23 |
-
langgraph deploy
|
| 24 |
-
```
|
| 25 |
-
|
| 26 |
-
4. **Get your API endpoint** - LangGraph Cloud provides a REST API automatically
|
| 27 |
-
|
| 28 |
-
### Cost
|
| 29 |
-
- **Free tier**: Limited requests/month
|
| 30 |
-
- **Paid**: Pay-per-use pricing
|
| 31 |
-
|
| 32 |
-
### Pros
|
| 33 |
-
- ✅ Zero infrastructure management
|
| 34 |
-
- ✅ Built-in state persistence
|
| 35 |
-
- ✅ Automatic API generation
|
| 36 |
-
- ✅ LangSmith integration
|
| 37 |
-
- ✅ Perfect for LangGraph apps
|
| 38 |
-
|
| 39 |
-
### Cons
|
| 40 |
-
- ⚠️ Vendor lock-in
|
| 41 |
-
- ⚠️ Limited customization
|
| 42 |
-
|
| 43 |
-
---
|
| 44 |
-
|
| 45 |
-
## Option 2: Railway.app (Simple & Cheap)
|
| 46 |
-
|
| 47 |
-
### Steps
|
| 48 |
-
|
| 49 |
-
1. **Create a FastAPI wrapper** (create `api.py`):
|
| 50 |
-
```python
|
| 51 |
-
from fastapi import FastAPI, File, UploadFile
|
| 52 |
-
from job_writing_agent.workflow import JobWorkflow
|
| 53 |
-
import tempfile
|
| 54 |
-
import os
|
| 55 |
-
|
| 56 |
-
app = FastAPI()
|
| 57 |
-
|
| 58 |
-
@app.post("/generate")
|
| 59 |
-
async def generate_application(
|
| 60 |
-
resume: UploadFile = File(...),
|
| 61 |
-
job_description: str,
|
| 62 |
-
content_type: str = "cover_letter"
|
| 63 |
-
):
|
| 64 |
-
# Save resume temporarily
|
| 65 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 66 |
-
tmp.write(await resume.read())
|
| 67 |
-
resume_path = tmp.name
|
| 68 |
-
|
| 69 |
-
try:
|
| 70 |
-
workflow = JobWorkflow(
|
| 71 |
-
resume=resume_path,
|
| 72 |
-
job_description_source=job_description,
|
| 73 |
-
content=content_type
|
| 74 |
-
)
|
| 75 |
-
result = await workflow.run()
|
| 76 |
-
return {"result": result}
|
| 77 |
-
finally:
|
| 78 |
-
os.unlink(resume_path)
|
| 79 |
-
```
|
| 80 |
-
|
| 81 |
-
2. **Create `Procfile`**:
|
| 82 |
-
```
|
| 83 |
-
web: uvicorn api:app --host 0.0.0.0 --port $PORT
|
| 84 |
-
```
|
| 85 |
-
|
| 86 |
-
3. **Deploy to Railway**:
|
| 87 |
-
- Sign up at [railway.app](https://railway.app)
|
| 88 |
-
- Connect GitHub repo
|
| 89 |
-
- Railway auto-detects Python and runs `Procfile`
|
| 90 |
-
|
| 91 |
-
### Cost
|
| 92 |
-
- **Free tier**: $5 credit/month
|
| 93 |
-
- **Hobby**: $5/month for 512MB RAM
|
| 94 |
-
- **Pro**: $20/month for 2GB RAM
|
| 95 |
-
|
| 96 |
-
### Pros
|
| 97 |
-
- ✅ Very simple deployment
|
| 98 |
-
- ✅ Auto-scaling
|
| 99 |
-
- ✅ Free tier available
|
| 100 |
-
- ✅ Automatic HTTPS
|
| 101 |
-
|
| 102 |
-
### Cons
|
| 103 |
-
- ⚠️ Need to add FastAPI wrapper
|
| 104 |
-
- ⚠️ State management needs Redis/Postgres
|
| 105 |
-
|
| 106 |
-
---
|
| 107 |
-
|
| 108 |
-
## Option 3: Render.com (Similar to Railway)
|
| 109 |
-
|
| 110 |
-
### Steps
|
| 111 |
-
|
| 112 |
-
1. **Create `render.yaml`**:
|
| 113 |
-
```yaml
|
| 114 |
-
services:
|
| 115 |
-
- type: web
|
| 116 |
-
name: job-writer-api
|
| 117 |
-
env: python
|
| 118 |
-
buildCommand: pip install -r requirements.txt
|
| 119 |
-
startCommand: uvicorn api:app --host 0.0.0.0 --port $PORT
|
| 120 |
-
envVars:
|
| 121 |
-
- key: OPENROUTER_API_KEY
|
| 122 |
-
sync: false
|
| 123 |
-
- key: TAVILY_API_KEY
|
| 124 |
-
sync: false
|
| 125 |
-
```
|
| 126 |
-
|
| 127 |
-
2. **Deploy**:
|
| 128 |
-
- Connect GitHub repo to Render
|
| 129 |
-
- Render auto-detects `render.yaml`
|
| 130 |
-
|
| 131 |
-
### Cost
|
| 132 |
-
- **Free tier**: 750 hours/month (sleeps after 15min inactivity)
|
| 133 |
-
- **Starter**: $7/month (always on)
|
| 134 |
-
|
| 135 |
-
### Pros
|
| 136 |
-
- ✅ Free tier for testing
|
| 137 |
-
- ✅ Simple YAML config
|
| 138 |
-
- ✅ Auto-deploy from Git
|
| 139 |
-
|
| 140 |
-
### Cons
|
| 141 |
-
- ⚠️ Free tier sleeps (cold starts)
|
| 142 |
-
- ⚠️ Need FastAPI wrapper
|
| 143 |
-
|
| 144 |
-
---
|
| 145 |
-
|
| 146 |
-
## Option 4: Fly.io (Good Free Tier)
|
| 147 |
-
|
| 148 |
-
### Steps
|
| 149 |
-
|
| 150 |
-
1. **Install Fly CLI**:
|
| 151 |
-
```powershell
|
| 152 |
-
iwr https://fly.io/install.ps1 -useb | iex
|
| 153 |
-
```
|
| 154 |
-
|
| 155 |
-
2. **Create `Dockerfile`**:
|
| 156 |
-
```dockerfile
|
| 157 |
-
FROM python:3.12-slim
|
| 158 |
-
|
| 159 |
-
WORKDIR /app
|
| 160 |
-
COPY requirements.txt .
|
| 161 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 162 |
-
|
| 163 |
-
COPY . .
|
| 164 |
-
|
| 165 |
-
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8080"]
|
| 166 |
-
```
|
| 167 |
-
|
| 168 |
-
3. **Deploy**:
|
| 169 |
-
```powershell
|
| 170 |
-
fly launch
|
| 171 |
-
fly deploy
|
| 172 |
-
```
|
| 173 |
-
|
| 174 |
-
### Cost
|
| 175 |
-
- **Free tier**: 3 shared-cpu VMs, 3GB storage
|
| 176 |
-
- **Paid**: $1.94/month per VM
|
| 177 |
-
|
| 178 |
-
### Pros
|
| 179 |
-
- ✅ Generous free tier
|
| 180 |
-
- ✅ Global edge deployment
|
| 181 |
-
- ✅ Docker-based (flexible)
|
| 182 |
-
|
| 183 |
-
### Cons
|
| 184 |
-
- ⚠️ Need Docker knowledge
|
| 185 |
-
- ⚠️ Need FastAPI wrapper
|
| 186 |
-
|
| 187 |
-
---
|
| 188 |
-
|
| 189 |
-
## Option 5: AWS Lambda (Serverless - Pay Per Use)
|
| 190 |
-
|
| 191 |
-
### Steps
|
| 192 |
-
|
| 193 |
-
1. **Create Lambda handler** (`lambda_handler.py`):
|
| 194 |
-
```python
|
| 195 |
-
import json
|
| 196 |
-
from job_writing_agent.workflow import JobWorkflow
|
| 197 |
-
|
| 198 |
-
def lambda_handler(event, context):
|
| 199 |
-
# Parse event
|
| 200 |
-
body = json.loads(event['body'])
|
| 201 |
-
|
| 202 |
-
workflow = JobWorkflow(
|
| 203 |
-
resume=body['resume_path'],
|
| 204 |
-
job_description_source=body['job_description'],
|
| 205 |
-
content=body.get('content_type', 'cover_letter')
|
| 206 |
-
)
|
| 207 |
-
|
| 208 |
-
result = workflow.run()
|
| 209 |
-
|
| 210 |
-
return {
|
| 211 |
-
'statusCode': 200,
|
| 212 |
-
'body': json.dumps({'result': result})
|
| 213 |
-
}
|
| 214 |
-
```
|
| 215 |
-
|
| 216 |
-
2. **Package and deploy** using AWS SAM or Serverless Framework
|
| 217 |
-
|
| 218 |
-
### Cost
|
| 219 |
-
- **Free tier**: 1M requests/month
|
| 220 |
-
- **Paid**: $0.20 per 1M requests + compute time
|
| 221 |
-
|
| 222 |
-
### Pros
|
| 223 |
-
- ✅ Pay only for usage
|
| 224 |
-
- ✅ Auto-scaling
|
| 225 |
-
- ✅ Very cheap for low traffic
|
| 226 |
-
|
| 227 |
-
### Cons
|
| 228 |
-
- ⚠️ 15min timeout limit
|
| 229 |
-
- ⚠️ Cold starts
|
| 230 |
-
- ⚠️ Complex setup
|
| 231 |
-
- ⚠️ Need to handle state externally
|
| 232 |
-
|
| 233 |
-
---
|
| 234 |
-
|
| 235 |
-
## Recommendation
|
| 236 |
-
|
| 237 |
-
**For your use case, I recommend:**
|
| 238 |
-
|
| 239 |
-
1. **Start with LangGraph Cloud** - Easiest, built for your stack
|
| 240 |
-
2. **If you need more control → Railway** - Simple, good free tier
|
| 241 |
-
3. **If you need serverless → AWS Lambda** - Cheapest for low traffic
|
| 242 |
-
|
| 243 |
-
---
|
| 244 |
-
|
| 245 |
-
## Quick Start: FastAPI Wrapper (for Railway/Render/Fly.io)
|
| 246 |
-
|
| 247 |
-
Create `api.py` in your project root:
|
| 248 |
-
|
| 249 |
-
```python
|
| 250 |
-
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 251 |
-
from fastapi.responses import JSONResponse
|
| 252 |
-
from job_writing_agent.workflow import JobWorkflow
|
| 253 |
-
import tempfile
|
| 254 |
-
import os
|
| 255 |
-
import asyncio
|
| 256 |
-
|
| 257 |
-
app = FastAPI(title="Job Application Writer API")
|
| 258 |
-
|
| 259 |
-
@app.get("/")
|
| 260 |
-
def health():
|
| 261 |
-
return {"status": "ok"}
|
| 262 |
-
|
| 263 |
-
@app.post("/generate")
|
| 264 |
-
async def generate_application(
|
| 265 |
-
resume: UploadFile = File(...),
|
| 266 |
-
job_description: str,
|
| 267 |
-
content_type: str = "cover_letter"
|
| 268 |
-
):
|
| 269 |
-
"""Generate job application material."""
|
| 270 |
-
# Save resume temporarily
|
| 271 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
| 272 |
-
content = await resume.read()
|
| 273 |
-
tmp.write(content)
|
| 274 |
-
resume_path = tmp.name
|
| 275 |
-
|
| 276 |
-
try:
|
| 277 |
-
workflow = JobWorkflow(
|
| 278 |
-
resume=resume_path,
|
| 279 |
-
job_description_source=job_description,
|
| 280 |
-
content=content_type
|
| 281 |
-
)
|
| 282 |
-
|
| 283 |
-
# Run workflow (assuming it's async or can be wrapped)
|
| 284 |
-
result = await asyncio.to_thread(workflow.run)
|
| 285 |
-
|
| 286 |
-
return JSONResponse({
|
| 287 |
-
"status": "success",
|
| 288 |
-
"result": result
|
| 289 |
-
})
|
| 290 |
-
except Exception as e:
|
| 291 |
-
raise HTTPException(status_code=500, detail=str(e))
|
| 292 |
-
finally:
|
| 293 |
-
# Cleanup
|
| 294 |
-
if os.path.exists(resume_path):
|
| 295 |
-
os.unlink(resume_path)
|
| 296 |
-
|
| 297 |
-
if __name__ == "__main__":
|
| 298 |
-
import uvicorn
|
| 299 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 300 |
-
```
|
| 301 |
-
|
| 302 |
-
Then update `requirements.txt` to ensure FastAPI and uvicorn are included (they already are ✅).
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DOCKERFILE_EXPLANATION.md
DELETED
|
@@ -1,147 +0,0 @@
|
|
| 1 |
-
# Dockerfile Explanation
|
| 2 |
-
|
| 3 |
-
This Dockerfile is specifically designed for **LangGraph Cloud/LangServe deployment**. It uses the official LangGraph API base image and configures your agent graphs to be served as REST APIs.
|
| 4 |
-
|
| 5 |
-
## Line-by-Line Breakdown
|
| 6 |
-
|
| 7 |
-
### 1. Base Image (Line 1)
|
| 8 |
-
```dockerfile
|
| 9 |
-
FROM langchain/langgraph-api:3.12
|
| 10 |
-
```
|
| 11 |
-
- **Purpose**: Uses the official LangGraph API base image with Python 3.12
|
| 12 |
-
- **What it includes**: Pre-configured LangGraph runtime, LangServe server, and all LangGraph dependencies
|
| 13 |
-
- **Why**: This image already has everything needed to serve LangGraph workflows as REST APIs
|
| 14 |
-
|
| 15 |
-
---
|
| 16 |
-
|
| 17 |
-
### 2. Install Node Dependencies (Line 9)
|
| 18 |
-
```dockerfile
|
| 19 |
-
RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt nodes
|
| 20 |
-
```
|
| 21 |
-
- **Purpose**: Installs the `nodes` package (likely a dependency from your `langgraph.json`)
|
| 22 |
-
- **`PYTHONDONTWRITEBYTECODE=1`**: Prevents creating `.pyc` files (smaller image)
|
| 23 |
-
- **`uv pip`**: Uses `uv` (fast Python package installer) instead of regular `pip`
|
| 24 |
-
- **`--system`**: Installs to system Python (not virtual env)
|
| 25 |
-
- **`--no-cache-dir`**: Doesn't cache pip downloads (smaller image)
|
| 26 |
-
- **`-c /api/constraints.txt`**: Uses constraint file from base image (ensures compatible versions)
|
| 27 |
-
|
| 28 |
-
---
|
| 29 |
-
|
| 30 |
-
### 3. Copy Your Code (Line 14)
|
| 31 |
-
```dockerfile
|
| 32 |
-
ADD . /deps/job_writer
|
| 33 |
-
```
|
| 34 |
-
- **Purpose**: Copies your entire project into `/deps/job_writer` in the container
|
| 35 |
-
- **Why `/deps/`**: LangGraph API expects dependencies in this directory
|
| 36 |
-
- **What gets copied**: All your source code, `pyproject.toml`, `requirements.txt`, etc.
|
| 37 |
-
|
| 38 |
-
---
|
| 39 |
-
|
| 40 |
-
### 4. Install Your Package (Lines 19-21)
|
| 41 |
-
```dockerfile
|
| 42 |
-
RUN for dep in /deps/*; do
|
| 43 |
-
echo "Installing $dep";
|
| 44 |
-
if [ -d "$dep" ]; then
|
| 45 |
-
echo "Installing $dep";
|
| 46 |
-
(cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .);
|
| 47 |
-
fi;
|
| 48 |
-
done
|
| 49 |
-
```
|
| 50 |
-
- **Purpose**: Installs your `job_writer` package in editable mode (`-e`)
|
| 51 |
-
- **How it works**:
|
| 52 |
-
- Loops through all directories in `/deps/`
|
| 53 |
-
- For each directory, changes into it and runs `pip install -e .`
|
| 54 |
-
- The `-e` flag installs in "editable" mode (changes to code are reflected)
|
| 55 |
-
- **Why**: Makes your package importable as `job_writing_agent` inside the container
|
| 56 |
-
|
| 57 |
-
---
|
| 58 |
-
|
| 59 |
-
### 5. Register Your Graphs (Line 25)
|
| 60 |
-
```dockerfile
|
| 61 |
-
ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", ...}'
|
| 62 |
-
```
|
| 63 |
-
- **Purpose**: Tells LangServe which graphs to expose as REST APIs
|
| 64 |
-
- **Format**: JSON mapping of `graph_name` → `module_path:attribute_name`
|
| 65 |
-
- **What it does**:
|
| 66 |
-
- `job_app_graph` → Exposes `JobWorkflow.job_app_graph` property as an API endpoint
|
| 67 |
-
- `research_workflow` → Exposes the research subgraph
|
| 68 |
-
- `data_loading_workflow` → Exposes the data loading subgraph
|
| 69 |
-
- **Result**: Each graph becomes a REST API endpoint like `/invoke/job_app_graph`
|
| 70 |
-
|
| 71 |
-
---
|
| 72 |
-
|
| 73 |
-
### 6. Protect LangGraph API (Lines 33-35)
|
| 74 |
-
```dockerfile
|
| 75 |
-
RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && \
|
| 76 |
-
touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py
|
| 77 |
-
RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
|
| 78 |
-
```
|
| 79 |
-
- **Purpose**: Prevents your dependencies from accidentally overwriting LangGraph API packages
|
| 80 |
-
- **How**:
|
| 81 |
-
1. Creates placeholder `__init__.py` files for LangGraph packages
|
| 82 |
-
2. Reinstalls LangGraph API (without dependencies) to ensure it's not overwritten
|
| 83 |
-
- **Why**: If your `requirements.txt` has conflicting versions, this ensures LangGraph API stays intact
|
| 84 |
-
|
| 85 |
-
---
|
| 86 |
-
|
| 87 |
-
### 7. Cleanup Build Tools (Lines 37-41)
|
| 88 |
-
```dockerfile
|
| 89 |
-
RUN pip uninstall -y pip setuptools wheel
|
| 90 |
-
RUN rm -rf /usr/local/lib/python*/site-packages/pip* ...
|
| 91 |
-
RUN uv pip uninstall --system pip setuptools wheel && rm /usr/bin/uv /usr/bin/uvx
|
| 92 |
-
```
|
| 93 |
-
- **Purpose**: Removes all build tools to make the image smaller and more secure
|
| 94 |
-
- **What gets removed**:
|
| 95 |
-
- `pip`, `setuptools`, `wheel` (Python build tools)
|
| 96 |
-
- `uv` and `uvx` (package installers)
|
| 97 |
-
- **Why**: These tools aren't needed at runtime, only during build
|
| 98 |
-
- **Security**: Smaller attack surface (can't install malicious packages at runtime)
|
| 99 |
-
|
| 100 |
-
---
|
| 101 |
-
|
| 102 |
-
### 8. Set Working Directory (Line 45)
|
| 103 |
-
```dockerfile
|
| 104 |
-
WORKDIR /deps/job_writer
|
| 105 |
-
```
|
| 106 |
-
- **Purpose**: Sets the default directory when the container starts
|
| 107 |
-
- **Why**: Makes it easier to reference files relative to your project root
|
| 108 |
-
|
| 109 |
-
---
|
| 110 |
-
|
| 111 |
-
## How It Works at Runtime
|
| 112 |
-
|
| 113 |
-
When this container runs:
|
| 114 |
-
|
| 115 |
-
1. **LangServe starts automatically** (from base image)
|
| 116 |
-
2. **Reads `LANGSERVE_GRAPHS`** environment variable
|
| 117 |
-
3. **Imports your graphs** from the specified paths
|
| 118 |
-
4. **Exposes REST API endpoints**:
|
| 119 |
-
- `POST /invoke/job_app_graph` - Main workflow
|
| 120 |
-
- `POST /invoke/research_workflow` - Research subgraph
|
| 121 |
-
- `POST /invoke/data_loading_workflow` - Data loading subgraph
|
| 122 |
-
5. **Handles state management** automatically (checkpointing, persistence)
|
| 123 |
-
|
| 124 |
-
## Example API Usage
|
| 125 |
-
|
| 126 |
-
Once deployed, you can call your agent like this:
|
| 127 |
-
|
| 128 |
-
```bash
|
| 129 |
-
curl -X POST http://your-deployment/invoke/job_app_graph \
|
| 130 |
-
-H "Content-Type: application/json" \
|
| 131 |
-
-d '{
|
| 132 |
-
"resume_path": "...",
|
| 133 |
-
"job_description_source": "...",
|
| 134 |
-
"content": "cover_letter"
|
| 135 |
-
}'
|
| 136 |
-
```
|
| 137 |
-
|
| 138 |
-
## Key Points
|
| 139 |
-
|
| 140 |
-
✅ **Optimized for LangGraph Cloud** - Uses official base image
|
| 141 |
-
✅ **Automatic API generation** - No need to write FastAPI code
|
| 142 |
-
✅ **State management** - Built-in checkpointing and persistence
|
| 143 |
-
✅ **Security** - Removes build tools from final image
|
| 144 |
-
✅ **Small image** - No-cache installs, no bytecode files
|
| 145 |
-
|
| 146 |
-
This is the **easiest deployment option** for LangGraph apps - just build and push this Docker image!
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api-1.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
demo_candidate_store.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Demo script for CandidateProfileStore using ChromaDB.
|
| 3 |
+
|
| 4 |
+
Tests basic operations:
|
| 5 |
+
- Adding resumes with different sections
|
| 6 |
+
- Querying with natural language
|
| 7 |
+
- Retrieving sections
|
| 8 |
+
- Listing candidates
|
| 9 |
+
- Deleting data
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import logging
|
| 13 |
+
from src.job_writing_agent.agent_memory.candidate_profile_store import CandidateProfileStore
|
| 14 |
+
|
| 15 |
+
# Setup logging
|
| 16 |
+
logging.basicConfig(
|
| 17 |
+
level=logging.INFO,
|
| 18 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def demo_basic_operations():
|
| 25 |
+
"""Demonstrate basic ChromaDB candidate store operations."""
|
| 26 |
+
|
| 27 |
+
print("=" * 80)
|
| 28 |
+
print("CANDIDATE PROFILE STORE DEMO")
|
| 29 |
+
print("=" * 80)
|
| 30 |
+
|
| 31 |
+
# Initialize store
|
| 32 |
+
print("\n1. Initializing ChromaDB store...")
|
| 33 |
+
store = CandidateProfileStore(persist_directory="./chroma_db")
|
| 34 |
+
|
| 35 |
+
# Sample resume data
|
| 36 |
+
candidate_1 = {
|
| 37 |
+
"Experience": """
|
| 38 |
+
Senior Software Engineer at TechCorp (2020-2024)
|
| 39 |
+
- Led development of microservices architecture using Python and FastAPI
|
| 40 |
+
- Implemented machine learning models for recommendation systems
|
| 41 |
+
- Managed team of 5 engineers, conducted code reviews
|
| 42 |
+
- Built CI/CD pipelines using Docker and Kubernetes
|
| 43 |
+
- Reduced API response time by 60% through optimization
|
| 44 |
+
|
| 45 |
+
Software Engineer at StartupXYZ (2018-2020)
|
| 46 |
+
- Developed RESTful APIs using Django and PostgreSQL
|
| 47 |
+
- Created data processing pipelines with Apache Airflow
|
| 48 |
+
- Implemented automated testing with pytest and coverage tools
|
| 49 |
+
""",
|
| 50 |
+
"Skills": """
|
| 51 |
+
Programming Languages: Python, JavaScript, SQL, Go
|
| 52 |
+
Frameworks: FastAPI, Django, React, Flask
|
| 53 |
+
Databases: PostgreSQL, MongoDB, Redis
|
| 54 |
+
Tools: Docker, Kubernetes, Git, Jenkins, AWS
|
| 55 |
+
Machine Learning: scikit-learn, TensorFlow, pandas, numpy
|
| 56 |
+
Methodologies: Agile, Scrum, Test-Driven Development
|
| 57 |
+
""",
|
| 58 |
+
"Education": """
|
| 59 |
+
Master of Science in Computer Science
|
| 60 |
+
Stanford University (2016-2018)
|
| 61 |
+
- Specialization in Machine Learning and AI
|
| 62 |
+
- GPA: 3.9/4.0
|
| 63 |
+
- Thesis: "Deep Learning Approaches for Natural Language Processing"
|
| 64 |
+
|
| 65 |
+
Bachelor of Science in Computer Engineering
|
| 66 |
+
MIT (2012-2016)
|
| 67 |
+
- Minor in Mathematics
|
| 68 |
+
- Dean's List all semesters
|
| 69 |
+
""",
|
| 70 |
+
"Projects": """
|
| 71 |
+
Open Source Contributions:
|
| 72 |
+
- Contributor to FastAPI framework (30+ merged PRs)
|
| 73 |
+
- Created python-ml-toolkit library (500+ GitHub stars)
|
| 74 |
+
|
| 75 |
+
Personal Projects:
|
| 76 |
+
- Built AI-powered job matching platform
|
| 77 |
+
- Developed automated trading bot using machine learning
|
| 78 |
+
"""
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
candidate_2 = {
|
| 82 |
+
"Experience": """
|
| 83 |
+
Data Scientist at AnalyticsPro (2021-2024)
|
| 84 |
+
- Built predictive models for customer churn analysis
|
| 85 |
+
- Developed NLP pipelines for sentiment analysis
|
| 86 |
+
- Created interactive dashboards using Tableau and Plotly
|
| 87 |
+
- Worked with large datasets (100M+ records) using PySpark
|
| 88 |
+
|
| 89 |
+
Junior Data Analyst at DataCo (2019-2021)
|
| 90 |
+
- Performed statistical analysis on user behavior data
|
| 91 |
+
- Created SQL queries and data visualizations
|
| 92 |
+
- Automated reporting using Python scripts
|
| 93 |
+
""",
|
| 94 |
+
"Skills": """
|
| 95 |
+
Programming: Python, R, SQL
|
| 96 |
+
Data Science: pandas, numpy, scikit-learn, statsmodels
|
| 97 |
+
Machine Learning: TensorFlow, PyTorch, XGBoost
|
| 98 |
+
Visualization: Matplotlib, Seaborn, Plotly, Tableau
|
| 99 |
+
Big Data: PySpark, Hadoop, Hive
|
| 100 |
+
Statistics: A/B Testing, Hypothesis Testing, Regression Analysis
|
| 101 |
+
""",
|
| 102 |
+
"Education": """
|
| 103 |
+
Master of Science in Data Science
|
| 104 |
+
UC Berkeley (2017-2019)
|
| 105 |
+
- Focus on Statistical Learning and Big Data Analytics
|
| 106 |
+
|
| 107 |
+
Bachelor of Science in Mathematics
|
| 108 |
+
UCLA (2013-2017)
|
| 109 |
+
- Minor in Computer Science
|
| 110 |
+
"""
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
# Add resumes
|
| 114 |
+
print("\n2. Adding candidate resumes...")
|
| 115 |
+
result1 = store.add_resume_text(
|
| 116 |
+
candidate_id="candidate_001",
|
| 117 |
+
sections=candidate_1,
|
| 118 |
+
metadata={
|
| 119 |
+
"name": "John Smith",
|
| 120 |
+
"email": "john.smith@email.com",
|
| 121 |
+
"title": "Senior Software Engineer"
|
| 122 |
+
}
|
| 123 |
+
)
|
| 124 |
+
print(f" ✓ Added candidate_001: {result1['chunks_stored']} chunks, sections: {result1['sections']}")
|
| 125 |
+
|
| 126 |
+
result2 = store.add_resume_text(
|
| 127 |
+
candidate_id="candidate_002",
|
| 128 |
+
sections=candidate_2,
|
| 129 |
+
metadata={
|
| 130 |
+
"name": "Jane Doe",
|
| 131 |
+
"email": "jane.doe@email.com",
|
| 132 |
+
"title": "Data Scientist"
|
| 133 |
+
}
|
| 134 |
+
)
|
| 135 |
+
print(f" ✓ Added candidate_002: {result2['chunks_stored']} chunks, sections: {result2['sections']}")
|
| 136 |
+
|
| 137 |
+
# List all candidates
|
| 138 |
+
print("\n3. Listing all candidates...")
|
| 139 |
+
candidates = store.list_candidates()
|
| 140 |
+
print(f" Found {len(candidates)} candidates: {candidates}")
|
| 141 |
+
|
| 142 |
+
# Query tests
|
| 143 |
+
print("\n4. Testing semantic queries...")
|
| 144 |
+
|
| 145 |
+
queries = [
|
| 146 |
+
("candidate_001", "programming languages and frameworks", None),
|
| 147 |
+
("candidate_001", "machine learning experience", "Experience"),
|
| 148 |
+
("candidate_002", "data visualization tools", "Skills"),
|
| 149 |
+
("candidate_001", "education background in AI", "Education"),
|
| 150 |
+
("candidate_002", "worked with big data", None),
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
for candidate_id, query, section in queries:
|
| 154 |
+
section_str = f" (section: {section})" if section else ""
|
| 155 |
+
print(f"\n Query: '{query}'{section_str}")
|
| 156 |
+
print(f" Candidate: {candidate_id}")
|
| 157 |
+
|
| 158 |
+
results = store.query_resume(
|
| 159 |
+
candidate_id=candidate_id,
|
| 160 |
+
query=query,
|
| 161 |
+
section=section,
|
| 162 |
+
n_results=3
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
for i, result in enumerate(results, 1):
|
| 166 |
+
relevance = result['relevance_score']
|
| 167 |
+
doc_preview = result['document'][:150].replace('\n', ' ')
|
| 168 |
+
print(f" {i}. [Score: {relevance:.3f}] {result['metadata']['section']}")
|
| 169 |
+
print(f" {doc_preview}...")
|
| 170 |
+
|
| 171 |
+
# Get all sections for a candidate
|
| 172 |
+
print("\n5. Retrieving all sections for candidate_001...")
|
| 173 |
+
sections = store.get_all_sections("candidate_001")
|
| 174 |
+
for section_name, content in sections.items():
|
| 175 |
+
preview = content[:100].replace('\n', ' ')
|
| 176 |
+
print(f" {section_name}: {preview}...")
|
| 177 |
+
|
| 178 |
+
# Get specific section
|
| 179 |
+
print("\n6. Getting specific section (Skills) for candidate_001...")
|
| 180 |
+
skills_chunks = store.get_candidate_sections(
|
| 181 |
+
candidate_id="candidate_001",
|
| 182 |
+
section="Skills"
|
| 183 |
+
)
|
| 184 |
+
print(f" Found {len(skills_chunks)} chunks:")
|
| 185 |
+
for chunk in skills_chunks:
|
| 186 |
+
preview = chunk['document'][:80].replace('\n', ' ')
|
| 187 |
+
print(f" - {preview}...")
|
| 188 |
+
|
| 189 |
+
# Statistics
|
| 190 |
+
print("\n7. Database statistics...")
|
| 191 |
+
print(f" Total documents in collection: {store.collection.count()}")
|
| 192 |
+
print(f" Total candidates: {len(store.list_candidates())}")
|
| 193 |
+
|
| 194 |
+
# Cleanup option (commented out - uncomment to test deletion)
|
| 195 |
+
# print("\n8. Testing deletion...")
|
| 196 |
+
# delete_result = store.delete_candidate("candidate_002")
|
| 197 |
+
# print(f" Deleted {delete_result['chunks_deleted']} chunks for candidate_002")
|
| 198 |
+
# print(f" Remaining candidates: {store.list_candidates()}")
|
| 199 |
+
|
| 200 |
+
print("\n" + "=" * 80)
|
| 201 |
+
print("DEMO COMPLETED SUCCESSFULLY")
|
| 202 |
+
print("=" * 80)
|
| 203 |
+
print(f"\nData persisted to: {store.persist_directory}")
|
| 204 |
+
print("To reset database, uncomment the cleanup section in the demo script.")
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def demo_job_matching_queries():
|
| 208 |
+
"""Demonstrate job-matching use cases."""
|
| 209 |
+
|
| 210 |
+
print("\n" + "=" * 80)
|
| 211 |
+
print("JOB MATCHING QUERIES DEMO")
|
| 212 |
+
print("=" * 80)
|
| 213 |
+
|
| 214 |
+
store = CandidateProfileStore(persist_directory="./chroma_db")
|
| 215 |
+
|
| 216 |
+
# Simulate job requirements
|
| 217 |
+
job_queries = [
|
| 218 |
+
{
|
| 219 |
+
"title": "Senior Backend Engineer",
|
| 220 |
+
"query": "Python FastAPI microservices Docker Kubernetes experience",
|
| 221 |
+
"candidate": "candidate_001"
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"title": "Machine Learning Engineer",
|
| 225 |
+
"query": "machine learning models deep learning TensorFlow production",
|
| 226 |
+
"candidate": "candidate_001"
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"title": "Data Science Lead",
|
| 230 |
+
"query": "predictive modeling statistics big data PySpark",
|
| 231 |
+
"candidate": "candidate_002"
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"title": "NLP Engineer",
|
| 235 |
+
"query": "natural language processing sentiment analysis text mining",
|
| 236 |
+
"candidate": "candidate_002"
|
| 237 |
+
}
|
| 238 |
+
]
|
| 239 |
+
|
| 240 |
+
print("\nMatching candidates to job requirements:\n")
|
| 241 |
+
|
| 242 |
+
for job in job_queries:
|
| 243 |
+
print(f"Job: {job['title']}")
|
| 244 |
+
print(f"Requirements: {job['query']}")
|
| 245 |
+
print(f"Checking: {job['candidate']}\n")
|
| 246 |
+
|
| 247 |
+
results = store.query_resume(
|
| 248 |
+
candidate_id=job['candidate'],
|
| 249 |
+
query=job['query'],
|
| 250 |
+
n_results=2
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
if results:
|
| 254 |
+
best_match = results[0]
|
| 255 |
+
print(f"✓ Match Score: {best_match['relevance_score']:.3f}")
|
| 256 |
+
print(f" Relevant experience from {best_match['metadata']['section']}:")
|
| 257 |
+
preview = best_match['document'][:200].replace('\n', ' ')
|
| 258 |
+
print(f" {preview}...\n")
|
| 259 |
+
|
| 260 |
+
print("-" * 80 + "\n")
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
if __name__ == "__main__":
|
| 264 |
+
try:
|
| 265 |
+
# Run basic operations demo
|
| 266 |
+
demo_basic_operations()
|
| 267 |
+
|
| 268 |
+
# Run job matching demo
|
| 269 |
+
demo_job_matching_queries()
|
| 270 |
+
|
| 271 |
+
except Exception as e:
|
| 272 |
+
logger.error(f"Demo failed: {e}", exc_info=True)
|
| 273 |
+
raise
|
docker-compose.override.example.yml
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
# Example override file for local development
|
| 2 |
-
# Copy this to docker-compose.override.yml to customize settings
|
| 3 |
-
# docker-compose automatically loads override files
|
| 4 |
-
|
| 5 |
-
version: "3.9"
|
| 6 |
-
services:
|
| 7 |
-
redis:
|
| 8 |
-
# Override Redis port for local development
|
| 9 |
-
ports:
|
| 10 |
-
- "6380:6379" # Use different port if 6379 is already in use
|
| 11 |
-
|
| 12 |
-
postgres:
|
| 13 |
-
# Override Postgres port for local development
|
| 14 |
-
ports:
|
| 15 |
-
- "5433:5432" # Use different port if 5432 is already in use
|
| 16 |
-
environment:
|
| 17 |
-
# Override credentials for local dev
|
| 18 |
-
- POSTGRES_USER=dev_user
|
| 19 |
-
- POSTGRES_PASSWORD=dev_password
|
| 20 |
-
- POSTGRES_DB=job_app_dev
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/job_writing_agent/agent_memory/__init__.py
ADDED
|
File without changes
|
src/job_writing_agent/agent_memory/agent_shopping_example.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from langgraph.store.mongodb.base import MongoDBStore, VectorIndexConfig
|
| 3 |
+
from langgraph.checkpoint.mongodb import MongoDBSaver
|
| 4 |
+
from langchain.agents.middleware import dynamic_prompt, ModelRequest
|
| 5 |
+
from langchain.agents import create_agent
|
| 6 |
+
from langchain.tools import tool
|
| 7 |
+
from langmem import create_manage_memory_tool
|
| 8 |
+
from langchain_voyageai import VoyageAIEmbeddings
|
| 9 |
+
from langchain_mongodb import MongoDBAtlasVectorSearch
|
| 10 |
+
from langchain_openai import OpenAIEmbeddings
|
| 11 |
+
from langchain_core.runnables import RunnableConfig
|
| 12 |
+
from pydantic import SecretStr
|
| 13 |
+
from pymongo import MongoClient
|
| 14 |
+
import os
|
| 15 |
+
from src.job_writing_agent.utils.llm_provider_factory import LLMFactory
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
factory = LLMFactory(default_provider="openrouter")
|
| 19 |
+
|
| 20 |
+
llm = factory.create_langchain(
|
| 21 |
+
model="openai/gpt-oss-20b:free",
|
| 22 |
+
provider="openrouter",
|
| 23 |
+
temperature=0.7
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
logger.setLevel(logging.ERROR)
|
| 29 |
+
|
| 30 |
+
os.environ["VOYAGE_API_KEY"] = "al-Tb_yMc_j7L50kEyRl_wsHTUARCcs77h0EjiEadFuT7N"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# Initialize MongoDB connection
|
| 35 |
+
MONGODB_URI = "mongodb+srv://raggar15:Rishabh%402095@cluster0.jsbdmm9.mongodb.net/?appName=Cluster0"
|
| 36 |
+
client = MongoClient(MONGODB_URI)
|
| 37 |
+
db = client["memories"]
|
| 38 |
+
collection = db["memory_store"]
|
| 39 |
+
|
| 40 |
+
# Create the store with vector search capabilities
|
| 41 |
+
store = MongoDBStore(
|
| 42 |
+
collection=collection,
|
| 43 |
+
index_config=VectorIndexConfig(
|
| 44 |
+
fields=None, # Auto-detect fields for indexing
|
| 45 |
+
filters=[], # No additional filters
|
| 46 |
+
dims=1024, # OpenAI embedding dimensions
|
| 47 |
+
embed=VoyageAIEmbeddings(model="voyage-3.5-lite", api_key=SecretStr("al-Tb_yMc_j7L50kEyRl_wsHTUARCcs77h0EjiEadFuT7N")) # Embedding model for vector search
|
| 48 |
+
),
|
| 49 |
+
auto_index_timeout=120
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
checkpointer = MongoDBSaver(
|
| 53 |
+
client, # MongoDB client
|
| 54 |
+
db_name="memories", # Database name
|
| 55 |
+
collection_name="thread_checkpoints" # Collection for conversation state
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
db = client["ai_shop"]
|
| 60 |
+
collection = db["products"]
|
| 61 |
+
|
| 62 |
+
@tool
|
| 63 |
+
def search_products(query: str) -> str:
|
| 64 |
+
"""Searches for products in the database using vector search."""
|
| 65 |
+
|
| 66 |
+
vectorstore = MongoDBAtlasVectorSearch(collection, OpenAIEmbeddings(), text_key="title", embedding_key="embedding", index_name="vector_index_2")
|
| 67 |
+
docs = vectorstore.similarity_search(query, k=5)
|
| 68 |
+
|
| 69 |
+
return "\n".join([str(doc.metadata) for doc in docs])
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@dynamic_prompt
|
| 73 |
+
def dynamic_memories_prompt(request: ModelRequest) -> str:
|
| 74 |
+
"""
|
| 75 |
+
A middleware that builds a dynamic system prompt using relevant memories.
|
| 76 |
+
"""
|
| 77 |
+
# 1. Get current state (which includes messages)
|
| 78 |
+
state = request.state
|
| 79 |
+
|
| 80 |
+
# 2. Extract the last user message content
|
| 81 |
+
last_message_text = ""
|
| 82 |
+
if state.get("messages"):
|
| 83 |
+
last_message_text = state["messages"][-1].content
|
| 84 |
+
|
| 85 |
+
# 3. Query your long-term memory store (not state) for relevant memories
|
| 86 |
+
# Remember: 'store' is a global or outer scoped store you defined earlier
|
| 87 |
+
memories = store.search(
|
| 88 |
+
("memories",),
|
| 89 |
+
query=last_message_text if isinstance(last_message_text, str) else "",
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# 4. Build a system message that includes found memories
|
| 93 |
+
system_msg = (
|
| 94 |
+
"You are a shopping assistant with persistent memory.\n"
|
| 95 |
+
"## Relevant Memories\n"
|
| 96 |
+
"<memories>\n"
|
| 97 |
+
f"{memories}\n"
|
| 98 |
+
"</memories>\n"
|
| 99 |
+
"Use these memories to provide personalized responses."
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Return the system prompt text
|
| 103 |
+
return system_msg
|
| 104 |
+
|
| 105 |
+
agent = create_agent(
|
| 106 |
+
model=llm,
|
| 107 |
+
tools=[
|
| 108 |
+
create_manage_memory_tool(namespace=("memories",)),
|
| 109 |
+
search_products,
|
| 110 |
+
],
|
| 111 |
+
middleware=[dynamic_memories_prompt], # dynamic prompt injection
|
| 112 |
+
store=store, # long-term memory
|
| 113 |
+
checkpointer=checkpointer, # persistent checkpointing
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def create_shopping_agent():
|
| 118 |
+
"""Complete shopping assistant with memory"""
|
| 119 |
+
|
| 120 |
+
# Memory storage setup
|
| 121 |
+
store = MongoDBStore(
|
| 122 |
+
collection=client.memories.user_preferences,
|
| 123 |
+
index_config=VectorIndexConfig(
|
| 124 |
+
dims=1024,
|
| 125 |
+
embed=VoyageAIEmbeddings(model="voyage-3.5-lite", api_key=SecretStr("al-Tb_yMc_j7L50kEyRl_wsHTUARCcs77h0EjiEadFuT7N")),
|
| 126 |
+
fields=["content"],
|
| 127 |
+
filters=["active"]
|
| 128 |
+
)
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Conversation persistence
|
| 132 |
+
checkpointer = MongoDBSaver(
|
| 133 |
+
client,
|
| 134 |
+
db_name="shopping_assistant",
|
| 135 |
+
collection_name="conversations"
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
@dynamic_prompt
|
| 139 |
+
def enhanced_dynamic_prompt(request: ModelRequest) -> str:
|
| 140 |
+
state = request.state
|
| 141 |
+
|
| 142 |
+
# Safely extract last user text
|
| 143 |
+
raw = state.get("messages")[-1].content if state.get("messages") else ""
|
| 144 |
+
if isinstance(raw, dict): # normalize structured content
|
| 145 |
+
user_query = raw.get("text", "")
|
| 146 |
+
elif isinstance(raw, str):
|
| 147 |
+
user_query = raw
|
| 148 |
+
else:
|
| 149 |
+
user_query = ""
|
| 150 |
+
|
| 151 |
+
user_query = user_query.strip() or ""
|
| 152 |
+
|
| 153 |
+
if not user_query:
|
| 154 |
+
# Fallback prompt with no query inputs
|
| 155 |
+
return "You are a shopping assistant."
|
| 156 |
+
|
| 157 |
+
memories = store.search(
|
| 158 |
+
("preferences",),
|
| 159 |
+
query=user_query,
|
| 160 |
+
limit=3,
|
| 161 |
+
filter={"active": True},
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
purchase_history = store.search(
|
| 165 |
+
("purchases",),
|
| 166 |
+
query=user_query,
|
| 167 |
+
limit=2,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
system_msg = f"""You are an expert shopping assistant with access to:
|
| 171 |
+
- Product search capabilities
|
| 172 |
+
- User preference memory
|
| 173 |
+
- Purchase history
|
| 174 |
+
|
| 175 |
+
## User Preferences
|
| 176 |
+
{memories}
|
| 177 |
+
|
| 178 |
+
## Recent Purchase Context
|
| 179 |
+
{purchase_history}
|
| 180 |
+
|
| 181 |
+
Provide personalized, helpful shopping advice."""
|
| 182 |
+
|
| 183 |
+
return system_msg
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
# Create agent with all capabilities
|
| 187 |
+
return create_agent(
|
| 188 |
+
model=llm,
|
| 189 |
+
tools=[
|
| 190 |
+
create_manage_memory_tool(namespace=("preferences",)),
|
| 191 |
+
create_manage_memory_tool(namespace=("purchases",)),
|
| 192 |
+
search_products,
|
| 193 |
+
],
|
| 194 |
+
middleware=[enhanced_dynamic_prompt], # Attach the dynamic prompt generator
|
| 195 |
+
store=store,
|
| 196 |
+
checkpointer=checkpointer,
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
# Usage example
|
| 200 |
+
agent = create_shopping_agent()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def get_user_config(
|
| 204 |
+
user_id: str,
|
| 205 |
+
thread_id: str = "default_thread",
|
| 206 |
+
*,
|
| 207 |
+
tags: list[str] | None = None,
|
| 208 |
+
metadata: dict | None = None,
|
| 209 |
+
) -> RunnableConfig:
|
| 210 |
+
"""Build a RunnableConfig for agent runs.
|
| 211 |
+
|
| 212 |
+
Parameters
|
| 213 |
+
- user_id: unique identifier for the user
|
| 214 |
+
- thread_id: logical conversation/thread id to group runs
|
| 215 |
+
- tags: optional list of tags to attach to the run
|
| 216 |
+
- metadata: optional metadata dict for observability/auditing
|
| 217 |
+
|
| 218 |
+
Returns
|
| 219 |
+
- RunnableConfig dict accepted by LangChain runnables
|
| 220 |
+
"""
|
| 221 |
+
|
| 222 |
+
config: RunnableConfig = {
|
| 223 |
+
"configurable": {"user_id": user_id, "thread_id": thread_id}
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
if tags:
|
| 227 |
+
config["tags"] = tags
|
| 228 |
+
if metadata:
|
| 229 |
+
config["metadata"] = metadata
|
| 230 |
+
|
| 231 |
+
return config
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
# Conversation 1: Learning preferences
|
| 235 |
+
user_config1 = get_user_config("user123")
|
| 236 |
+
response = agent.invoke({
|
| 237 |
+
"messages": [{"role": "user", "content": "I'm vegan and prefer organic products"}]
|
| 238 |
+
}, config=user_config1)
|
| 239 |
+
|
| 240 |
+
print(response["messages"][-1].content)
|
| 241 |
+
|
| 242 |
+
# Conversation 2: Using learned preferences (different session)
|
| 243 |
+
user_config2 = get_user_config("user123", "mobile-app")
|
| 244 |
+
response = agent.invoke({
|
| 245 |
+
"messages": [{"role": "user", "content": "Find me some pasta options"}]
|
| 246 |
+
}, config=user_config2)
|
| 247 |
+
# Agent automatically applies vegan + organic filters
|
| 248 |
+
|
| 249 |
+
print(response["messages"][-1].content)
|
src/job_writing_agent/agent_memory/candidate_profile_store.py
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Candidate Profile Store using ChromaDB for vector-based resume storage and retrieval.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Optional, List, Dict, Any
|
| 10 |
+
import uuid
|
| 11 |
+
|
| 12 |
+
import chromadb
|
| 13 |
+
from chromadb.config import Settings
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class CandidateProfileStore:
|
| 19 |
+
"""
|
| 20 |
+
Manages candidate resumes in ChromaDB with vector embeddings.
|
| 21 |
+
|
| 22 |
+
Uses ChromaDB's default embedding function (sentence-transformers/all-MiniLM-L6-v2)
|
| 23 |
+
for free, local embeddings without API keys.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, persist_directory: str = "./chroma_db"):
|
| 27 |
+
"""
|
| 28 |
+
Initialize ChromaDB client and collection.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
persist_directory: Directory to persist ChromaDB data
|
| 32 |
+
"""
|
| 33 |
+
self.persist_directory = persist_directory
|
| 34 |
+
|
| 35 |
+
# Create directory if it doesn't exist
|
| 36 |
+
Path(persist_directory).mkdir(parents=True, exist_ok=True)
|
| 37 |
+
|
| 38 |
+
# Initialize ChromaDB client
|
| 39 |
+
self.client = chromadb.PersistentClient(
|
| 40 |
+
path=persist_directory,
|
| 41 |
+
settings=Settings(
|
| 42 |
+
anonymized_telemetry=False,
|
| 43 |
+
allow_reset=True
|
| 44 |
+
)
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# Get or create collection with default embedding function
|
| 48 |
+
self.collection = self.client.get_or_create_collection(
|
| 49 |
+
name="candidate_resumes",
|
| 50 |
+
metadata={"hnsw:space": "cosine"} # Use cosine similarity
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
logger.info(f"Initialized CandidateProfileStore at {persist_directory}")
|
| 54 |
+
logger.info(f"Collection contains {self.collection.count()} documents")
|
| 55 |
+
|
| 56 |
+
def add_resume_text(
|
| 57 |
+
self,
|
| 58 |
+
candidate_id: str,
|
| 59 |
+
sections: Dict[str, str],
|
| 60 |
+
metadata: Optional[Dict[str, Any]] = None
|
| 61 |
+
) -> Dict[str, Any]:
|
| 62 |
+
"""
|
| 63 |
+
Add resume sections directly as text (for demo/testing).
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
candidate_id: Unique identifier for the candidate
|
| 67 |
+
sections: Dict mapping section names to content
|
| 68 |
+
e.g., {"Experience": "...", "Skills": "..."}
|
| 69 |
+
metadata: Additional metadata (name, email, etc.)
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
Dict with operation summary
|
| 73 |
+
"""
|
| 74 |
+
ids = []
|
| 75 |
+
documents = []
|
| 76 |
+
metadatas = []
|
| 77 |
+
|
| 78 |
+
base_metadata = metadata or {}
|
| 79 |
+
timestamp = datetime.now().isoformat()
|
| 80 |
+
|
| 81 |
+
for section_name, content in sections.items():
|
| 82 |
+
# Split long sections into chunks
|
| 83 |
+
chunks = self._chunk_text(content, chunk_size=400, overlap=50)
|
| 84 |
+
|
| 85 |
+
for i, chunk in enumerate(chunks):
|
| 86 |
+
chunk_id = f"{candidate_id}_{section_name}_{i}"
|
| 87 |
+
ids.append(chunk_id)
|
| 88 |
+
documents.append(chunk)
|
| 89 |
+
metadatas.append({
|
| 90 |
+
"candidate_id": candidate_id,
|
| 91 |
+
"section": section_name,
|
| 92 |
+
"chunk_index": i,
|
| 93 |
+
"timestamp": timestamp,
|
| 94 |
+
**base_metadata
|
| 95 |
+
})
|
| 96 |
+
|
| 97 |
+
# Add to ChromaDB (auto-embeds with default embedding function)
|
| 98 |
+
self.collection.add(
|
| 99 |
+
ids=ids,
|
| 100 |
+
documents=documents,
|
| 101 |
+
metadatas=metadatas
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
result = {
|
| 105 |
+
"candidate_id": candidate_id,
|
| 106 |
+
"chunks_stored": len(ids),
|
| 107 |
+
"sections": list(sections.keys()),
|
| 108 |
+
"total_documents": self.collection.count()
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
logger.info(f"Added {len(ids)} chunks for candidate {candidate_id}")
|
| 112 |
+
return result
|
| 113 |
+
|
| 114 |
+
def query_resume(
|
| 115 |
+
self,
|
| 116 |
+
candidate_id: str,
|
| 117 |
+
query: str,
|
| 118 |
+
section: Optional[str] = None,
|
| 119 |
+
n_results: int = 5
|
| 120 |
+
) -> List[Dict[str, Any]]:
|
| 121 |
+
"""
|
| 122 |
+
Semantic search candidate's resume using natural language query.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
candidate_id: Candidate to search
|
| 126 |
+
query: Natural language query
|
| 127 |
+
section: Optional section filter (e.g., "Experience", "Skills")
|
| 128 |
+
n_results: Number of results to return
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
List of matching chunks with metadata and relevance scores
|
| 132 |
+
"""
|
| 133 |
+
# Build where filter with proper ChromaDB syntax for multiple conditions
|
| 134 |
+
if section:
|
| 135 |
+
where_filter = {
|
| 136 |
+
"$and": [
|
| 137 |
+
{"candidate_id": candidate_id},
|
| 138 |
+
{"section": section}
|
| 139 |
+
]
|
| 140 |
+
}
|
| 141 |
+
else:
|
| 142 |
+
where_filter = {"candidate_id": candidate_id}
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
results = self.collection.query(
|
| 146 |
+
query_texts=[query],
|
| 147 |
+
n_results=n_results,
|
| 148 |
+
where=where_filter,
|
| 149 |
+
include=["documents", "metadatas", "distances"]
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
return self._format_query_results(results)
|
| 153 |
+
except Exception as e:
|
| 154 |
+
logger.error(f"Query failed: {e}")
|
| 155 |
+
return []
|
| 156 |
+
|
| 157 |
+
def get_candidate_sections(
|
| 158 |
+
self,
|
| 159 |
+
candidate_id: str,
|
| 160 |
+
section: Optional[str] = None,
|
| 161 |
+
limit: Optional[int] = None
|
| 162 |
+
) -> List[Dict[str, Any]]:
|
| 163 |
+
"""
|
| 164 |
+
Get all stored data for a candidate, optionally filtered by section.
|
| 165 |
+
|
| 166 |
+
Args:
|
| 167 |
+
candidate_id: Candidate identifier
|
| 168 |
+
section: Optional section filter
|
| 169 |
+
limit: Maximum number of results
|
| 170 |
+
|
| 171 |
+
Returns:
|
| 172 |
+
List of documents with metadata
|
| 173 |
+
"""
|
| 174 |
+
# Build where filter with proper ChromaDB syntax for multiple conditions
|
| 175 |
+
if section:
|
| 176 |
+
where_filter = {
|
| 177 |
+
"$and": [
|
| 178 |
+
{"candidate_id": candidate_id},
|
| 179 |
+
{"section": section}
|
| 180 |
+
]
|
| 181 |
+
}
|
| 182 |
+
else:
|
| 183 |
+
where_filter = {"candidate_id": candidate_id}
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
results = self.collection.get(
|
| 187 |
+
where=where_filter,
|
| 188 |
+
limit=limit,
|
| 189 |
+
include=["documents", "metadatas"]
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
return self._format_get_results(results)
|
| 193 |
+
except Exception as e:
|
| 194 |
+
logger.error(f"Get failed: {e}")
|
| 195 |
+
return []
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def get_all_sections(self, candidate_id: str) -> Dict[str, str]:
|
| 199 |
+
"""
|
| 200 |
+
Get all sections for a candidate, reconstructed from chunks.
|
| 201 |
+
|
| 202 |
+
Args:
|
| 203 |
+
candidate_id: Candidate identifier
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
Dict mapping section names to reconstructed content
|
| 207 |
+
"""
|
| 208 |
+
results = self.get_candidate_sections(candidate_id)
|
| 209 |
+
|
| 210 |
+
sections = {}
|
| 211 |
+
for item in results:
|
| 212 |
+
section_name = item["metadata"]["section"]
|
| 213 |
+
if section_name not in sections:
|
| 214 |
+
sections[section_name] = []
|
| 215 |
+
sections[section_name].append({
|
| 216 |
+
"chunk_index": item["metadata"]["chunk_index"],
|
| 217 |
+
"content": item["document"]
|
| 218 |
+
})
|
| 219 |
+
|
| 220 |
+
# Sort by chunk_index and join
|
| 221 |
+
reconstructed = {}
|
| 222 |
+
for section_name, chunks in sections.items():
|
| 223 |
+
sorted_chunks = sorted(chunks, key=lambda x: x["chunk_index"])
|
| 224 |
+
reconstructed[section_name] = " ".join(c["content"] for c in sorted_chunks)
|
| 225 |
+
|
| 226 |
+
return reconstructed
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def delete_candidate(self, candidate_id: str) -> Dict[str, Any]:
|
| 230 |
+
"""
|
| 231 |
+
Remove all data for a candidate.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
candidate_id: Candidate to delete
|
| 235 |
+
|
| 236 |
+
Returns:
|
| 237 |
+
Operation summary
|
| 238 |
+
"""
|
| 239 |
+
# Get count before deletion
|
| 240 |
+
before_count = len(self.get_candidate_sections(candidate_id))
|
| 241 |
+
|
| 242 |
+
self.collection.delete(
|
| 243 |
+
where={"candidate_id": candidate_id}
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
logger.info(f"Deleted {before_count} chunks for candidate {candidate_id}")
|
| 247 |
+
|
| 248 |
+
return {
|
| 249 |
+
"candidate_id": candidate_id,
|
| 250 |
+
"chunks_deleted": before_count,
|
| 251 |
+
"total_documents": self.collection.count()
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def list_candidates(self) -> List[str]:
|
| 256 |
+
"""
|
| 257 |
+
Get list of all candidate IDs in the database.
|
| 258 |
+
|
| 259 |
+
Returns:
|
| 260 |
+
List of unique candidate IDs
|
| 261 |
+
"""
|
| 262 |
+
all_data = self.collection.get(include=["metadatas"])
|
| 263 |
+
candidate_ids = set(
|
| 264 |
+
meta["candidate_id"]
|
| 265 |
+
for meta in all_data.get("metadatas")
|
| 266 |
+
)
|
| 267 |
+
return sorted(list(candidate_ids))
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def reset(self) -> None:
|
| 271 |
+
"""Reset the database (delete all data). Use with caution!"""
|
| 272 |
+
self.client.delete_collection("candidate_resumes")
|
| 273 |
+
self.collection = self.client.get_or_create_collection(
|
| 274 |
+
name="candidate_resumes",
|
| 275 |
+
metadata={"hnsw:space": "cosine"}
|
| 276 |
+
)
|
| 277 |
+
logger.warning("Database reset - all data deleted")
|
| 278 |
+
|
| 279 |
+
def _chunk_text(self, text: str, chunk_size: int = 400, overlap: int = 50) -> List[str]:
|
| 280 |
+
"""
|
| 281 |
+
Split text into overlapping chunks.
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
text: Text to chunk
|
| 285 |
+
chunk_size: Maximum chunk size in characters
|
| 286 |
+
overlap: Overlap between chunks
|
| 287 |
+
|
| 288 |
+
Returns:
|
| 289 |
+
List of text chunks
|
| 290 |
+
"""
|
| 291 |
+
if len(text) <= chunk_size:
|
| 292 |
+
return [text]
|
| 293 |
+
|
| 294 |
+
chunks = []
|
| 295 |
+
start = 0
|
| 296 |
+
|
| 297 |
+
while start < len(text):
|
| 298 |
+
end = start + chunk_size
|
| 299 |
+
chunk = text[start:end]
|
| 300 |
+
|
| 301 |
+
# Try to break at sentence boundary
|
| 302 |
+
if end < len(text):
|
| 303 |
+
last_period = chunk.rfind(". ")
|
| 304 |
+
if last_period > chunk_size // 2:
|
| 305 |
+
end = start + last_period + 1
|
| 306 |
+
chunk = text[start:end]
|
| 307 |
+
|
| 308 |
+
chunks.append(chunk.strip())
|
| 309 |
+
start = end - overlap
|
| 310 |
+
|
| 311 |
+
return chunks
|
| 312 |
+
|
| 313 |
+
def _format_query_results(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 314 |
+
"""Format ChromaDB query results into a cleaner structure."""
|
| 315 |
+
formatted = []
|
| 316 |
+
|
| 317 |
+
if not results['ids'] or not results['ids'][0]:
|
| 318 |
+
return formatted
|
| 319 |
+
|
| 320 |
+
for i in range(len(results['ids'][0])):
|
| 321 |
+
formatted.append({
|
| 322 |
+
"id": results['ids'][0][i],
|
| 323 |
+
"document": results['documents'][0][i],
|
| 324 |
+
"metadata": results['metadatas'][0][i],
|
| 325 |
+
"distance": results['distances'][0][i],
|
| 326 |
+
"relevance_score": 1 - results['distances'][0][i] # Convert distance to similarity
|
| 327 |
+
})
|
| 328 |
+
|
| 329 |
+
return formatted
|
| 330 |
+
|
| 331 |
+
def _format_get_results(self, results: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 332 |
+
"""Format ChromaDB get results into a cleaner structure."""
|
| 333 |
+
formatted = []
|
| 334 |
+
|
| 335 |
+
if not results['ids']:
|
| 336 |
+
return formatted
|
| 337 |
+
|
| 338 |
+
for i in range(len(results['ids'])):
|
| 339 |
+
formatted.append({
|
| 340 |
+
"id": results['ids'][i],
|
| 341 |
+
"document": results['documents'][i],
|
| 342 |
+
"metadata": results['metadatas'][i]
|
| 343 |
+
})
|
| 344 |
+
|
| 345 |
+
return formatted
|
src/job_writing_agent/agent_memory/mongodb_logterm_memory.py
ADDED
|
File without changes
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|