Rishabh2095 commited on
Commit
046508a
·
1 Parent(s): 6e2bf85

Code Refactoring and Central Logging

Browse files
Files changed (34) hide show
  1. .dockerignore +38 -0
  2. .gitignore +2 -0
  3. .vscode/settings.json +8 -1
  4. DEPLOYMENT_GUIDE.md +303 -0
  5. DOCKERFILE_EXPLANATION.md +147 -0
  6. Dockerfile +41 -0
  7. docker-compose.override.example.yml +21 -0
  8. docker-compose.yml +59 -0
  9. langgraph.json +5 -3
  10. pyproject.toml +17 -15
  11. pyrightconfig.json +18 -0
  12. src/job_writing_agent/agents/nodes.py +220 -53
  13. src/job_writing_agent/agents/output_schema.py +48 -11
  14. src/job_writing_agent/classes/__init__.py +2 -2
  15. src/job_writing_agent/classes/classes.py +60 -7
  16. src/job_writing_agent/logs/job_writer.log +0 -0
  17. src/job_writing_agent/nodes/initializing.py +408 -205
  18. src/job_writing_agent/nodes/job_description_loader.py +192 -0
  19. src/job_writing_agent/nodes/research_workflow.py +260 -53
  20. src/job_writing_agent/nodes/resume_loader.py +140 -0
  21. src/job_writing_agent/nodes/selfconsistency.py +28 -20
  22. src/job_writing_agent/nodes/variations.py +10 -8
  23. src/job_writing_agent/prompts/templates.py +22 -11
  24. src/job_writing_agent/prompts/test_prompts.py +38 -0
  25. src/job_writing_agent/tools/SearchTool.py +203 -79
  26. src/job_writing_agent/tools/__init__.py +2 -2
  27. src/job_writing_agent/utils/application_cli_interface.py +2 -2
  28. src/job_writing_agent/utils/document_processing.py +129 -87
  29. src/job_writing_agent/utils/llm_client.py +143 -127
  30. src/job_writing_agent/utils/llm_provider_factory.py +3 -0
  31. src/job_writing_agent/utils/logging/logging_config.py +132 -0
  32. src/job_writing_agent/utils/logging/logging_decorators.py +103 -0
  33. src/job_writing_agent/workflow.py +221 -42
  34. uv.lock +0 -0
.dockerignore ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ app_env/
8
+ venv/
9
+ env/
10
+ ENV/
11
+
12
+ # IDE
13
+ .vscode/
14
+ .idea/
15
+ *.swp
16
+ *.swo
17
+
18
+ # Logs
19
+ *.log
20
+ logs/
21
+
22
+ # OS
23
+ .DS_Store
24
+ Thumbs.db
25
+
26
+ # Project specific
27
+ *.pdf
28
+ cover_letter_*.txt
29
+ uv.lock
30
+
31
+ # Git
32
+ .git/
33
+ .gitignore
34
+
35
+ # Documentation
36
+ *.md
37
+ !README.md
38
+
.gitignore CHANGED
@@ -4,6 +4,8 @@
4
  # Environment / secret files
5
  job_writing_agent/.env
6
  job_writing_agent/.env.*
 
 
7
 
8
  # Jupyter notebooks
9
  job_writing_agent/*.ipynb
 
4
  # Environment / secret files
5
  job_writing_agent/.env
6
  job_writing_agent/.env.*
7
+ src/job_writing_agent/.env
8
+ src/job_writing_agent/.env.*
9
 
10
  # Jupyter notebooks
11
  job_writing_agent/*.ipynb
.vscode/settings.json CHANGED
@@ -1,3 +1,10 @@
1
  {
2
- "python.defaultInterpreterPath": "/home/icangdb/application_writing_agent/.venv/bin/python"
 
 
 
 
 
 
 
3
  }
 
1
  {
2
+ "python.defaultInterpreterPath": "C:\\Users\\risha\\python-dir\\job_application_agent\\job_writer\\app_env\\Scripts\\python.exe",
3
+ "python.formatting.provider": "black",
4
+ "editor.formatOnSave": true,
5
+ "python.formatting.blackArgs": ["--line-length", "88"],
6
+ "python.linting.enabled": true,
7
+ "python.linting.pylintEnabled": true,
8
+ "python.linting.lintOnSave": true,
9
+ "python.linting.mypyEnabled": true
10
  }
DEPLOYMENT_GUIDE.md ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deployment Guide for Job Application Agent
2
+
3
+ ## Option 1: LangGraph Cloud (Easiest & Recommended)
4
+
5
+ ### Prerequisites
6
+ - LangGraph CLI installed (`langgraph-cli` in requirements.txt)
7
+ - `langgraph.json` already configured ✅
8
+
9
+ ### Steps
10
+
11
+ 1. **Install LangGraph CLI** (if not already):
12
+ ```powershell
13
+ pip install langgraph-cli
14
+ ```
15
+
16
+ 2. **Login to LangGraph Cloud**:
17
+ ```powershell
18
+ langgraph login
19
+ ```
20
+
21
+ 3. **Deploy your agent**:
22
+ ```powershell
23
+ langgraph deploy
24
+ ```
25
+
26
+ 4. **Get your API endpoint** - LangGraph Cloud provides a REST API automatically
27
+
28
+ ### Cost
29
+ - **Free tier**: Limited requests/month
30
+ - **Paid**: Pay-per-use pricing
31
+
32
+ ### Pros
33
+ - ✅ Zero infrastructure management
34
+ - ✅ Built-in state persistence
35
+ - ✅ Automatic API generation
36
+ - ✅ LangSmith integration
37
+ - ✅ Perfect for LangGraph apps
38
+
39
+ ### Cons
40
+ - ⚠️ Vendor lock-in
41
+ - ⚠️ Limited customization
42
+
43
+ ---
44
+
45
+ ## Option 2: Railway.app (Simple & Cheap)
46
+
47
+ ### Steps
48
+
49
+ 1. **Create a FastAPI wrapper** (create `api.py`):
50
+ ```python
51
+ from fastapi import FastAPI, File, UploadFile
52
+ from job_writing_agent.workflow import JobWorkflow
53
+ import tempfile
54
+ import os
55
+
56
+ app = FastAPI()
57
+
58
+ @app.post("/generate")
59
+ async def generate_application(
60
+ resume: UploadFile = File(...),
61
+ job_description: str,
62
+ content_type: str = "cover_letter"
63
+ ):
64
+ # Save resume temporarily
65
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
66
+ tmp.write(await resume.read())
67
+ resume_path = tmp.name
68
+
69
+ try:
70
+ workflow = JobWorkflow(
71
+ resume=resume_path,
72
+ job_description_source=job_description,
73
+ content=content_type
74
+ )
75
+ result = await workflow.run()
76
+ return {"result": result}
77
+ finally:
78
+ os.unlink(resume_path)
79
+ ```
80
+
81
+ 2. **Create `Procfile`**:
82
+ ```
83
+ web: uvicorn api:app --host 0.0.0.0 --port $PORT
84
+ ```
85
+
86
+ 3. **Deploy to Railway**:
87
+ - Sign up at [railway.app](https://railway.app)
88
+ - Connect GitHub repo
89
+ - Railway auto-detects Python and runs `Procfile`
90
+
91
+ ### Cost
92
+ - **Free tier**: $5 credit/month
93
+ - **Hobby**: $5/month for 512MB RAM
94
+ - **Pro**: $20/month for 2GB RAM
95
+
96
+ ### Pros
97
+ - ✅ Very simple deployment
98
+ - ✅ Auto-scaling
99
+ - ✅ Free tier available
100
+ - ✅ Automatic HTTPS
101
+
102
+ ### Cons
103
+ - ⚠️ Need to add FastAPI wrapper
104
+ - ⚠️ State management needs Redis/Postgres
105
+
106
+ ---
107
+
108
+ ## Option 3: Render.com (Similar to Railway)
109
+
110
+ ### Steps
111
+
112
+ 1. **Create `render.yaml`**:
113
+ ```yaml
114
+ services:
115
+ - type: web
116
+ name: job-writer-api
117
+ env: python
118
+ buildCommand: pip install -r requirements.txt
119
+ startCommand: uvicorn api:app --host 0.0.0.0 --port $PORT
120
+ envVars:
121
+ - key: OPENROUTER_API_KEY
122
+ sync: false
123
+ - key: TAVILY_API_KEY
124
+ sync: false
125
+ ```
126
+
127
+ 2. **Deploy**:
128
+ - Connect GitHub repo to Render
129
+ - Render auto-detects `render.yaml`
130
+
131
+ ### Cost
132
+ - **Free tier**: 750 hours/month (sleeps after 15min inactivity)
133
+ - **Starter**: $7/month (always on)
134
+
135
+ ### Pros
136
+ - ✅ Free tier for testing
137
+ - ✅ Simple YAML config
138
+ - ✅ Auto-deploy from Git
139
+
140
+ ### Cons
141
+ - ⚠️ Free tier sleeps (cold starts)
142
+ - ⚠️ Need FastAPI wrapper
143
+
144
+ ---
145
+
146
+ ## Option 4: Fly.io (Good Free Tier)
147
+
148
+ ### Steps
149
+
150
+ 1. **Install Fly CLI**:
151
+ ```powershell
152
+ iwr https://fly.io/install.ps1 -useb | iex
153
+ ```
154
+
155
+ 2. **Create `Dockerfile`**:
156
+ ```dockerfile
157
+ FROM python:3.12-slim
158
+
159
+ WORKDIR /app
160
+ COPY requirements.txt .
161
+ RUN pip install --no-cache-dir -r requirements.txt
162
+
163
+ COPY . .
164
+
165
+ CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8080"]
166
+ ```
167
+
168
+ 3. **Deploy**:
169
+ ```powershell
170
+ fly launch
171
+ fly deploy
172
+ ```
173
+
174
+ ### Cost
175
+ - **Free tier**: 3 shared-cpu VMs, 3GB storage
176
+ - **Paid**: $1.94/month per VM
177
+
178
+ ### Pros
179
+ - ✅ Generous free tier
180
+ - ✅ Global edge deployment
181
+ - ✅ Docker-based (flexible)
182
+
183
+ ### Cons
184
+ - ⚠️ Need Docker knowledge
185
+ - ⚠️ Need FastAPI wrapper
186
+
187
+ ---
188
+
189
+ ## Option 5: AWS Lambda (Serverless - Pay Per Use)
190
+
191
+ ### Steps
192
+
193
+ 1. **Create Lambda handler** (`lambda_handler.py`):
194
+ ```python
195
+ import json
196
+ from job_writing_agent.workflow import JobWorkflow
197
+
198
+ def lambda_handler(event, context):
199
+ # Parse event
200
+ body = json.loads(event['body'])
201
+
202
+ workflow = JobWorkflow(
203
+ resume=body['resume_path'],
204
+ job_description_source=body['job_description'],
205
+ content=body.get('content_type', 'cover_letter')
206
+ )
207
+
208
+ result = workflow.run()
209
+
210
+ return {
211
+ 'statusCode': 200,
212
+ 'body': json.dumps({'result': result})
213
+ }
214
+ ```
215
+
216
+ 2. **Package and deploy** using AWS SAM or Serverless Framework
217
+
218
+ ### Cost
219
+ - **Free tier**: 1M requests/month
220
+ - **Paid**: $0.20 per 1M requests + compute time
221
+
222
+ ### Pros
223
+ - ✅ Pay only for usage
224
+ - ✅ Auto-scaling
225
+ - ✅ Very cheap for low traffic
226
+
227
+ ### Cons
228
+ - ⚠️ 15min timeout limit
229
+ - ⚠️ Cold starts
230
+ - ⚠️ Complex setup
231
+ - ⚠️ Need to handle state externally
232
+
233
+ ---
234
+
235
+ ## Recommendation
236
+
237
+ **For your use case, I recommend:**
238
+
239
+ 1. **Start with LangGraph Cloud** - Easiest, built for your stack
240
+ 2. **If you need more control → Railway** - Simple, good free tier
241
+ 3. **If you need serverless → AWS Lambda** - Cheapest for low traffic
242
+
243
+ ---
244
+
245
+ ## Quick Start: FastAPI Wrapper (for Railway/Render/Fly.io)
246
+
247
+ Create `api.py` in your project root:
248
+
249
+ ```python
250
+ from fastapi import FastAPI, File, UploadFile, HTTPException
251
+ from fastapi.responses import JSONResponse
252
+ from job_writing_agent.workflow import JobWorkflow
253
+ import tempfile
254
+ import os
255
+ import asyncio
256
+
257
+ app = FastAPI(title="Job Application Writer API")
258
+
259
+ @app.get("/")
260
+ def health():
261
+ return {"status": "ok"}
262
+
263
+ @app.post("/generate")
264
+ async def generate_application(
265
+ resume: UploadFile = File(...),
266
+ job_description: str,
267
+ content_type: str = "cover_letter"
268
+ ):
269
+ """Generate job application material."""
270
+ # Save resume temporarily
271
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
272
+ content = await resume.read()
273
+ tmp.write(content)
274
+ resume_path = tmp.name
275
+
276
+ try:
277
+ workflow = JobWorkflow(
278
+ resume=resume_path,
279
+ job_description_source=job_description,
280
+ content=content_type
281
+ )
282
+
283
+ # Run workflow (assuming it's async or can be wrapped)
284
+ result = await asyncio.to_thread(workflow.run)
285
+
286
+ return JSONResponse({
287
+ "status": "success",
288
+ "result": result
289
+ })
290
+ except Exception as e:
291
+ raise HTTPException(status_code=500, detail=str(e))
292
+ finally:
293
+ # Cleanup
294
+ if os.path.exists(resume_path):
295
+ os.unlink(resume_path)
296
+
297
+ if __name__ == "__main__":
298
+ import uvicorn
299
+ uvicorn.run(app, host="0.0.0.0", port=8000)
300
+ ```
301
+
302
+ Then update `requirements.txt` to ensure FastAPI and uvicorn are included (they already are ✅).
303
+
DOCKERFILE_EXPLANATION.md ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile Explanation
2
+
3
+ This Dockerfile is specifically designed for **LangGraph Cloud/LangServe deployment**. It uses the official LangGraph API base image and configures your agent graphs to be served as REST APIs.
4
+
5
+ ## Line-by-Line Breakdown
6
+
7
+ ### 1. Base Image (Line 1)
8
+ ```dockerfile
9
+ FROM langchain/langgraph-api:3.12
10
+ ```
11
+ - **Purpose**: Uses the official LangGraph API base image with Python 3.12
12
+ - **What it includes**: Pre-configured LangGraph runtime, LangServe server, and all LangGraph dependencies
13
+ - **Why**: This image already has everything needed to serve LangGraph workflows as REST APIs
14
+
15
+ ---
16
+
17
+ ### 2. Install Node Dependencies (Line 9)
18
+ ```dockerfile
19
+ RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt nodes
20
+ ```
21
+ - **Purpose**: Installs the `nodes` package (likely a dependency from your `langgraph.json`)
22
+ - **`PYTHONDONTWRITEBYTECODE=1`**: Prevents creating `.pyc` files (smaller image)
23
+ - **`uv pip`**: Uses `uv` (fast Python package installer) instead of regular `pip`
24
+ - **`--system`**: Installs to system Python (not virtual env)
25
+ - **`--no-cache-dir`**: Doesn't cache pip downloads (smaller image)
26
+ - **`-c /api/constraints.txt`**: Uses constraint file from base image (ensures compatible versions)
27
+
28
+ ---
29
+
30
+ ### 3. Copy Your Code (Line 14)
31
+ ```dockerfile
32
+ ADD . /deps/job_writer
33
+ ```
34
+ - **Purpose**: Copies your entire project into `/deps/job_writer` in the container
35
+ - **Why `/deps/`**: LangGraph API expects dependencies in this directory
36
+ - **What gets copied**: All your source code, `pyproject.toml`, `requirements.txt`, etc.
37
+
38
+ ---
39
+
40
+ ### 4. Install Your Package (Lines 19-21)
41
+ ```dockerfile
42
+ RUN for dep in /deps/*; do
43
+ echo "Installing $dep";
44
+ if [ -d "$dep" ]; then
45
+ echo "Installing $dep";
46
+ (cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .);
47
+ fi;
48
+ done
49
+ ```
50
+ - **Purpose**: Installs your `job_writer` package in editable mode (`-e`)
51
+ - **How it works**:
52
+ - Loops through all directories in `/deps/`
53
+ - For each directory, changes into it and runs `pip install -e .`
54
+ - The `-e` flag installs in "editable" mode (changes to code are reflected)
55
+ - **Why**: Makes your package importable as `job_writing_agent` inside the container
56
+
57
+ ---
58
+
59
+ ### 5. Register Your Graphs (Line 25)
60
+ ```dockerfile
61
+ ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", ...}'
62
+ ```
63
+ - **Purpose**: Tells LangServe which graphs to expose as REST APIs
64
+ - **Format**: JSON mapping of `graph_name` → `module_path:attribute_name`
65
+ - **What it does**:
66
+ - `job_app_graph` → Exposes `JobWorkflow.job_app_graph` property as an API endpoint
67
+ - `research_workflow` → Exposes the research subgraph
68
+ - `data_loading_workflow` → Exposes the data loading subgraph
69
+ - **Result**: Each graph becomes a REST API endpoint like `/invoke/job_app_graph`
70
+
71
+ ---
72
+
73
+ ### 6. Protect LangGraph API (Lines 33-35)
74
+ ```dockerfile
75
+ RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && \
76
+ touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py
77
+ RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
78
+ ```
79
+ - **Purpose**: Prevents your dependencies from accidentally overwriting LangGraph API packages
80
+ - **How**:
81
+ 1. Creates placeholder `__init__.py` files for LangGraph packages
82
+ 2. Reinstalls LangGraph API (without dependencies) to ensure it's not overwritten
83
+ - **Why**: If your `requirements.txt` has conflicting versions, this ensures LangGraph API stays intact
84
+
85
+ ---
86
+
87
+ ### 7. Cleanup Build Tools (Lines 37-41)
88
+ ```dockerfile
89
+ RUN pip uninstall -y pip setuptools wheel
90
+ RUN rm -rf /usr/local/lib/python*/site-packages/pip* ...
91
+ RUN uv pip uninstall --system pip setuptools wheel && rm /usr/bin/uv /usr/bin/uvx
92
+ ```
93
+ - **Purpose**: Removes all build tools to make the image smaller and more secure
94
+ - **What gets removed**:
95
+ - `pip`, `setuptools`, `wheel` (Python build tools)
96
+ - `uv` and `uvx` (package installers)
97
+ - **Why**: These tools aren't needed at runtime, only during build
98
+ - **Security**: Smaller attack surface (can't install malicious packages at runtime)
99
+
100
+ ---
101
+
102
+ ### 8. Set Working Directory (Line 45)
103
+ ```dockerfile
104
+ WORKDIR /deps/job_writer
105
+ ```
106
+ - **Purpose**: Sets the default directory when the container starts
107
+ - **Why**: Makes it easier to reference files relative to your project root
108
+
109
+ ---
110
+
111
+ ## How It Works at Runtime
112
+
113
+ When this container runs:
114
+
115
+ 1. **LangServe starts automatically** (from base image)
116
+ 2. **Reads `LANGSERVE_GRAPHS`** environment variable
117
+ 3. **Imports your graphs** from the specified paths
118
+ 4. **Exposes REST API endpoints**:
119
+ - `POST /invoke/job_app_graph` - Main workflow
120
+ - `POST /invoke/research_workflow` - Research subgraph
121
+ - `POST /invoke/data_loading_workflow` - Data loading subgraph
122
+ 5. **Handles state management** automatically (checkpointing, persistence)
123
+
124
+ ## Example API Usage
125
+
126
+ Once deployed, you can call your agent like this:
127
+
128
+ ```bash
129
+ curl -X POST http://your-deployment/invoke/job_app_graph \
130
+ -H "Content-Type: application/json" \
131
+ -d '{
132
+ "resume_path": "...",
133
+ "job_description_source": "...",
134
+ "content": "cover_letter"
135
+ }'
136
+ ```
137
+
138
+ ## Key Points
139
+
140
+ ✅ **Optimized for LangGraph Cloud** - Uses official base image
141
+ ✅ **Automatic API generation** - No need to write FastAPI code
142
+ ✅ **State management** - Built-in checkpointing and persistence
143
+ ✅ **Security** - Removes build tools from final image
144
+ ✅ **Small image** - No-cache installs, no bytecode files
145
+
146
+ This is the **easiest deployment option** for LangGraph apps - just build and push this Docker image!
147
+
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM langchain/langgraph-api:3.12
2
+
3
+
4
+
5
+
6
+
7
+
8
+
9
+ # -- Adding local package . --
10
+ ADD . /deps/job_writer
11
+ # -- End of local package . --
12
+
13
+
14
+
15
+ # -- Installing all local dependencies --
16
+
17
+ RUN for dep in /deps/*; do echo "Installing $dep"; if [ -d "$dep" ]; then echo "Installing $dep"; (cd "$dep" && PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir -c /api/constraints.txt -e .); fi; done
18
+
19
+ # -- End of local dependencies install --
20
+
21
+ ENV LANGSERVE_GRAPHS='{"job_app_graph": "/deps/job_writer/src/job_writing_agent/workflow.py:job_app_graph", "research_workflow": "/deps/job_writer/src/job_writing_agent/nodes/research_workflow.py:research_workflow", "data_loading_workflow": "/deps/job_writer/src/job_writing_agent/nodes/initializing.py:data_loading_workflow"}'
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+ # -- Ensure user deps didn't inadvertently overwrite langgraph-api
30
+ RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license && touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py
31
+ RUN PYTHONDONTWRITEBYTECODE=1 uv pip install --system --no-cache-dir --no-deps -e /api
32
+ # -- End of ensuring user deps didn't inadvertently overwrite langgraph-api --
33
+ # -- Removing build deps from the final image ~<:===~~~ --
34
+ RUN pip uninstall -y pip setuptools wheel
35
+ RUN rm -rf /usr/local/lib/python*/site-packages/pip* /usr/local/lib/python*/site-packages/setuptools* /usr/local/lib/python*/site-packages/wheel* && find /usr/local/bin -name "pip*" -delete || true
36
+ RUN rm -rf /usr/lib/python*/site-packages/pip* /usr/lib/python*/site-packages/setuptools* /usr/lib/python*/site-packages/wheel* && find /usr/bin -name "pip*" -delete || true
37
+ RUN uv pip uninstall --system pip setuptools wheel && rm /usr/bin/uv /usr/bin/uvx
38
+
39
+
40
+
41
+ WORKDIR /deps/job_writer
docker-compose.override.example.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Example override file for local development
2
+ # Copy this to docker-compose.override.yml to customize settings
3
+ # docker-compose automatically loads override files
4
+
5
+ version: "3.9"
6
+ services:
7
+ redis:
8
+ # Override Redis port for local development
9
+ ports:
10
+ - "6380:6379" # Use different port if 6379 is already in use
11
+
12
+ postgres:
13
+ # Override Postgres port for local development
14
+ ports:
15
+ - "5433:5432" # Use different port if 5432 is already in use
16
+ environment:
17
+ # Override credentials for local dev
18
+ - POSTGRES_USER=dev_user
19
+ - POSTGRES_PASSWORD=dev_password
20
+ - POSTGRES_DB=job_app_dev
21
+
docker-compose.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ redis:
3
+ image: redis:6-alpine
4
+ container_name: job-app-redis
5
+ ports:
6
+ - "6379:6379"
7
+ healthcheck:
8
+ test: ["CMD", "redis-cli", "ping"]
9
+ interval: 5s
10
+ timeout: 3s
11
+ retries: 5
12
+ networks:
13
+ - job-app-network
14
+
15
+ postgres:
16
+ image: postgres:16-alpine
17
+ container_name: job-app-postgres
18
+ environment:
19
+ - POSTGRES_USER=postgres
20
+ - POSTGRES_PASSWORD=postgres
21
+ - POSTGRES_DB=postgres
22
+ ports:
23
+ - "5432:5432"
24
+ healthcheck:
25
+ test: ["CMD-SHELL", "pg_isready -U postgres"]
26
+ interval: 5s
27
+ timeout: 5s
28
+ retries: 5
29
+ volumes:
30
+ - pg_data_local:/var/lib/postgresql/data
31
+ networks:
32
+ - job-app-network
33
+
34
+ # Optional: Uncomment to run your agent container alongside Redis/Postgres
35
+ agent:
36
+ build:
37
+ context: .
38
+ dockerfile: Dockerfile
39
+ image: job-app-workflow:latest
40
+ container_name: job-app-agent
41
+ ports:
42
+ - "8000:8000"
43
+ environment:
44
+ - REDIS_URL=redis://redis:6379
45
+ - POSTGRES_URL=postgresql://postgres:postgres@postgres:5432/postgres
46
+ depends_on:
47
+ redis:
48
+ condition: service_healthy
49
+ postgres:
50
+ condition: service_healthy
51
+ networks:
52
+ - job-app-network
53
+
54
+ networks:
55
+ job-app-network:
56
+ driver: bridge
57
+
58
+ volumes:
59
+ pg_data_local:
langgraph.json CHANGED
@@ -1,10 +1,12 @@
1
  {
2
  "dependencies": [
3
- "nodes"
4
  ],
5
  "graphs": {
6
- "research_workflow": "./src/job_writing_agent/nodes/research_workflow.py:research_workflow"
 
 
7
  },
8
- "env": "./job_writer_env",
9
  "python_version": "3.12"
10
  }
 
1
  {
2
  "dependencies": [
3
+ "."
4
  ],
5
  "graphs": {
6
+ "job_app_graph": "./src/job_writing_agent/workflow.py:job_app_graph",
7
+ "research_workflow": "./src/job_writing_agent/nodes/research_workflow.py:research_workflow",
8
+ "data_loading_workflow": "./src/job_writing_agent/nodes/initializing.py:data_loading_workflow"
9
  },
10
+ "env": "./app_env",
11
  "python_version": "3.12"
12
  }
pyproject.toml CHANGED
@@ -23,6 +23,7 @@ dependencies = [
23
  "babel==2.17.0",
24
  "backoff==2.2.1",
25
  "beautifulsoup4==4.14.2",
 
26
  "blinker==1.9.0",
27
  "blockbuster==1.5.25",
28
  "bs4==0.0.2",
@@ -103,20 +104,19 @@ dependencies = [
103
  "jsonschema-specifications==2025.9.1",
104
  "justext==3.0.2",
105
  "kiwisolver==1.4.9",
106
- "langchain==0.3.27",
107
- "langchain-cerebras==0.5.0",
108
- "langchain-community==0.3.30",
109
- "langchain-core==0.3.78",
110
- "langchain-ollama==0.3.10",
111
- "langchain-openai==0.3.34",
112
- "langchain-tavily==0.2.12",
113
- "langchain-text-splitters==0.3.11",
114
  "langfuse==3.6.1",
115
- "langgraph==0.6.8",
116
- "langgraph-api==0.4.46",
117
- "langgraph-checkpoint==2.1.1",
118
- "langgraph-cli==0.4.4",
119
- "langgraph-prebuilt==0.6.4",
120
  "langgraph-runtime-inmem==0.14.1",
121
  "langgraph-sdk==0.2.9",
122
  "langsmith==0.4.32",
@@ -168,8 +168,8 @@ dependencies = [
168
  "opentelemetry-sdk==1.37.0",
169
  "opentelemetry-semantic-conventions==0.58b0",
170
  "optuna==4.5.0",
171
- "orjson==3.11.3",
172
- "ormsgpack==1.10.0",
173
  "packaging==25.0",
174
  "pandas==2.3.3",
175
  "parse==1.20.2",
@@ -212,6 +212,7 @@ dependencies = [
212
  "rich-rst==1.3.1",
213
  "rpds-py==0.27.1",
214
  "rsa==4.9.1",
 
215
  "scikit-learn==1.7.2",
216
  "scipy==1.16.2",
217
  "setuptools==80.9.0",
@@ -258,3 +259,4 @@ dependencies = [
258
 
259
  [tool.setuptools.packages.find]
260
  where = ["src"]
 
 
23
  "babel==2.17.0",
24
  "backoff==2.2.1",
25
  "beautifulsoup4==4.14.2",
26
+ "black>=25.12.0",
27
  "blinker==1.9.0",
28
  "blockbuster==1.5.25",
29
  "bs4==0.0.2",
 
104
  "jsonschema-specifications==2025.9.1",
105
  "justext==3.0.2",
106
  "kiwisolver==1.4.9",
107
+ "langchain",
108
+ "langchain-cerebras",
109
+ "langchain-community",
110
+ "langchain-core>=1.0.0",
111
+ "langchain-ollama",
112
+ "langchain-openai",
113
+ "langchain-tavily",
114
+ "langchain-text-splitters",
115
  "langfuse==3.6.1",
116
+ "langgraph",
117
+ "langgraph-api",
118
+ "langgraph-cli",
119
+ "langgraph-prebuilt",
 
120
  "langgraph-runtime-inmem==0.14.1",
121
  "langgraph-sdk==0.2.9",
122
  "langsmith==0.4.32",
 
168
  "opentelemetry-sdk==1.37.0",
169
  "opentelemetry-semantic-conventions==0.58b0",
170
  "optuna==4.5.0",
171
+ "orjson>=3.9.7,<3.10.17",
172
+ "ormsgpack>=1.12.0",
173
  "packaging==25.0",
174
  "pandas==2.3.3",
175
  "parse==1.20.2",
 
212
  "rich-rst==1.3.1",
213
  "rpds-py==0.27.1",
214
  "rsa==4.9.1",
215
+ "ruff>=0.14.10",
216
  "scikit-learn==1.7.2",
217
  "scipy==1.16.2",
218
  "setuptools==80.9.0",
 
259
 
260
  [tool.setuptools.packages.find]
261
  where = ["src"]
262
+
pyrightconfig.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "include": [
3
+ "src"
4
+ ],
5
+ "exclude": [
6
+ "**/__pycache__",
7
+ "**/.*",
8
+ "app_env",
9
+ "node_modules"
10
+ ],
11
+ "extraPaths": [
12
+ "src"
13
+ ],
14
+ "pythonVersion": "3.12",
15
+ "typeCheckingMode": "basic",
16
+ "reportMissingImports": true,
17
+ "reportMissingTypeStubs": false
18
+ }
src/job_writing_agent/agents/nodes.py CHANGED
@@ -10,8 +10,9 @@ from datetime import datetime
10
 
11
  from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
12
  from langchain_core.output_parsers import StrOutputParser
 
13
 
14
- from ..classes.classes import AppState, ResearchState
15
  from ..prompts.templates import (
16
  CRITIQUE_PROMPT,
17
  PERSONA_DEVELOPMENT_PROMPT,
@@ -26,30 +27,38 @@ logger = logging.getLogger(__name__)
26
  # Constants
27
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
28
 
29
- llm_provider = LLMFactory()
30
 
31
- llm = llm_provider.create_langchain(
32
- "qwen/qwen3-4b:free", provider="openrouter", temperature=0.3
33
- )
34
-
35
-
36
- def create_draft(state: ResearchState) -> AppState:
37
  """Create initial draft of the application material."""
 
 
 
 
 
 
38
  # Determine which type of content we're creating
39
- current_application_session = state.get("company_research_data", {})
40
 
41
  content_category = state.get("content_category", "cover_letter")
42
 
 
 
 
43
  try:
 
44
  if state.get("vector_store"):
45
  vector_store = state.get("vector_store")
46
 
47
  # Extract key requirements from job description
48
  prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
49
 
50
- if current_application_session:
51
  key_requirements = prompt.invoke(
52
- {"job_description": current_application_session["job_description"]}
 
 
 
 
53
  )
54
  else:
55
  return key_requirements
@@ -68,13 +77,16 @@ def create_draft(state: ResearchState) -> AppState:
68
  highly_relevant_resume = "\n".join(
69
  [doc.page_content for doc in relevant_docs]
70
  )
 
71
  resume_text = f"""
72
  # Most Relevant Experience
73
  {highly_relevant_resume}
74
 
75
  # Full Resume
76
- {resume_text}
77
  """
 
 
78
  except Exception as e:
79
  logger.warning(f"Could not use vector search for relevant resume parts: {e}")
80
  # Continue with regular resume text
@@ -91,31 +103,42 @@ def create_draft(state: ResearchState) -> AppState:
91
  # Create the draft using the selected prompt template
92
  CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
93
  """
94
- Below is the Job Description and Resume enclosed in triple backticks.
95
 
96
- Job Description and Resume:
97
 
98
- ```
99
  {current_job_role}
100
-
101
- ```
102
- Use the Company Research Data below in to create a cover letter that highlights the match between my qualifications and the job requirements and aligns with the company's values and culture.
103
- Company Research Data:
104
- #company_research_data
105
-
106
- Create a cover letter that highlights the match between my qualifications and the job requirements.
 
 
 
 
 
 
107
  """,
108
- input_variables=["current_job_role", "company_research_data"],
 
 
 
 
109
  )
110
 
111
  FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
112
 
113
  # Invoke the chain with the appropriate inputs
114
- chain = (
115
  (
116
  {
117
  "current_job_role": lambda x: x["current_job_role"],
118
  "company_research_data": lambda x: x["company_research_data"],
 
119
  }
120
  )
121
  | FirstDraftGenerationPromptTemplate
@@ -123,59 +146,203 @@ def create_draft(state: ResearchState) -> AppState:
123
  )
124
 
125
  # Prepare the inputs
126
- inputs = {
127
- "current_job_role": current_application_session["job_description"],
128
- "company_research_data": current_application_session["tavily_search"],
 
 
 
129
  }
130
 
131
- response = chain.invoke(inputs)
132
- logger.info(f"Draft has been created: {response}")
133
- state["draft"] = response
134
- return state
 
 
 
 
 
 
 
 
135
 
136
 
137
- def critique_draft(state: AppState) -> AppState:
138
- """Critique the draft for improvements."""
139
- critique = llm.invoke(
140
- CRITIQUE_PROMPT.format(
141
- job_description=state["job_description"][0], draft=state["draft"]
 
 
 
 
 
 
 
142
  )
143
- )
144
 
145
- # Store the critique for reference during human feedback
146
- state["critique"] = critique
147
- return state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
 
149
 
150
- def human_approval(state: AppState) -> AppState:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  """Human-in-the-loop checkpoint for feedback on the draft."""
152
  # This is a placeholder function that would be replaced by actual UI interaction
153
  print("\n" + "=" * 80)
154
  print("DRAFT FOR REVIEW:")
155
  print(state["draft"])
156
  print("\nAUTOMATIC CRITIQUE:")
157
- print(state.get("critique", "No critique available"))
158
  print("=" * 80)
159
  print("\nPlease provide your feedback (press Enter to continue with no changes):")
160
 
161
  # In a real implementation, this would be handled by the UI
162
- feedback = input()
163
- state["feedback"] = feedback
164
- return state
 
 
 
 
 
 
 
165
 
166
 
167
- def finalize_document(state: AppState) -> AppState:
168
  """Incorporate feedback and finalize the document."""
169
- if not state["feedback"].strip():
170
- state["final"] = state["draft"]
171
- return state
172
 
173
- final = llm.invoke(
174
- REVISION_PROMPT.format(draft=state["draft"], feedback=state["feedback"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  )
176
 
177
- state["final"] = final
178
- return state
 
 
 
 
 
 
 
 
 
179
 
180
 
181
  """
 
10
 
11
  from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
12
  from langchain_core.output_parsers import StrOutputParser
13
+ from langchain_core.messages import SystemMessage
14
 
15
+ from ..classes.classes import AppState, ResearchState, ResultState, DataLoadState
16
  from ..prompts.templates import (
17
  CRITIQUE_PROMPT,
18
  PERSONA_DEVELOPMENT_PROMPT,
 
27
  # Constants
28
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
29
 
 
30
 
31
+ def create_draft(state: ResearchState) -> ResultState:
 
 
 
 
 
32
  """Create initial draft of the application material."""
33
+ # Create LLM inside function (lazy initialization)
34
+ llm_provider = LLMFactory()
35
+ llm = llm_provider.create_langchain(
36
+ "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
37
+ )
38
+
39
  # Determine which type of content we're creating
40
+ company_background_information = state.get("company_research_data", {})
41
 
42
  content_category = state.get("content_category", "cover_letter")
43
 
44
+ # Get the original resume text from state (used later if vector search is available)
45
+ original_resume_text = company_background_information.get("resume", "")
46
+
47
  try:
48
+ # Not yet implemented
49
  if state.get("vector_store"):
50
  vector_store = state.get("vector_store")
51
 
52
  # Extract key requirements from job description
53
  prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
54
 
55
+ if company_background_information:
56
  key_requirements = prompt.invoke(
57
+ {
58
+ "job_description": company_background_information[
59
+ "job_description"
60
+ ]
61
+ }
62
  )
63
  else:
64
  return key_requirements
 
77
  highly_relevant_resume = "\n".join(
78
  [doc.page_content for doc in relevant_docs]
79
  )
80
+ # Combine highly relevant parts with full resume text
81
  resume_text = f"""
82
  # Most Relevant Experience
83
  {highly_relevant_resume}
84
 
85
  # Full Resume
86
+ {original_resume_text}
87
  """
88
+ # Update the company_background_information with the enhanced resume
89
+ company_background_information["resume"] = resume_text
90
  except Exception as e:
91
  logger.warning(f"Could not use vector search for relevant resume parts: {e}")
92
  # Continue with regular resume text
 
103
  # Create the draft using the selected prompt template
104
  CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
105
  """
106
+ Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
107
 
108
+ **Job Description:**
109
 
110
+ '''
111
  {current_job_role}
112
+ '''
113
+
114
+ **Candidate Resume:**
115
+
116
+ '''
117
+ {candidate_resume}
118
+ '''
119
+
120
+ **Company Research Data:**
121
+
122
+ '''
123
+ {company_research_data}
124
+ '''
125
  """,
126
+ input_variables=[
127
+ "current_job_role",
128
+ "company_research_data",
129
+ "candidate_resume",
130
+ ],
131
  )
132
 
133
  FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
134
 
135
  # Invoke the chain with the appropriate inputs
136
+ draft_generation_chain = (
137
  (
138
  {
139
  "current_job_role": lambda x: x["current_job_role"],
140
  "company_research_data": lambda x: x["company_research_data"],
141
+ "candidate_resume": lambda x: x["candidate_resume"],
142
  }
143
  )
144
  | FirstDraftGenerationPromptTemplate
 
146
  )
147
 
148
  # Prepare the inputs
149
+ application_background_data = {
150
+ "current_job_role": company_background_information["job_description"],
151
+ "company_research_data": company_background_information[
152
+ "company_research_data_summary"
153
+ ],
154
+ "candidate_resume": company_background_information["resume"],
155
  }
156
 
157
+ response = draft_generation_chain.invoke(application_background_data)
158
+ logger.info(f"Draft has been created: {response.content}")
159
+ app_state = ResultState(
160
+ draft=response.content,
161
+ feedback="",
162
+ critique_feedback="",
163
+ current_node="create_draft",
164
+ company_research_data=company_background_information,
165
+ output_data={},
166
+ )
167
+
168
+ return app_state
169
 
170
 
171
+ def critique_draft(state: ResultState) -> ResultState:
172
+ """
173
+ Critique the draft for improvements.
174
+ Provides external evaluation focusing on job requirements, tone, clarity, and style.
175
+ """
176
+ try:
177
+ logger.info("Critiquing draft...")
178
+
179
+ # Create LLM inside function (lazy initialization)
180
+ llm_provider = LLMFactory()
181
+ llm = llm_provider.create_langchain(
182
+ "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
183
  )
 
184
 
185
+ job_description = str(state["company_research_data"].get("job_description", ""))
186
+ draft = str(state.get("draft", ""))
187
+
188
+ # Debug logging to verify values
189
+ logger.debug(f"Job description length: {len(job_description)}")
190
+ logger.debug(f"Draft length: {len(draft)}")
191
+
192
+ if not job_description or not draft:
193
+ logger.warning("Missing job_description or draft in state")
194
+ # Return state with empty feedback
195
+ return ResultState(
196
+ draft=draft,
197
+ feedback="",
198
+ critique_feedback="",
199
+ current_node="critique",
200
+ company_research_data=state["company_research_data"],
201
+ output_data=state["output_data"],
202
+ )
203
+
204
+ # Use the same pattern as create_draft:
205
+ # 1. Create ChatPromptTemplate from SystemMessage
206
+ # 2. Append HumanMessagePromptTemplate with variables
207
+ # 3. Create chain and invoke
208
+
209
+ # Extract SystemMessage from CRITIQUE_PROMPT
210
+
211
+ critique_system_message = SystemMessage(
212
+ content="You are a professional editor who specializes in job applications. Provide constructive feedback."
213
+ )
214
+
215
+ # Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
216
+ CritiquePromptTemplate = ChatPromptTemplate([critique_system_message])
217
+
218
+ # Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
219
+ CritiqueContextMessage = HumanMessagePromptTemplate.from_template(
220
+ """
221
+ # Job Description
222
+ {job_description}
223
+
224
+ # Current Draft
225
+ {draft}
226
+
227
+ Critique this draft and suggest specific improvements. Focus on:
228
+ 1. How well it targets the job requirements
229
+ 2. Professional tone and language
230
+ 3. Clarity and impact
231
+ 4. Grammar and style
232
+
233
+ Return your critique in a constructive, actionable format.
234
+ """,
235
+ input_variables=["job_description", "draft"],
236
+ )
237
 
238
+ CritiquePromptTemplate.append(CritiqueContextMessage)
239
 
240
+ # Create chain (like line 129-139 in create_draft)
241
+ critique_chain = (
242
+ {
243
+ "job_description": lambda x: x["job_description"],
244
+ "draft": lambda x: x["draft"],
245
+ }
246
+ | CritiquePromptTemplate
247
+ | llm
248
+ )
249
+
250
+ # Invoke with input variables (like line 150 in create_draft)
251
+ critique = critique_chain.invoke(
252
+ {
253
+ "job_description": job_description,
254
+ "draft": draft,
255
+ }
256
+ )
257
+
258
+ critique_content = (
259
+ critique.content if hasattr(critique, "content") else str(critique)
260
+ )
261
+ logger.info("Draft critique completed")
262
+
263
+ # Store the critique for reference during revision
264
+ app_state = ResultState(
265
+ draft=state["draft"],
266
+ feedback=state["feedback"],
267
+ critique_feedback=critique_content,
268
+ current_node="critique",
269
+ company_research_data=state["company_research_data"],
270
+ output_data=state["output_data"],
271
+ )
272
+ return app_state
273
+
274
+ except Exception as e:
275
+ logger.error(f"Error in critique_draft: {e}", exc_info=True)
276
+ # Return state unchanged on error
277
+ return state
278
+
279
+
280
+ def human_approval(state: ResultState) -> ResultState:
281
  """Human-in-the-loop checkpoint for feedback on the draft."""
282
  # This is a placeholder function that would be replaced by actual UI interaction
283
  print("\n" + "=" * 80)
284
  print("DRAFT FOR REVIEW:")
285
  print(state["draft"])
286
  print("\nAUTOMATIC CRITIQUE:")
287
+ print(state.get("critique_feedback", "No critique available"))
288
  print("=" * 80)
289
  print("\nPlease provide your feedback (press Enter to continue with no changes):")
290
 
291
  # In a real implementation, this would be handled by the UI
292
+ human_feedback = input()
293
+ result_state = ResultState(
294
+ draft=state["draft"],
295
+ feedback=human_feedback,
296
+ critique_feedback=state["critique_feedback"],
297
+ current_node="human_approval",
298
+ company_research_data=state["company_research_data"],
299
+ output_data=state["output_data"],
300
+ )
301
+ return result_state
302
 
303
 
304
+ def finalize_document(state: ResultState) -> DataLoadState:
305
  """Incorporate feedback and finalize the document."""
 
 
 
306
 
307
+ # Create LLM inside function (lazy initialization)
308
+ llm_provider = LLMFactory()
309
+ llm = llm_provider.create_langchain(
310
+ "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
311
+ )
312
+
313
+ # Create chain like in critique_draft (line 229-236)
314
+ revision_chain = (
315
+ {
316
+ "draft": lambda x: x["draft"],
317
+ "feedback": lambda x: x["feedback"],
318
+ "critique_feedback": lambda x: x["critique_feedback"],
319
+ }
320
+ | REVISION_PROMPT
321
+ | llm
322
+ )
323
+
324
+ print(f"revision_chain: {revision_chain}")
325
+
326
+ # Invoke with input variables (like line 239 in critique_draft)
327
+ final_content = revision_chain.invoke(
328
+ {
329
+ "draft": state["draft"],
330
+ "feedback": state["feedback"],
331
+ "critique_feedback": state["critique_feedback"],
332
+ }
333
  )
334
 
335
+ app_state = DataLoadState(
336
+ draft=state["draft"],
337
+ feedback=state["feedback"],
338
+ critique_feedback=state["critique_feedback"],
339
+ company_research_data=state["company_research_data"],
340
+ current_node="finalize",
341
+ output_data=final_content.content
342
+ if hasattr(final_content, "content")
343
+ else str(final_content),
344
+ )
345
+ return app_state
346
 
347
 
348
  """
src/job_writing_agent/agents/output_schema.py CHANGED
@@ -2,12 +2,24 @@ from pydantic import BaseModel, Field, field_validator
2
  from typing import List, Optional
3
  import dspy
4
 
 
5
  class TavilyQuerySet(BaseModel):
6
- query1: Optional[List[str]] = Field(default=None, description="First search query and its rationale, e.g., ['query text']")
7
- query2: Optional[List[str]] = Field(default=None, description="Second search query and its rationale")
8
- query3: Optional[List[str]] = Field(default=None, description="Third search query and its rationale")
9
- query4: Optional[List[str]] = Field(default=None, description="Fourth search query and its rationale")
10
- query5: Optional[List[str]] = Field(default=None, description="Fifth search query and its rationale")
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  @field_validator("query1", "query2", "query3", "query4", "query5", mode="after")
13
  @classmethod
@@ -16,13 +28,38 @@ class TavilyQuerySet(BaseModel):
16
  if v is not None: # Only validate if the list is actually provided
17
  if len(v) != 1:
18
  # Updated error message for clarity
19
- raise ValueError("Each query list, when provided, must contain exactly one string: the query text.")
 
 
20
  return v
21
 
 
22
  class TavilySearchQueries(dspy.Signature):
23
- """Use the job description and company name
24
  to create exactly 5 search queries for the tavily search tool in JSON Format"""
25
- job_description = dspy.InputField(desc="Job description of the role that candidate is applying for.")
26
- company_name = dspy.InputField(desc="Name of the company the candidate is applying for.")
27
- search_queries = dspy.OutputField(desc="Dictionary of tavily search queries which will gather understanding of the company and it's culture", json=True)
28
- search_query_relevance = dspy.OutputField(desc="Dictionary of relevance for each tavily search query that is generated", json=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from typing import List, Optional
3
  import dspy
4
 
5
+
6
  class TavilyQuerySet(BaseModel):
7
+ query1: Optional[List[str]] = Field(
8
+ default=None,
9
+ description="First search query and its rationale, e.g., ['query text']",
10
+ )
11
+ query2: Optional[List[str]] = Field(
12
+ default=None, description="Second search query and its rationale"
13
+ )
14
+ query3: Optional[List[str]] = Field(
15
+ default=None, description="Third search query and its rationale"
16
+ )
17
+ query4: Optional[List[str]] = Field(
18
+ default=None, description="Fourth search query and its rationale"
19
+ )
20
+ query5: Optional[List[str]] = Field(
21
+ default=None, description="Fifth search query and its rationale"
22
+ )
23
 
24
  @field_validator("query1", "query2", "query3", "query4", "query5", mode="after")
25
  @classmethod
 
28
  if v is not None: # Only validate if the list is actually provided
29
  if len(v) != 1:
30
  # Updated error message for clarity
31
+ raise ValueError(
32
+ "Each query list, when provided, must contain exactly one string: the query text."
33
+ )
34
  return v
35
 
36
+
37
  class TavilySearchQueries(dspy.Signature):
38
+ """Use the job description and company name
39
  to create exactly 5 search queries for the tavily search tool in JSON Format"""
40
+
41
+ job_description = dspy.InputField(
42
+ desc="Job description of the role that candidate is applying for."
43
+ )
44
+ company_name = dspy.InputField(
45
+ desc="Name of the company the candidate is applying for."
46
+ )
47
+ search_queries = dspy.OutputField(
48
+ desc="Dictionary of tavily search queries which will gather understanding of the company and it's culture",
49
+ json=True,
50
+ )
51
+ search_query_relevance = dspy.OutputField(
52
+ desc="Dictionary of relevance for each tavily search query that is generated",
53
+ json=True,
54
+ )
55
+
56
+
57
+ class CompanyResearchDataSummarizationSchema(dspy.Signature):
58
+ """This schema is used to summarize the company research data into a concise summary to provide a clear understanding of the company."""
59
+
60
+ company_research_data = dspy.InputField(
61
+ desc="These are the results of the tavily search queries that were generated. They have been filtered for relevance and are now ready to be summarized."
62
+ )
63
+ company_research_data_summary = dspy.OutputField(
64
+ desc="This is summary of the company research data that will be used by a job application writer to assist the candidate in writing content supporting the job application. The summary should be relevant to the job application and the company.",
65
+ )
src/job_writing_agent/classes/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from .classes import AppState, ResearchState, DataLoadState
2
 
3
- __all__ = ["AppState", "ResearchState", "DataLoadState"]
 
1
+ from .classes import AppState, ResearchState, DataLoadState, ResultState
2
 
3
+ __all__ = ["AppState", "ResearchState", "DataLoadState", "ResultState"]
src/job_writing_agent/classes/classes.py CHANGED
@@ -2,11 +2,36 @@
2
  State definitions for the Job Writer LangGraph Workflow.
3
  """
4
 
5
- from langgraph.store.base import Op
6
  from typing_extensions import List, Dict, Any
7
  from langgraph.graph import MessagesState
8
  from dataclasses import dataclass
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  @dataclass
11
  class AppState(MessagesState):
12
  """
@@ -23,33 +48,45 @@ class AppState(MessagesState):
23
  final: Final version of the application material
24
  content: Type of application material to generate
25
  """
 
26
  resume_path: str
27
  job_description_source: str
28
- company_research_data: Dict[str, Any]
29
- draft: str
30
- feedback: str
31
- final_version: str
32
  content: str # "cover_letter", "bullets", "linkedin_note"
33
  current_node: str
34
 
35
 
36
- class DataLoadState(MessagesState):
37
  """
38
  State container for the job application writer workflow.
 
39
 
40
  Attributes:
41
  resume: List of text chunks from the candidate's resume
42
  job_description: List of text chunks from the job description
43
  persona: The writing persona to use ("recruiter" or "hiring_manager")
44
  content: Type of application material to generate
 
 
 
 
 
45
  """
 
46
  resume_path: str
47
  job_description_source: str
 
48
  resume: str
49
  job_description: str
50
  company_name: str
51
  current_node: str
52
- company_research_data: Dict[str, Any]
 
 
 
 
 
 
 
53
 
54
 
55
  class ResearchState(MessagesState):
@@ -60,6 +97,22 @@ class ResearchState(MessagesState):
60
  attempted_search_queries: List of queries used extracted from the job description
61
  compiled_knowledge: Compiled knowledge from the research
62
  """
 
63
  company_research_data: Dict[str, Any]
64
  attempted_search_queries: List[str]
65
  current_node: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  State definitions for the Job Writer LangGraph Workflow.
3
  """
4
 
5
+ from typing import Annotated
6
  from typing_extensions import List, Dict, Any
7
  from langgraph.graph import MessagesState
8
  from dataclasses import dataclass
9
 
10
+
11
+ def merge_dict_reducer(
12
+ x: Dict[str, Any] | None, y: Dict[str, Any] | None
13
+ ) -> Dict[str, Any]:
14
+ """
15
+ Reducer function to merge two dictionaries.
16
+ Used for company_research_data to allow parallel nodes to update it.
17
+
18
+ Args:
19
+ x: First dictionary (existing state or None)
20
+ y: Second dictionary (new update or None)
21
+
22
+ Returns:
23
+ Merged dictionary with y taking precedence for overlapping keys
24
+ """
25
+ # Handle None cases - treat as empty dict
26
+ if x is None:
27
+ x = {}
28
+ if y is None:
29
+ y = {}
30
+
31
+ # Merge dictionaries, with y taking precedence for overlapping keys
32
+ return {**x, **y}
33
+
34
+
35
  @dataclass
36
  class AppState(MessagesState):
37
  """
 
48
  final: Final version of the application material
49
  content: Type of application material to generate
50
  """
51
+
52
  resume_path: str
53
  job_description_source: str
 
 
 
 
54
  content: str # "cover_letter", "bullets", "linkedin_note"
55
  current_node: str
56
 
57
 
58
+ class DataLoadState(MessagesState, total=False):
59
  """
60
  State container for the job application writer workflow.
61
+ Includes all fields needed throughout the entire workflow.
62
 
63
  Attributes:
64
  resume: List of text chunks from the candidate's resume
65
  job_description: List of text chunks from the job description
66
  persona: The writing persona to use ("recruiter" or "hiring_manager")
67
  content: Type of application material to generate
68
+ draft: Current draft of the application material
69
+ feedback: Human feedback on the draft
70
+ critique_feedback: Automated critique feedback
71
+ output_data: Final output data
72
+ next_node: Next node to route to after data loading subgraph
73
  """
74
+
75
  resume_path: str
76
  job_description_source: str
77
+ content: str # "cover_letter", "bullets", "linkedin_note"
78
  resume: str
79
  job_description: str
80
  company_name: str
81
  current_node: str
82
+ next_node: str # For routing after data loading subgraph
83
+ # Use Annotated with reducer to allow parallel nodes to merge dictionary updates
84
+ company_research_data: Annotated[Dict[str, Any], merge_dict_reducer]
85
+ # Result fields (added for final output - optional, populated later)
86
+ draft: str
87
+ feedback: str
88
+ critique_feedback: str
89
+ output_data: str
90
 
91
 
92
  class ResearchState(MessagesState):
 
97
  attempted_search_queries: List of queries used extracted from the job description
98
  compiled_knowledge: Compiled knowledge from the research
99
  """
100
+
101
  company_research_data: Dict[str, Any]
102
  attempted_search_queries: List[str]
103
  current_node: str
104
+
105
+
106
+ class ResultState(MessagesState):
107
+ """
108
+ State container for the job application writer workflow.
109
+ Attributes:
110
+ final_result: The final generated application material
111
+ """
112
+
113
+ draft: str
114
+ feedback: str
115
+ critique_feedback: str
116
+ current_node: str
117
+ company_research_data: Dict[str, Any]
118
+ output_data: str
src/job_writing_agent/logs/job_writer.log CHANGED
The diff for this file is too large to render. See raw diff
 
src/job_writing_agent/nodes/initializing.py CHANGED
@@ -8,74 +8,80 @@ job descriptions, managing missing inputs, and populating application state.
8
 
9
  The module includes utilities for:
10
  - Parsing resume files and extracting text content
11
- - Parsing job descriptions and extracting company information
12
  - Orchestrating input loading with validation
13
  - Providing user prompts for missing information during verification
14
  """
15
 
16
  import logging
17
- from typing import Tuple
18
- from typing_extensions import Literal
19
 
20
  from langchain_core.documents import Document
21
  from langchain_core.messages import SystemMessage
 
22
 
23
- from job_writing_agent.classes import AppState, DataLoadState
24
- from job_writing_agent.utils.document_processing import parse_resume, get_job_description
 
 
 
25
  from job_writing_agent.prompts.templates import agent_system_prompt
 
 
 
 
 
26
 
27
  logger = logging.getLogger(__name__)
28
 
29
 
30
- # ---------------------------------------------------------------------------
31
- # Helper decorator to log exceptions for async methods
32
- # ---------------------------------------------------------------------------
33
- def log_exceptions(func):
34
- """Decorator to log exceptions in async functions."""
35
- async def wrapper(*args, **kwargs):
36
- try:
37
- return await func(*args, **kwargs)
38
- except Exception as exc:
39
- logger.error(
40
- "Exception in %s: %s", func.__name__, exc, exc_info=True
41
- )
42
- raise
43
-
44
- return wrapper
45
 
46
 
47
  class Dataloading:
48
  """
49
- Node for loading and initializing resume and job description data.
 
 
 
50
 
51
  Methods
52
  -------
53
- set_agent_system_message(state: AppState) -> DataLoadState
54
  Adds the system prompt to the conversation state.
55
  get_resume(resume_source) -> str
56
  Parses a resume file and returns its plain‑text content.
57
  parse_job_description(job_description_source) -> Tuple[str, str]
58
  Parses a job description and returns its text and company name.
59
- load_inputs(state: DataLoadState) -> AppState
60
- Orchestrates loading of resume and job description.
61
- validate_data_load_state(state: DataLoadState)
62
- Ensures required fields are present in company_research_data.
63
- verify_inputs(state: AppState) -> Literal["load", "research"]
64
- Validates inputs and decides the next workflow node.
65
- run(state: DataLoadState) -> AppState
66
- Executes the loading step of the workflow.
 
 
 
67
 
68
  """
 
69
  def __init__(self):
 
70
  pass
71
 
 
 
 
72
 
73
- async def set_agent_system_message(self, state: AppState) -> DataLoadState:
 
74
  """Add the system prompt to the conversation state.
75
 
76
  Parameters
77
  ----------
78
- state: AppState
79
  Current workflow state.
80
 
81
  Returns
@@ -83,9 +89,7 @@ class Dataloading:
83
  DataLoadState
84
  Updated state with the system message and the next node identifier.
85
  """
86
- agent_initialization_system_message = SystemMessage(
87
- content=agent_system_prompt
88
- )
89
  messages = state.get("messages", [])
90
  messages.append(agent_initialization_system_message)
91
  return {
@@ -94,217 +98,416 @@ class Dataloading:
94
  "current_node": "initialize_system",
95
  }
96
 
 
 
 
 
 
 
97
  async def get_resume(self, resume_source):
98
- """Parse a resume file and return its plain‑text content.
 
 
 
 
99
 
100
  Parameters
101
  ----------
102
  resume_source: Any
103
  Path or file‑like object accepted by ``parse_resume``.
 
 
 
 
 
 
 
 
 
 
 
 
104
  """
105
- try:
106
- logger.info("Parsing resume...")
107
- resume_text = ""
108
- assert resume_source is not None
109
- resume_chunks = parse_resume(resume_source)
110
- for chunk in resume_chunks:
111
- if hasattr(chunk, "page_content") and chunk.page_content:
112
- resume_text += chunk.page_content
113
- elif isinstance(chunk, str) and chunk:
114
- resume_text += chunk
115
- else:
116
- logger.debug(
117
- "Skipping empty or invalid chunk in resume: %s", chunk
118
- )
119
- return resume_text
120
- except Exception as e:
121
- logger.error("Error parsing resume: %s", e)
122
- raise
123
 
 
 
124
  async def parse_job_description(self, job_description_source):
125
- """Parse a job description and return its text and company name.
 
 
 
 
126
 
127
  Parameters
128
  ----------
129
  job_description_source: Any
130
- Source accepted by ``get_job_description``.
 
 
 
 
 
 
 
 
 
 
 
 
131
  """
132
- try:
133
- logger.info(
134
- "Parsing job description from: %s", job_description_source
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  )
136
- assert (
137
- job_description_source is not None
138
- ), "Job description source cannot be None"
139
- job_description_document: Document = await get_job_description(
140
- job_description_source
 
 
 
 
141
  )
142
- company_name = ""
143
- job_posting_text = ""
144
- if job_description_document:
145
- if hasattr(
146
- job_description_document, "metadata"
147
- ) and isinstance(job_description_document.metadata, dict):
148
- company_name = job_description_document.metadata.get(
149
- "company_name", ""
150
- )
151
- if not company_name:
152
- logger.warning(
153
- "Company name not found in job description metadata."
154
- )
155
- else:
156
- logger.warning(
157
- "Metadata attribute missing or not a dict in job "
158
- "description document."
159
- )
160
- if hasattr(job_description_document, "page_content"):
161
- job_posting_text = job_description_document.page_content or ""
162
- if not job_posting_text:
163
- logger.info("Parsed job posting text is empty.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  else:
165
- logger.warning(
166
- "page_content attribute missing in job description document."
167
- )
168
- else:
169
- logger.warning(
170
- "get_job_description returned None for source: %s",
171
- job_description_source,
172
- )
173
- return job_posting_text, company_name
174
- except Exception as e:
175
- logger.error(
176
- "Error parsing job description from source '%s': %s",
177
- job_description_source,
178
- e,
179
- exc_info=True,
180
- )
181
- raise
182
 
 
 
 
183
 
184
- # -----------------------------------------------------------------------
185
- # Private helper methods used by load_inputs
186
- # -----------------------------------------------------------------------
187
- @log_exceptions
188
  async def _load_resume(self, resume_source) -> str:
189
- """Load resume content, raising if the source is missing."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  if not resume_source:
191
  raise ValueError("resume_source is required")
192
  return await self.get_resume(resume_source)
193
 
194
-
195
- @log_exceptions
196
  async def _load_job_description(self, jd_source) -> Tuple[str, str]:
197
- """Load job description text and company name, raising if missing."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  if not jd_source:
199
  raise ValueError("job_description_source is required")
200
  return await self.parse_job_description(jd_source)
201
 
202
-
203
- @log_exceptions
204
  async def _prompt_user(self, prompt_msg: str) -> str:
205
- """Prompt the user for input (synchronous ``input`` wrapped for async use)."""
206
- # In a real async UI replace ``input`` with an async call.
207
- return input(prompt_msg)
208
 
 
 
209
 
210
- async def load_inputs(self, state: DataLoadState) -> AppState:
211
- """Orchestrate loading of resume and job description.
 
 
212
 
213
- The method populates ``state['company_research_data']`` with the parsed
214
- resume, job description, and company name, then advances the workflow
215
- to the ``load_inputs`` node.
 
216
  """
217
- resume_src = state.get("resume_path")
218
- jd_src = state.get("job_description_source")
219
 
220
- # -------------------------------------------------------------------
221
- # Load job description (or prompt if missing during verification)
222
- # -------------------------------------------------------------------
223
- job_text = ""
224
- company_name = ""
225
- if jd_src:
226
- job_text, company_name = await self._load_job_description(jd_src)
227
- elif state.get("current_node") == "verify":
228
- job_text = await self._prompt_user(
229
- "Please paste the job posting in text format: "
230
- )
231
 
232
- # -------------------------------------------------------------------
233
- # Load resume (or prompt if missing during verification)
234
- # -------------------------------------------------------------------
235
- resume_text = ""
236
- if resume_src:
237
- resume_text = await self._load_resume(resume_src)
238
- elif state.get("current_node") == "verify":
239
- raw = await self._prompt_user(
240
- "Please paste the resume in text format: "
241
- )
242
- resume_text = raw
243
 
244
- # Populate state
245
- state["company_research_data"] = {
246
- "resume": resume_text,
247
- "job_description": job_text,
248
- "company_name": company_name,
249
- }
250
- state["current_node"] = "load_inputs"
251
- return state
252
 
 
 
 
 
253
 
254
- def validate_data_load_state(self, state: DataLoadState):
255
- """Ensure required fields are present in ``company_research_data``."""
256
- assert state.company_research_data.get(
257
- "resume"
258
- ), "Resume is missing in company_research_data"
259
- assert state.company_research_data.get(
260
- "job_description"
261
- ), "Job description is missing"
 
 
 
 
 
262
 
 
 
 
 
 
 
263
 
264
- def verify_inputs(self, state: AppState) -> Literal["load", "research"]:
265
- """Validate inputs and decide the next workflow node.
266
 
267
- Returns
268
- -------
269
- Literal["load", "research"]
270
- ``"load"`` if required data is missing, otherwise ``"research"``.
271
- """
272
- print("Verifying Inputs")
273
- state["current_node"] = "verify"
274
- logger.info("Verifying loaded inputs!")
275
- assert state["company_research_data"].get(
276
- "resume"
277
- ), "Resume is missing in company_research_data"
278
- assert state["company_research_data"].get(
279
- "job_description"
280
- ), "Job description is missing"
281
- if not state.get("company_research_data"):
282
- missing_items = []
283
- if not state["company_research_data"].get("resume", ""):
284
- missing_items.append("resume")
285
- if not state["company_research_data"].get("job_description", ""):
286
- missing_items.append("job description")
287
- logger.error("Missing required data: %s", ", ".join(missing_items))
288
- return "load"
289
- # Normalise values to strings
290
- for key in ["resume", "job_description"]:
291
- try:
292
- value = state["company_research_data"][key]
293
- if isinstance(value, (list, tuple)):
294
- state["company_research_data"][key] = " ".join(
295
- str(x) for x in value
296
- )
297
- elif isinstance(value, dict):
298
- state["company_research_data"][key] = str(value)
299
- else:
300
- state["company_research_data"][key] = str(value)
301
- except Exception as e:
302
- logger.warning("Error converting %s to string: %s", key, e)
303
- raise
304
- return "research"
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- async def run(self, state: DataLoadState) -> AppState:
308
- """Execute the loading step of the workflow."""
309
- state = await self.load_inputs(state)
310
- return state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  The module includes utilities for:
10
  - Parsing resume files and extracting text content
11
+ - Parsing job descriptions and extracting company information
12
  - Orchestrating input loading with validation
13
  - Providing user prompts for missing information during verification
14
  """
15
 
16
  import logging
17
+ from typing import Tuple, Optional
 
18
 
19
  from langchain_core.documents import Document
20
  from langchain_core.messages import SystemMessage
21
+ from langgraph.graph import StateGraph, END, START
22
 
23
+ from job_writing_agent.classes import DataLoadState
24
+ from job_writing_agent.utils.document_processing import (
25
+ parse_resume,
26
+ get_job_description,
27
+ )
28
  from job_writing_agent.prompts.templates import agent_system_prompt
29
+ from job_writing_agent.utils.logging.logging_decorators import (
30
+ log_async,
31
+ log_execution,
32
+ log_errors,
33
+ )
34
 
35
  logger = logging.getLogger(__name__)
36
 
37
 
38
+ # Note: Using centralized logging decorators from utils.logging.logging_decorators
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  class Dataloading:
42
  """
43
+ Helper class providing utility methods for loading and parsing data.
44
+
45
+ This class provides helper methods used by the data loading subgraph nodes.
46
+ The actual workflow orchestration is handled by the data_loading_workflow subgraph.
47
 
48
  Methods
49
  -------
50
+ set_agent_system_message(state: DataLoadState) -> DataLoadState
51
  Adds the system prompt to the conversation state.
52
  get_resume(resume_source) -> str
53
  Parses a resume file and returns its plain‑text content.
54
  parse_job_description(job_description_source) -> Tuple[str, str]
55
  Parses a job description and returns its text and company name.
56
+ verify_inputs(state: DataLoadState) -> DataLoadState
57
+ Validates inputs and sets next_node for routing.
58
+
59
+ Private Methods (used by subgraph nodes)
60
+ -----------------------------------------
61
+ _load_resume(resume_source) -> str
62
+ Load resume content, raising if the source is missing.
63
+ _load_job_description(jd_source) -> Tuple[str, str]
64
+ Load job description text and company name, raising if missing.
65
+ _prompt_user(prompt_msg: str) -> str
66
+ Prompt the user for input (synchronous input wrapped for async use).
67
 
68
  """
69
+
70
  def __init__(self):
71
+ """Initialize Dataloading helper class."""
72
  pass
73
 
74
+ # =======================================================================
75
+ # System/Initialization Methods
76
+ # =======================================================================
77
 
78
+ @log_async
79
+ async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
80
  """Add the system prompt to the conversation state.
81
 
82
  Parameters
83
  ----------
84
+ state: DataLoadState
85
  Current workflow state.
86
 
87
  Returns
 
89
  DataLoadState
90
  Updated state with the system message and the next node identifier.
91
  """
92
+ agent_initialization_system_message = SystemMessage(content=agent_system_prompt)
 
 
93
  messages = state.get("messages", [])
94
  messages.append(agent_initialization_system_message)
95
  return {
 
98
  "current_node": "initialize_system",
99
  }
100
 
101
+ # =======================================================================
102
+ # Public Parsing Methods
103
+ # =======================================================================
104
+
105
+ @log_async
106
+ @log_errors
107
  async def get_resume(self, resume_source):
108
+ """
109
+ Parse a resume file and return its plain‑text content.
110
+
111
+ This method extracts text from resume chunks, handling both Document
112
+ objects and plain strings. Empty or invalid chunks are skipped.
113
 
114
  Parameters
115
  ----------
116
  resume_source: Any
117
  Path or file‑like object accepted by ``parse_resume``.
118
+
119
+ Returns
120
+ -------
121
+ str
122
+ Plain text content of the resume.
123
+
124
+ Raises
125
+ ------
126
+ AssertionError
127
+ If resume_source is None.
128
+ Exception
129
+ If parsing fails.
130
  """
131
+ logger.info("Parsing resume...")
132
+ resume_text = ""
133
+ assert resume_source is not None
134
+ resume_chunks = parse_resume(resume_source)
135
+ for chunk in resume_chunks:
136
+ if hasattr(chunk, "page_content") and chunk.page_content:
137
+ resume_text += chunk.page_content
138
+ elif isinstance(chunk, str) and chunk:
139
+ resume_text += chunk
140
+ else:
141
+ logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
142
+ return resume_text
 
 
 
 
 
 
143
 
144
+ @log_async
145
+ @log_errors
146
  async def parse_job_description(self, job_description_source):
147
+ """
148
+ Parse a job description and return its text and company name.
149
+
150
+ Extracts both the job posting text and company name from the document.
151
+ Company name is extracted from document metadata if available.
152
 
153
  Parameters
154
  ----------
155
  job_description_source: Any
156
+ Source accepted by ``get_job_description`` (URL, file path, etc.).
157
+
158
+ Returns
159
+ -------
160
+ Tuple[str, str]
161
+ A tuple of (job_posting_text, company_name).
162
+
163
+ Raises
164
+ ------
165
+ AssertionError
166
+ If job_description_source is None.
167
+ Exception
168
+ If parsing fails.
169
  """
170
+ company_name = ""
171
+ job_posting_text = ""
172
+
173
+ logger.info("Parsing job description from: %s", job_description_source)
174
+ assert job_description_source is not None, (
175
+ "Job description source cannot be None"
176
+ )
177
+
178
+ job_description_document: Optional[Document] = await get_job_description(
179
+ job_description_source
180
+ )
181
+
182
+ # Extract company name from metadata
183
+ if hasattr(job_description_document, "metadata") and isinstance(
184
+ job_description_document.metadata, dict
185
+ ):
186
+ company_name = job_description_document.metadata.get("company_name", "")
187
+ if not company_name:
188
+ logger.warning("Company name not found in job description metadata.")
189
+ else:
190
+ logger.warning(
191
+ "Metadata attribute missing or not a dict in job description document."
192
  )
193
+
194
+ # Extract job posting text
195
+ if hasattr(job_description_document, "page_content"):
196
+ job_posting_text = job_description_document.page_content or ""
197
+ if not job_posting_text:
198
+ logger.info("Parsed job posting text is empty.")
199
+ else:
200
+ logger.warning(
201
+ "page_content attribute missing in job description document."
202
  )
203
+
204
+ return job_posting_text, company_name
205
+
206
+ @log_async
207
+ async def get_application_form_details(self, job_description_source):
208
+ """
209
+ Placeholder for future method to get application form details.
210
+
211
+ This method will be implemented to extract form fields and requirements
212
+ from job application forms.
213
+
214
+ Parameters
215
+ ----------
216
+ job_description_source: Any
217
+ Source of the job description or application form.
218
+ """
219
+ # TODO: Implement form field extraction
220
+ pass
221
+
222
+ # =======================================================================
223
+ # Validation Methods
224
+ # =======================================================================
225
+
226
+ @log_execution
227
+ @log_errors
228
+ def verify_inputs(self, state: DataLoadState) -> DataLoadState:
229
+ """
230
+ Validate inputs and set next_node for routing.
231
+
232
+ This method validates that both resume and job description are present
233
+ in the state, normalizes their values to strings, and sets the next_node
234
+ field for conditional routing in the main workflow.
235
+
236
+ Parameters
237
+ ----------
238
+ state: DataLoadState
239
+ Current workflow state containing company_research_data.
240
+
241
+ Returns
242
+ -------
243
+ DataLoadState
244
+ Updated state with next_node set to "load" (if validation fails)
245
+ or "research" (if validation passes).
246
+
247
+ Raises
248
+ ------
249
+ Exception
250
+ If normalization fails for any field.
251
+ """
252
+ logger.info("Verifying loaded inputs!")
253
+ state["current_node"] = "verify"
254
+
255
+ # Validate required fields
256
+ company_research_data = state.get("company_research_data", {})
257
+
258
+ if not company_research_data.get("resume"):
259
+ logger.error("Resume is missing in company_research_data")
260
+ state["next_node"] = "load" # Loop back to load subgraph
261
+ return state
262
+
263
+ if not company_research_data.get("job_description"):
264
+ logger.error("Job description is missing in company_research_data")
265
+ state["next_node"] = "load" # Loop back to load subgraph
266
+ return state
267
+
268
+ # Normalize values to strings
269
+ for key in ["resume", "job_description"]:
270
+ try:
271
+ value = company_research_data[key]
272
+ if isinstance(value, (list, tuple)):
273
+ company_research_data[key] = " ".join(str(x) for x in value)
274
+ elif isinstance(value, dict):
275
+ company_research_data[key] = str(value)
276
  else:
277
+ company_research_data[key] = str(value)
278
+ except Exception as e:
279
+ logger.warning("Error converting %s to string: %s", key, e)
280
+ state["next_node"] = "load"
281
+ return state
282
+
283
+ # All validations passed
284
+ state["next_node"] = "research"
285
+ logger.info("Inputs verified successfully, proceeding to research")
286
+ return state
 
 
 
 
 
 
 
287
 
288
+ # =======================================================================
289
+ # Private Helper Methods (used by subgraph nodes)
290
+ # =======================================================================
291
 
292
+ @log_async
293
+ @log_errors
 
 
294
  async def _load_resume(self, resume_source) -> str:
295
+ """
296
+ Load resume content, raising if the source is missing.
297
+
298
+ This is a wrapper around get_resume() that validates the source first.
299
+ Used by subgraph nodes for consistent error handling.
300
+
301
+ Parameters
302
+ ----------
303
+ resume_source: Any
304
+ Path or file-like object for the resume.
305
+
306
+ Returns
307
+ -------
308
+ str
309
+ Plain text content of the resume.
310
+
311
+ Raises
312
+ ------
313
+ ValueError
314
+ If resume_source is None or empty.
315
+ """
316
  if not resume_source:
317
  raise ValueError("resume_source is required")
318
  return await self.get_resume(resume_source)
319
 
320
+ @log_async
321
+ @log_errors
322
  async def _load_job_description(self, jd_source) -> Tuple[str, str]:
323
+ """
324
+ Load job description text and company name, raising if missing.
325
+
326
+ This is a wrapper around parse_job_description() that validates the source first.
327
+ Used by subgraph nodes for consistent error handling.
328
+
329
+ Parameters
330
+ ----------
331
+ jd_source: Any
332
+ Source for the job description (URL, file path, etc.).
333
+
334
+ Returns
335
+ -------
336
+ Tuple[str, str]
337
+ A tuple of (job_posting_text, company_name).
338
+
339
+ Raises
340
+ ------
341
+ ValueError
342
+ If jd_source is None or empty.
343
+ """
344
  if not jd_source:
345
  raise ValueError("job_description_source is required")
346
  return await self.parse_job_description(jd_source)
347
 
348
+ @log_async
349
+ @log_errors
350
  async def _prompt_user(self, prompt_msg: str) -> str:
351
+ """
352
+ Prompt the user for input (synchronous input wrapped for async use).
 
353
 
354
+ This method wraps the synchronous input() function to be used in async contexts.
355
+ In a production async UI, this would be replaced with an async input mechanism.
356
 
357
+ Parameters
358
+ ----------
359
+ prompt_msg: str
360
+ Message to display to the user.
361
 
362
+ Returns
363
+ -------
364
+ str
365
+ User input string.
366
  """
367
+ # In a real async UI replace input with an async call.
368
+ return input(prompt_msg)
369
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
+ # ============================================================================
372
+ # Data Loading Subgraph Nodes
373
+ # ============================================================================
 
 
 
 
 
 
 
 
374
 
 
 
 
 
 
 
 
 
375
 
376
+ @log_async
377
+ async def parse_resume_node(state: DataLoadState) -> DataLoadState:
378
+ """
379
+ Node to parse resume in parallel with job description parsing.
380
 
381
+ Extracts resume parsing logic from load_inputs for parallel execution.
382
+ Returns only the resume data - reducer will merge with job description data.
383
+ """
384
+ dataloading = Dataloading()
385
+ resume_src = state.get("resume_path")
386
+
387
+ resume_text = ""
388
+ if resume_src:
389
+ resume_text = await dataloading._load_resume(resume_src)
390
+ elif state.get("current_node") == "verify":
391
+ resume_text = await dataloading._prompt_user(
392
+ "Please paste the resume in text format: "
393
+ )
394
 
395
+ # Return only the resume data - reducer will merge this with job description data
396
+ logger.info(f"Resume parsed: {len(resume_text)} characters")
397
+ # Return partial state update - LangGraph will merge this with other parallel updates
398
+ return {
399
+ "company_research_data": {"resume": resume_text},
400
+ }
401
 
 
 
402
 
403
+ @log_async
404
+ async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
405
+ """
406
+ Node to parse job description in parallel with resume parsing.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
 
408
+ Extracts job description parsing logic from load_inputs for parallel execution.
409
+ Returns only the job description data - reducer will merge with resume data.
410
+ """
411
+ dataloading = Dataloading()
412
+ jd_src = state.get("job_description_source")
413
+
414
+ job_text = ""
415
+ company_name = ""
416
+ if jd_src:
417
+ job_text, company_name = await dataloading._load_job_description(jd_src)
418
+ elif state.get("current_node") == "verify":
419
+ job_text = await dataloading._prompt_user(
420
+ "Please paste the job posting in text format: "
421
+ )
422
 
423
+ # Return only the job description data - reducer will merge this with resume data
424
+ logger.info(
425
+ f"Job description parsed: {len(job_text)} characters, company: {company_name}"
426
+ )
427
+ # Return partial state update - LangGraph will merge this with other parallel updates
428
+ return {
429
+ "company_research_data": {
430
+ "job_description": job_text,
431
+ "company_name": company_name,
432
+ },
433
+ }
434
+
435
+
436
+ @log_execution
437
+ def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
438
+ """
439
+ Aggregate results from parallel resume and job description parsing nodes.
440
+
441
+ This node runs after both parse_resume_node and parse_job_description_node
442
+ complete. It ensures both results are present and normalizes the state.
443
+ """
444
+ # Ensure company_research_data exists
445
+ if "company_research_data" not in state:
446
+ state["company_research_data"] = {}
447
+
448
+ # Get results from parallel nodes
449
+ resume_text = state["company_research_data"].get("resume", "")
450
+ job_text = state["company_research_data"].get("job_description", "")
451
+ company_name = state["company_research_data"].get("company_name", "")
452
+
453
+ # Validate both are present
454
+ if not resume_text:
455
+ logger.warning("Resume text is empty after parsing")
456
+ if not job_text:
457
+ logger.warning("Job description text is empty after parsing")
458
+
459
+ # Ensure final structure is correct
460
+ state["company_research_data"] = {
461
+ "resume": resume_text,
462
+ "job_description": job_text,
463
+ "company_name": company_name,
464
+ }
465
+ state["current_node"] = "aggregate_results"
466
+
467
+ logger.info("Data loading results aggregated successfully")
468
+ return state
469
+
470
+
471
+ @log_execution
472
+ def verify_inputs_node(state: DataLoadState) -> DataLoadState:
473
+ """
474
+ Verify that required inputs are present and set next_node for routing.
475
+
476
+ Modified from verify_inputs to return state with next_node instead of string.
477
+ """
478
+ dataloading = Dataloading()
479
+ return dataloading.verify_inputs(state)
480
+
481
+
482
+ # ============================================================================
483
+ # Data Loading Subgraph
484
+ # ============================================================================
485
+
486
+ # Create data loading subgraph
487
+ data_loading_subgraph = StateGraph(DataLoadState)
488
+
489
+ # Add subgraph nodes
490
+ dataloading_instance = Dataloading()
491
+ data_loading_subgraph.add_node(
492
+ "set_agent_system_message", dataloading_instance.set_agent_system_message
493
+ )
494
+ data_loading_subgraph.add_node("parse_resume", parse_resume_node)
495
+ data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
496
+ data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
497
+ data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
498
+
499
+ # Add subgraph edges
500
+ data_loading_subgraph.add_edge(START, "set_agent_system_message")
501
+ # Parallel execution: both nodes start after set_agent_system_message
502
+ data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
503
+ data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
504
+ # Both parallel nodes feed into aggregate (LangGraph waits for both)
505
+ data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
506
+ data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
507
+ # Aggregate feeds into verification
508
+ data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
509
+ # Verification ends the subgraph
510
+ data_loading_subgraph.add_edge("verify_inputs", END)
511
+
512
+ # Compile data loading subgraph
513
+ data_loading_workflow = data_loading_subgraph.compile()
src/job_writing_agent/nodes/job_description_loader.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Job Description Loader Module
4
+
5
+ This module provides the JobDescriptionLoader class responsible for loading and parsing
6
+ job description files and URLs, extracting both the job posting text and company name.
7
+ """
8
+
9
+ import logging
10
+ from typing import Callable, Any, Optional, Tuple, Awaitable
11
+
12
+ from langchain_core.documents import Document
13
+
14
+ from job_writing_agent.utils.document_processing import get_job_description
15
+ from job_writing_agent.utils.logging.logging_decorators import (
16
+ log_async,
17
+ log_errors,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class JobDescriptionLoader:
24
+ """
25
+ Responsible for loading and parsing job description documents.
26
+
27
+ This class follows SOLID principles:
28
+ - Single Responsibility: Only handles job description parsing
29
+ - Dependency Inversion: Parser is injected for testability
30
+ - Open/Closed: Can extend with different parsers without modification
31
+ - Interface Segregation: Focused interface (only job description methods)
32
+
33
+ Example:
34
+ >>> loader = JobDescriptionLoader()
35
+ >>> job_text, company = await loader.parse_job_description("https://example.com/job")
36
+ >>>
37
+ >>> # With custom parser for testing
38
+ >>> async def mock_parser(source):
39
+ ... return Document(page_content="test", metadata={"company_name": "TestCo"})
40
+ >>> loader = JobDescriptionLoader(parser=mock_parser)
41
+ """
42
+
43
+ def __init__(self, parser: Optional[Callable[[Any], Awaitable[Document]]] = None):
44
+ """
45
+ Initialize JobDescriptionLoader with optional parser dependency injection.
46
+
47
+ Parameters
48
+ ----------
49
+ parser: Optional[Callable[[Any], Awaitable[Document]]]
50
+ Async function to parse job description documents. Defaults to
51
+ `get_job_description` from document_processing. Can be injected
52
+ for testing or custom parsing.
53
+
54
+ The parser should:
55
+ - Take one argument (source: str) - URL or file path
56
+ - Return an awaitable that resolves to a Document object
57
+ - Document should have page_content (str) and metadata (dict)
58
+ """
59
+ self._parser = parser or get_job_description
60
+
61
+ @log_async
62
+ @log_errors
63
+ async def parse_job_description(
64
+ self, job_description_source: Any
65
+ ) -> Tuple[str, str]:
66
+ """
67
+ Parse a job description and return its text and company name.
68
+
69
+ Extracts both the job posting text and company name from the document.
70
+ Company name is extracted from document metadata if available.
71
+
72
+ Parameters
73
+ ----------
74
+ job_description_source: Any
75
+ Source accepted by the parser function (URL, file path, etc.).
76
+ Can be a URL starting with http:// or https://, or a local file path.
77
+
78
+ Returns
79
+ -------
80
+ Tuple[str, str]
81
+ A tuple of (job_posting_text, company_name).
82
+ If company name is not found in metadata, returns empty string.
83
+
84
+ Raises
85
+ ------
86
+ AssertionError
87
+ If job_description_source is None.
88
+ Exception
89
+ If parsing fails.
90
+ """
91
+ company_name = ""
92
+ job_posting_text = ""
93
+
94
+ logger.info("Parsing job description from: %s", job_description_source)
95
+ assert job_description_source is not None, (
96
+ "Job description source cannot be None"
97
+ )
98
+
99
+ job_description_document: Document = await self._parser(job_description_source)
100
+
101
+ # Extract company name from metadata
102
+ if hasattr(job_description_document, "metadata") and isinstance(
103
+ job_description_document.metadata, dict
104
+ ):
105
+ company_name = job_description_document.metadata.get("company_name", "")
106
+ if not company_name:
107
+ logger.warning("Company name not found in job description metadata.")
108
+ else:
109
+ logger.warning(
110
+ "Metadata attribute missing or not a dict in job description document."
111
+ )
112
+
113
+ # Extract job posting text
114
+ if hasattr(job_description_document, "page_content"):
115
+ job_posting_text = job_description_document.page_content or ""
116
+ if not job_posting_text:
117
+ logger.info("Parsed job posting text is empty.")
118
+ else:
119
+ logger.warning(
120
+ "page_content attribute missing in job description document."
121
+ )
122
+
123
+ return job_posting_text, company_name
124
+
125
+ @log_async
126
+ @log_errors
127
+ async def _load_job_description(self, jd_source: Any) -> Tuple[str, str]:
128
+ """
129
+ Load job description text and company name, raising if missing.
130
+
131
+ This is a wrapper around parse_job_description() that validates the
132
+ source first. Used by subgraph nodes for consistent error handling.
133
+
134
+ Parameters
135
+ ----------
136
+ jd_source: Any
137
+ Source for the job description (URL, file path, etc.).
138
+
139
+ Returns
140
+ -------
141
+ Tuple[str, str]
142
+ A tuple of (job_posting_text, company_name).
143
+
144
+ Raises
145
+ ------
146
+ ValueError
147
+ If jd_source is None or empty.
148
+ """
149
+ if not jd_source:
150
+ raise ValueError("job_description_source is required")
151
+ return await self.parse_job_description(jd_source)
152
+
153
+ @log_async
154
+ async def get_application_form_details(self, job_description_source: Any):
155
+ """
156
+ Placeholder for future method to get application form details.
157
+
158
+ This method will be implemented to extract form fields and requirements
159
+ from job application forms.
160
+
161
+ Parameters
162
+ ----------
163
+ job_description_source: Any
164
+ Source of the job description or application form.
165
+ """
166
+ # TODO: Implement form field extraction
167
+ pass
168
+
169
+ async def _prompt_user(self) -> str:
170
+ """
171
+ Prompt the user for input (synchronous input wrapped for async use).
172
+
173
+ This method wraps the synchronous input() function to be used in async
174
+ contexts. In a production async UI, this would be replaced with an
175
+ async input mechanism.
176
+
177
+ Note: This is a shared utility method. In a future refactoring, this
178
+ could be extracted to a separate UserInputHelper class following the
179
+ Interface Segregation Principle.
180
+
181
+ Parameters
182
+ ----------
183
+ prompt_msg: str
184
+ Message to display to the user.
185
+
186
+ Returns
187
+ -------
188
+ str
189
+ User input string.
190
+ """
191
+ # In a real async UI replace input with an async call.
192
+ return input("Please paste the job description in text format: ")
src/job_writing_agent/nodes/research_workflow.py CHANGED
@@ -1,97 +1,304 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- This module performs the research phase of the job application writing process.
4
- One of the stages is Tavily Search which will be use to search for the company
5
- """
6
-
7
  import logging
8
  import json
9
- from langgraph.graph import StateGraph, START, END
 
10
 
 
 
11
  from job_writing_agent.tools.SearchTool import TavilyResearchTool
12
  from job_writing_agent.classes.classes import ResearchState
13
- from job_writing_agent.tools.SearchTool import relevance_filter
14
-
 
 
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
- # Set up logging
19
- logger = logging.getLogger(__name__)
20
- logging.basicConfig(level=logging.INFO)
 
 
21
 
22
 
23
- async def research_company(state: ResearchState) -> ResearchState:
24
- """Research the company if name is available."""
25
- state["current_node"] = "research_company"
 
 
 
 
 
 
 
 
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  try:
28
- # Extract values from state
29
- company_name: str = state["company_research_data"].get("company_name", None)
30
- job_description = state["company_research_data"].get("job_description", None)
31
 
32
- assert company_name is not None, "Company name is required for research_company"
33
- assert job_description is not None, (
34
- "Job description is required for research_company"
35
- )
 
 
 
36
 
37
- logger.info(f"Researching company: {company_name}")
 
 
 
 
 
 
 
38
 
39
- # Call search_company using the invoke method instead of __call__
40
- # The tool expects job_description and company_name and returns a tuple
41
- tavily_search = TavilyResearchTool(
42
- job_description=job_description, company_name=company_name
43
- )
44
 
45
- tavily_search_queries = tavily_search.create_tavily_queries()
 
 
46
 
47
- tavily_search_queries_json: dict = json.loads(
48
- tavily_search_queries["search_queries"]
49
- )
50
 
51
- logger.info(list(tavily_search_queries_json.values()))
52
 
53
- tavily_search_results: list[list[str]] = tavily_search.tavily_search_company(
54
- tavily_search_queries_json
55
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- assert isinstance(tavily_search_results, list), (
58
- "Expected list or tuple from tavily_search_company"
 
 
 
59
  )
60
- assert len(tavily_search_results) > 0, (
61
- "No results returned from tavily_search_company"
 
 
 
 
 
 
62
  )
63
- assert len(tavily_search_queries_json) > 0, "No search queries were attempted"
64
 
65
- logger.info(
66
- f"Search completed with results and {len(tavily_search_queries)} queries"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  )
68
 
69
- # Store results in state - note that results is the first item in the tuple
70
- state["attempted_search_queries"] = list(tavily_search_queries_json.values())
71
- state["company_research_data"]["tavily_search"] = tavily_search_results
72
 
73
  except Exception as e:
74
- logger.error(f"Error in research_company: {str(e)}")
75
- # Provide empty results to avoid breaking the workflow
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  state["company_research_data"]["tavily_search"] = []
77
  state["attempted_search_queries"] = []
78
- finally:
79
  return state
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- print("\n\n\nInitializing research workflow...\n\n\n")
83
  # Create research subgraph
84
  research_subgraph = StateGraph(ResearchState)
85
 
86
  # Add research subgraph nodes
87
  research_subgraph.add_node("research_company", research_company)
88
- research_subgraph.add_node("relevance_filter", relevance_filter)
89
-
 
 
90
 
91
  # Add research subgraph edges
92
  research_subgraph.add_edge(START, "research_company")
93
  research_subgraph.add_edge("research_company", "relevance_filter")
94
- research_subgraph.add_edge("relevance_filter", END)
 
95
 
96
  # Compile research subgraph
97
  research_workflow = research_subgraph.compile()
 
1
+ # research_workflow.py
 
 
 
 
 
2
  import logging
3
  import json
4
+ import asyncio
5
+ from typing import Dict, Any, cast
6
 
7
+ from langgraph.graph import StateGraph, END, START
8
+ import dspy
9
  from job_writing_agent.tools.SearchTool import TavilyResearchTool
10
  from job_writing_agent.classes.classes import ResearchState
11
+ from job_writing_agent.tools.SearchTool import filter_research_results_by_relevance
12
+ from job_writing_agent.agents.output_schema import (
13
+ CompanyResearchDataSummarizationSchema,
14
+ )
15
+ from job_writing_agent.utils.llm_provider_factory import LLMFactory
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
+ # Configuration
20
+ MAX_RETRIES = 3
21
+ RETRY_DELAY = 2 # seconds
22
+ QUERY_TIMEOUT = 30 # seconds
23
+ EVAL_TIMEOUT = 15 # seconds per evaluation
24
 
25
 
26
+ def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
27
+ """
28
+ Validate that required inputs are present.
29
+ Returns: (is_valid, company_name, job_description)
30
+ """
31
+ try:
32
+ company_name = state["company_research_data"].get("company_name", "")
33
+ job_description = state["company_research_data"].get("job_description", "")
34
+
35
+ if not company_name or not company_name.strip():
36
+ logger.error("Company name is missing or empty")
37
+ return False, "", ""
38
 
39
+ if not job_description or not job_description.strip():
40
+ logger.error("Job description is missing or empty")
41
+ return False, "", ""
42
+
43
+ return True, company_name.strip(), job_description.strip()
44
+
45
+ except (KeyError, TypeError, AttributeError) as e:
46
+ logger.error(f"Invalid state structure: {e}")
47
+ return False, "", ""
48
+
49
+
50
+ def parse_dspy_queries_with_fallback(
51
+ raw_queries: Dict[str, Any], company_name: str
52
+ ) -> Dict[str, str]:
53
+ """
54
+ Parse DSPy query output with multiple fallback strategies.
55
+ Returns a dict of query_id -> query_string.
56
+ """
57
  try:
58
+ # Try to extract search_queries field
59
+ if isinstance(raw_queries, dict) and "search_queries" in raw_queries:
60
+ queries_data = raw_queries["search_queries"]
61
 
62
+ # If it's a JSON string, parse it
63
+ if isinstance(queries_data, str):
64
+ try:
65
+ queries_data = json.loads(queries_data)
66
+ except json.JSONDecodeError as e:
67
+ logger.warning(f"JSON decode failed: {e}. Using fallback queries.")
68
+ return get_fallback_queries(company_name)
69
 
70
+ # Extract query strings
71
+ if isinstance(queries_data, dict):
72
+ parsed = {}
73
+ for key, value in queries_data.items():
74
+ if isinstance(value, str):
75
+ parsed[key] = value
76
+ elif isinstance(value, list) and len(value) > 0:
77
+ parsed[key] = str(value[0])
78
 
79
+ if parsed:
80
+ return parsed
 
 
 
81
 
82
+ # If we reach here, parsing failed
83
+ logger.warning("Could not parse DSPy queries. Using fallback.")
84
+ return get_fallback_queries(company_name)
85
 
86
+ except Exception as e:
87
+ logger.error(f"Error parsing DSPy queries: {e}. Using fallback.")
88
+ return get_fallback_queries(company_name)
89
 
 
90
 
91
+ def get_fallback_queries(company_name: str) -> Dict[str, str]:
92
+ """
93
+ Generate basic fallback queries when DSPy fails.
94
+ """
95
+ return {
96
+ "query1": f"{company_name} company culture and values",
97
+ "query2": f"{company_name} recent news and achievements",
98
+ "query3": f"{company_name} mission statement and goals",
99
+ }
100
+
101
+
102
+ def company_research_data_summary(state: ResearchState) -> ResearchState:
103
+ """
104
+ Summarize the filtered research data into a concise summary.
105
+ Replaces the raw tavily_search results with a summarized version.
106
+ """
107
+ try:
108
+ state["current_node"] = "company_research_data_summary"
109
+
110
+ # Extract the current research data
111
+ company_research_data = state.get("company_research_data", {})
112
+ tavily_search_data = company_research_data.get("tavily_search", [])
113
+
114
+ # If no research data, skip summarization
115
+ if not tavily_search_data or len(tavily_search_data) == 0:
116
+ logger.warning("No research data to summarize. Skipping summarization.")
117
+ return state
118
 
119
+ logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
120
+
121
+ # Create DSPy summarization chain
122
+ company_research_data_summarization = dspy.ChainOfThought(
123
+ CompanyResearchDataSummarizationSchema
124
  )
125
+
126
+ # Initialize LLM provider
127
+
128
+ llm_provider = LLMFactory()
129
+ llm = llm_provider.create_dspy(
130
+ model="mistralai/mistral-7b-instruct:free",
131
+ provider="openrouter",
132
+ temperature=0.3,
133
  )
 
134
 
135
+ # Generate summary using DSPy
136
+ with dspy.context(lm=llm, adapter=dspy.JSONAdapter()):
137
+ response = company_research_data_summarization(
138
+ company_research_data=company_research_data
139
+ )
140
+ # Extract the summary from the response
141
+ # The response should have a 'company_research_data_summary' field (JSON string)
142
+ if hasattr(response, "company_research_data_summary"):
143
+ summary_json_str = response.company_research_data_summary
144
+ elif isinstance(response, dict) and "company_research_data_summary" in response:
145
+ summary_json_str = response["company_research_data_summary"]
146
+ else:
147
+ logger.error(
148
+ f"Unexpected response format from summarization: {type(response)}"
149
+ )
150
+ return state
151
+
152
+ # Parse the JSON summary
153
+ state["company_research_data"]["company_research_data_summary"] = (
154
+ summary_json_str
155
  )
156
 
157
+ return state
 
 
158
 
159
  except Exception as e:
160
+ logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
161
+ # Return state unchanged on error
162
+ return state
163
+
164
+
165
+ async def research_company_with_retry(state: ResearchState) -> ResearchState:
166
+ """
167
+ Research company with retry logic and timeouts.
168
+ """
169
+ state["current_node"] = "research_company"
170
+
171
+ # Validate inputs
172
+ is_valid, company_name, job_description = validate_research_inputs(state)
173
+
174
+ if not is_valid:
175
+ logger.error("Invalid inputs for research. Skipping research phase.")
176
  state["company_research_data"]["tavily_search"] = []
177
  state["attempted_search_queries"] = []
 
178
  return state
179
 
180
+ logger.info(f"Researching company: {company_name}")
181
+
182
+ # Try with retries
183
+ for attempt in range(MAX_RETRIES):
184
+ try:
185
+ # Create tool instance
186
+ tavily_search = TavilyResearchTool(
187
+ job_description=job_description, company_name=company_name
188
+ )
189
+
190
+ # Generate queries with timeout
191
+ queries_task = asyncio.create_task(
192
+ asyncio.to_thread(tavily_search.create_tavily_queries)
193
+ )
194
+
195
+ try:
196
+ raw_queries = await asyncio.wait_for(
197
+ queries_task, timeout=QUERY_TIMEOUT
198
+ )
199
+ except asyncio.TimeoutError:
200
+ logger.warning(
201
+ f"Query generation timed out (attempt {attempt + 1}/{MAX_RETRIES})"
202
+ )
203
+ if attempt < MAX_RETRIES - 1:
204
+ await asyncio.sleep(RETRY_DELAY)
205
+ continue
206
+ else:
207
+ raise
208
+
209
+ # Parse queries with fallback
210
+ # Convert DSPy Prediction to dict if needed
211
+ if hasattr(raw_queries, "dict"):
212
+ raw_queries_dict = cast(Dict[str, Any], raw_queries.dict())
213
+ elif hasattr(raw_queries, "__dict__"):
214
+ raw_queries_dict = cast(Dict[str, Any], raw_queries.__dict__)
215
+ elif isinstance(raw_queries, dict):
216
+ raw_queries_dict = cast(Dict[str, Any], raw_queries)
217
+ else:
218
+ raw_queries_dict = cast(Dict[str, Any], dict(raw_queries))
219
+
220
+ queries = parse_dspy_queries_with_fallback(raw_queries_dict, company_name)
221
+
222
+ if not queries:
223
+ logger.warning("No valid queries generated")
224
+ queries = get_fallback_queries(company_name)
225
+
226
+ logger.info(
227
+ f"Generated {len(queries)} search queries: {list(queries.keys())}"
228
+ )
229
+
230
+ # Perform searches with timeout
231
+ search_task = asyncio.create_task(
232
+ asyncio.to_thread(tavily_search.tavily_search_company, queries)
233
+ )
234
+
235
+ try:
236
+ search_results = await asyncio.wait_for(
237
+ search_task, timeout=QUERY_TIMEOUT * len(queries)
238
+ )
239
+ except asyncio.TimeoutError:
240
+ logger.warning(
241
+ f"Search timed out (attempt {attempt + 1}/{MAX_RETRIES})"
242
+ )
243
+ if attempt < MAX_RETRIES - 1:
244
+ await asyncio.sleep(RETRY_DELAY)
245
+ continue
246
+ else:
247
+ raise
248
+
249
+ # Validate results
250
+ if not isinstance(search_results, list):
251
+ logger.warning(f"Invalid search results type: {type(search_results)}")
252
+ search_results = []
253
+
254
+ if len(search_results) == 0:
255
+ logger.warning("No search results returned")
256
+
257
+ # Store results
258
+ state["attempted_search_queries"] = list(queries.values())
259
+ state["company_research_data"]["tavily_search"] = search_results
260
+
261
+ logger.info(
262
+ f"Research completed successfully with {len(search_results)} result sets"
263
+ )
264
+ return state
265
+
266
+ except Exception as e:
267
+ logger.error(
268
+ f"Error in research_company (attempt {attempt + 1}/{MAX_RETRIES}): {e}",
269
+ exc_info=True,
270
+ )
271
+
272
+ if attempt < MAX_RETRIES - 1:
273
+ await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
274
+ else:
275
+ logger.error("All retry attempts exhausted. Using empty results.")
276
+ state["company_research_data"]["tavily_search"] = []
277
+ state["attempted_search_queries"] = []
278
+
279
+ return state
280
+
281
+
282
+ async def research_company(state: ResearchState) -> ResearchState:
283
+ """Wrapper to call the retry version."""
284
+ return await research_company_with_retry(state)
285
+
286
 
 
287
  # Create research subgraph
288
  research_subgraph = StateGraph(ResearchState)
289
 
290
  # Add research subgraph nodes
291
  research_subgraph.add_node("research_company", research_company)
292
+ research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
293
+ research_subgraph.add_node(
294
+ "company_research_data_summary", company_research_data_summary
295
+ )
296
 
297
  # Add research subgraph edges
298
  research_subgraph.add_edge(START, "research_company")
299
  research_subgraph.add_edge("research_company", "relevance_filter")
300
+ research_subgraph.add_edge("relevance_filter", "company_research_data_summary")
301
+ research_subgraph.add_edge("company_research_data_summary", END)
302
 
303
  # Compile research subgraph
304
  research_workflow = research_subgraph.compile()
src/job_writing_agent/nodes/resume_loader.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Resume Loader Module
4
+
5
+ This module provides the ResumeLoader class responsible for loading and parsing
6
+ the resume file and returning the resume in the required format.
7
+ """
8
+
9
+ import logging
10
+ from typing import Callable, Any, Optional
11
+
12
+ from job_writing_agent.utils.document_processing import parse_resume
13
+ from job_writing_agent.utils.logging.logging_decorators import (
14
+ log_async,
15
+ log_errors,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ResumeLoader:
22
+ """
23
+ Responsible for loading and parsing resume documents.
24
+
25
+ Example:
26
+ >>> loader = ResumeLoader()
27
+ >>> resume_text = await loader.get_resume("path/to/resume.pdf")
28
+ >>>
29
+ >>> # With custom parser for testing
30
+ >>> mock_parser = lambda x: [Document(page_content="test")]
31
+ >>> loader = ResumeLoader(parser=mock_parser)
32
+ """
33
+
34
+ def __init__(self, parser: Optional[Callable[[Any], Any]] = None):
35
+ """
36
+ Initialize ResumeLoader with optional parser dependency injection.
37
+
38
+ Parameters
39
+ ----------
40
+ parser: Optional[Callable[[Any], Any]]
41
+ Function to parse resume documents. Defaults to `parse_resume` from
42
+ document_processing. Can be injected for testing or custom parsing.
43
+ """
44
+ self._parser = parser or parse_resume
45
+
46
+ @log_async
47
+ @log_errors
48
+ async def get_resume(self, resume_source: Any) -> str:
49
+ """
50
+ Parse a resume file and return its plain-text content.
51
+
52
+ This method extracts text from resume chunks, handling both Document
53
+ objects and plain strings. Empty or invalid chunks are skipped.
54
+
55
+ Parameters
56
+ ----------
57
+ resume_source: Any
58
+ Path or file-like object accepted by the parser function.
59
+ Can be a file path, URL, or file-like object.
60
+
61
+ Returns
62
+ -------
63
+ str
64
+ Plain text content of the resume.
65
+
66
+ Raises
67
+ ------
68
+ AssertionError
69
+ If resume_source is None.
70
+ Exception
71
+ If parsing fails.
72
+ """
73
+ logger.info("Parsing resume...")
74
+ resume_text = ""
75
+ assert resume_source is not None, "resume_source cannot be None"
76
+
77
+ resume_chunks = self._parser(resume_source)
78
+
79
+ for chunk in resume_chunks:
80
+ if hasattr(chunk, "page_content") and chunk.page_content:
81
+ resume_text += chunk.page_content
82
+ elif isinstance(chunk, str) and chunk:
83
+ resume_text += chunk
84
+ else:
85
+ logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
86
+
87
+ return resume_text
88
+
89
+ @log_async
90
+ @log_errors
91
+ async def _load_resume(self, resume_source: Any) -> str:
92
+ """
93
+ Load resume content, raising if the source is missing.
94
+
95
+ This is a wrapper around get_resume() that validates the source first.
96
+ Used by subgraph nodes for consistent error handling.
97
+
98
+ Parameters
99
+ ----------
100
+ resume_source: Any
101
+ Path or file-like object for the resume.
102
+
103
+ Returns
104
+ -------
105
+ str
106
+ Plain text content of the resume.
107
+
108
+ Raises
109
+ ------
110
+ ValueError
111
+ If resume_source is None or empty.
112
+ """
113
+ if not resume_source:
114
+ raise ValueError("resume_source is required")
115
+ return await self.get_resume(resume_source)
116
+
117
+ async def _prompt_user_for_resume(self) -> str:
118
+ """
119
+ Prompt the user for input (synchronous input wrapped for async use).
120
+
121
+ This method wraps the synchronous input() function to be used in async
122
+ contexts. In a production async UI, this would be replaced with an
123
+ async input mechanism.
124
+
125
+ Note: This is a shared utility method. In a future refactoring, this
126
+ could be extracted to a separate UserInputHelper class following the
127
+ Interface Segregation Principle.
128
+
129
+ Parameters
130
+ ----------
131
+ prompt_msg: str
132
+ Message to display to the user.
133
+
134
+ Returns
135
+ -------
136
+ str
137
+ User input string.
138
+ """
139
+ # In a real async UI replace input with an async call.
140
+ return input("Please paste the resume in text format: ")
src/job_writing_agent/nodes/selfconsistency.py CHANGED
@@ -4,23 +4,23 @@ import json
4
  import re
5
 
6
  from ..classes.classes import AppState
7
- from ..prompts.templates import (
8
- DRAFT_RATING_PROMPT,
9
- BEST_DRAFT_SELECTION_PROMPT
10
- )
11
  from ..utils.llm_provider_factory import LLMFactory
12
 
13
 
14
-
15
  logger = logging.getLogger(__name__)
16
  # Constants
17
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
18
 
19
- llm_factory = LLMFactory()
20
- llm_precise = llm_factory.create_langchain(model="qwen/qwen3-4b:free", provider="openrouter", temperature=0.1)
21
 
22
  def self_consistency_vote(state: AppState) -> AppState:
23
  """Choose the best draft from multiple variations."""
 
 
 
 
 
 
24
  variations = state.get("variations", {"variations": []})
25
 
26
  all_drafts = [state["draft"]] + variations["variations"]
@@ -31,7 +31,7 @@ def self_consistency_vote(state: AppState) -> AppState:
31
  # Get resume and job summaries, handling different formats
32
  try:
33
  if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
34
- if hasattr(state["resume_path"][0], 'page_content'):
35
  resume_summary = state["resume_path"][0].page_content
36
  else:
37
  resume_summary = state["resume_path"][0]
@@ -42,7 +42,10 @@ def self_consistency_vote(state: AppState) -> AppState:
42
  resume_summary = str(state["resume_path"])
43
 
44
  try:
45
- if isinstance(state["job_description_source"], list) and len(state["job_description_source"]) > 0:
 
 
 
46
  job_summary = state["job_description_source"][0]
47
  else:
48
  job_summary = str(state["job_description_source"])
@@ -51,33 +54,38 @@ def self_consistency_vote(state: AppState) -> AppState:
51
  job_summary = str(state["job_description_source"])
52
 
53
  for i, draft in enumerate(all_drafts):
54
- rating = llm_precise.invoke(DRAFT_RATING_PROMPT.format(
55
- resume_summary=resume_summary,
56
- job_summary=job_summary,
57
- draft=draft,
58
- draft_number=i+1
59
- ))
 
 
60
  ratings.append(rating)
61
 
62
  # Create a clearer, more structured prompt for draft selection
63
  selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
64
- ratings_json=json.dumps(ratings, indent=2),
65
- num_drafts=len(all_drafts)
66
  )
67
 
68
  # Get the selected draft index with error handling
69
  try:
70
  selection = llm_precise.invoke(selection_prompt).strip()
71
  # Extract just the first number found in the response
72
- number_match = re.search(r'\d+', selection)
73
  if not number_match:
74
- print("Warning: Could not extract draft number from LLM response. Using original draft.")
 
 
75
  best_draft_idx = 0
76
  else:
77
  best_draft_idx = int(number_match.group()) - 1
78
  # Validate the index is in range
79
  if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
80
- print(f"Warning: Selected draft index {best_draft_idx + 1} out of range. Using original draft.")
 
 
81
  best_draft_idx = 0
82
  except (ValueError, TypeError) as e:
83
  print(f"Warning: Error selecting best draft: {e}. Using original draft.")
 
4
  import re
5
 
6
  from ..classes.classes import AppState
7
+ from ..prompts.templates import DRAFT_RATING_PROMPT, BEST_DRAFT_SELECTION_PROMPT
 
 
 
8
  from ..utils.llm_provider_factory import LLMFactory
9
 
10
 
 
11
  logger = logging.getLogger(__name__)
12
  # Constants
13
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
14
 
 
 
15
 
16
  def self_consistency_vote(state: AppState) -> AppState:
17
  """Choose the best draft from multiple variations."""
18
+ # Create LLM inside function (lazy initialization)
19
+ llm_factory = LLMFactory()
20
+ llm_precise = llm_factory.create_langchain(
21
+ model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
22
+ )
23
+
24
  variations = state.get("variations", {"variations": []})
25
 
26
  all_drafts = [state["draft"]] + variations["variations"]
 
31
  # Get resume and job summaries, handling different formats
32
  try:
33
  if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
34
+ if hasattr(state["resume_path"][0], "page_content"):
35
  resume_summary = state["resume_path"][0].page_content
36
  else:
37
  resume_summary = state["resume_path"][0]
 
42
  resume_summary = str(state["resume_path"])
43
 
44
  try:
45
+ if (
46
+ isinstance(state["job_description_source"], list)
47
+ and len(state["job_description_source"]) > 0
48
+ ):
49
  job_summary = state["job_description_source"][0]
50
  else:
51
  job_summary = str(state["job_description_source"])
 
54
  job_summary = str(state["job_description_source"])
55
 
56
  for i, draft in enumerate(all_drafts):
57
+ rating = llm_precise.invoke(
58
+ DRAFT_RATING_PROMPT.format(
59
+ resume_summary=resume_summary,
60
+ job_summary=job_summary,
61
+ draft=draft,
62
+ draft_number=i + 1,
63
+ )
64
+ )
65
  ratings.append(rating)
66
 
67
  # Create a clearer, more structured prompt for draft selection
68
  selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
69
+ ratings_json=json.dumps(ratings, indent=2), num_drafts=len(all_drafts)
 
70
  )
71
 
72
  # Get the selected draft index with error handling
73
  try:
74
  selection = llm_precise.invoke(selection_prompt).strip()
75
  # Extract just the first number found in the response
76
+ number_match = re.search(r"\d+", selection)
77
  if not number_match:
78
+ print(
79
+ "Warning: Could not extract draft number from LLM response. Using original draft."
80
+ )
81
  best_draft_idx = 0
82
  else:
83
  best_draft_idx = int(number_match.group()) - 1
84
  # Validate the index is in range
85
  if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
86
+ print(
87
+ f"Warning: Selected draft index {best_draft_idx + 1} out of range. Using original draft."
88
+ )
89
  best_draft_idx = 0
90
  except (ValueError, TypeError) as e:
91
  print(f"Warning: Error selecting best draft: {e}. Using original draft.")
src/job_writing_agent/nodes/variations.py CHANGED
@@ -5,7 +5,7 @@ from typing_extensions import Dict, List
5
  from langchain_core.documents import Document
6
 
7
 
8
- from ..classes.classes import AppState
9
  from ..utils.llm_provider_factory import LLMFactory
10
  from ..prompts.templates import VARIATION_PROMPT
11
 
@@ -14,15 +14,15 @@ logger = logging.getLogger(__name__)
14
  # Constants
15
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
16
 
17
- llm_provider = LLMFactory()
18
 
19
- llm = llm_provider.create_langchain(
20
- "qwen/qwen3-4b:free", provider="openrouter", temperature=0.3
21
- )
22
-
23
-
24
- def generate_variations(state: AppState) -> Dict[str, List[str]]:
25
  """Generate multiple variations of the draft for self-consistency voting."""
 
 
 
 
 
 
26
  variations = []
27
 
28
  # Get resume and job text, handling both string and Document types
@@ -70,6 +70,8 @@ def generate_variations(state: AppState) -> Dict[str, List[str]]:
70
 
71
  response = configured_llm.invoke(variation)
72
 
 
 
73
  if response and response.strip(): # Only add non-empty variations
74
  variations.append(response)
75
  except Exception as e:
 
5
  from langchain_core.documents import Document
6
 
7
 
8
+ from ..classes.classes import ResultState
9
  from ..utils.llm_provider_factory import LLMFactory
10
  from ..prompts.templates import VARIATION_PROMPT
11
 
 
14
  # Constants
15
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
16
 
 
17
 
18
+ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
 
 
 
 
 
19
  """Generate multiple variations of the draft for self-consistency voting."""
20
+ # Create LLM inside function (lazy initialization)
21
+ llm_provider = LLMFactory()
22
+ llm = llm_provider.create_langchain(
23
+ "google/gemma-3-27b-it:free", provider="openrouter", temperature=0.3
24
+ )
25
+
26
  variations = []
27
 
28
  # Get resume and job text, handling both string and Document types
 
70
 
71
  response = configured_llm.invoke(variation)
72
 
73
+ print(f"Response for setting: {variation} has a response: {response}")
74
+
75
  if response and response.strip(): # Only add non-empty variations
76
  variations.append(response)
77
  except Exception as e:
src/job_writing_agent/prompts/templates.py CHANGED
@@ -5,7 +5,11 @@ This module contains all prompt templates used throughout the job application
5
  generation process, organized by task.
6
  """
7
 
8
- from langchain_core.prompts import ChatPromptTemplate
 
 
 
 
9
  from langchain_core.messages import SystemMessage, HumanMessage
10
 
11
  # Persona selection prompts
@@ -201,19 +205,26 @@ Example: If draft #2 is best, return ONLY '2'.
201
 
202
  REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
203
  [
204
- SystemMessage(
205
- content="You are an expert job application writer. Revise the draft based on feedback."
206
  ),
207
- HumanMessage(
208
- content="""
209
- # Original Draft
210
  {draft}
211
-
212
- # Feedback
 
213
  {feedback}
214
-
215
- Revise the draft to incorporate this feedback while maintaining professionalism and impact.
216
- Return the complete, final version.
 
 
 
 
 
 
217
  """
218
  ),
219
  ]
 
5
  generation process, organized by task.
6
  """
7
 
8
+ from langchain_core.prompts import (
9
+ ChatPromptTemplate,
10
+ SystemMessagePromptTemplate,
11
+ HumanMessagePromptTemplate,
12
+ )
13
  from langchain_core.messages import SystemMessage, HumanMessage
14
 
15
  # Persona selection prompts
 
205
 
206
  REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
207
  [
208
+ SystemMessagePromptTemplate.from_template(
209
+ "You are an expert job application writer. Revise the draft based on BOTH the self-evaluation and external feedback provided."
210
  ),
211
+ HumanMessagePromptTemplate.from_template(
212
+ """
213
+ --------------------------------Original Draft--------------------------------
214
  {draft}
215
+ ----------------------------------------------------------------------------------------
216
+
217
+ --------------------------------Candidate Feedback--------------------------------
218
  {feedback}
219
+ ----------------------------------------------------------------------------------------
220
+
221
+ --------------------------------Critique Feedback--------------------------------
222
+ {critique_feedback}
223
+ ----------------------------------------------------------------------------------------
224
+
225
+ Based on the self evaluation in the Original Draft, Critique Feedback and the Candidates' Feedback, revise the content taking essence of the self evaluation, Critique Feedback and the Candidates' Feedback into account. Do not repeat the same content from the Original Draft, Critique Feedback and the Candidates' Feedback.
226
+
227
+ Return the content of the revised draft. Make sure the output is only the content that is the revised content and nothing else.
228
  """
229
  ),
230
  ]
src/job_writing_agent/prompts/test_prompts.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import (
2
+ ChatPromptTemplate,
3
+ HumanMessagePromptTemplate,
4
+ SystemMessagePromptTemplate,
5
+ )
6
+
7
+
8
+ REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
9
+ [
10
+ SystemMessagePromptTemplate.from_template(
11
+ "You are an expert job application writer. Revise the draft based on BOTH the self-evaluation and external feedback provided."
12
+ ),
13
+ HumanMessagePromptTemplate.from_template(
14
+ """
15
+ # Original Draft Content with Evaluation Section at the end
16
+ {draft}
17
+
18
+ # Candidates' Feedback (Human Feedback)
19
+ {feedback}
20
+
21
+ # Critique Feedback (AI Feedback)
22
+ {critique_feedback}
23
+
24
+ Based on the self evaluation in the Original Draft, Critique Feedback and the Candidates' Feedback, revise the content taking essence of the self evaluation, Critique Feedback and the Candidates' Feedback into account. Do not repeat the same content from the Original Draft, Critique Feedback and the Candidates' Feedback.
25
+
26
+ Return the content of the revised draft. Make sure the output is only the content that is the revised content and nothing else.
27
+ """
28
+ ),
29
+ ]
30
+ )
31
+
32
+ print(
33
+ REVISION_PROMPT.format_messages(
34
+ draft="Hello, how are you?",
35
+ feedback="I like your draft.",
36
+ critique_feedback="Your draft is good.",
37
+ )
38
+ )
src/job_writing_agent/tools/SearchTool.py CHANGED
@@ -6,37 +6,40 @@ from pathlib import Path
6
 
7
  from langchain_tavily import TavilySearch
8
  from openevals.llm import create_async_llm_as_judge
9
- from openevals.prompts import (
10
- RAG_RETRIEVAL_RELEVANCE_PROMPT,
11
- RAG_HELPFULNESS_PROMPT
12
- )
13
  import dspy
14
 
15
  from ..agents.output_schema import TavilySearchQueries
16
  from ..classes.classes import ResearchState
17
  from ..utils.llm_provider_factory import LLMFactory
18
 
 
19
  logger = logging.getLogger(__name__)
20
 
21
 
22
- env_path = Path(__file__).parent / '.env'
23
  load_dotenv(dotenv_path=env_path, override=True)
24
 
25
 
26
  openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
27
 
28
- llm_provider = LLMFactory()
29
-
30
 
31
  class TavilyResearchTool:
32
-
33
- def __init__(self, job_description, company_name, max_results=5, model_name="qwen/qwen3-4b:free"):
34
- self.dspy_llm = llm_provider.create_dspy(model=model_name,
35
- provider="openrouter",
36
- temperature=0.3)
 
 
 
 
 
 
 
37
  self.job_description = job_description
38
  self.company_name = company_name
39
- self.tavily_searchtool = TavilySearch(max_results=max_results)
40
 
41
  def create_tavily_queries(self):
42
  """
@@ -46,101 +49,222 @@ class TavilyResearchTool:
46
  """
47
  tavily_query_generator = dspy.ChainOfThought(TavilySearchQueries)
48
  with dspy.context(lm=self.dspy_llm, adapter=dspy.JSONAdapter()):
49
- response = tavily_query_generator(job_description=self.job_description, company_name=self.company_name)
 
 
50
  return response
51
 
52
-
53
  def tavily_search_company(self, queries):
54
-
55
  query_results: list[list[str]] = []
56
  for query in queries:
57
  try:
58
- search_query_response = self.tavily_searchtool.invoke({"query": queries[query]})
59
- query_results.append([res['content'] for res in search_query_response['results']])
 
 
 
 
60
  # print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
61
  except Exception as e:
62
- logger.error(f"Failed to perform company research using TavilySearchTool. Error : {e}")
 
 
63
  continue
64
 
65
  return query_results
66
 
67
- llm_structured = llm_provider.create_langchain("llama3.1-8b",
68
- provider="cerebras",
69
- temperature=0.3)
70
 
71
  def get_relevance_evaluator():
 
 
 
 
 
 
 
 
 
 
72
  return create_async_llm_as_judge(
73
- judge=llm_structured,
74
- prompt=RAG_RETRIEVAL_RELEVANCE_PROMPT,
75
- feedback_key="retrieval_relevance",
76
- )
77
 
78
 
79
  def get_helpfulness_evaluator():
 
 
 
 
 
 
 
 
 
 
80
  return create_async_llm_as_judge(
81
- judge=llm_structured,
82
- prompt=RAG_HELPFULNESS_PROMPT
83
- + '\nReturn "true" if the answer is helpful, and "false" otherwise.',
84
- feedback_key="helpfulness",
85
- )
86
-
87
-
88
- async def relevance_filter(state: ResearchState) -> ResearchState:
 
 
 
 
 
89
  try:
90
- # Set the current node
91
- state["current_node"] = "relevance_filter"
92
-
93
- # Get the all_query_data and attempted_queries_list
94
- tavily_search_results = state["company_research_data"]["tavily_search"]
95
- attempted_tavily_query_list = state["attempted_search_queries"]
96
-
97
- # Check if all_query_data and attempted_queries_list are lists
98
- assert isinstance(tavily_search_results, list), "tavily_search_results is not a list"
99
- assert isinstance(attempted_tavily_query_list, list), "attempted_tavily_query_list is not a list"
100
-
101
- print("Filtering results...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- filtered_search_results = [] # Stores results deemed relevant in this specific call
 
 
 
 
 
 
104
 
105
- # Create a semaphore to limit concurrent tasks to 2
106
- semaphore = asyncio.Semaphore(2)
107
 
108
- async def evaluate_with_semaphore(query_result_item, input_query: str):
109
- # query_result_item is a dict like {'rationale': '...', 'results': [...]}
110
- async with semaphore:
111
- relevance_evaluator = get_relevance_evaluator()
112
- eval_result = await relevance_evaluator(
113
- inputs=input_query, context=query_result_item # context is the whole result block for the query
114
- )
115
- return query_result_item, eval_result
116
 
117
- # Create tasks for all results
118
- tasks: list = []
 
 
 
119
 
120
- for query_result, attempted_query in zip(tavily_search_results, attempted_tavily_query_list):
121
- tasks.append(evaluate_with_semaphore(query_result, attempted_query))
122
- # Process tasks as they complete
123
- for completed_task in asyncio.as_completed(tasks):
124
- query_result_item, eval_result = await completed_task
125
- # logger.info(f"Evaluated query result for '{query_result_item}': {eval_result}")
126
- if eval_result.get("score"): # Safely check for score
127
- if isinstance(query_result_item, list):
128
- filtered_search_results.extend(query_result_item)
129
- else:
130
- # Handle cases where "results" might not be a list or is missing
131
- logger.warning("Expected a list in query_result_item, got: %s", type(query_result_item))
132
 
133
- # Append the newly filtered results to the main compiled_results list
134
- state["company_research_data"]["tavily_search"] = filtered_search_results
 
 
135
 
136
- logger.info(f"Relevance filtering completed. {len(filtered_search_results)} relevant results found.")
 
 
 
 
137
 
138
  return state
139
 
140
  except Exception as e:
141
- print(f"ERROR in relevance_filter: {e}")
142
- import traceback
143
- traceback.print_exc()
144
- logger.error(f"Error in relevance_filter: {str(e)}")
145
- # Return original state to avoid breaking the flow
146
  return state
 
6
 
7
  from langchain_tavily import TavilySearch
8
  from openevals.llm import create_async_llm_as_judge
9
+ from openevals.prompts import RAG_RETRIEVAL_RELEVANCE_PROMPT, RAG_HELPFULNESS_PROMPT
 
 
 
10
  import dspy
11
 
12
  from ..agents.output_schema import TavilySearchQueries
13
  from ..classes.classes import ResearchState
14
  from ..utils.llm_provider_factory import LLMFactory
15
 
16
+
17
  logger = logging.getLogger(__name__)
18
 
19
 
20
+ env_path = Path(__file__).parent / ".env"
21
  load_dotenv(dotenv_path=env_path, override=True)
22
 
23
 
24
  openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
25
 
 
 
26
 
27
  class TavilyResearchTool:
28
+ def __init__(
29
+ self,
30
+ job_description,
31
+ company_name,
32
+ max_results=5,
33
+ model_name="mistralai/mistral-7b-instruct:free",
34
+ ):
35
+ # Create LLM inside __init__ (lazy initialization)
36
+ llm_provider = LLMFactory()
37
+ self.dspy_llm = llm_provider.create_dspy(
38
+ model=model_name, provider="openrouter", temperature=0.3
39
+ )
40
  self.job_description = job_description
41
  self.company_name = company_name
42
+ self.tavily_searchtool = TavilySearch(max_results=max_results)
43
 
44
  def create_tavily_queries(self):
45
  """
 
49
  """
50
  tavily_query_generator = dspy.ChainOfThought(TavilySearchQueries)
51
  with dspy.context(lm=self.dspy_llm, adapter=dspy.JSONAdapter()):
52
+ response = tavily_query_generator(
53
+ job_description=self.job_description, company_name=self.company_name
54
+ )
55
  return response
56
 
 
57
  def tavily_search_company(self, queries):
 
58
  query_results: list[list[str]] = []
59
  for query in queries:
60
  try:
61
+ search_query_response = self.tavily_searchtool.invoke(
62
+ {"query": queries[query]}
63
+ )
64
+ query_results.append(
65
+ [res["content"] for res in search_query_response["results"]]
66
+ )
67
  # print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
68
  except Exception as e:
69
+ logger.error(
70
+ f"Failed to perform company research using TavilySearchTool. Error : {e}"
71
+ )
72
  continue
73
 
74
  return query_results
75
 
 
 
 
76
 
77
  def get_relevance_evaluator():
78
+ """
79
+ Create an LLM-as-judge evaluator for relevance filtering.
80
+
81
+ Creates the LLM on-demand (lazy initialization) to avoid startup delays.
82
+ """
83
+ # Create LLM inside function (lazy initialization)
84
+ llm_provider = LLMFactory()
85
+ llm_structured = llm_provider.create_langchain(
86
+ "llama3.1-8b", provider="cerebras", temperature=0.3
87
+ )
88
  return create_async_llm_as_judge(
89
+ judge=llm_structured,
90
+ prompt=RAG_RETRIEVAL_RELEVANCE_PROMPT,
91
+ feedback_key="retrieval_relevance",
92
+ )
93
 
94
 
95
  def get_helpfulness_evaluator():
96
+ """
97
+ Create an LLM-as-judge evaluator for helpfulness filtering.
98
+
99
+ Creates the LLM on-demand (lazy initialization) to avoid startup delays.
100
+ """
101
+ # Create LLM inside function (lazy initialization)
102
+ llm_provider = LLMFactory()
103
+ llm_structured = llm_provider.create_langchain(
104
+ "llama3.1-8b", provider="cerebras", temperature=0.3
105
+ )
106
  return create_async_llm_as_judge(
107
+ judge=llm_structured,
108
+ prompt=RAG_HELPFULNESS_PROMPT
109
+ + '\nReturn "true" if the answer is helpful, and "false" otherwise.',
110
+ feedback_key="helpfulness",
111
+ )
112
+
113
+
114
+ async def filter_research_results_by_relevance(state: ResearchState) -> ResearchState:
115
+ """
116
+ Filter search results to keep only relevant company information.
117
+ Uses LLM-as-judge to evaluate if each result set is relevant to its query.
118
+ Irrelevant results are REMOVED from the final output.
119
+ """
120
  try:
121
+ state["current_node"] = "filter_research_results_by_relevance"
122
+
123
+ # Extract search data from state
124
+ raw_search_results = state.get("company_research_data", {}).get(
125
+ "tavily_search", []
126
+ )
127
+ search_queries_used = state.get("attempted_search_queries", [])
128
+
129
+ # Validate data types
130
+ if not isinstance(raw_search_results, list):
131
+ logger.warning(f"Invalid search results type: {type(raw_search_results)}")
132
+ return state
133
+
134
+ if not isinstance(search_queries_used, list):
135
+ logger.warning(f"Invalid queries type: {type(search_queries_used)}")
136
+ search_queries_used = []
137
+
138
+ # Early exit if no results
139
+ if len(raw_search_results) == 0:
140
+ logger.info("No search results to filter.")
141
+ state["company_research_data"]["tavily_search"] = []
142
+ return state
143
+
144
+ logger.info(
145
+ f"Starting relevance filtering for {len(raw_search_results)} result sets..."
146
+ )
147
+
148
+ # Track filtering statistics
149
+ results_kept = []
150
+ results_removed_count = 0
151
+ evaluation_errors_count = 0
152
+
153
+ # Limit concurrent evaluations to prevent rate limiting
154
+ concurrency_limiter = asyncio.Semaphore(2)
155
+
156
+ async def evaluate_result_set_relevance(
157
+ search_result_content, original_query: str
158
+ ):
159
+ """
160
+ Evaluate if a search result set is relevant to its query.
161
+
162
+ Returns:
163
+ tuple: (search_result_content, is_relevant: bool, error: str|None)
164
+ """
165
+ async with concurrency_limiter:
166
+ try:
167
+ # Skip empty result sets
168
+ if not search_result_content:
169
+ logger.debug(
170
+ f"Skipping empty result set for query: {original_query[:50]}..."
171
+ )
172
+ return (None, False, "empty")
173
+
174
+ # Create relevance evaluator
175
+ llm_relevance_judge = get_relevance_evaluator()
176
+
177
+ # Evaluate with timeout protection
178
+ evaluation_task = llm_relevance_judge(
179
+ inputs=original_query, context=search_result_content
180
+ )
181
+
182
+ evaluation_result = await asyncio.wait_for(
183
+ evaluation_task, timeout=15
184
+ )
185
+
186
+ # Extract relevance score (True = relevant, False = not relevant)
187
+ is_result_relevant = bool(evaluation_result.get("score", False))
188
+
189
+ if is_result_relevant:
190
+ logger.debug(
191
+ f"KEPT: Result relevant for query: {original_query[:60]}..."
192
+ )
193
+ return (search_result_content, True, None)
194
+ else:
195
+ logger.debug(
196
+ f"REMOVED: Result not relevant for query: {original_query[:60]}..."
197
+ )
198
+ return (None, False, None)
199
+
200
+ except asyncio.TimeoutError:
201
+ logger.warning(
202
+ f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
203
+ )
204
+ return (search_result_content, True, "timeout")
205
+
206
+ except Exception as e:
207
+ logger.error(
208
+ f"Evaluation failed for query: {original_query[:60]}... - {e} (KEEPING result)"
209
+ )
210
+ return (search_result_content, True, f"error:{str(e)}")
211
+
212
+ # Create evaluation tasks for all result sets
213
+ evaluation_tasks = []
214
+ for result_set, query in zip(raw_search_results, search_queries_used):
215
+ task = evaluate_result_set_relevance(result_set, query)
216
+ evaluation_tasks.append(task)
217
+
218
+ # Execute all evaluations concurrently
219
+ all_evaluation_results = await asyncio.gather(
220
+ *evaluation_tasks, return_exceptions=True
221
+ )
222
+
223
+ # Process evaluation results and separate kept vs removed
224
+ for eval_result in all_evaluation_results:
225
+ # Handle exceptions from gather
226
+ if isinstance(eval_result, Exception):
227
+ logger.error(f"Evaluation task failed with exception: {eval_result}")
228
+ evaluation_errors_count += 1
229
+ continue
230
 
231
+ # Type guard: eval_result is now guaranteed to be a tuple
232
+ if not isinstance(eval_result, tuple) or len(eval_result) != 3:
233
+ logger.error(
234
+ f"Unexpected evaluation result format: {type(eval_result)}"
235
+ )
236
+ evaluation_errors_count += 1
237
+ continue
238
 
239
+ result_content, is_relevant, error = eval_result
 
240
 
241
+ # Track errors
242
+ if error:
243
+ evaluation_errors_count += 1
 
 
 
 
 
244
 
245
+ # Keep relevant results, discard irrelevant ones
246
+ if result_content is not None and is_relevant:
247
+ results_kept.append(result_content)
248
+ else:
249
+ results_removed_count += 1
250
 
251
+ # Update state with ONLY the relevant results
252
+ state["company_research_data"]["tavily_search"] = results_kept
 
 
 
 
 
 
 
 
 
 
253
 
254
+ # Log filtering summary
255
+ total_evaluated = len(raw_search_results)
256
+ kept_count = len(results_kept)
257
+ removed_count = results_removed_count
258
 
259
+ logger.info(
260
+ f"Relevance filtering complete: "
261
+ f"KEPT {kept_count} | REMOVED {removed_count} | TOTAL {total_evaluated} "
262
+ f"({evaluation_errors_count} evaluation errors)"
263
+ )
264
 
265
  return state
266
 
267
  except Exception as e:
268
+ logger.error(f"Critical error in relevance filtering: {e}", exc_info=True)
269
+ # On critical error, return original state unchanged
 
 
 
270
  return state
src/job_writing_agent/tools/__init__.py CHANGED
@@ -4,6 +4,6 @@ Created on Mon Oct 23 16:49:52 2023
4
  @author: rishabhaggarwal
5
  """
6
 
7
- from .SearchTool import relevance_filter
8
 
9
- __all__ = ["relevance_filter"]
 
4
  @author: rishabhaggarwal
5
  """
6
 
7
+ from .SearchTool import filter_research_results_by_relevance
8
 
9
+ __all__ = ["filter_research_results_by_relevance"]
src/job_writing_agent/utils/application_cli_interface.py CHANGED
@@ -1,12 +1,12 @@
1
  import argparse
2
  import os
3
- from typing import Optional, Any, Iterable
4
 
5
  import requests
6
  from requests.exceptions import RequestException
7
 
8
 
9
- DEFAULT_MODEL = "qwen/qwen3-4b:free"
10
  DEFAULT_CONTENT_TYPE = "cover_letter"
11
 
12
 
 
1
  import argparse
2
  import os
3
+ from typing import Iterable
4
 
5
  import requests
6
  from requests.exceptions import RequestException
7
 
8
 
9
+ DEFAULT_MODEL = "mistralai/mistral-7b-instruct:free"
10
  DEFAULT_CONTENT_TYPE = "cover_letter"
11
 
12
 
src/job_writing_agent/utils/document_processing.py CHANGED
@@ -13,54 +13,66 @@ from typing_extensions import Dict, List, Any
13
  import dspy
14
  from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
15
  from langchain_community.document_transformers import Html2TextTransformer
16
- from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
17
- from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 
18
  from langchain_core.documents import Document
19
  from langfuse import observe
20
  from pydantic import BaseModel, Field
21
 
22
  # Local imports - using relative imports
23
  from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
24
- from .llm_provider_factory import LLMFactory
25
 
26
  # Set up logging
27
  logger = logging.getLogger(__name__)
28
  logging.basicConfig(level=logging.INFO)
29
 
30
- llm_provider = LLMFactory()
31
-
32
- llm = llm_provider.create_langchain("qwen-3-32b",
33
- provider="cerebras",
34
- temperature=0.3,
35
- )
36
-
37
  # Default paths
38
  DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
39
 
40
 
41
  # Most Occurring Resume Section Headers
42
  RESUME_SECTIONS: list[str] = [
43
- "EDUCATION", "EXPERIENCE", "SKILLS", "WORK EXPERIENCE",
44
- "PROFESSIONAL EXPERIENCE", "PROJECTS", "CERTIFICATIONS",
45
- "SUMMARY", "OBJECTIVE", "CONTACT", "PUBLICATIONS",
46
- "AWARDS", "LANGUAGES", "INTERESTS", "REFERENCES"
 
 
 
 
 
 
 
 
 
 
 
47
  ]
48
 
49
 
50
  class ResumeSection(BaseModel):
51
  """Model for a structured resume section."""
52
- title: str = Field(description="The section title (e.g., 'Experience', 'Education')")
 
 
 
53
  content: str = Field(description="The full content of this section")
54
 
55
 
56
  class StructuredResume(BaseModel):
57
  """Model for a structured resume with sections."""
 
58
  sections: List[ResumeSection] = Field(description="List of resume sections")
59
- contact_info: Dict[str, str] = Field(description="Contact information extracted from the resume")
 
 
60
 
61
 
62
  class JobDescriptionComponents(BaseModel):
63
  """Model for job description components."""
 
64
  company_name: str = Field(description="The company name")
65
  job_description: str = Field(description="The job description")
66
  reasoning: str = Field(description="The reasoning for the extracted information")
@@ -72,8 +84,13 @@ class ExtractJobDescription(dspy.Signature):
72
  Role Introduction,Qualifications and Requirements, Prefrred Qualifications, Salary, Location.
73
  Do not alter the content of the job description.
74
  """
75
- job_description_html_content = dspy.InputField(desc="HTML content of the job posting.")
76
- job_description = dspy.OutputField(desc="Clean job description which is free of HTML tags and irrelevant information.")
 
 
 
 
 
77
  job_role = dspy.OutputField(desc="The job role in the posting.")
78
  company_name = dspy.OutputField(desc="Company Name of the Job listing.")
79
  location = dspy.OutputField(desc="The location for the provided job posting.")
@@ -90,19 +107,20 @@ def clean_resume_text(text: str) -> str:
90
  Cleaned text
91
  """
92
  # Remove excessive whitespace
93
- text = re.sub(r'\s+', ' ', text)
94
 
95
  # Fix common PDF extraction issues
96
- text = re.sub(r'([a-z])- ([a-z])', r'\1\2', text) # Fix hyphenated words
97
 
98
  # Remove header/footer page numbers
99
- text = re.sub(r'\n\s*\d+\s*\n', '\n', text)
100
 
101
  # Replace bullet variations with standard markdown bullets
102
- text = re.sub(r'[•●○◘◙♦♣♠★]', '* ', text)
103
 
104
  return text.strip()
105
 
 
106
  @observe()
107
  def extract_contact_info(text: str) -> Dict[str, str]:
108
  """Extract contact information from resume text.
@@ -116,28 +134,33 @@ def extract_contact_info(text: str) -> Dict[str, str]:
116
  contact_info = {}
117
 
118
  # Extract email
119
- email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
 
 
120
  if email_match:
121
- contact_info['email'] = email_match.group(0)
122
 
123
  # Extract phone (various formats)
124
- phone_match = re.search(r'(\+\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}', text)
 
 
125
  if phone_match:
126
- contact_info['phone'] = phone_match.group(0)
127
 
128
  # Extract LinkedIn URL
129
- linkedin_match = re.search(r'linkedin\.com/in/[a-zA-Z0-9_-]+/?', text)
130
  if linkedin_match:
131
- contact_info['linkedin'] = 'https://www.' + linkedin_match.group(0)
132
 
133
  # Try to extract name (this is approximate and might need LLM for better accuracy)
134
  # Typically name appears at the top of the resume
135
- first_line = text.strip().split('\n')[0].strip()
136
  if len(first_line) < 40 and not any(char.isdigit() for char in first_line):
137
- contact_info['name'] = first_line
138
 
139
  return contact_info
140
 
 
141
  @observe()
142
  def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
143
  """Identify sections in a resume text.
@@ -174,15 +197,21 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
174
 
175
  # Regex-based section identification
176
  # Create a pattern that matches common section headers
177
- section_pattern = r'(?:^|\n)(?:[^a-zA-Z\d\s]|\s)*(' + '|'.join(RESUME_SECTIONS) + r')(?:[^a-zA-Z\d\s]|\s)*(?:$|\n)'
 
 
 
 
178
  matches = list(re.finditer(section_pattern, text, re.IGNORECASE))
179
 
180
  if not matches:
181
  # If no sections found, treat the whole resume as one section
182
- sections.append({
183
- "title": "resume",
184
- "content": text,
185
- })
 
 
186
  return sections
187
 
188
  # Process each section
@@ -191,15 +220,12 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
191
  start_pos = match.start()
192
 
193
  # Find the end position (start of next section or end of text)
194
- end_pos = matches[i+1].start() if i < len(matches) - 1 else len(text)
195
 
196
  # Extract section content (excluding the header)
197
  section_content = text[start_pos:end_pos].strip()
198
 
199
- sections.append({
200
- "title": section_title.lower(),
201
- "content": section_content
202
- })
203
 
204
  return sections
205
 
@@ -211,11 +237,8 @@ def _collapse_ws(text: str) -> str:
211
 
212
 
213
  def _is_heading(line: str) -> bool:
214
- return (
215
- line.isupper()
216
- and len(line.split()) <= 5
217
- and not re.search(r"\d", line)
218
- )
219
 
220
  def parse_resume(file_path: str | Path) -> List[Document]:
221
  """
@@ -225,11 +248,13 @@ def parse_resume(file_path: str | Path) -> List[Document]:
225
  file_extension = Path(file_path).suffix.lower()
226
 
227
  # Handle different file types
228
- if file_extension == '.pdf':
229
- text = PyPDFLoader(str(file_path), extraction_mode="layout").load()[0].page_content
230
- elif file_extension == '.txt':
 
 
231
  try:
232
- with open(file_path, 'r', encoding='utf-8') as f:
233
  text = f.read()
234
  if not text.strip():
235
  raise ValueError("File is empty")
@@ -237,27 +262,26 @@ def parse_resume(file_path: str | Path) -> List[Document]:
237
  logger.error(f"Error reading text file: {str(e)}")
238
  raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
239
  else:
240
- raise ValueError(f"Unsupported resume file type: {file_path}. Supported types: .pdf, .txt")
 
 
241
 
242
  text = _collapse_ws(text)
243
 
244
  # Tag headings with "###" so Markdown splitter can see them
245
- tagged_lines = [
246
- f"### {ln}" if _is_heading(ln) else ln
247
- for ln in text.splitlines()]
248
 
249
  md_text = "\n".join(tagged_lines)
250
 
251
  if "###" in md_text:
252
- splitter = MarkdownHeaderTextSplitter(
253
- headers_to_split_on=[("###", "section")]
254
- )
255
  chunks = splitter.split_text(md_text) # already returns Documents
256
  else:
257
- splitter = RecursiveCharacterTextSplitter(
258
- chunk_size=400, chunk_overlap=50
259
- )
260
- chunks: list[Document] = [Document(page_content=chunk, metadata={}) for chunk in splitter.split_text(md_text)] # Attach metadata
 
261
  for doc in chunks:
262
  doc.metadata.setdefault("source", str(file_path))
263
  # section already present if header‑splitter was used
@@ -274,26 +298,32 @@ async def get_job_description(file_path_or_url: str) -> Document:
274
  Document containing the job description
275
  """
276
  # Check if the input is a URL
277
- if file_path_or_url.startswith(('http://', 'https://')):
278
  return await parse_job_description_from_url(file_path_or_url)
279
 
280
  # Handle local files based on extension
281
  file_extension = Path(file_path_or_url).suffix.lower()
282
 
283
  # Handle txt files
284
- if file_extension == '.txt':
285
  try:
286
- with open(file_path_or_url, 'r', encoding='utf-8') as f:
287
  content = f.read()
288
  if not content.strip():
289
  raise ValueError(f"File is empty: {file_path_or_url}")
290
- return Document(page_content=content, metadata={"source": file_path_or_url})
 
 
291
  except Exception as e:
292
  logger.error(f"Error reading text file: {str(e)}")
293
- raise ValueError(f"Could not read text file: {file_path_or_url}. Error: {str(e)}")
 
 
294
 
295
  # For other file types
296
- raise ValueError(f"Unsupported file type: {file_path_or_url}. Supported types: .pdf, .docx, .txt, .md")
 
 
297
 
298
 
299
  async def scrape_job_description_from_web(urls: List[str]):
@@ -304,7 +334,9 @@ async def scrape_job_description_from_web(urls: List[str]):
304
  scraped_data_documents = await loader.aload()
305
 
306
  html2text = Html2TextTransformer()
307
- markdown_scraped_data_documents = html2text.transform_documents(scraped_data_documents)
 
 
308
 
309
  # Grab the first 1000 tokens of the site
310
  splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
@@ -312,14 +344,14 @@ async def scrape_job_description_from_web(urls: List[str]):
312
  )
313
 
314
  extracted_content = splitter.split_documents(markdown_scraped_data_documents)
315
-
316
  return ".".join(doc.page_content for doc in extracted_content)
317
 
318
 
319
  async def parse_job_description_from_url(url: str) -> Document:
320
  """Extracts and structures a job description from a URL using an LLM.
321
 
322
- This function fetches content from a URL, uses a DSPy model to extract key details,
323
  and returns a structured LangChain Document. If the LLM processing fails, it falls
324
  back to returning the raw extracted text.
325
 
@@ -334,8 +366,8 @@ async def parse_job_description_from_url(url: str) -> Document:
334
  JobDescriptionParsingError: For any unexpected errors during the process.
335
  """
336
  logger.info("Starting job description extraction from URL: %s", url)
337
-
338
- # 1. Validate URL first (fail fast)
339
  parsed_url = urlparse(url)
340
  if not all([parsed_url.scheme, parsed_url.netloc]):
341
  logger.error("Invalid URL format: %s", url)
@@ -348,27 +380,33 @@ async def parse_job_description_from_url(url: str) -> Document:
348
  logger.info("Fetching content from URL...")
349
  raw_content = await scrape_job_description_from_web([url])
350
  if not raw_content or not raw_content.strip():
351
- raise URLExtractionError("Failed to extract any meaningful content from the URL.")
 
 
352
  logger.info("Successfully fetched raw content from URL.")
353
  except Exception as e:
354
  # Wrap any fetching error into our custom exception
355
- raise URLExtractionError(f"Failed to download or read content from {url}: {e}") from e
 
 
356
 
357
  # 3. Process content with the LLM
358
  try:
359
  logger.info("Processing content with DSPy LLM...")
360
  # Configure DSPy LM (it's good practice to do this here if it can change)
361
- dspy.configure(lm=dspy.LM(
362
- "cerebras/qwen-3-32b",
363
- api_key=os.environ.get("CEREBRAS_API_KEY"),
364
- temperature=0.1,
365
- max_tokens=60000 # Note: This max_tokens is unusually high
366
- ))
367
-
 
 
368
  job_extract_fn = dspy.Predict(ExtractJobDescription)
369
  result = job_extract_fn(job_description_html_content=raw_content)
370
  logger.info("Successfully processed job description with LLM.")
371
-
372
  # 4. Create the final Document with structured data
373
  job_doc = Document(
374
  page_content=result.job_description,
@@ -376,8 +414,8 @@ async def parse_job_description_from_url(url: str) -> Document:
376
  "company_name": result.company_name,
377
  "source": url,
378
  "job_role": result.job_role,
379
- "location": result.location
380
- }
381
  )
382
  return job_doc
383
 
@@ -392,11 +430,13 @@ async def parse_job_description_from_url(url: str) -> Document:
392
  if raw_content:
393
  return Document(
394
  page_content=raw_content,
395
- metadata={"company_name": "Unknown", "source": url, "error": str(e)}
396
  )
397
  # If raw_content is also None, then the failure was catastrophic.
398
- raise LLMProcessingError("LLM processing failed and no raw content was available for fallback.") from e
399
-
 
 
400
  except URLExtractionError as e:
401
  logger.error(f"Could not extract content from URL: {e}")
402
  raise URLExtractionError("Failed to extract content from the URL.") from e
@@ -404,4 +444,6 @@ async def parse_job_description_from_url(url: str) -> Document:
404
  # 6. Catch any other unexpected errors
405
  except Exception as e:
406
  logger.error(f"An unexpected error occurred: {e}", exc_info=True)
407
- raise JobDescriptionParsingError(f"An unexpected error occurred while parsing the job description: {e}") from e
 
 
 
13
  import dspy
14
  from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
15
  from langchain_community.document_transformers import Html2TextTransformer
16
+ from langchain_text_splitters import (
17
+ RecursiveCharacterTextSplitter,
18
+ MarkdownHeaderTextSplitter,
19
+ )
20
  from langchain_core.documents import Document
21
  from langfuse import observe
22
  from pydantic import BaseModel, Field
23
 
24
  # Local imports - using relative imports
25
  from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
 
26
 
27
  # Set up logging
28
  logger = logging.getLogger(__name__)
29
  logging.basicConfig(level=logging.INFO)
30
 
 
 
 
 
 
 
 
31
  # Default paths
32
  DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
33
 
34
 
35
  # Most Occurring Resume Section Headers
36
  RESUME_SECTIONS: list[str] = [
37
+ "EDUCATION",
38
+ "EXPERIENCE",
39
+ "SKILLS",
40
+ "WORK EXPERIENCE",
41
+ "PROFESSIONAL EXPERIENCE",
42
+ "PROJECTS",
43
+ "CERTIFICATIONS",
44
+ "SUMMARY",
45
+ "OBJECTIVE",
46
+ "CONTACT",
47
+ "PUBLICATIONS",
48
+ "AWARDS",
49
+ "LANGUAGES",
50
+ "INTERESTS",
51
+ "REFERENCES",
52
  ]
53
 
54
 
55
  class ResumeSection(BaseModel):
56
  """Model for a structured resume section."""
57
+
58
+ title: str = Field(
59
+ description="The section title (e.g., 'Experience', 'Education')"
60
+ )
61
  content: str = Field(description="The full content of this section")
62
 
63
 
64
  class StructuredResume(BaseModel):
65
  """Model for a structured resume with sections."""
66
+
67
  sections: List[ResumeSection] = Field(description="List of resume sections")
68
+ contact_info: Dict[str, str] = Field(
69
+ description="Contact information extracted from the resume"
70
+ )
71
 
72
 
73
  class JobDescriptionComponents(BaseModel):
74
  """Model for job description components."""
75
+
76
  company_name: str = Field(description="The company name")
77
  job_description: str = Field(description="The job description")
78
  reasoning: str = Field(description="The reasoning for the extracted information")
 
84
  Role Introduction,Qualifications and Requirements, Prefrred Qualifications, Salary, Location.
85
  Do not alter the content of the job description.
86
  """
87
+
88
+ job_description_html_content = dspy.InputField(
89
+ desc="HTML content of the job posting."
90
+ )
91
+ job_description = dspy.OutputField(
92
+ desc="Clean job description which is free of HTML tags and irrelevant information."
93
+ )
94
  job_role = dspy.OutputField(desc="The job role in the posting.")
95
  company_name = dspy.OutputField(desc="Company Name of the Job listing.")
96
  location = dspy.OutputField(desc="The location for the provided job posting.")
 
107
  Cleaned text
108
  """
109
  # Remove excessive whitespace
110
+ text = re.sub(r"\s+", " ", text)
111
 
112
  # Fix common PDF extraction issues
113
+ text = re.sub(r"([a-z])- ([a-z])", r"\1\2", text) # Fix hyphenated words
114
 
115
  # Remove header/footer page numbers
116
+ text = re.sub(r"\n\s*\d+\s*\n", "\n", text)
117
 
118
  # Replace bullet variations with standard markdown bullets
119
+ text = re.sub(r"[•●○◘◙♦♣♠★]", "* ", text)
120
 
121
  return text.strip()
122
 
123
+
124
  @observe()
125
  def extract_contact_info(text: str) -> Dict[str, str]:
126
  """Extract contact information from resume text.
 
134
  contact_info = {}
135
 
136
  # Extract email
137
+ email_match = re.search(
138
+ r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", text
139
+ )
140
  if email_match:
141
+ contact_info["email"] = email_match.group(0)
142
 
143
  # Extract phone (various formats)
144
+ phone_match = re.search(
145
+ r"(\+\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}", text
146
+ )
147
  if phone_match:
148
+ contact_info["phone"] = phone_match.group(0)
149
 
150
  # Extract LinkedIn URL
151
+ linkedin_match = re.search(r"linkedin\.com/in/[a-zA-Z0-9_-]+/?", text)
152
  if linkedin_match:
153
+ contact_info["linkedin"] = "https://www." + linkedin_match.group(0)
154
 
155
  # Try to extract name (this is approximate and might need LLM for better accuracy)
156
  # Typically name appears at the top of the resume
157
+ first_line = text.strip().split("\n")[0].strip()
158
  if len(first_line) < 40 and not any(char.isdigit() for char in first_line):
159
+ contact_info["name"] = first_line
160
 
161
  return contact_info
162
 
163
+
164
  @observe()
165
  def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
166
  """Identify sections in a resume text.
 
197
 
198
  # Regex-based section identification
199
  # Create a pattern that matches common section headers
200
+ section_pattern = (
201
+ r"(?:^|\n)(?:[^a-zA-Z\d\s]|\s)*("
202
+ + "|".join(RESUME_SECTIONS)
203
+ + r")(?:[^a-zA-Z\d\s]|\s)*(?:$|\n)"
204
+ )
205
  matches = list(re.finditer(section_pattern, text, re.IGNORECASE))
206
 
207
  if not matches:
208
  # If no sections found, treat the whole resume as one section
209
+ sections.append(
210
+ {
211
+ "title": "resume",
212
+ "content": text,
213
+ }
214
+ )
215
  return sections
216
 
217
  # Process each section
 
220
  start_pos = match.start()
221
 
222
  # Find the end position (start of next section or end of text)
223
+ end_pos = matches[i + 1].start() if i < len(matches) - 1 else len(text)
224
 
225
  # Extract section content (excluding the header)
226
  section_content = text[start_pos:end_pos].strip()
227
 
228
+ sections.append({"title": section_title.lower(), "content": section_content})
 
 
 
229
 
230
  return sections
231
 
 
237
 
238
 
239
  def _is_heading(line: str) -> bool:
240
+ return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
241
+
 
 
 
242
 
243
  def parse_resume(file_path: str | Path) -> List[Document]:
244
  """
 
248
  file_extension = Path(file_path).suffix.lower()
249
 
250
  # Handle different file types
251
+ if file_extension == ".pdf":
252
+ text = (
253
+ PyPDFLoader(str(file_path), extraction_mode="layout").load()[0].page_content
254
+ )
255
+ elif file_extension == ".txt":
256
  try:
257
+ with open(file_path, "r", encoding="utf-8") as f:
258
  text = f.read()
259
  if not text.strip():
260
  raise ValueError("File is empty")
 
262
  logger.error(f"Error reading text file: {str(e)}")
263
  raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
264
  else:
265
+ raise ValueError(
266
+ f"Unsupported resume file type: {file_path}. Supported types: .pdf, .txt"
267
+ )
268
 
269
  text = _collapse_ws(text)
270
 
271
  # Tag headings with "###" so Markdown splitter can see them
272
+ tagged_lines = [f"### {ln}" if _is_heading(ln) else ln for ln in text.splitlines()]
 
 
273
 
274
  md_text = "\n".join(tagged_lines)
275
 
276
  if "###" in md_text:
277
+ splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[("###", "section")])
 
 
278
  chunks = splitter.split_text(md_text) # already returns Documents
279
  else:
280
+ splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
281
+ chunks: list[Document] = [
282
+ Document(page_content=chunk, metadata={})
283
+ for chunk in splitter.split_text(md_text)
284
+ ] # Attach metadata
285
  for doc in chunks:
286
  doc.metadata.setdefault("source", str(file_path))
287
  # section already present if header‑splitter was used
 
298
  Document containing the job description
299
  """
300
  # Check if the input is a URL
301
+ if file_path_or_url.startswith(("http://", "https://")):
302
  return await parse_job_description_from_url(file_path_or_url)
303
 
304
  # Handle local files based on extension
305
  file_extension = Path(file_path_or_url).suffix.lower()
306
 
307
  # Handle txt files
308
+ if file_extension == ".txt":
309
  try:
310
+ with open(file_path_or_url, "r", encoding="utf-8") as f:
311
  content = f.read()
312
  if not content.strip():
313
  raise ValueError(f"File is empty: {file_path_or_url}")
314
+ return Document(
315
+ page_content=content, metadata={"source": file_path_or_url}
316
+ )
317
  except Exception as e:
318
  logger.error(f"Error reading text file: {str(e)}")
319
+ raise ValueError(
320
+ f"Could not read text file: {file_path_or_url}. Error: {str(e)}"
321
+ )
322
 
323
  # For other file types
324
+ raise ValueError(
325
+ f"Unsupported file type: {file_path_or_url}. Supported types: .pdf, .docx, .txt, .md"
326
+ )
327
 
328
 
329
  async def scrape_job_description_from_web(urls: List[str]):
 
334
  scraped_data_documents = await loader.aload()
335
 
336
  html2text = Html2TextTransformer()
337
+ markdown_scraped_data_documents = html2text.transform_documents(
338
+ scraped_data_documents
339
+ )
340
 
341
  # Grab the first 1000 tokens of the site
342
  splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
 
344
  )
345
 
346
  extracted_content = splitter.split_documents(markdown_scraped_data_documents)
347
+
348
  return ".".join(doc.page_content for doc in extracted_content)
349
 
350
 
351
  async def parse_job_description_from_url(url: str) -> Document:
352
  """Extracts and structures a job description from a URL using an LLM.
353
 
354
+ This function fetches content from a URL, uses a DSPy to extract key details,
355
  and returns a structured LangChain Document. If the LLM processing fails, it falls
356
  back to returning the raw extracted text.
357
 
 
366
  JobDescriptionParsingError: For any unexpected errors during the process.
367
  """
368
  logger.info("Starting job description extraction from URL: %s", url)
369
+
370
+ # 1. Validate URL
371
  parsed_url = urlparse(url)
372
  if not all([parsed_url.scheme, parsed_url.netloc]):
373
  logger.error("Invalid URL format: %s", url)
 
380
  logger.info("Fetching content from URL...")
381
  raw_content = await scrape_job_description_from_web([url])
382
  if not raw_content or not raw_content.strip():
383
+ raise URLExtractionError(
384
+ "Failed to extract any meaningful content from the URL."
385
+ )
386
  logger.info("Successfully fetched raw content from URL.")
387
  except Exception as e:
388
  # Wrap any fetching error into our custom exception
389
+ raise URLExtractionError(
390
+ f"Failed to download or read content from {url}: {e}"
391
+ ) from e
392
 
393
  # 3. Process content with the LLM
394
  try:
395
  logger.info("Processing content with DSPy LLM...")
396
  # Configure DSPy LM (it's good practice to do this here if it can change)
397
+ dspy.configure(
398
+ lm=dspy.LM(
399
+ "cerebras/qwen-3-32b",
400
+ api_key=os.environ.get("CEREBRAS_API_KEY"),
401
+ temperature=0.1,
402
+ max_tokens=60000, # Note: This max_tokens is unusually high
403
+ )
404
+ )
405
+
406
  job_extract_fn = dspy.Predict(ExtractJobDescription)
407
  result = job_extract_fn(job_description_html_content=raw_content)
408
  logger.info("Successfully processed job description with LLM.")
409
+
410
  # 4. Create the final Document with structured data
411
  job_doc = Document(
412
  page_content=result.job_description,
 
414
  "company_name": result.company_name,
415
  "source": url,
416
  "job_role": result.job_role,
417
+ "location": result.location,
418
+ },
419
  )
420
  return job_doc
421
 
 
430
  if raw_content:
431
  return Document(
432
  page_content=raw_content,
433
+ metadata={"company_name": "Unknown", "source": url, "error": str(e)},
434
  )
435
  # If raw_content is also None, then the failure was catastrophic.
436
+ raise LLMProcessingError(
437
+ "LLM processing failed and no raw content was available for fallback."
438
+ ) from e
439
+
440
  except URLExtractionError as e:
441
  logger.error(f"Could not extract content from URL: {e}")
442
  raise URLExtractionError("Failed to extract content from the URL.") from e
 
444
  # 6. Catch any other unexpected errors
445
  except Exception as e:
446
  logger.error(f"An unexpected error occurred: {e}", exc_info=True)
447
+ raise JobDescriptionParsingError(
448
+ f"An unexpected error occurred while parsing the job description: {e}"
449
+ ) from e
src/job_writing_agent/utils/llm_client.py CHANGED
@@ -14,85 +14,88 @@ import dspy
14
  logger = logging.getLogger(__name__)
15
 
16
  __all__ = [
17
- "OllamaChatProvider",
18
- "CerebrasChatProvider",
19
- "OpenRouterChatProvider",
20
  ]
21
 
 
22
  class LLMProvider(ABC):
23
  """Base class for LLM provider strategies."""
24
-
25
  @abstractmethod
26
  def get_default_config(self) -> Dict[str, Any]:
27
  pass
28
-
29
  @abstractmethod
30
  def get_langchain_params(self) -> set[str]:
31
  pass
32
-
33
  @abstractmethod
34
  def get_dspy_params(self) -> set[str]:
35
  pass
36
-
37
  @abstractmethod
38
  def format_model_name_for_provider(self, model: str) -> str:
39
  """Convert model name to DSPy format.
40
-
41
  Different providers require different prefixes in DSPy.
42
-
43
  Args:
44
  model: Model name as used in LangChain
45
-
46
  Returns:
47
  Model name formatted for DSPy
48
  """
49
  pass
50
-
51
  @abstractmethod
52
  def validate_config(self, **config) -> Dict[str, Any]:
53
  pass
54
-
55
  def create_llm_instance(
56
- self,
57
- model: str,
58
- framework: Literal['langchain', 'dspy'] = 'langchain',
59
- **config
60
  ) -> BaseChatModel | dspy.LM:
61
  """Create LLM instance for specified framework."""
62
  defaults = self.get_default_config()
63
-
64
  # Get framework-specific supported params
65
- if framework == 'langchain':
66
  supported = self.get_langchain_params()
67
  else:
68
  supported = self.get_dspy_params()
69
-
70
  # Filter unsupported params
71
  filtered_config = {k: v for k, v in config.items() if k in supported}
72
-
73
  # Warn about ignored params
74
  ignored = set(config.keys()) - supported
75
  if ignored:
76
- logger.warning(f"Ignoring unsupported parameters for {framework}: {ignored}")
77
-
 
 
78
  # Merge configs
79
  merged_config = {**defaults, **filtered_config}
80
-
81
  # Validate
82
  validated_config = self.validate_config(**merged_config)
83
-
84
  # Create instance based on framework
85
- if framework == 'langchain':
86
  return self._create_langchain_instance(model, **validated_config)
87
- elif framework == 'dspy':
88
  return self._create_dspy_instance(model, **validated_config)
89
  else:
90
  raise ValueError(f"Unsupported framework: {framework}")
91
-
92
  @abstractmethod
93
  def _create_langchain_instance(self, model: str, **config) -> BaseChatModel:
94
  pass
95
-
96
  @abstractmethod
97
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
98
  pass
@@ -100,224 +103,237 @@ class LLMProvider(ABC):
100
 
101
  class OpenRouterChatProvider(LLMProvider):
102
  """Provider for OpenRouter.
103
-
104
  Model format:
105
  - LangChain: "openai/gpt-4", "anthropic/claude-3-opus"
106
  - DSPy: Same - "openai/gpt-4", "anthropic/claude-3-opus"
107
-
108
  Docs: https://openrouter.ai/docs
109
  """
110
-
111
  OPENROUTER_API_URL = "https://openrouter.ai/api/v1"
112
-
113
  def get_default_config(self) -> Dict[str, Any]:
114
- return {'temperature': 0.2}
115
-
116
  def get_langchain_params(self) -> set[str]:
117
  return {
118
- 'temperature', 'max_tokens', 'top_p',
119
- 'frequency_penalty', 'presence_penalty',
120
- 'stop', 'n', 'stream'
 
 
 
 
 
121
  }
122
-
123
  def get_dspy_params(self) -> set[str]:
124
- return {'temperature', 'max_tokens', 'top_p', 'stop', 'n'}
125
-
126
  def format_model_name_for_provider(self, model: str) -> str:
127
  """OpenRouter models are used as-is in DSPy.
128
-
129
  Examples:
130
  "openai/gpt-4" -> "openai/gpt-4"
131
  "anthropic/claude-3-opus" -> "anthropic/claude-3-opus"
132
  """
133
  return f"{model}" # ✅ Use as-is - already has provider/model format
134
-
135
  def validate_config(self, **config) -> Dict[str, Any]:
136
- if 'temperature' in config:
137
- temp = config['temperature']
138
  if not 0 <= temp <= 2:
139
  logger.warning(f"Temperature must be 0-2, got {temp}")
140
-
141
- if 'api_key' not in config:
142
- api_key = os.getenv('OPENROUTER_API_KEY')
143
  if not api_key:
144
  raise ValueError("OPENROUTER_API_KEY not set")
145
- config['api_key'] = api_key
146
-
147
  return config
148
-
149
  def _create_langchain_instance(self, model: str, **config) -> ChatOpenAI:
150
  """Create LangChain instance.
151
-
152
  Example model: "openai/gpt-4"
153
  """
154
- api_key = config.pop('api_key')
155
-
156
  return ChatOpenAI(
157
- model=self.format_model_name_for_provider(model), # ✅ Use model as-is: "openai/gpt-4"
 
 
158
  api_key=SecretStr(api_key),
159
  base_url=self.OPENROUTER_API_URL,
160
- **config
161
  )
162
-
163
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
164
  """Create DSPy instance.
165
-
166
  Example model: "openai/gpt-4"
167
  """
168
- api_key = config.pop('api_key')
169
-
170
  return dspy.LM(
171
- model=self.format_model_name_for_provider(model), # ✅ Use as-is: "openai/gpt-4"
172
  api_key=api_key,
173
  api_base=self.OPENROUTER_API_URL,
174
- **config
175
  )
176
 
177
 
178
  class CerebrasChatProvider(LLMProvider):
179
  """Provider for Cerebras.
180
-
181
  Model format:
182
  - LangChain: "llama3.1-8b", "llama3.1-70b" (direct names)
183
  - DSPy: "openai/llama3.1-8b" (needs openai/ prefix for compatibility)
184
-
185
  Docs: https://inference-docs.cerebras.ai/
186
  """
187
-
188
  CEREBRAS_API_URL = "https://api.cerebras.ai/v1"
189
-
190
  def get_default_config(self) -> Dict[str, Any]:
191
- return {'temperature': 0.2, 'max_tokens': 1024}
192
-
193
  def get_langchain_params(self) -> set[str]:
194
- return {
195
- 'temperature', 'max_tokens', 'top_p',
196
- 'stop', 'stream', 'seed'
197
- }
198
-
199
  def get_dspy_params(self) -> set[str]:
200
- return {'temperature', 'max_tokens', 'top_p', 'stop'}
201
-
202
  def format_model_name_for_provider(self, model: str) -> str:
203
  """Cerebras models need 'cerebras/' prefix.
204
-
205
  Examples:
206
  "llama3.1-8b" -> "cerebras/llama3.1-8b"
207
  "llama3.1-70b" -> "cerebras/llama3.1-70b"
208
  """
209
  return f"cerebras/{model}" # ✅ Add openai/ prefix for OpenAI-compatible API
210
-
211
  def validate_config(self, **config) -> Dict[str, Any]:
212
- if 'temperature' in config:
213
- temp = config['temperature']
214
  if not 0 <= temp <= 1.5:
215
  raise ValueError(f"Temperature must be 0-1.5, got {temp}")
216
-
217
- if 'api_key' not in config:
218
- api_key = os.getenv('CEREBRAS_API_KEY')
219
  if not api_key:
220
  raise ValueError("CEREBRAS_API_KEY not set")
221
- config['api_key'] = api_key
222
-
223
  return config
224
-
225
  def _create_langchain_instance(self, model: str, **config) -> ChatCerebras:
226
  """Create LangChain instance.
227
-
228
  Example model: "llama3.1-8b"
229
  """
230
 
231
  return ChatCerebras(
232
  model=model, # Direct name: "llama3.1-8b"
233
- **config
234
  )
235
 
236
-
237
  @DeprecationWarning
238
- def _create_langchain_instance_openaiclient(self, model: str, **config) -> ChatOpenAI:
 
 
239
  """
240
  Create LangChain instance
241
  Example model: "llama3.1-8b"
242
  """
243
-
244
- api_key = config.pop('api_key')
245
-
246
  return ChatOpenAI(
247
- model=self.format_model_name_for_provider(model), # Direct name: "llama3.1-8b"
 
 
248
  api_key=SecretStr(api_key),
249
  base_url=self.CEREBRAS_API_URL,
250
- **config
251
  )
252
-
253
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
254
  """Create DSPy instance.
255
-
256
  Example model input: "llama3.1-8b"
257
  DSPy format: "openai/llama3.1-8b"
258
  """
259
- api_key = config.pop('api_key')
260
-
261
  return dspy.LM(
262
- model=self.format_model_name_for_provider(model), # With prefix: "openai/llama3.1-8b"
 
 
263
  api_key=api_key,
264
  api_base=self.CEREBRAS_API_URL,
265
- **config
266
  )
267
 
268
 
269
  class OllamaChatProvider(LLMProvider):
270
  """Provider for Ollama.
271
-
272
  Model format:
273
  - LangChain: "llama3.2", "llama3.2:latest" (direct names with optional tags)
274
  - DSPy: "ollama_chat/llama3.2" (needs ollama_chat/ prefix)
275
-
276
  Docs: https://ollama.com/
277
  """
278
-
279
  def get_default_config(self) -> Dict[str, Any]:
280
- return {'temperature': 0.2, 'top_k': 40, 'top_p': 0.9}
281
-
282
  def get_langchain_params(self) -> set[str]:
283
  return {
284
- 'temperature', 'top_k', 'top_p', 'repeat_penalty',
285
- 'num_ctx', 'num_predict', 'format', 'seed'
 
 
 
 
 
 
286
  }
287
-
288
  def get_dspy_params(self) -> set[str]:
289
- return {'temperature', 'top_p', 'num_ctx', 'seed'}
290
-
291
  def format_model_name_for_provider(self, model: str) -> str:
292
  """Ollama models need 'ollama_chat/' prefix for DSPy.
293
-
294
  Examples:
295
  "llama3.2" -> "ollama_chat/llama3.2"
296
  "llama3.2:latest" -> "ollama_chat/llama3.2:latest"
297
  """
298
  return f"ollama_chat/{model}" # ✅ Add ollama_chat/ prefix
299
-
300
  def validate_config(self, **config) -> Dict[str, Any]:
301
- if 'temperature' in config:
302
- temp = config['temperature']
303
  if not 0 <= temp <= 2:
304
  raise ValueError(f"Temperature must be 0-2, got {temp}")
305
-
306
- if 'top_k' in config:
307
- if not isinstance(config['top_k'], int) or config['top_k'] < 1:
308
  raise ValueError("top_k must be positive integer")
309
-
310
  return config
311
-
312
  def _create_langchain_instance(self, model: str, **config) -> ChatOllama:
313
-
314
- return ChatOllama(
315
- model=self.format_model_name_for_provider(model),
316
- **config)
317
-
318
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
319
-
320
  return dspy.LM(
321
- model=self.format_model_name_for_provider(model), # ✅ With prefix: "ollama_chat/llama3.2"
322
- **config
323
- )
 
 
 
14
  logger = logging.getLogger(__name__)
15
 
16
  __all__ = [
17
+ "OllamaChatProvider",
18
+ "CerebrasChatProvider",
19
+ "OpenRouterChatProvider",
20
  ]
21
 
22
+
23
  class LLMProvider(ABC):
24
  """Base class for LLM provider strategies."""
25
+
26
  @abstractmethod
27
  def get_default_config(self) -> Dict[str, Any]:
28
  pass
29
+
30
  @abstractmethod
31
  def get_langchain_params(self) -> set[str]:
32
  pass
33
+
34
  @abstractmethod
35
  def get_dspy_params(self) -> set[str]:
36
  pass
37
+
38
  @abstractmethod
39
  def format_model_name_for_provider(self, model: str) -> str:
40
  """Convert model name to DSPy format.
41
+
42
  Different providers require different prefixes in DSPy.
43
+
44
  Args:
45
  model: Model name as used in LangChain
46
+
47
  Returns:
48
  Model name formatted for DSPy
49
  """
50
  pass
51
+
52
  @abstractmethod
53
  def validate_config(self, **config) -> Dict[str, Any]:
54
  pass
55
+
56
  def create_llm_instance(
57
+ self,
58
+ model: str,
59
+ framework: Literal["langchain", "dspy"] = "langchain",
60
+ **config,
61
  ) -> BaseChatModel | dspy.LM:
62
  """Create LLM instance for specified framework."""
63
  defaults = self.get_default_config()
64
+
65
  # Get framework-specific supported params
66
+ if framework == "langchain":
67
  supported = self.get_langchain_params()
68
  else:
69
  supported = self.get_dspy_params()
70
+
71
  # Filter unsupported params
72
  filtered_config = {k: v for k, v in config.items() if k in supported}
73
+
74
  # Warn about ignored params
75
  ignored = set(config.keys()) - supported
76
  if ignored:
77
+ logger.warning(
78
+ f"Ignoring unsupported parameters for {framework}: {ignored}"
79
+ )
80
+
81
  # Merge configs
82
  merged_config = {**defaults, **filtered_config}
83
+
84
  # Validate
85
  validated_config = self.validate_config(**merged_config)
86
+
87
  # Create instance based on framework
88
+ if framework == "langchain":
89
  return self._create_langchain_instance(model, **validated_config)
90
+ elif framework == "dspy":
91
  return self._create_dspy_instance(model, **validated_config)
92
  else:
93
  raise ValueError(f"Unsupported framework: {framework}")
94
+
95
  @abstractmethod
96
  def _create_langchain_instance(self, model: str, **config) -> BaseChatModel:
97
  pass
98
+
99
  @abstractmethod
100
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
101
  pass
 
103
 
104
  class OpenRouterChatProvider(LLMProvider):
105
  """Provider for OpenRouter.
106
+
107
  Model format:
108
  - LangChain: "openai/gpt-4", "anthropic/claude-3-opus"
109
  - DSPy: Same - "openai/gpt-4", "anthropic/claude-3-opus"
110
+
111
  Docs: https://openrouter.ai/docs
112
  """
113
+
114
  OPENROUTER_API_URL = "https://openrouter.ai/api/v1"
115
+
116
  def get_default_config(self) -> Dict[str, Any]:
117
+ return {"temperature": 0.2}
118
+
119
  def get_langchain_params(self) -> set[str]:
120
  return {
121
+ "temperature",
122
+ "max_tokens",
123
+ "top_p",
124
+ "frequency_penalty",
125
+ "presence_penalty",
126
+ "stop",
127
+ "n",
128
+ "stream",
129
  }
130
+
131
  def get_dspy_params(self) -> set[str]:
132
+ return {"temperature", "max_tokens", "top_p", "stop", "n"}
133
+
134
  def format_model_name_for_provider(self, model: str) -> str:
135
  """OpenRouter models are used as-is in DSPy.
136
+
137
  Examples:
138
  "openai/gpt-4" -> "openai/gpt-4"
139
  "anthropic/claude-3-opus" -> "anthropic/claude-3-opus"
140
  """
141
  return f"{model}" # ✅ Use as-is - already has provider/model format
142
+
143
  def validate_config(self, **config) -> Dict[str, Any]:
144
+ if "temperature" in config:
145
+ temp = config["temperature"]
146
  if not 0 <= temp <= 2:
147
  logger.warning(f"Temperature must be 0-2, got {temp}")
148
+
149
+ if "api_key" not in config:
150
+ api_key = os.getenv("OPENROUTER_API_KEY")
151
  if not api_key:
152
  raise ValueError("OPENROUTER_API_KEY not set")
153
+ config["api_key"] = api_key
154
+
155
  return config
156
+
157
  def _create_langchain_instance(self, model: str, **config) -> ChatOpenAI:
158
  """Create LangChain instance.
159
+
160
  Example model: "openai/gpt-4"
161
  """
162
+ api_key = config.pop("api_key")
163
+
164
  return ChatOpenAI(
165
+ model=self.format_model_name_for_provider(
166
+ model
167
+ ), # ✅ Use model as-is: "openai/gpt-4"
168
  api_key=SecretStr(api_key),
169
  base_url=self.OPENROUTER_API_URL,
170
+ **config,
171
  )
172
+
173
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
174
  """Create DSPy instance.
175
+
176
  Example model: "openai/gpt-4"
177
  """
178
+ api_key = config.pop("api_key")
179
+
180
  return dspy.LM(
181
+ model=f"openrouter/{self.format_model_name_for_provider(model)}", # ✅ Use as-is: "openai/gpt-4"
182
  api_key=api_key,
183
  api_base=self.OPENROUTER_API_URL,
184
+ **config,
185
  )
186
 
187
 
188
  class CerebrasChatProvider(LLMProvider):
189
  """Provider for Cerebras.
190
+
191
  Model format:
192
  - LangChain: "llama3.1-8b", "llama3.1-70b" (direct names)
193
  - DSPy: "openai/llama3.1-8b" (needs openai/ prefix for compatibility)
194
+
195
  Docs: https://inference-docs.cerebras.ai/
196
  """
197
+
198
  CEREBRAS_API_URL = "https://api.cerebras.ai/v1"
199
+
200
  def get_default_config(self) -> Dict[str, Any]:
201
+ return {"temperature": 0.2, "max_tokens": 1024}
202
+
203
  def get_langchain_params(self) -> set[str]:
204
+ return {"temperature", "max_tokens", "top_p", "stop", "stream", "seed"}
205
+
 
 
 
206
  def get_dspy_params(self) -> set[str]:
207
+ return {"temperature", "max_tokens", "top_p", "stop"}
208
+
209
  def format_model_name_for_provider(self, model: str) -> str:
210
  """Cerebras models need 'cerebras/' prefix.
211
+
212
  Examples:
213
  "llama3.1-8b" -> "cerebras/llama3.1-8b"
214
  "llama3.1-70b" -> "cerebras/llama3.1-70b"
215
  """
216
  return f"cerebras/{model}" # ✅ Add openai/ prefix for OpenAI-compatible API
217
+
218
  def validate_config(self, **config) -> Dict[str, Any]:
219
+ if "temperature" in config:
220
+ temp = config["temperature"]
221
  if not 0 <= temp <= 1.5:
222
  raise ValueError(f"Temperature must be 0-1.5, got {temp}")
223
+
224
+ if "api_key" not in config:
225
+ api_key = os.getenv("CEREBRAS_API_KEY")
226
  if not api_key:
227
  raise ValueError("CEREBRAS_API_KEY not set")
228
+ config["api_key"] = api_key
229
+
230
  return config
231
+
232
  def _create_langchain_instance(self, model: str, **config) -> ChatCerebras:
233
  """Create LangChain instance.
234
+
235
  Example model: "llama3.1-8b"
236
  """
237
 
238
  return ChatCerebras(
239
  model=model, # Direct name: "llama3.1-8b"
240
+ **config,
241
  )
242
 
 
243
  @DeprecationWarning
244
+ def _create_langchain_instance_openaiclient(
245
+ self, model: str, **config
246
+ ) -> ChatOpenAI:
247
  """
248
  Create LangChain instance
249
  Example model: "llama3.1-8b"
250
  """
251
+
252
+ api_key = config.pop("api_key")
253
+
254
  return ChatOpenAI(
255
+ model=self.format_model_name_for_provider(
256
+ model
257
+ ), # Direct name: "llama3.1-8b"
258
  api_key=SecretStr(api_key),
259
  base_url=self.CEREBRAS_API_URL,
260
+ **config,
261
  )
262
+
263
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
264
  """Create DSPy instance.
265
+
266
  Example model input: "llama3.1-8b"
267
  DSPy format: "openai/llama3.1-8b"
268
  """
269
+ api_key = config.pop("api_key")
270
+
271
  return dspy.LM(
272
+ model=self.format_model_name_for_provider(
273
+ model
274
+ ), # With prefix: "openai/llama3.1-8b"
275
  api_key=api_key,
276
  api_base=self.CEREBRAS_API_URL,
277
+ **config,
278
  )
279
 
280
 
281
  class OllamaChatProvider(LLMProvider):
282
  """Provider for Ollama.
283
+
284
  Model format:
285
  - LangChain: "llama3.2", "llama3.2:latest" (direct names with optional tags)
286
  - DSPy: "ollama_chat/llama3.2" (needs ollama_chat/ prefix)
287
+
288
  Docs: https://ollama.com/
289
  """
290
+
291
  def get_default_config(self) -> Dict[str, Any]:
292
+ return {"temperature": 0.2, "top_k": 40, "top_p": 0.9}
293
+
294
  def get_langchain_params(self) -> set[str]:
295
  return {
296
+ "temperature",
297
+ "top_k",
298
+ "top_p",
299
+ "repeat_penalty",
300
+ "num_ctx",
301
+ "num_predict",
302
+ "format",
303
+ "seed",
304
  }
305
+
306
  def get_dspy_params(self) -> set[str]:
307
+ return {"temperature", "top_p", "num_ctx", "seed"}
308
+
309
  def format_model_name_for_provider(self, model: str) -> str:
310
  """Ollama models need 'ollama_chat/' prefix for DSPy.
311
+
312
  Examples:
313
  "llama3.2" -> "ollama_chat/llama3.2"
314
  "llama3.2:latest" -> "ollama_chat/llama3.2:latest"
315
  """
316
  return f"ollama_chat/{model}" # ✅ Add ollama_chat/ prefix
317
+
318
  def validate_config(self, **config) -> Dict[str, Any]:
319
+ if "temperature" in config:
320
+ temp = config["temperature"]
321
  if not 0 <= temp <= 2:
322
  raise ValueError(f"Temperature must be 0-2, got {temp}")
323
+
324
+ if "top_k" in config:
325
+ if not isinstance(config["top_k"], int) or config["top_k"] < 1:
326
  raise ValueError("top_k must be positive integer")
327
+
328
  return config
329
+
330
  def _create_langchain_instance(self, model: str, **config) -> ChatOllama:
331
+ return ChatOllama(model=self.format_model_name_for_provider(model), **config)
332
+
 
 
 
333
  def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
 
334
  return dspy.LM(
335
+ model=self.format_model_name_for_provider(
336
+ model
337
+ ), # ✅ With prefix: "ollama_chat/llama3.2"
338
+ **config,
339
+ )
src/job_writing_agent/utils/llm_provider_factory.py CHANGED
@@ -10,6 +10,7 @@ from .llm_client import (
10
  OllamaChatProvider,
11
  OpenRouterChatProvider,
12
  )
 
13
 
14
  logger = logging.getLogger(__name__)
15
 
@@ -32,6 +33,7 @@ class LLMFactory:
32
  >>> dspy.configure(lm=lm)
33
  """
34
 
 
35
  def __init__(self, default_provider: str = "openrouter"):
36
  """Initialize factory with available providers.
37
 
@@ -50,6 +52,7 @@ class LLMFactory:
50
  f"default: {default_provider}"
51
  )
52
 
 
53
  def create(
54
  self,
55
  model: str,
 
10
  OllamaChatProvider,
11
  OpenRouterChatProvider,
12
  )
13
+ from .logging.logging_decorators import log_execution
14
 
15
  logger = logging.getLogger(__name__)
16
 
 
33
  >>> dspy.configure(lm=lm)
34
  """
35
 
36
+ @log_execution
37
  def __init__(self, default_provider: str = "openrouter"):
38
  """Initialize factory with available providers.
39
 
 
52
  f"default: {default_provider}"
53
  )
54
 
55
+ @log_execution
56
  def create(
57
  self,
58
  model: str,
src/job_writing_agent/utils/logging/logging_config.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Logging configuration for the application
3
+
4
+ This module provides a centralized logging manager that configures
5
+ logging once at application startup, ensuring consistent log format
6
+ and behavior across all modules.
7
+ """
8
+
9
+ import logging
10
+ import sys
11
+ from pathlib import Path
12
+ from typing_extensions import Optional
13
+
14
+
15
+ class LoggingManager:
16
+ """
17
+ Centralized logging configuration manager.
18
+
19
+ Uses Singleton pattern to ensure logging is configured only once.
20
+
21
+ Example:
22
+ >>> manager = LoggingManager()
23
+ >>> manager.configure_logging(log_level=logging.INFO)
24
+ >>> logger = logging.getLogger(__name__)
25
+ >>> logger.info("This will be logged consistently")
26
+ """
27
+
28
+ _instance: Optional["LoggingManager"] = None
29
+ _configured: bool = False
30
+
31
+ def __new__(cls):
32
+ if cls._instance is None:
33
+ cls._instance = super().__new__(cls)
34
+ cls._configured = False
35
+ return cls._instance
36
+
37
+ def configure_logging(
38
+ self,
39
+ log_level: int = logging.INFO,
40
+ log_file: Optional[Path] = None,
41
+ log_format: Optional[str] = None,
42
+ date_format: Optional[str] = None,
43
+ ) -> None:
44
+ """
45
+ Configure logging for the entire application.
46
+
47
+ This should be called once at application startup (e.g., in main()).
48
+ Subsequent calls are ignored if already configured.
49
+
50
+ Args:
51
+ log_level: Logging level (logging.DEBUG, INFO, WARNING, ERROR)
52
+ log_file: Optional path to log file. If None, logs only to console.
53
+ log_format: Optional custom format string. Default includes timestamp, level, module, message.
54
+ date_format: Optional date format string. Default: "%Y-%m-%d %H:%M:%S"
55
+
56
+ Example:
57
+ >>> manager = LoggingManager()
58
+ >>> manager.configure_logging(
59
+ ... log_level=logging.INFO,
60
+ ... log_file=Path("logs/app.log")
61
+ ... )
62
+ """
63
+ if self._configured:
64
+ # Already configured - don't reconfigure
65
+ return
66
+
67
+ # Default format: [2025-01-15 10:30:45] INFO module_name: message
68
+ if log_format is None:
69
+ log_format = "[%(asctime)s] %(levelname)-8s %(name)s: %(message)s"
70
+
71
+ if date_format is None:
72
+ date_format = "%Y-%m-%d %H:%M:%S"
73
+
74
+ # Create formatter
75
+ formatter = logging.Formatter(log_format, datefmt=date_format)
76
+
77
+ # Configure root logger
78
+ root_logger = logging.getLogger()
79
+ root_logger.setLevel(log_level)
80
+
81
+ # Remove existing handlers to avoid duplicates
82
+ root_logger.handlers.clear()
83
+
84
+ # Console handler (always add)
85
+ console_handler = logging.StreamHandler(sys.stdout)
86
+ console_handler.setLevel(log_level)
87
+ console_handler.setFormatter(formatter)
88
+ root_logger.addHandler(console_handler)
89
+
90
+ # File handler (if log_file specified)
91
+ if log_file:
92
+ # Create log directory if it doesn't exist
93
+ log_file.parent.mkdir(parents=True, exist_ok=True)
94
+
95
+ file_handler = logging.FileHandler(log_file, mode="a", encoding="utf-8")
96
+ file_handler.setLevel(log_level)
97
+ file_handler.setFormatter(formatter)
98
+ root_logger.addHandler(file_handler)
99
+
100
+ self._configured = True
101
+
102
+ # Log that logging is configured
103
+ logger = logging.getLogger(__name__)
104
+ logger.info(
105
+ f"Logging configured: level={logging.getLevelName(log_level)}, "
106
+ f"file={'enabled' if log_file else 'disabled'}"
107
+ )
108
+
109
+ def is_configured(self) -> bool:
110
+ """Check if logging has been configured."""
111
+ return self._configured
112
+
113
+
114
+ # Convenience function for easy access
115
+ def get_logger(name: str) -> logging.Logger:
116
+ """
117
+ Get a logger instance for a module.
118
+
119
+ This is a convenience function that ensures consistent logger creation.
120
+ Use this instead of logging.getLogger(__name__) for consistency.
121
+
122
+ Args:
123
+ name: Logger name (typically __name__)
124
+
125
+ Returns:
126
+ Logger instance
127
+
128
+ Example:
129
+ >>> logger = get_logger(__name__)
130
+ >>> logger.info("Application started")
131
+ """
132
+ return logging.getLogger(name)
src/job_writing_agent/utils/logging/logging_decorators.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple decorators for logging.
3
+
4
+ These decorators add logging behavior without cluttering your function code.
5
+ Keep it simple - just the essentials.
6
+ """
7
+
8
+ import functools
9
+ import logging
10
+ import time
11
+ from typing import Callable, TypeVar
12
+
13
+ # Type variable for function signatures
14
+ F = TypeVar("F", bound=Callable)
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def log_execution(func: F) -> F:
20
+ """
21
+ Simple decorator to log when a function starts and finishes.
22
+
23
+ Logs entry, exit, and how long it took.
24
+
25
+ Example:
26
+ >>> @log_execution
27
+ >>> def process_data(data: str) -> str:
28
+ ... return data.upper()
29
+ >>> process_data("hello")
30
+ # Logs: "Entering process_data" ... "Exiting process_data (took 0.001s)"
31
+ """
32
+
33
+ @functools.wraps(func)
34
+ def log_execution_wrapper(*args, **kwargs):
35
+ func_name = func.__name__
36
+ logger.info(f"Entering {func_name}")
37
+
38
+ start_time = time.time()
39
+ try:
40
+ result = func(*args, **kwargs)
41
+ elapsed = time.time() - start_time
42
+ logger.info(f"Exiting {func_name} (took {elapsed:.3f}s)")
43
+ return result
44
+ except Exception as e:
45
+ elapsed = time.time() - start_time
46
+ logger.error(f"{func_name} failed after {elapsed:.3f}s: {e}", exc_info=True)
47
+ raise
48
+
49
+ return log_execution_wrapper
50
+
51
+
52
+ def log_async(func: F) -> F:
53
+ """
54
+ Simple decorator for async functions - logs entry, exit, and timing.
55
+
56
+ Example:
57
+ >>> @log_async
58
+ >>> async def fetch_data(url: str) -> dict:
59
+ ... return await http.get(url)
60
+ """
61
+
62
+ @functools.wraps(func)
63
+ async def log_async_wrapper(*args, **kwargs):
64
+ func_name = func.__name__
65
+ logger.info(f"Entering async {func_name}")
66
+
67
+ start_time = time.time()
68
+ try:
69
+ result = await func(*args, **kwargs)
70
+ elapsed = time.time() - start_time
71
+ logger.info(f"Exiting async {func_name} (took {elapsed:.3f}s)")
72
+ return result
73
+ except Exception as e:
74
+ elapsed = time.time() - start_time
75
+ logger.error(f"{func_name} failed after {elapsed:.3f}s: {e}", exc_info=True)
76
+ raise
77
+
78
+ return log_async_wrapper
79
+
80
+
81
+ def log_errors(func: F) -> F:
82
+ """
83
+ Simple decorator to catch and log exceptions.
84
+
85
+ Logs the error, then re-raises it so your code still fails normally.
86
+
87
+ Example:
88
+ >>> @log_errors
89
+ >>> def risky_operation():
90
+ ... raise ValueError("Something went wrong")
91
+ >>> risky_operation()
92
+ # Logs the error, then raises it
93
+ """
94
+
95
+ @functools.wraps(func)
96
+ def log_errors_wrapper(*args, **kwargs):
97
+ try:
98
+ return func(*args, **kwargs)
99
+ except Exception as e:
100
+ logger.error(f"Error in {func.__name__}: {e}", exc_info=True)
101
+ raise
102
+
103
+ return log_errors_wrapper
src/job_writing_agent/workflow.py CHANGED
@@ -6,13 +6,13 @@ This module provides the JobWorkflow class and CLI runner.
6
  import asyncio
7
  import logging
8
  import sys
 
9
  from datetime import datetime
10
  from functools import cached_property
11
  from typing import Optional, Dict, Any
12
 
13
- from langchain_core.tracers import ConsoleCallbackHandler
14
  from langgraph.graph import StateGraph
15
- from langfuse import Langfuse
16
  from langgraph.graph.state import CompiledStateGraph
17
 
18
  from job_writing_agent.agents.nodes import (
@@ -21,96 +21,274 @@ from job_writing_agent.agents.nodes import (
21
  finalize_document,
22
  human_approval,
23
  )
24
- from job_writing_agent.classes import AppState, DataLoadState
25
- from job_writing_agent.nodes import Dataloading, generate_variations, self_consistency_vote
26
  from job_writing_agent.nodes.research_workflow import research_workflow
27
  from job_writing_agent.utils.application_cli_interface import handle_cli
28
  from job_writing_agent.utils.result_utils import print_result, save_result
29
-
 
 
 
30
 
31
  logger = logging.getLogger(__name__)
32
 
33
 
34
  class JobWorkflow:
35
  """
36
- Workflow runner for the job application writer.
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  """
38
 
39
  def __init__(self, resume: str, job_description_source: str, content: str):
 
 
 
 
 
 
 
 
 
 
 
 
40
  self.resume = resume
41
  self.job_description_source = job_description_source
42
  self.content = content
43
- self.dataloading = Dataloading()
44
- self.langfuse = Langfuse()
45
 
46
  @cached_property
47
- def app_state(self) -> AppState:
48
- return AppState(
49
- resume_path=self.resume,
50
- job_description_source=self.job_description_source,
51
- company_research_data=None,
52
- draft="",
53
- feedback="",
54
- final="",
55
- content=self.content,
56
- current_node="",
57
- )
 
 
 
 
 
 
 
 
58
 
59
- @cached_property
60
  def job_app_graph(self) -> StateGraph:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  graph = StateGraph(DataLoadState)
62
- graph.add_node("initialize_system", self.dataloading.set_agent_system_message)
63
- graph.add_node("load", self.dataloading.run)
 
64
  graph.add_node("research", research_workflow)
65
  graph.add_node("create_draft", create_draft)
66
- graph.add_node("variations", generate_variations)
67
- graph.add_node("self_consistency", self_consistency_vote)
68
  graph.add_node("critique", critique_draft)
69
  graph.add_node("human_approval", human_approval)
70
  graph.add_node("finalize", finalize_document)
71
 
72
- graph.set_entry_point("initialize_system")
 
73
  graph.set_finish_point("finalize")
74
- graph.add_edge("initialize_system", "load")
75
- graph.add_conditional_edges("load", self.dataloading.verify_inputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  graph.add_edge("research", "create_draft")
77
- graph.add_edge("create_draft", "variations")
78
- graph.add_edge("variations", "self_consistency")
79
- graph.add_edge("self_consistency", "critique")
80
  graph.add_edge("critique", "human_approval")
81
  graph.add_edge("human_approval", "finalize")
 
82
  return graph
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  async def run(self) -> Optional[Dict[str, Any]]:
85
  """
86
- Run the job application writer workflow.
 
 
 
 
 
 
 
 
 
 
87
  """
88
  try:
89
  compiled_graph = self.compile()
90
  except Exception as e:
91
- logger.error("Error compiling graph: %s", e)
92
  return None
93
 
94
- run_name = f"Job Application Writer - {self.app_state['content']} - {datetime.now():%Y-%m-%d-%H%M%S}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  config = {
96
  "configurable": {
97
- "thread_id": f"job_app_session_{datetime.now():%Y%m%d%H%M%S}",
98
- "callbacks": [ConsoleCallbackHandler()],
99
  "run_name": run_name,
100
- "tags": ["job-application", self.app_state["content"]],
 
101
  },
102
  "recursion_limit": 10,
103
  }
 
104
  try:
105
- self.app_state["current_node"] = "initialize_system"
 
 
 
 
106
  graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
 
 
107
  except Exception as e:
108
- logger.error("Error running graph: %s", e)
109
  return None
110
- return graph_output
111
 
 
 
112
  def compile(self) -> CompiledStateGraph:
113
- """Compile the workflow graph."""
 
 
 
 
 
 
 
 
 
 
 
 
114
  return self.job_app_graph.compile()
115
 
116
 
@@ -122,9 +300,10 @@ def main():
122
  content=args.content_type,
123
  )
124
  result = asyncio.run(workflow.run())
 
125
  if result:
126
- print_result(args.content_type, result["final"])
127
- save_result(args.content_type, result["final"])
128
  print("Workflow completed successfully.")
129
  else:
130
  print("Error running workflow.")
 
6
  import asyncio
7
  import logging
8
  import sys
9
+ import os
10
  from datetime import datetime
11
  from functools import cached_property
12
  from typing import Optional, Dict, Any
13
 
14
+ from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
15
  from langgraph.graph import StateGraph
 
16
  from langgraph.graph.state import CompiledStateGraph
17
 
18
  from job_writing_agent.agents.nodes import (
 
21
  finalize_document,
22
  human_approval,
23
  )
24
+ from job_writing_agent.classes import DataLoadState
25
+ from job_writing_agent.nodes.initializing import data_loading_workflow
26
  from job_writing_agent.nodes.research_workflow import research_workflow
27
  from job_writing_agent.utils.application_cli_interface import handle_cli
28
  from job_writing_agent.utils.result_utils import print_result, save_result
29
+ from job_writing_agent.utils.logging.logging_decorators import (
30
+ log_execution,
31
+ log_errors,
32
+ )
33
 
34
  logger = logging.getLogger(__name__)
35
 
36
 
37
  class JobWorkflow:
38
  """
39
+ Workflow orchestrator for the job application writer.
40
+
41
+ This class coordinates the execution of the job application writing workflow,
42
+ managing the LangGraph state machine and LangSmith tracing. It follows the
43
+ orchestrator pattern, coordinating multiple subgraphs and nodes without
44
+ implementing business logic itself.
45
+
46
+ The workflow consists of:
47
+ 1. Data Loading: Parse resume and job description (parallel subgraph)
48
+ 2. Research: Company research and relevance filtering (subgraph)
49
+ 3. Draft Creation: Generate initial application material
50
+ 4. Critique: AI-powered feedback on the draft
51
+ 5. Human Approval: User feedback collection
52
+ 6. Finalization: Incorporate feedback and produce final output
53
  """
54
 
55
  def __init__(self, resume: str, job_description_source: str, content: str):
56
+ """
57
+ Initialize the JobWorkflow orchestrator.
58
+
59
+ Parameters
60
+ ----------
61
+ resume: str
62
+ Path to the resume file or resume text.
63
+ job_description_source: str
64
+ URL, file path, or text content of the job description.
65
+ content: str
66
+ Type of application material to generate ("cover_letter", "bullets", "linkedin_note").
67
+ """
68
  self.resume = resume
69
  self.job_description_source = job_description_source
70
  self.content = content
 
 
71
 
72
  @cached_property
73
+ def app_state(self) -> DataLoadState:
74
+ """
75
+ Get the initial application state for the workflow.
76
+
77
+ Returns
78
+ -------
79
+ DataLoadState
80
+ Initialized state dictionary with resume path, job description source,
81
+ content type, and empty messages list.
82
+ """
83
+
84
+ return {
85
+ "resume_path": self.resume,
86
+ "job_description_source": self.job_description_source,
87
+ "content": self.content,
88
+ "current_node": "",
89
+ "messages": [],
90
+ "company_research_data": {},
91
+ }
92
 
 
93
  def job_app_graph(self) -> StateGraph:
94
+ """
95
+ Build and configure the job application workflow graph.
96
+
97
+ This method constructs the LangGraph state machine with all nodes and edges.
98
+ The graph is cached as a property to avoid rebuilding on each access.
99
+
100
+ Workflow Structure:
101
+ - Entry: Data loading subgraph (parallel resume + job description parsing)
102
+ - Research: Company research subgraph
103
+ - Draft Creation: Generate initial application material
104
+ - Critique: AI feedback on draft
105
+ - Human Approval: User feedback collection
106
+ - Finalization: Produce final output
107
+ - Exit: Finalize node
108
+
109
+ Returns
110
+ -------
111
+ StateGraph
112
+ Configured LangGraph state machine ready for compilation.
113
+ """
114
  graph = StateGraph(DataLoadState)
115
+
116
+ # Add workflow nodes (subgraphs and individual nodes)
117
+ graph.add_node("load", data_loading_workflow)
118
  graph.add_node("research", research_workflow)
119
  graph.add_node("create_draft", create_draft)
 
 
120
  graph.add_node("critique", critique_draft)
121
  graph.add_node("human_approval", human_approval)
122
  graph.add_node("finalize", finalize_document)
123
 
124
+ # Set entry and exit points
125
+ graph.set_entry_point("load")
126
  graph.set_finish_point("finalize")
127
+
128
+ # Conditional routing after data loading
129
+ def route_after_load(state: DataLoadState) -> str:
130
+ """
131
+ Route based on next_node set by data loading subgraph.
132
+
133
+ The data loading subgraph sets next_node to either "load" (if validation
134
+ fails) or "research" (if validation passes).
135
+
136
+ Parameters
137
+ ----------
138
+ state: DataLoadState
139
+ Current workflow state.
140
+
141
+ Returns
142
+ -------
143
+ str
144
+ Next node name: "load" or "research".
145
+ """
146
+ next_node = state.get("next_node", "research") # Default to research
147
+ logger.info(f"Routing after load: {next_node}")
148
+ return next_node
149
+
150
+ graph.add_conditional_edges(
151
+ "load",
152
+ route_after_load,
153
+ {
154
+ "load": "load", # Loop back to load subgraph if validation fails
155
+ "research": "research", # Proceed to research if validation passes
156
+ },
157
+ )
158
+
159
+ # Sequential edges for main workflow
160
  graph.add_edge("research", "create_draft")
161
+ graph.add_edge("create_draft", "critique")
 
 
162
  graph.add_edge("critique", "human_approval")
163
  graph.add_edge("human_approval", "finalize")
164
+
165
  return graph
166
 
167
+ def _get_callbacks(self) -> list:
168
+ """
169
+ Get list of callbacks including LangSmith tracer with enhanced metadata.
170
+
171
+ This method creates callback handlers for LangGraph execution, including
172
+ LangSmith tracing with workflow-level metadata and tags for better
173
+ observability and filtering in the LangSmith UI.
174
+
175
+ Returns
176
+ -------
177
+ list
178
+ List of callback handlers for LangGraph execution, including:
179
+ - ConsoleCallbackHandler: Console output
180
+ - LangChainTracer: LangSmith tracing (if enabled)
181
+ """
182
+ callbacks = [ConsoleCallbackHandler()]
183
+
184
+ # Add LangSmith tracer if tracing is enabled via environment variable
185
+ if os.getenv("LANGSMITH_TRACING", "").lower() == "true":
186
+ try:
187
+ # LangChainTracer automatically reads from environment variables:
188
+ # - LANGSMITH_API_KEY
189
+ # - LANGSMITH_PROJECT (optional, defaults to "default")
190
+ # - LANGSMITH_ENDPOINT (optional, defaults to https://api.smith.langchain.com)
191
+ langsmith_tracer = LangChainTracer(
192
+ project_name=os.getenv(
193
+ "LANGSMITH_PROJECT", "job_application_writer"
194
+ )
195
+ )
196
+ callbacks.append(langsmith_tracer)
197
+ logger.info("LangSmith tracing enabled with metadata")
198
+ except Exception as e:
199
+ logger.warning(
200
+ f"Failed to initialize LangSmith tracer: {e}. Continuing without tracing."
201
+ )
202
+ else:
203
+ logger.debug(
204
+ "LangSmith tracing is not enabled (LANGSMITH_TRACING != 'true')"
205
+ )
206
+
207
+ return callbacks
208
+
209
+ @log_execution
210
+ @log_errors
211
  async def run(self) -> Optional[Dict[str, Any]]:
212
  """
213
+ Execute the complete job application writer workflow.
214
+
215
+ This method compiles the graph, configures LangSmith tracing with
216
+ enhanced metadata, and executes the workflow. It handles errors
217
+ gracefully and returns the final state or None if execution fails.
218
+
219
+ Returns
220
+ -------
221
+ Optional[Dict[str, Any]]
222
+ Final workflow state containing the generated application material
223
+ in the "output_data" field, or None if execution fails.
224
  """
225
  try:
226
  compiled_graph = self.compile()
227
  except Exception as e:
228
+ logger.error("Error compiling graph: %s", e, exc_info=True)
229
  return None
230
 
231
+ # Prepare enhanced LangSmith metadata and tags
232
+ content = self.app_state.get("content", "cover_letter")
233
+ thread_id = f"job_app_session_{datetime.now():%Y%m%d%H%M%S}"
234
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
235
+
236
+ # Enhanced metadata for better trace filtering and analysis
237
+ metadata = {
238
+ "workflow": "job_application_writer",
239
+ "content_type": content,
240
+ "session_id": thread_id,
241
+ }
242
+
243
+ # Enhanced tags for trace organization
244
+ tags = [
245
+ "job-application",
246
+ content,
247
+ ]
248
+
249
+ # Descriptive run name for LangSmith UI
250
+ run_name = f"JobAppWriter.{content}.{timestamp}"
251
+
252
  config = {
253
  "configurable": {
254
+ "thread_id": thread_id,
255
+ "callbacks": self._get_callbacks(),
256
  "run_name": run_name,
257
+ "metadata": metadata,
258
+ "tags": tags,
259
  },
260
  "recursion_limit": 10,
261
  }
262
+
263
  try:
264
+ self.app_state["current_node"] = "load"
265
+ logger.info(
266
+ f"Starting workflow execution: {run_name} "
267
+ f"(content_type={content}, session_id={thread_id})"
268
+ )
269
  graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
270
+ logger.info("Workflow execution completed successfully")
271
+ return graph_output
272
  except Exception as e:
273
+ logger.error("Error running graph: %s", e, exc_info=True)
274
  return None
 
275
 
276
+ @log_execution
277
+ @log_errors
278
  def compile(self) -> CompiledStateGraph:
279
+ """
280
+ Compile the workflow graph into an executable state machine.
281
+
282
+ Returns
283
+ -------
284
+ CompiledStateGraph
285
+ Compiled LangGraph state machine ready for execution.
286
+
287
+ Raises
288
+ ------
289
+ Exception
290
+ If graph compilation fails (e.g., invalid edges, missing nodes).
291
+ """
292
  return self.job_app_graph.compile()
293
 
294
 
 
300
  content=args.content_type,
301
  )
302
  result = asyncio.run(workflow.run())
303
+ # print(f"result: {result}")
304
  if result:
305
+ print_result(args.content_type, result["output_data"])
306
+ save_result(args.content_type, result["output_data"])
307
  print("Workflow completed successfully.")
308
  else:
309
  print("Error running workflow.")
uv.lock CHANGED
The diff for this file is too large to render. See raw diff