mrrobot1024
commited on
Commit
Β·
8acadd7
1
Parent(s):
80ed4c7
Changed the directory structure. Made significant code changes including usage of design patterns and improved code readability. Last commit was updating the gitignore file, ignore commit message in last commit.
Browse files- README.md +0 -81
- job_writing_agent.egg-info/PKG-INFO +333 -0
- job_writing_agent.egg-info/SOURCES.txt +40 -0
- job_writing_agent.egg-info/dependency_links.txt +1 -0
- job_writing_agent.egg-info/requires.txt +245 -0
- job_writing_agent.egg-info/top_level.txt +1 -0
- __init__.py β job_writing_agent/__init__.py +76 -2
- {agents β job_writing_agent/agents}/__init__.py +0 -0
- {agents β job_writing_agent/agents}/nodes.py +54 -40
- {agents β job_writing_agent/agents}/output_schema.py +9 -0
- {classes β job_writing_agent/classes}/__init__.py +0 -0
- {classes β job_writing_agent/classes}/classes.py +7 -5
- langgraph_init.py β job_writing_agent/langgraph_init.py +0 -0
- {nodes β job_writing_agent/nodes}/__init__.py +0 -0
- {nodes β job_writing_agent/nodes}/createdraft.py +0 -0
- job_writing_agent/nodes/initializing.py +310 -0
- {nodes β job_writing_agent/nodes}/research_workflow.py +51 -35
- {nodes β job_writing_agent/nodes}/selfconsistency.py +16 -14
- {nodes β job_writing_agent/nodes}/test_workflow.py +0 -0
- {nodes β job_writing_agent/nodes}/variations.py +28 -17
- prompts.md β job_writing_agent/prompts.md +0 -0
- {prompts β job_writing_agent/prompts}/__init__.py +0 -0
- {prompts β job_writing_agent/prompts}/templates.py +183 -82
- job_writing_agent/tools/SearchTool.py +146 -0
- {tools β job_writing_agent/tools}/__init__.py +2 -2
- job_writing_agent/tools/test_llm.py +10 -0
- job_writing_agent/tools/test_tavily.py +70 -0
- {utils β job_writing_agent/utils}/__init__.py +0 -0
- job_writing_agent/utils/application_cli_interface.py +113 -0
- {utils β job_writing_agent/utils}/config.py +0 -0
- job_writing_agent/utils/config_utils.py +25 -0
- {utils β job_writing_agent/utils}/document_processing.py +123 -159
- job_writing_agent/utils/dspy_job_extract.py +91 -0
- {utils β job_writing_agent/utils}/errors.py +0 -0
- {utils β job_writing_agent/utils}/langfuse_handler.py +0 -0
- job_writing_agent/utils/llm_client.py +323 -0
- job_writing_agent/utils/llm_provider_factory.py +346 -0
- job_writing_agent/utils/result_utils.py +39 -0
- {utils β job_writing_agent/utils}/vector_store.py +0 -0
- job_writing_agent/workflow.py +135 -0
- langgraph.json +0 -10
- nodes/initializing.py +0 -225
- setup.py +0 -0
- testing.ipynb +0 -1069
- tools/TavilySearch.py +0 -230
- utils/llm_client.py +0 -141
- workflow.py +0 -210
README.md
DELETED
|
@@ -1,81 +0,0 @@
|
|
| 1 |
-
# Job Writer Module
|
| 2 |
-
|
| 3 |
-
A modular, well-structured package for creating tailored job applications using LangChain and LangGraph with LangSmith observability.
|
| 4 |
-
|
| 5 |
-
## Features
|
| 6 |
-
|
| 7 |
-
- Creates personalized job application materials based on resumes and job descriptions
|
| 8 |
-
- Supports multiple application types: cover letters, bullet points, and LinkedIn messages
|
| 9 |
-
- Uses RAG for personalization and web search for company research
|
| 10 |
-
- Provides human-in-the-loop feedback integration
|
| 11 |
-
- Implements self-consistency voting for quality control
|
| 12 |
-
|
| 13 |
-
## Installation
|
| 14 |
-
|
| 15 |
-
```bash
|
| 16 |
-
# Install the package and its dependencies
|
| 17 |
-
pip install -e .
|
| 18 |
-
|
| 19 |
-
# Install development dependencies (including linting tools)
|
| 20 |
-
pip install -r requirements-dev.txt
|
| 21 |
-
```
|
| 22 |
-
|
| 23 |
-
## Code Standards and Linting
|
| 24 |
-
|
| 25 |
-
This project uses several tools to ensure code quality:
|
| 26 |
-
|
| 27 |
-
1. **Black** - Code formatter that enforces consistent style
|
| 28 |
-
2. **isort** - Sorts imports according to best practices
|
| 29 |
-
3. **Flake8** - Style guide enforcement
|
| 30 |
-
4. **mypy** - Static type checking
|
| 31 |
-
|
| 32 |
-
### Running the Linters
|
| 33 |
-
|
| 34 |
-
```bash
|
| 35 |
-
# Format code with Black
|
| 36 |
-
black job_writer/
|
| 37 |
-
|
| 38 |
-
# Sort imports
|
| 39 |
-
isort job_writer/
|
| 40 |
-
|
| 41 |
-
# Check style with Flake8
|
| 42 |
-
flake8 job_writer/
|
| 43 |
-
|
| 44 |
-
# Type checking with mypy
|
| 45 |
-
mypy job_writer/
|
| 46 |
-
```
|
| 47 |
-
|
| 48 |
-
### Pre-commit Hooks
|
| 49 |
-
|
| 50 |
-
We use pre-commit hooks to automatically run linters before each commit:
|
| 51 |
-
|
| 52 |
-
```bash
|
| 53 |
-
# Install the pre-commit hooks
|
| 54 |
-
pip install pre-commit
|
| 55 |
-
pre-commit install
|
| 56 |
-
|
| 57 |
-
# You can also run the hooks manually
|
| 58 |
-
pre-commit run --all-files
|
| 59 |
-
```
|
| 60 |
-
|
| 61 |
-
## Usage Example
|
| 62 |
-
|
| 63 |
-
```python
|
| 64 |
-
import asyncio
|
| 65 |
-
from job_writer.workflow import run_job_application_writer
|
| 66 |
-
|
| 67 |
-
# Run the job application writer
|
| 68 |
-
result = asyncio.run(run_job_application_writer(
|
| 69 |
-
resume_path="path/to/resume.pdf",
|
| 70 |
-
job_desc_path="https://example.com/job-posting",
|
| 71 |
-
content="cover_letter"
|
| 72 |
-
))
|
| 73 |
-
|
| 74 |
-
print(result["final"])
|
| 75 |
-
```
|
| 76 |
-
|
| 77 |
-
Alternatively, you can use the command-line interface:
|
| 78 |
-
|
| 79 |
-
```bash
|
| 80 |
-
python -m job_writer.workflow --resume path/to/resume.pdf --job https://example.com/job-posting --type cover_letter
|
| 81 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
job_writing_agent.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: job_writing_agent
|
| 3 |
+
Version: 1.0.0
|
| 4 |
+
Summary: This module can run an agent which is capable of running langgraph agent sing tools like tavily search
|
| 5 |
+
Requires-Python: >=3.11
|
| 6 |
+
Description-Content-Type: text/markdown
|
| 7 |
+
Requires-Dist: aiofiles==24.1.0
|
| 8 |
+
Requires-Dist: aiohappyeyeballs==2.6.1
|
| 9 |
+
Requires-Dist: aiohttp==3.12.15
|
| 10 |
+
Requires-Dist: aiosignal==1.4.0
|
| 11 |
+
Requires-Dist: alembic==1.16.5
|
| 12 |
+
Requires-Dist: annotated-types==0.7.0
|
| 13 |
+
Requires-Dist: anyio==4.11.0
|
| 14 |
+
Requires-Dist: asyncer==0.0.8
|
| 15 |
+
Requires-Dist: attrs==25.3.0
|
| 16 |
+
Requires-Dist: authlib==1.6.5
|
| 17 |
+
Requires-Dist: av==15.1.0
|
| 18 |
+
Requires-Dist: babel==2.17.0
|
| 19 |
+
Requires-Dist: backoff==2.2.1
|
| 20 |
+
Requires-Dist: beautifulsoup4==4.14.2
|
| 21 |
+
Requires-Dist: blinker==1.9.0
|
| 22 |
+
Requires-Dist: blockbuster==1.5.25
|
| 23 |
+
Requires-Dist: bs4==0.0.2
|
| 24 |
+
Requires-Dist: cachetools==6.2.0
|
| 25 |
+
Requires-Dist: certifi==2025.10.5
|
| 26 |
+
Requires-Dist: cffi==2.0.0
|
| 27 |
+
Requires-Dist: charset-normalizer==3.4.3
|
| 28 |
+
Requires-Dist: click==8.3.0
|
| 29 |
+
Requires-Dist: click-default-group==1.2.4
|
| 30 |
+
Requires-Dist: cloudpickle==3.1.1
|
| 31 |
+
Requires-Dist: colorama==0.4.6
|
| 32 |
+
Requires-Dist: coloredlogs==15.0.1
|
| 33 |
+
Requires-Dist: colorlog==6.9.0
|
| 34 |
+
Requires-Dist: condense-json==0.1.3
|
| 35 |
+
Requires-Dist: contourpy==1.3.3
|
| 36 |
+
Requires-Dist: courlan==1.3.2
|
| 37 |
+
Requires-Dist: cryptography==44.0.3
|
| 38 |
+
Requires-Dist: cycler==0.12.1
|
| 39 |
+
Requires-Dist: cyclopts==3.24.0
|
| 40 |
+
Requires-Dist: databricks-sdk==0.67.0
|
| 41 |
+
Requires-Dist: dataclasses-json==0.6.7
|
| 42 |
+
Requires-Dist: dateparser==1.2.2
|
| 43 |
+
Requires-Dist: diskcache==5.6.3
|
| 44 |
+
Requires-Dist: distro==1.9.0
|
| 45 |
+
Requires-Dist: dnspython==2.8.0
|
| 46 |
+
Requires-Dist: docker==7.1.0
|
| 47 |
+
Requires-Dist: docstring-parser==0.17.0
|
| 48 |
+
Requires-Dist: docutils==0.22.2
|
| 49 |
+
Requires-Dist: dspy==3.0.3
|
| 50 |
+
Requires-Dist: dspy-ai==3.0.3
|
| 51 |
+
Requires-Dist: email-validator==2.3.0
|
| 52 |
+
Requires-Dist: eval-type-backport==0.2.2
|
| 53 |
+
Requires-Dist: exceptiongroup==1.3.0
|
| 54 |
+
Requires-Dist: fastapi==0.118.0
|
| 55 |
+
Requires-Dist: fastmcp==2.12.4
|
| 56 |
+
Requires-Dist: fastuuid==0.13.5
|
| 57 |
+
Requires-Dist: filelock==3.19.1
|
| 58 |
+
Requires-Dist: flask==3.1.2
|
| 59 |
+
Requires-Dist: flatbuffers==25.9.23
|
| 60 |
+
Requires-Dist: fonttools==4.60.1
|
| 61 |
+
Requires-Dist: forbiddenfruit==0.1.4
|
| 62 |
+
Requires-Dist: frozenlist==1.7.0
|
| 63 |
+
Requires-Dist: fsspec==2025.9.0
|
| 64 |
+
Requires-Dist: gepa==0.0.7
|
| 65 |
+
Requires-Dist: gitdb==4.0.12
|
| 66 |
+
Requires-Dist: gitpython==3.1.45
|
| 67 |
+
Requires-Dist: google-auth==2.41.1
|
| 68 |
+
Requires-Dist: googleapis-common-protos==1.70.0
|
| 69 |
+
Requires-Dist: graphene==3.4.3
|
| 70 |
+
Requires-Dist: graphql-core==3.2.6
|
| 71 |
+
Requires-Dist: graphql-relay==3.2.0
|
| 72 |
+
Requires-Dist: greenlet==3.2.4
|
| 73 |
+
Requires-Dist: grpcio==1.76.0
|
| 74 |
+
Requires-Dist: grpcio-tools==1.76.0
|
| 75 |
+
Requires-Dist: gunicorn==23.0.0
|
| 76 |
+
Requires-Dist: h11==0.16.0
|
| 77 |
+
Requires-Dist: hf-xet==1.1.10
|
| 78 |
+
Requires-Dist: html2text==2025.4.15
|
| 79 |
+
Requires-Dist: htmldate==1.9.3
|
| 80 |
+
Requires-Dist: httpcore==1.0.9
|
| 81 |
+
Requires-Dist: httpx==0.28.1
|
| 82 |
+
Requires-Dist: httpx-sse==0.4.1
|
| 83 |
+
Requires-Dist: huggingface-hub==0.35.3
|
| 84 |
+
Requires-Dist: humanfriendly==10.0
|
| 85 |
+
Requires-Dist: idna==3.10
|
| 86 |
+
Requires-Dist: importlib-metadata==8.7.0
|
| 87 |
+
Requires-Dist: isodate==0.7.2
|
| 88 |
+
Requires-Dist: itsdangerous==2.2.0
|
| 89 |
+
Requires-Dist: jinja2==3.1.6
|
| 90 |
+
Requires-Dist: jiter==0.11.0
|
| 91 |
+
Requires-Dist: joblib==1.5.2
|
| 92 |
+
Requires-Dist: json-repair==0.52.0
|
| 93 |
+
Requires-Dist: jsonpatch==1.33
|
| 94 |
+
Requires-Dist: jsonpointer==3.0.0
|
| 95 |
+
Requires-Dist: jsonschema==4.25.1
|
| 96 |
+
Requires-Dist: jsonschema-path==0.3.4
|
| 97 |
+
Requires-Dist: jsonschema-rs==0.29.1
|
| 98 |
+
Requires-Dist: jsonschema-specifications==2025.9.1
|
| 99 |
+
Requires-Dist: justext==3.0.2
|
| 100 |
+
Requires-Dist: kiwisolver==1.4.9
|
| 101 |
+
Requires-Dist: langchain==0.3.27
|
| 102 |
+
Requires-Dist: langchain-cerebras==0.5.0
|
| 103 |
+
Requires-Dist: langchain-community==0.3.30
|
| 104 |
+
Requires-Dist: langchain-core==0.3.78
|
| 105 |
+
Requires-Dist: langchain-ollama==0.3.10
|
| 106 |
+
Requires-Dist: langchain-openai==0.3.34
|
| 107 |
+
Requires-Dist: langchain-tavily==0.2.12
|
| 108 |
+
Requires-Dist: langchain-text-splitters==0.3.11
|
| 109 |
+
Requires-Dist: langfuse==3.6.1
|
| 110 |
+
Requires-Dist: langgraph==0.6.8
|
| 111 |
+
Requires-Dist: langgraph-api==0.4.46
|
| 112 |
+
Requires-Dist: langgraph-checkpoint==2.1.1
|
| 113 |
+
Requires-Dist: langgraph-cli==0.4.4
|
| 114 |
+
Requires-Dist: langgraph-prebuilt==0.6.4
|
| 115 |
+
Requires-Dist: langgraph-runtime-inmem==0.14.1
|
| 116 |
+
Requires-Dist: langgraph-sdk==0.2.9
|
| 117 |
+
Requires-Dist: langsmith==0.4.32
|
| 118 |
+
Requires-Dist: lazy-object-proxy==1.12.0
|
| 119 |
+
Requires-Dist: litellm==1.77.7
|
| 120 |
+
Requires-Dist: livekit==1.0.13
|
| 121 |
+
Requires-Dist: livekit-agents==1.2.14
|
| 122 |
+
Requires-Dist: livekit-api==1.0.6
|
| 123 |
+
Requires-Dist: livekit-blingfire==1.0.0
|
| 124 |
+
Requires-Dist: livekit-plugins-cartesia==1.2.14
|
| 125 |
+
Requires-Dist: livekit-plugins-openai==1.2.14
|
| 126 |
+
Requires-Dist: livekit-plugins-silero==1.2.14
|
| 127 |
+
Requires-Dist: livekit-protocol==1.0.7
|
| 128 |
+
Requires-Dist: llm==0.27.1
|
| 129 |
+
Requires-Dist: llm-openrouter==0.5
|
| 130 |
+
Requires-Dist: lxml==5.4.0
|
| 131 |
+
Requires-Dist: lxml-html-clean==0.4.3
|
| 132 |
+
Requires-Dist: magicattr==0.1.6
|
| 133 |
+
Requires-Dist: mako==1.3.10
|
| 134 |
+
Requires-Dist: markdown-it-py==4.0.0
|
| 135 |
+
Requires-Dist: markupsafe==3.0.3
|
| 136 |
+
Requires-Dist: marshmallow==3.26.1
|
| 137 |
+
Requires-Dist: matplotlib==3.10.6
|
| 138 |
+
Requires-Dist: mcp==1.16.0
|
| 139 |
+
Requires-Dist: mdurl==0.1.2
|
| 140 |
+
Requires-Dist: mlflow==3.4.0
|
| 141 |
+
Requires-Dist: mlflow-skinny==3.4.0
|
| 142 |
+
Requires-Dist: mlflow-tracing==3.4.0
|
| 143 |
+
Requires-Dist: more-itertools==10.8.0
|
| 144 |
+
Requires-Dist: mpmath==1.3.0
|
| 145 |
+
Requires-Dist: multidict==6.6.4
|
| 146 |
+
Requires-Dist: mypy-extensions==1.1.0
|
| 147 |
+
Requires-Dist: nest-asyncio==1.6.0
|
| 148 |
+
Requires-Dist: numpy==2.3.3
|
| 149 |
+
Requires-Dist: ollama==0.6.0
|
| 150 |
+
Requires-Dist: onnxruntime==1.23.0
|
| 151 |
+
Requires-Dist: openai==2.3.0
|
| 152 |
+
Requires-Dist: openapi-core==0.19.5
|
| 153 |
+
Requires-Dist: openapi-pydantic==0.5.1
|
| 154 |
+
Requires-Dist: openapi-schema-validator==0.6.3
|
| 155 |
+
Requires-Dist: openapi-spec-validator==0.7.2
|
| 156 |
+
Requires-Dist: openevals==0.1.0
|
| 157 |
+
Requires-Dist: opentelemetry-api==1.37.0
|
| 158 |
+
Requires-Dist: opentelemetry-exporter-otlp==1.37.0
|
| 159 |
+
Requires-Dist: opentelemetry-exporter-otlp-proto-common==1.37.0
|
| 160 |
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc==1.37.0
|
| 161 |
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http==1.37.0
|
| 162 |
+
Requires-Dist: opentelemetry-proto==1.37.0
|
| 163 |
+
Requires-Dist: opentelemetry-sdk==1.37.0
|
| 164 |
+
Requires-Dist: opentelemetry-semantic-conventions==0.58b0
|
| 165 |
+
Requires-Dist: optuna==4.5.0
|
| 166 |
+
Requires-Dist: orjson==3.11.3
|
| 167 |
+
Requires-Dist: ormsgpack==1.10.0
|
| 168 |
+
Requires-Dist: packaging==25.0
|
| 169 |
+
Requires-Dist: pandas==2.3.3
|
| 170 |
+
Requires-Dist: parse==1.20.2
|
| 171 |
+
Requires-Dist: pathable==0.4.4
|
| 172 |
+
Requires-Dist: pillow==11.3.0
|
| 173 |
+
Requires-Dist: pip==25.2
|
| 174 |
+
Requires-Dist: playwright==1.55.0
|
| 175 |
+
Requires-Dist: pluggy==1.6.0
|
| 176 |
+
Requires-Dist: poetry>=2.2.1
|
| 177 |
+
Requires-Dist: prometheus-client==0.23.1
|
| 178 |
+
Requires-Dist: propcache==0.4.0
|
| 179 |
+
Requires-Dist: protobuf==6.32.1
|
| 180 |
+
Requires-Dist: psutil==7.1.0
|
| 181 |
+
Requires-Dist: puremagic==1.30
|
| 182 |
+
Requires-Dist: pyarrow==21.0.0
|
| 183 |
+
Requires-Dist: pyasn1==0.6.1
|
| 184 |
+
Requires-Dist: pyasn1-modules==0.4.2
|
| 185 |
+
Requires-Dist: pycparser==2.23
|
| 186 |
+
Requires-Dist: pydantic==2.12.0
|
| 187 |
+
Requires-Dist: pydantic-core==2.41.1
|
| 188 |
+
Requires-Dist: pydantic-settings==2.11.0
|
| 189 |
+
Requires-Dist: pyee==13.0.0
|
| 190 |
+
Requires-Dist: pygments==2.19.2
|
| 191 |
+
Requires-Dist: pyjwt==2.10.1
|
| 192 |
+
Requires-Dist: pyparsing==3.2.5
|
| 193 |
+
Requires-Dist: pypdf==6.1.1
|
| 194 |
+
Requires-Dist: pyperclip==1.11.0
|
| 195 |
+
Requires-Dist: python-dateutil==2.9.0.post0
|
| 196 |
+
Requires-Dist: python-dotenv==1.1.1
|
| 197 |
+
Requires-Dist: python-multipart==0.0.20
|
| 198 |
+
Requires-Dist: python-ulid==3.1.0
|
| 199 |
+
Requires-Dist: pytz==2025.2
|
| 200 |
+
Requires-Dist: pyyaml==6.0.3
|
| 201 |
+
Requires-Dist: referencing==0.36.2
|
| 202 |
+
Requires-Dist: regex==2025.9.18
|
| 203 |
+
Requires-Dist: requests==2.32.5
|
| 204 |
+
Requires-Dist: requests-toolbelt==1.0.0
|
| 205 |
+
Requires-Dist: rfc3339-validator==0.1.4
|
| 206 |
+
Requires-Dist: rich==14.1.0
|
| 207 |
+
Requires-Dist: rich-rst==1.3.1
|
| 208 |
+
Requires-Dist: rpds-py==0.27.1
|
| 209 |
+
Requires-Dist: rsa==4.9.1
|
| 210 |
+
Requires-Dist: scikit-learn==1.7.2
|
| 211 |
+
Requires-Dist: scipy==1.16.2
|
| 212 |
+
Requires-Dist: setuptools==80.9.0
|
| 213 |
+
Requires-Dist: six==1.17.0
|
| 214 |
+
Requires-Dist: smmap==5.0.2
|
| 215 |
+
Requires-Dist: sniffio==1.3.1
|
| 216 |
+
Requires-Dist: sounddevice==0.5.2
|
| 217 |
+
Requires-Dist: soupsieve==2.8
|
| 218 |
+
Requires-Dist: sqlalchemy==2.0.43
|
| 219 |
+
Requires-Dist: sqlite-fts4==1.0.3
|
| 220 |
+
Requires-Dist: sqlite-migrate==0.1b0
|
| 221 |
+
Requires-Dist: sqlite-utils==3.38
|
| 222 |
+
Requires-Dist: sqlparse==0.5.3
|
| 223 |
+
Requires-Dist: sse-starlette==2.1.3
|
| 224 |
+
Requires-Dist: starlette==0.48.0
|
| 225 |
+
Requires-Dist: structlog==25.4.0
|
| 226 |
+
Requires-Dist: sympy==1.14.0
|
| 227 |
+
Requires-Dist: tabulate==0.9.0
|
| 228 |
+
Requires-Dist: tenacity==9.1.2
|
| 229 |
+
Requires-Dist: threadpoolctl==3.6.0
|
| 230 |
+
Requires-Dist: tiktoken==0.11.0
|
| 231 |
+
Requires-Dist: tld==0.13.1
|
| 232 |
+
Requires-Dist: tokenizers==0.22.1
|
| 233 |
+
Requires-Dist: tqdm==4.67.1
|
| 234 |
+
Requires-Dist: trafilatura==2.0.0
|
| 235 |
+
Requires-Dist: truststore==0.10.4
|
| 236 |
+
Requires-Dist: types-protobuf==6.32.1.20250918
|
| 237 |
+
Requires-Dist: typing-extensions==4.15.0
|
| 238 |
+
Requires-Dist: typing-inspect==0.9.0
|
| 239 |
+
Requires-Dist: typing-inspection==0.4.2
|
| 240 |
+
Requires-Dist: tzdata==2025.2
|
| 241 |
+
Requires-Dist: tzlocal==5.3.1
|
| 242 |
+
Requires-Dist: urllib3==2.5.0
|
| 243 |
+
Requires-Dist: uvicorn==0.37.0
|
| 244 |
+
Requires-Dist: watchfiles==1.1.0
|
| 245 |
+
Requires-Dist: websockets==15.0.1
|
| 246 |
+
Requires-Dist: werkzeug==3.1.1
|
| 247 |
+
Requires-Dist: wrapt==1.17.3
|
| 248 |
+
Requires-Dist: xxhash==3.6.0
|
| 249 |
+
Requires-Dist: yarl==1.21.0
|
| 250 |
+
Requires-Dist: zipp==3.23.0
|
| 251 |
+
Requires-Dist: zstandard==0.25.0
|
| 252 |
+
|
| 253 |
+
# Job Writer Module
|
| 254 |
+
|
| 255 |
+
A modular, well-structured package for creating tailored job applications using LangChain and LangGraph with LangSmith observability.
|
| 256 |
+
|
| 257 |
+
## Features
|
| 258 |
+
|
| 259 |
+
- Creates personalized job application materials based on resumes and job descriptions
|
| 260 |
+
- Supports multiple application types: cover letters, bullet points, and LinkedIn messages
|
| 261 |
+
- Uses RAG for personalization and web search for company research
|
| 262 |
+
- Provides human-in-the-loop feedback integration
|
| 263 |
+
- Implements self-consistency voting for quality control
|
| 264 |
+
|
| 265 |
+
## Installation
|
| 266 |
+
|
| 267 |
+
```bash
|
| 268 |
+
# Install the package and its dependencies
|
| 269 |
+
pip install -e .
|
| 270 |
+
|
| 271 |
+
# Install development dependencies (including linting tools)
|
| 272 |
+
pip install -r requirements-dev.txt
|
| 273 |
+
```
|
| 274 |
+
|
| 275 |
+
## Code Standards and Linting
|
| 276 |
+
|
| 277 |
+
This project uses several tools to ensure code quality:
|
| 278 |
+
|
| 279 |
+
1. **Black** - Code formatter that enforces consistent style
|
| 280 |
+
2. **isort** - Sorts imports according to best practices
|
| 281 |
+
3. **Flake8** - Style guide enforcement
|
| 282 |
+
4. **mypy** - Static type checking
|
| 283 |
+
|
| 284 |
+
### Running the Linters
|
| 285 |
+
|
| 286 |
+
```bash
|
| 287 |
+
# Format code with Black
|
| 288 |
+
black job_writer/
|
| 289 |
+
|
| 290 |
+
# Sort imports
|
| 291 |
+
isort job_writer/
|
| 292 |
+
|
| 293 |
+
# Check style with Flake8
|
| 294 |
+
flake8 job_writer/
|
| 295 |
+
|
| 296 |
+
# Type checking with mypy
|
| 297 |
+
mypy job_writer/
|
| 298 |
+
```
|
| 299 |
+
|
| 300 |
+
### Pre-commit Hooks
|
| 301 |
+
|
| 302 |
+
We use pre-commit hooks to automatically run linters before each commit:
|
| 303 |
+
|
| 304 |
+
```bash
|
| 305 |
+
# Install the pre-commit hooks
|
| 306 |
+
pip install pre-commit
|
| 307 |
+
pre-commit install
|
| 308 |
+
|
| 309 |
+
# You can also run the hooks manually
|
| 310 |
+
pre-commit run --all-files
|
| 311 |
+
```
|
| 312 |
+
|
| 313 |
+
## Usage Example
|
| 314 |
+
|
| 315 |
+
```python
|
| 316 |
+
import asyncio
|
| 317 |
+
from job_writer.workflow import run_job_application_writer
|
| 318 |
+
|
| 319 |
+
# Run the job application writer
|
| 320 |
+
result = asyncio.run(run_job_application_writer(
|
| 321 |
+
resume_path="path/to/resume.pdf",
|
| 322 |
+
job_desc_path="https://example.com/job-posting",
|
| 323 |
+
content="cover_letter"
|
| 324 |
+
))
|
| 325 |
+
|
| 326 |
+
print(result["final"])
|
| 327 |
+
```
|
| 328 |
+
|
| 329 |
+
Alternatively, you can use the command-line interface:
|
| 330 |
+
|
| 331 |
+
```bash
|
| 332 |
+
python -m job_writer.workflow --resume path/to/resume.pdf --job https://example.com/job-posting --type cover_letter
|
| 333 |
+
```
|
job_writing_agent.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
src/job_writing_agent/__init__.py
|
| 4 |
+
src/job_writing_agent/langgraph_init.py
|
| 5 |
+
src/job_writing_agent/workflow.py
|
| 6 |
+
src/job_writing_agent.egg-info/PKG-INFO
|
| 7 |
+
src/job_writing_agent.egg-info/SOURCES.txt
|
| 8 |
+
src/job_writing_agent.egg-info/dependency_links.txt
|
| 9 |
+
src/job_writing_agent.egg-info/requires.txt
|
| 10 |
+
src/job_writing_agent.egg-info/top_level.txt
|
| 11 |
+
src/job_writing_agent/agents/__init__.py
|
| 12 |
+
src/job_writing_agent/agents/nodes.py
|
| 13 |
+
src/job_writing_agent/agents/output_schema.py
|
| 14 |
+
src/job_writing_agent/classes/__init__.py
|
| 15 |
+
src/job_writing_agent/classes/classes.py
|
| 16 |
+
src/job_writing_agent/nodes/__init__.py
|
| 17 |
+
src/job_writing_agent/nodes/createdraft.py
|
| 18 |
+
src/job_writing_agent/nodes/initializing.py
|
| 19 |
+
src/job_writing_agent/nodes/research_workflow.py
|
| 20 |
+
src/job_writing_agent/nodes/selfconsistency.py
|
| 21 |
+
src/job_writing_agent/nodes/test_workflow.py
|
| 22 |
+
src/job_writing_agent/nodes/variations.py
|
| 23 |
+
src/job_writing_agent/prompts/__init__.py
|
| 24 |
+
src/job_writing_agent/prompts/templates.py
|
| 25 |
+
src/job_writing_agent/tools/SearchTool.py
|
| 26 |
+
src/job_writing_agent/tools/__init__.py
|
| 27 |
+
src/job_writing_agent/tools/test_llm.py
|
| 28 |
+
src/job_writing_agent/tools/test_tavily.py
|
| 29 |
+
src/job_writing_agent/utils/__init__.py
|
| 30 |
+
src/job_writing_agent/utils/application_cli_interface.py
|
| 31 |
+
src/job_writing_agent/utils/config.py
|
| 32 |
+
src/job_writing_agent/utils/config_utils.py
|
| 33 |
+
src/job_writing_agent/utils/document_processing.py
|
| 34 |
+
src/job_writing_agent/utils/dspy_job_extract.py
|
| 35 |
+
src/job_writing_agent/utils/errors.py
|
| 36 |
+
src/job_writing_agent/utils/langfuse_handler.py
|
| 37 |
+
src/job_writing_agent/utils/llm_client.py
|
| 38 |
+
src/job_writing_agent/utils/llm_provider_factory.py
|
| 39 |
+
src/job_writing_agent/utils/result_utils.py
|
| 40 |
+
src/job_writing_agent/utils/vector_store.py
|
job_writing_agent.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
job_writing_agent.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiofiles==24.1.0
|
| 2 |
+
aiohappyeyeballs==2.6.1
|
| 3 |
+
aiohttp==3.12.15
|
| 4 |
+
aiosignal==1.4.0
|
| 5 |
+
alembic==1.16.5
|
| 6 |
+
annotated-types==0.7.0
|
| 7 |
+
anyio==4.11.0
|
| 8 |
+
asyncer==0.0.8
|
| 9 |
+
attrs==25.3.0
|
| 10 |
+
authlib==1.6.5
|
| 11 |
+
av==15.1.0
|
| 12 |
+
babel==2.17.0
|
| 13 |
+
backoff==2.2.1
|
| 14 |
+
beautifulsoup4==4.14.2
|
| 15 |
+
blinker==1.9.0
|
| 16 |
+
blockbuster==1.5.25
|
| 17 |
+
bs4==0.0.2
|
| 18 |
+
cachetools==6.2.0
|
| 19 |
+
certifi==2025.10.5
|
| 20 |
+
cffi==2.0.0
|
| 21 |
+
charset-normalizer==3.4.3
|
| 22 |
+
click==8.3.0
|
| 23 |
+
click-default-group==1.2.4
|
| 24 |
+
cloudpickle==3.1.1
|
| 25 |
+
colorama==0.4.6
|
| 26 |
+
coloredlogs==15.0.1
|
| 27 |
+
colorlog==6.9.0
|
| 28 |
+
condense-json==0.1.3
|
| 29 |
+
contourpy==1.3.3
|
| 30 |
+
courlan==1.3.2
|
| 31 |
+
cryptography==44.0.3
|
| 32 |
+
cycler==0.12.1
|
| 33 |
+
cyclopts==3.24.0
|
| 34 |
+
databricks-sdk==0.67.0
|
| 35 |
+
dataclasses-json==0.6.7
|
| 36 |
+
dateparser==1.2.2
|
| 37 |
+
diskcache==5.6.3
|
| 38 |
+
distro==1.9.0
|
| 39 |
+
dnspython==2.8.0
|
| 40 |
+
docker==7.1.0
|
| 41 |
+
docstring-parser==0.17.0
|
| 42 |
+
docutils==0.22.2
|
| 43 |
+
dspy==3.0.3
|
| 44 |
+
dspy-ai==3.0.3
|
| 45 |
+
email-validator==2.3.0
|
| 46 |
+
eval-type-backport==0.2.2
|
| 47 |
+
exceptiongroup==1.3.0
|
| 48 |
+
fastapi==0.118.0
|
| 49 |
+
fastmcp==2.12.4
|
| 50 |
+
fastuuid==0.13.5
|
| 51 |
+
filelock==3.19.1
|
| 52 |
+
flask==3.1.2
|
| 53 |
+
flatbuffers==25.9.23
|
| 54 |
+
fonttools==4.60.1
|
| 55 |
+
forbiddenfruit==0.1.4
|
| 56 |
+
frozenlist==1.7.0
|
| 57 |
+
fsspec==2025.9.0
|
| 58 |
+
gepa==0.0.7
|
| 59 |
+
gitdb==4.0.12
|
| 60 |
+
gitpython==3.1.45
|
| 61 |
+
google-auth==2.41.1
|
| 62 |
+
googleapis-common-protos==1.70.0
|
| 63 |
+
graphene==3.4.3
|
| 64 |
+
graphql-core==3.2.6
|
| 65 |
+
graphql-relay==3.2.0
|
| 66 |
+
greenlet==3.2.4
|
| 67 |
+
grpcio==1.76.0
|
| 68 |
+
grpcio-tools==1.76.0
|
| 69 |
+
gunicorn==23.0.0
|
| 70 |
+
h11==0.16.0
|
| 71 |
+
hf-xet==1.1.10
|
| 72 |
+
html2text==2025.4.15
|
| 73 |
+
htmldate==1.9.3
|
| 74 |
+
httpcore==1.0.9
|
| 75 |
+
httpx==0.28.1
|
| 76 |
+
httpx-sse==0.4.1
|
| 77 |
+
huggingface-hub==0.35.3
|
| 78 |
+
humanfriendly==10.0
|
| 79 |
+
idna==3.10
|
| 80 |
+
importlib-metadata==8.7.0
|
| 81 |
+
isodate==0.7.2
|
| 82 |
+
itsdangerous==2.2.0
|
| 83 |
+
jinja2==3.1.6
|
| 84 |
+
jiter==0.11.0
|
| 85 |
+
joblib==1.5.2
|
| 86 |
+
json-repair==0.52.0
|
| 87 |
+
jsonpatch==1.33
|
| 88 |
+
jsonpointer==3.0.0
|
| 89 |
+
jsonschema==4.25.1
|
| 90 |
+
jsonschema-path==0.3.4
|
| 91 |
+
jsonschema-rs==0.29.1
|
| 92 |
+
jsonschema-specifications==2025.9.1
|
| 93 |
+
justext==3.0.2
|
| 94 |
+
kiwisolver==1.4.9
|
| 95 |
+
langchain==0.3.27
|
| 96 |
+
langchain-cerebras==0.5.0
|
| 97 |
+
langchain-community==0.3.30
|
| 98 |
+
langchain-core==0.3.78
|
| 99 |
+
langchain-ollama==0.3.10
|
| 100 |
+
langchain-openai==0.3.34
|
| 101 |
+
langchain-tavily==0.2.12
|
| 102 |
+
langchain-text-splitters==0.3.11
|
| 103 |
+
langfuse==3.6.1
|
| 104 |
+
langgraph==0.6.8
|
| 105 |
+
langgraph-api==0.4.46
|
| 106 |
+
langgraph-checkpoint==2.1.1
|
| 107 |
+
langgraph-cli==0.4.4
|
| 108 |
+
langgraph-prebuilt==0.6.4
|
| 109 |
+
langgraph-runtime-inmem==0.14.1
|
| 110 |
+
langgraph-sdk==0.2.9
|
| 111 |
+
langsmith==0.4.32
|
| 112 |
+
lazy-object-proxy==1.12.0
|
| 113 |
+
litellm==1.77.7
|
| 114 |
+
livekit==1.0.13
|
| 115 |
+
livekit-agents==1.2.14
|
| 116 |
+
livekit-api==1.0.6
|
| 117 |
+
livekit-blingfire==1.0.0
|
| 118 |
+
livekit-plugins-cartesia==1.2.14
|
| 119 |
+
livekit-plugins-openai==1.2.14
|
| 120 |
+
livekit-plugins-silero==1.2.14
|
| 121 |
+
livekit-protocol==1.0.7
|
| 122 |
+
llm==0.27.1
|
| 123 |
+
llm-openrouter==0.5
|
| 124 |
+
lxml==5.4.0
|
| 125 |
+
lxml-html-clean==0.4.3
|
| 126 |
+
magicattr==0.1.6
|
| 127 |
+
mako==1.3.10
|
| 128 |
+
markdown-it-py==4.0.0
|
| 129 |
+
markupsafe==3.0.3
|
| 130 |
+
marshmallow==3.26.1
|
| 131 |
+
matplotlib==3.10.6
|
| 132 |
+
mcp==1.16.0
|
| 133 |
+
mdurl==0.1.2
|
| 134 |
+
mlflow==3.4.0
|
| 135 |
+
mlflow-skinny==3.4.0
|
| 136 |
+
mlflow-tracing==3.4.0
|
| 137 |
+
more-itertools==10.8.0
|
| 138 |
+
mpmath==1.3.0
|
| 139 |
+
multidict==6.6.4
|
| 140 |
+
mypy-extensions==1.1.0
|
| 141 |
+
nest-asyncio==1.6.0
|
| 142 |
+
numpy==2.3.3
|
| 143 |
+
ollama==0.6.0
|
| 144 |
+
onnxruntime==1.23.0
|
| 145 |
+
openai==2.3.0
|
| 146 |
+
openapi-core==0.19.5
|
| 147 |
+
openapi-pydantic==0.5.1
|
| 148 |
+
openapi-schema-validator==0.6.3
|
| 149 |
+
openapi-spec-validator==0.7.2
|
| 150 |
+
openevals==0.1.0
|
| 151 |
+
opentelemetry-api==1.37.0
|
| 152 |
+
opentelemetry-exporter-otlp==1.37.0
|
| 153 |
+
opentelemetry-exporter-otlp-proto-common==1.37.0
|
| 154 |
+
opentelemetry-exporter-otlp-proto-grpc==1.37.0
|
| 155 |
+
opentelemetry-exporter-otlp-proto-http==1.37.0
|
| 156 |
+
opentelemetry-proto==1.37.0
|
| 157 |
+
opentelemetry-sdk==1.37.0
|
| 158 |
+
opentelemetry-semantic-conventions==0.58b0
|
| 159 |
+
optuna==4.5.0
|
| 160 |
+
orjson==3.11.3
|
| 161 |
+
ormsgpack==1.10.0
|
| 162 |
+
packaging==25.0
|
| 163 |
+
pandas==2.3.3
|
| 164 |
+
parse==1.20.2
|
| 165 |
+
pathable==0.4.4
|
| 166 |
+
pillow==11.3.0
|
| 167 |
+
pip==25.2
|
| 168 |
+
playwright==1.55.0
|
| 169 |
+
pluggy==1.6.0
|
| 170 |
+
poetry>=2.2.1
|
| 171 |
+
prometheus-client==0.23.1
|
| 172 |
+
propcache==0.4.0
|
| 173 |
+
protobuf==6.32.1
|
| 174 |
+
psutil==7.1.0
|
| 175 |
+
puremagic==1.30
|
| 176 |
+
pyarrow==21.0.0
|
| 177 |
+
pyasn1==0.6.1
|
| 178 |
+
pyasn1-modules==0.4.2
|
| 179 |
+
pycparser==2.23
|
| 180 |
+
pydantic==2.12.0
|
| 181 |
+
pydantic-core==2.41.1
|
| 182 |
+
pydantic-settings==2.11.0
|
| 183 |
+
pyee==13.0.0
|
| 184 |
+
pygments==2.19.2
|
| 185 |
+
pyjwt==2.10.1
|
| 186 |
+
pyparsing==3.2.5
|
| 187 |
+
pypdf==6.1.1
|
| 188 |
+
pyperclip==1.11.0
|
| 189 |
+
python-dateutil==2.9.0.post0
|
| 190 |
+
python-dotenv==1.1.1
|
| 191 |
+
python-multipart==0.0.20
|
| 192 |
+
python-ulid==3.1.0
|
| 193 |
+
pytz==2025.2
|
| 194 |
+
pyyaml==6.0.3
|
| 195 |
+
referencing==0.36.2
|
| 196 |
+
regex==2025.9.18
|
| 197 |
+
requests==2.32.5
|
| 198 |
+
requests-toolbelt==1.0.0
|
| 199 |
+
rfc3339-validator==0.1.4
|
| 200 |
+
rich==14.1.0
|
| 201 |
+
rich-rst==1.3.1
|
| 202 |
+
rpds-py==0.27.1
|
| 203 |
+
rsa==4.9.1
|
| 204 |
+
scikit-learn==1.7.2
|
| 205 |
+
scipy==1.16.2
|
| 206 |
+
setuptools==80.9.0
|
| 207 |
+
six==1.17.0
|
| 208 |
+
smmap==5.0.2
|
| 209 |
+
sniffio==1.3.1
|
| 210 |
+
sounddevice==0.5.2
|
| 211 |
+
soupsieve==2.8
|
| 212 |
+
sqlalchemy==2.0.43
|
| 213 |
+
sqlite-fts4==1.0.3
|
| 214 |
+
sqlite-migrate==0.1b0
|
| 215 |
+
sqlite-utils==3.38
|
| 216 |
+
sqlparse==0.5.3
|
| 217 |
+
sse-starlette==2.1.3
|
| 218 |
+
starlette==0.48.0
|
| 219 |
+
structlog==25.4.0
|
| 220 |
+
sympy==1.14.0
|
| 221 |
+
tabulate==0.9.0
|
| 222 |
+
tenacity==9.1.2
|
| 223 |
+
threadpoolctl==3.6.0
|
| 224 |
+
tiktoken==0.11.0
|
| 225 |
+
tld==0.13.1
|
| 226 |
+
tokenizers==0.22.1
|
| 227 |
+
tqdm==4.67.1
|
| 228 |
+
trafilatura==2.0.0
|
| 229 |
+
truststore==0.10.4
|
| 230 |
+
types-protobuf==6.32.1.20250918
|
| 231 |
+
typing-extensions==4.15.0
|
| 232 |
+
typing-inspect==0.9.0
|
| 233 |
+
typing-inspection==0.4.2
|
| 234 |
+
tzdata==2025.2
|
| 235 |
+
tzlocal==5.3.1
|
| 236 |
+
urllib3==2.5.0
|
| 237 |
+
uvicorn==0.37.0
|
| 238 |
+
watchfiles==1.1.0
|
| 239 |
+
websockets==15.0.1
|
| 240 |
+
werkzeug==3.1.1
|
| 241 |
+
wrapt==1.17.3
|
| 242 |
+
xxhash==3.6.0
|
| 243 |
+
yarl==1.21.0
|
| 244 |
+
zipp==3.23.0
|
| 245 |
+
zstandard==0.25.0
|
job_writing_agent.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
job_writing_agent
|
__init__.py β job_writing_agent/__init__.py
RENAMED
|
@@ -11,7 +11,6 @@ import os, getpass
|
|
| 11 |
import logging
|
| 12 |
from pathlib import Path
|
| 13 |
from dotenv import load_dotenv
|
| 14 |
-
from langfuse import Langfuse
|
| 15 |
|
| 16 |
|
| 17 |
# Set up logging
|
|
@@ -65,5 +64,80 @@ if not os.getenv("LANGFUSE_SECRET_KEY"):
|
|
| 65 |
" Failed to get LANGFUSE_SECRET_KEY at Path %s", env_path)
|
| 66 |
_set_env("LANGFUSE_SECRET_KEY")
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
import logging
|
| 12 |
from pathlib import Path
|
| 13 |
from dotenv import load_dotenv
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
# Set up logging
|
|
|
|
| 64 |
" Failed to get LANGFUSE_SECRET_KEY at Path %s", env_path)
|
| 65 |
_set_env("LANGFUSE_SECRET_KEY")
|
| 66 |
|
| 67 |
+
if not os.getenv("LANGSMITH_API_KEY"):
|
| 68 |
+
logger.warning("LANGSMITH_API_KEY environment variable is not set." \
|
| 69 |
+
" Failed to get LANGSMITH_API_KEY at Path %s", env_path)
|
| 70 |
+
_set_env("LANGSMITH_API_KEY")
|
| 71 |
|
| 72 |
+
if not os.getenv("OPENROUTER_API_KEY"):
|
| 73 |
+
logger.warning("OPENROUTER_API_KEY environment variable is not set." \
|
| 74 |
+
" Failed to get OPENROUTER_API_KEY at Path %s", env_path)
|
| 75 |
+
_set_env("OPENROUTER_API_KEY")
|
| 76 |
+
|
| 77 |
+
if not os.getenv("LANGSMITH_PROJECT"):
|
| 78 |
+
logger.warning("LANGSMITH_PROJECT environment variable is not set." \
|
| 79 |
+
" Failed to get LANGSMITH_PROJECT at Path %s", env_path)
|
| 80 |
+
_set_env("LANGSMITH_PROJECT")
|
| 81 |
+
|
| 82 |
+
if not os.getenv("LANGSMITH_ENDPOINT"):
|
| 83 |
+
logger.warning("LANGSMITH_ENDPOINT environment variable is not set." \
|
| 84 |
+
" Failed to get LANGSMITH_ENDPOINT at Path %s", env_path)
|
| 85 |
+
_set_env("LANGSMITH_ENDPOINT")
|
| 86 |
+
|
| 87 |
+
if not os.getenv("CEREBRAS_API_KEY"):
|
| 88 |
+
logger.warning("CEREBRAS_API_KEY environment variable is not set." \
|
| 89 |
+
" Failed to get CEREBRAS_API_KEY at Path %s", env_path)
|
| 90 |
+
_set_env("CEREBRAS_API_KEY")
|
| 91 |
+
|
| 92 |
+
os.environ["LANGSMITH_TRACING"] = "true"
|
| 93 |
+
|
| 94 |
+
__all__: list[str] = ["job_app_graph", "workflows/research_workflow"]
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
"""
|
| 98 |
+
Job Application Writer Package
|
| 99 |
+
|
| 100 |
+
A modular, well-structured package for creating tailored job applications
|
| 101 |
+
using LangChain and LangGraph with LangSmith observability.
|
| 102 |
+
"""
|
| 103 |
+
|
| 104 |
+
__version__ = "0.1.0"
|
| 105 |
+
|
| 106 |
+
import os
|
| 107 |
+
import getpass
|
| 108 |
+
import logging
|
| 109 |
+
from pathlib import Path
|
| 110 |
+
from dotenv import load_dotenv
|
| 111 |
+
|
| 112 |
+
logger = logging.getLogger(__name__)
|
| 113 |
+
logger.setLevel(logging.INFO)
|
| 114 |
+
log_dir = Path(__file__).parent / 'logs'
|
| 115 |
+
log_dir.mkdir(exist_ok=True)
|
| 116 |
+
logger.addHandler(logging.FileHandler(log_dir / 'job_writer.log', mode='a'))
|
| 117 |
+
logger.info("Logger initialized. Writing to %s", Path(__file__).parent / 'job_writer.log')
|
| 118 |
+
|
| 119 |
+
env_path = Path(__file__).parent / '.env'
|
| 120 |
+
|
| 121 |
+
def _set_env(var: str):
|
| 122 |
+
if not os.environ.get(var):
|
| 123 |
+
os.environ[var] = getpass.getpass(f"{var}: ")
|
| 124 |
+
logger.info(f"{var} set to {os.environ[var]}")
|
| 125 |
+
|
| 126 |
+
def load_environment_variables(key_array):
|
| 127 |
+
for key in key_array:
|
| 128 |
+
if not os.getenv(key):
|
| 129 |
+
logger.warning(f"{key} environment variable is not set. Failed to get {key} at Path {env_path}")
|
| 130 |
+
_set_env(key)
|
| 131 |
+
|
| 132 |
+
if env_path.exists():
|
| 133 |
+
logger.info("Loading environment variables from %s", env_path)
|
| 134 |
+
load_dotenv(dotenv_path=env_path, override=True)
|
| 135 |
+
else:
|
| 136 |
+
logger.warning(".env file not found at %s. Using system environment variables.", env_path)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
environment_key_array = ["TAVILY_API_KEY", "GEMINI_API_KEY", "PINECONE_API_KEY", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
|
| 140 |
+
# Check for critical environment variables
|
| 141 |
+
load_environment_variables(environment_key_array)
|
| 142 |
+
|
| 143 |
+
__all__ = ["job_app_graph", "workflows/research_workflow"]
|
{agents β job_writing_agent/agents}/__init__.py
RENAMED
|
File without changes
|
{agents β job_writing_agent/agents}/nodes.py
RENAMED
|
@@ -11,7 +11,7 @@ from datetime import datetime
|
|
| 11 |
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
| 12 |
from langchain_core.output_parsers import StrOutputParser
|
| 13 |
|
| 14 |
-
from ..classes.classes import AppState
|
| 15 |
from ..prompts.templates import (
|
| 16 |
CRITIQUE_PROMPT,
|
| 17 |
PERSONA_DEVELOPMENT_PROMPT,
|
|
@@ -20,24 +20,26 @@ from ..prompts.templates import (
|
|
| 20 |
BULLET_POINTS_PROMPT,
|
| 21 |
LINKEDIN_NOTE_PROMPT,
|
| 22 |
)
|
| 23 |
-
from ..utils.
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
# Constants
|
| 27 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
-
def create_draft(state:
|
| 34 |
"""Create initial draft of the application material."""
|
| 35 |
# Determine which type of content we're creating
|
| 36 |
current_application_session = state.get("company_research_data", {})
|
| 37 |
|
| 38 |
content_category = state.get("content_category", "cover_letter")
|
| 39 |
|
| 40 |
-
|
| 41 |
try:
|
| 42 |
if state.get("vector_store"):
|
| 43 |
vector_store = state.get("vector_store")
|
|
@@ -46,24 +48,26 @@ def create_draft(state: AppState) -> AppState:
|
|
| 46 |
prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
|
| 47 |
|
| 48 |
if current_application_session:
|
| 49 |
-
key_requirements = prompt.invoke(
|
|
|
|
|
|
|
| 50 |
else:
|
| 51 |
return key_requirements
|
| 52 |
|
| 53 |
if not key_requirements:
|
| 54 |
print("Warning: No key requirements found in the job description.")
|
| 55 |
return state
|
| 56 |
-
|
| 57 |
# Use the key requirements to query for the most relevant resume parts
|
| 58 |
namespace = f"resume_{state['session_id']}"
|
| 59 |
relevant_docs = vector_store.retrieve_similar(
|
| 60 |
-
query=key_requirements,
|
| 61 |
-
namespace=namespace,
|
| 62 |
-
k=3
|
| 63 |
)
|
| 64 |
|
| 65 |
# Use these relevant sections with higher weight in the draft creation
|
| 66 |
-
highly_relevant_resume = "\n".join(
|
|
|
|
|
|
|
| 67 |
resume_text = f"""
|
| 68 |
# Most Relevant Experience
|
| 69 |
{highly_relevant_resume}
|
|
@@ -72,21 +76,21 @@ def create_draft(state: AppState) -> AppState:
|
|
| 72 |
{resume_text}
|
| 73 |
"""
|
| 74 |
except Exception as e:
|
| 75 |
-
|
| 76 |
# Continue with regular resume text
|
| 77 |
|
| 78 |
# Select the appropriate prompt template based on application type and persona
|
| 79 |
-
|
| 80 |
if content_category == "bullets":
|
| 81 |
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([BULLET_POINTS_PROMPT])
|
| 82 |
elif content_category == "linkedin_connect_request":
|
| 83 |
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([LINKEDIN_NOTE_PROMPT])
|
| 84 |
else:
|
| 85 |
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([COVER_LETTER_PROMPT])
|
| 86 |
-
|
| 87 |
# Create the draft using the selected prompt template
|
| 88 |
CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
|
| 89 |
-
|
| 90 |
Below is the Job Description and Resume enclosed in triple backticks.
|
| 91 |
|
| 92 |
Job Description and Resume:
|
|
@@ -101,35 +105,42 @@ def create_draft(state: AppState) -> AppState:
|
|
| 101 |
|
| 102 |
Create a cover letter that highlights the match between my qualifications and the job requirements.
|
| 103 |
""",
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
|
| 108 |
|
| 109 |
# Invoke the chain with the appropriate inputs
|
| 110 |
chain = (
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
)
|
| 116 |
|
| 117 |
# Prepare the inputs
|
| 118 |
inputs = {
|
| 119 |
-
"current_job_role": current_application_session[
|
| 120 |
-
"company_research_data": current_application_session["tavily_search"]
|
|
|
|
| 121 |
|
| 122 |
response = chain.invoke(inputs)
|
| 123 |
-
|
| 124 |
state["draft"] = response
|
| 125 |
return state
|
| 126 |
|
|
|
|
| 127 |
def critique_draft(state: AppState) -> AppState:
|
| 128 |
"""Critique the draft for improvements."""
|
| 129 |
-
critique = llm.invoke(
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
| 133 |
|
| 134 |
# Store the critique for reference during human feedback
|
| 135 |
state["critique"] = critique
|
|
@@ -139,12 +150,12 @@ def critique_draft(state: AppState) -> AppState:
|
|
| 139 |
def human_approval(state: AppState) -> AppState:
|
| 140 |
"""Human-in-the-loop checkpoint for feedback on the draft."""
|
| 141 |
# This is a placeholder function that would be replaced by actual UI interaction
|
| 142 |
-
print("\n" + "="*80)
|
| 143 |
print("DRAFT FOR REVIEW:")
|
| 144 |
print(state["draft"])
|
| 145 |
print("\nAUTOMATIC CRITIQUE:")
|
| 146 |
print(state.get("critique", "No critique available"))
|
| 147 |
-
print("="*80)
|
| 148 |
print("\nPlease provide your feedback (press Enter to continue with no changes):")
|
| 149 |
|
| 150 |
# In a real implementation, this would be handled by the UI
|
|
@@ -159,19 +170,22 @@ def finalize_document(state: AppState) -> AppState:
|
|
| 159 |
state["final"] = state["draft"]
|
| 160 |
return state
|
| 161 |
|
| 162 |
-
final = llm.invoke(
|
| 163 |
-
draft=state["draft"],
|
| 164 |
-
|
| 165 |
-
))
|
| 166 |
|
| 167 |
state["final"] = final
|
| 168 |
return state
|
| 169 |
|
| 170 |
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
def determine_next_step(state: AppState) -> str:
|
| 173 |
-
"""
|
| 174 |
-
|
| 175 |
if not state["company_name"]:
|
| 176 |
return "draft"
|
| 177 |
-
return "research"
|
|
|
|
| 11 |
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
|
| 12 |
from langchain_core.output_parsers import StrOutputParser
|
| 13 |
|
| 14 |
+
from ..classes.classes import AppState, ResearchState
|
| 15 |
from ..prompts.templates import (
|
| 16 |
CRITIQUE_PROMPT,
|
| 17 |
PERSONA_DEVELOPMENT_PROMPT,
|
|
|
|
| 20 |
BULLET_POINTS_PROMPT,
|
| 21 |
LINKEDIN_NOTE_PROMPT,
|
| 22 |
)
|
| 23 |
+
from ..utils.llm_provider_factory import LLMFactory
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
# Constants
|
| 27 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 28 |
|
| 29 |
+
llm_provider = LLMFactory()
|
| 30 |
+
|
| 31 |
+
llm = llm_provider.create_langchain(
|
| 32 |
+
"qwen/qwen3-4b:free", provider="openrouter", temperature=0.3
|
| 33 |
+
)
|
| 34 |
|
| 35 |
|
| 36 |
+
def create_draft(state: ResearchState) -> AppState:
|
| 37 |
"""Create initial draft of the application material."""
|
| 38 |
# Determine which type of content we're creating
|
| 39 |
current_application_session = state.get("company_research_data", {})
|
| 40 |
|
| 41 |
content_category = state.get("content_category", "cover_letter")
|
| 42 |
|
|
|
|
| 43 |
try:
|
| 44 |
if state.get("vector_store"):
|
| 45 |
vector_store = state.get("vector_store")
|
|
|
|
| 48 |
prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
|
| 49 |
|
| 50 |
if current_application_session:
|
| 51 |
+
key_requirements = prompt.invoke(
|
| 52 |
+
{"job_description": current_application_session["job_description"]}
|
| 53 |
+
)
|
| 54 |
else:
|
| 55 |
return key_requirements
|
| 56 |
|
| 57 |
if not key_requirements:
|
| 58 |
print("Warning: No key requirements found in the job description.")
|
| 59 |
return state
|
| 60 |
+
|
| 61 |
# Use the key requirements to query for the most relevant resume parts
|
| 62 |
namespace = f"resume_{state['session_id']}"
|
| 63 |
relevant_docs = vector_store.retrieve_similar(
|
| 64 |
+
query=key_requirements, namespace=namespace, k=3
|
|
|
|
|
|
|
| 65 |
)
|
| 66 |
|
| 67 |
# Use these relevant sections with higher weight in the draft creation
|
| 68 |
+
highly_relevant_resume = "\n".join(
|
| 69 |
+
[doc.page_content for doc in relevant_docs]
|
| 70 |
+
)
|
| 71 |
resume_text = f"""
|
| 72 |
# Most Relevant Experience
|
| 73 |
{highly_relevant_resume}
|
|
|
|
| 76 |
{resume_text}
|
| 77 |
"""
|
| 78 |
except Exception as e:
|
| 79 |
+
logger.warning(f"Could not use vector search for relevant resume parts: {e}")
|
| 80 |
# Continue with regular resume text
|
| 81 |
|
| 82 |
# Select the appropriate prompt template based on application type and persona
|
| 83 |
+
logger.info(f"The candidate wants the Agent to assist with : {content_category}")
|
| 84 |
if content_category == "bullets":
|
| 85 |
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([BULLET_POINTS_PROMPT])
|
| 86 |
elif content_category == "linkedin_connect_request":
|
| 87 |
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([LINKEDIN_NOTE_PROMPT])
|
| 88 |
else:
|
| 89 |
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([COVER_LETTER_PROMPT])
|
| 90 |
+
|
| 91 |
# Create the draft using the selected prompt template
|
| 92 |
CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
|
| 93 |
+
"""
|
| 94 |
Below is the Job Description and Resume enclosed in triple backticks.
|
| 95 |
|
| 96 |
Job Description and Resume:
|
|
|
|
| 105 |
|
| 106 |
Create a cover letter that highlights the match between my qualifications and the job requirements.
|
| 107 |
""",
|
| 108 |
+
input_variables=["current_job_role", "company_research_data"],
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
|
| 112 |
|
| 113 |
# Invoke the chain with the appropriate inputs
|
| 114 |
chain = (
|
| 115 |
+
(
|
| 116 |
+
{
|
| 117 |
+
"current_job_role": lambda x: x["current_job_role"],
|
| 118 |
+
"company_research_data": lambda x: x["company_research_data"],
|
| 119 |
+
}
|
| 120 |
+
)
|
| 121 |
+
| FirstDraftGenerationPromptTemplate
|
| 122 |
+
| llm
|
| 123 |
)
|
| 124 |
|
| 125 |
# Prepare the inputs
|
| 126 |
inputs = {
|
| 127 |
+
"current_job_role": current_application_session["job_description"],
|
| 128 |
+
"company_research_data": current_application_session["tavily_search"],
|
| 129 |
+
}
|
| 130 |
|
| 131 |
response = chain.invoke(inputs)
|
| 132 |
+
logger.info(f"Draft has been created: {response}")
|
| 133 |
state["draft"] = response
|
| 134 |
return state
|
| 135 |
|
| 136 |
+
|
| 137 |
def critique_draft(state: AppState) -> AppState:
|
| 138 |
"""Critique the draft for improvements."""
|
| 139 |
+
critique = llm.invoke(
|
| 140 |
+
CRITIQUE_PROMPT.format(
|
| 141 |
+
job_description=state["job_description"][0], draft=state["draft"]
|
| 142 |
+
)
|
| 143 |
+
)
|
| 144 |
|
| 145 |
# Store the critique for reference during human feedback
|
| 146 |
state["critique"] = critique
|
|
|
|
| 150 |
def human_approval(state: AppState) -> AppState:
|
| 151 |
"""Human-in-the-loop checkpoint for feedback on the draft."""
|
| 152 |
# This is a placeholder function that would be replaced by actual UI interaction
|
| 153 |
+
print("\n" + "=" * 80)
|
| 154 |
print("DRAFT FOR REVIEW:")
|
| 155 |
print(state["draft"])
|
| 156 |
print("\nAUTOMATIC CRITIQUE:")
|
| 157 |
print(state.get("critique", "No critique available"))
|
| 158 |
+
print("=" * 80)
|
| 159 |
print("\nPlease provide your feedback (press Enter to continue with no changes):")
|
| 160 |
|
| 161 |
# In a real implementation, this would be handled by the UI
|
|
|
|
| 170 |
state["final"] = state["draft"]
|
| 171 |
return state
|
| 172 |
|
| 173 |
+
final = llm.invoke(
|
| 174 |
+
REVISION_PROMPT.format(draft=state["draft"], feedback=state["feedback"])
|
| 175 |
+
)
|
|
|
|
| 176 |
|
| 177 |
state["final"] = final
|
| 178 |
return state
|
| 179 |
|
| 180 |
|
| 181 |
+
"""
|
| 182 |
+
Conditional node to determine if next node should be 'draft' node or "research" node
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
|
| 186 |
def determine_next_step(state: AppState) -> str:
|
| 187 |
+
"""If the company name is missing within the AppState, we can't
|
| 188 |
+
create the content draft and therefore redirected to the research node."""
|
| 189 |
if not state["company_name"]:
|
| 190 |
return "draft"
|
| 191 |
+
return "research"
|
{agents β job_writing_agent/agents}/output_schema.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from pydantic import BaseModel, Field, field_validator
|
| 2 |
from typing import List, Optional
|
|
|
|
| 3 |
|
| 4 |
class TavilyQuerySet(BaseModel):
|
| 5 |
query1: Optional[List[str]] = Field(default=None, description="First search query and its rationale, e.g., ['query text']")
|
|
@@ -17,3 +18,11 @@ class TavilyQuerySet(BaseModel):
|
|
| 17 |
# Updated error message for clarity
|
| 18 |
raise ValueError("Each query list, when provided, must contain exactly one string: the query text.")
|
| 19 |
return v
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pydantic import BaseModel, Field, field_validator
|
| 2 |
from typing import List, Optional
|
| 3 |
+
import dspy
|
| 4 |
|
| 5 |
class TavilyQuerySet(BaseModel):
|
| 6 |
query1: Optional[List[str]] = Field(default=None, description="First search query and its rationale, e.g., ['query text']")
|
|
|
|
| 18 |
# Updated error message for clarity
|
| 19 |
raise ValueError("Each query list, when provided, must contain exactly one string: the query text.")
|
| 20 |
return v
|
| 21 |
+
|
| 22 |
+
class TavilySearchQueries(dspy.Signature):
|
| 23 |
+
"""Use the job description and company name
|
| 24 |
+
to create exactly 5 search queries for the tavily search tool in JSON Format"""
|
| 25 |
+
job_description = dspy.InputField(desc="Job description of the role that candidate is applying for.")
|
| 26 |
+
company_name = dspy.InputField(desc="Name of the company the candidate is applying for.")
|
| 27 |
+
search_queries = dspy.OutputField(desc="Dictionary of tavily search queries which will gather understanding of the company and it's culture", json=True)
|
| 28 |
+
search_query_relevance = dspy.OutputField(desc="Dictionary of relevance for each tavily search query that is generated", json=True)
|
{classes β job_writing_agent/classes}/__init__.py
RENAMED
|
File without changes
|
{classes β job_writing_agent/classes}/classes.py
RENAMED
|
@@ -2,14 +2,16 @@
|
|
| 2 |
State definitions for the Job Writer LangGraph Workflow.
|
| 3 |
"""
|
| 4 |
|
|
|
|
| 5 |
from typing_extensions import List, Dict, Any
|
| 6 |
from langgraph.graph import MessagesState
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
class AppState(MessagesState):
|
| 10 |
"""
|
| 11 |
State container for the job application writer workflow.
|
| 12 |
-
|
| 13 |
Attributes:
|
| 14 |
resume: List of text chunks from the candidate's resume
|
| 15 |
job_description: List of text chunks from the job description
|
|
@@ -26,7 +28,7 @@ class AppState(MessagesState):
|
|
| 26 |
company_research_data: Dict[str, Any]
|
| 27 |
draft: str
|
| 28 |
feedback: str
|
| 29 |
-
|
| 30 |
content: str # "cover_letter", "bullets", "linkedin_note"
|
| 31 |
current_node: str
|
| 32 |
|
|
@@ -34,7 +36,7 @@ class AppState(MessagesState):
|
|
| 34 |
class DataLoadState(MessagesState):
|
| 35 |
"""
|
| 36 |
State container for the job application writer workflow.
|
| 37 |
-
|
| 38 |
Attributes:
|
| 39 |
resume: List of text chunks from the candidate's resume
|
| 40 |
job_description: List of text chunks from the job description
|
|
@@ -60,4 +62,4 @@ class ResearchState(MessagesState):
|
|
| 60 |
"""
|
| 61 |
company_research_data: Dict[str, Any]
|
| 62 |
attempted_search_queries: List[str]
|
| 63 |
-
current_node: str
|
|
|
|
| 2 |
State definitions for the Job Writer LangGraph Workflow.
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
from langgraph.store.base import Op
|
| 6 |
from typing_extensions import List, Dict, Any
|
| 7 |
from langgraph.graph import MessagesState
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
|
| 10 |
+
@dataclass
|
| 11 |
class AppState(MessagesState):
|
| 12 |
"""
|
| 13 |
State container for the job application writer workflow.
|
| 14 |
+
|
| 15 |
Attributes:
|
| 16 |
resume: List of text chunks from the candidate's resume
|
| 17 |
job_description: List of text chunks from the job description
|
|
|
|
| 28 |
company_research_data: Dict[str, Any]
|
| 29 |
draft: str
|
| 30 |
feedback: str
|
| 31 |
+
final_version: str
|
| 32 |
content: str # "cover_letter", "bullets", "linkedin_note"
|
| 33 |
current_node: str
|
| 34 |
|
|
|
|
| 36 |
class DataLoadState(MessagesState):
|
| 37 |
"""
|
| 38 |
State container for the job application writer workflow.
|
| 39 |
+
|
| 40 |
Attributes:
|
| 41 |
resume: List of text chunks from the candidate's resume
|
| 42 |
job_description: List of text chunks from the job description
|
|
|
|
| 62 |
"""
|
| 63 |
company_research_data: Dict[str, Any]
|
| 64 |
attempted_search_queries: List[str]
|
| 65 |
+
current_node: str
|
langgraph_init.py β job_writing_agent/langgraph_init.py
RENAMED
|
File without changes
|
{nodes β job_writing_agent/nodes}/__init__.py
RENAMED
|
File without changes
|
{nodes β job_writing_agent/nodes}/createdraft.py
RENAMED
|
File without changes
|
job_writing_agent/nodes/initializing.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Job Application Writer - Initialization Module
|
| 4 |
+
|
| 5 |
+
This module provides the Dataloading class responsible for loading and validating
|
| 6 |
+
inputs required for the job-application workflow. It handles parsing resumes and
|
| 7 |
+
job descriptions, managing missing inputs, and populating application state.
|
| 8 |
+
|
| 9 |
+
The module includes utilities for:
|
| 10 |
+
- Parsing resume files and extracting text content
|
| 11 |
+
- Parsing job descriptions and extracting company information
|
| 12 |
+
- Orchestrating input loading with validation
|
| 13 |
+
- Providing user prompts for missing information during verification
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import logging
|
| 17 |
+
from typing import Tuple
|
| 18 |
+
from typing_extensions import Literal
|
| 19 |
+
|
| 20 |
+
from langchain_core.documents import Document
|
| 21 |
+
from langchain_core.messages import SystemMessage
|
| 22 |
+
|
| 23 |
+
from job_writing_agent.classes import AppState, DataLoadState
|
| 24 |
+
from job_writing_agent.utils.document_processing import parse_resume, get_job_description
|
| 25 |
+
from job_writing_agent.prompts.templates import agent_system_prompt
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ---------------------------------------------------------------------------
|
| 31 |
+
# Helper decorator to log exceptions for async methods
|
| 32 |
+
# ---------------------------------------------------------------------------
|
| 33 |
+
def log_exceptions(func):
|
| 34 |
+
"""Decorator to log exceptions in async functions."""
|
| 35 |
+
async def wrapper(*args, **kwargs):
|
| 36 |
+
try:
|
| 37 |
+
return await func(*args, **kwargs)
|
| 38 |
+
except Exception as exc:
|
| 39 |
+
logger.error(
|
| 40 |
+
"Exception in %s: %s", func.__name__, exc, exc_info=True
|
| 41 |
+
)
|
| 42 |
+
raise
|
| 43 |
+
|
| 44 |
+
return wrapper
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class Dataloading:
|
| 48 |
+
"""
|
| 49 |
+
Node for loading and initializing resume and job description data.
|
| 50 |
+
|
| 51 |
+
Methods
|
| 52 |
+
-------
|
| 53 |
+
set_agent_system_message(state: AppState) -> DataLoadState
|
| 54 |
+
Adds the system prompt to the conversation state.
|
| 55 |
+
get_resume(resume_source) -> str
|
| 56 |
+
Parses a resume file and returns its plainβtext content.
|
| 57 |
+
parse_job_description(job_description_source) -> Tuple[str, str]
|
| 58 |
+
Parses a job description and returns its text and company name.
|
| 59 |
+
load_inputs(state: DataLoadState) -> AppState
|
| 60 |
+
Orchestrates loading of resume and job description.
|
| 61 |
+
validate_data_load_state(state: DataLoadState)
|
| 62 |
+
Ensures required fields are present in company_research_data.
|
| 63 |
+
verify_inputs(state: AppState) -> Literal["load", "research"]
|
| 64 |
+
Validates inputs and decides the next workflow node.
|
| 65 |
+
run(state: DataLoadState) -> AppState
|
| 66 |
+
Executes the loading step of the workflow.
|
| 67 |
+
|
| 68 |
+
"""
|
| 69 |
+
def __init__(self):
|
| 70 |
+
pass
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
async def set_agent_system_message(self, state: AppState) -> DataLoadState:
|
| 74 |
+
"""Add the system prompt to the conversation state.
|
| 75 |
+
|
| 76 |
+
Parameters
|
| 77 |
+
----------
|
| 78 |
+
state: AppState
|
| 79 |
+
Current workflow state.
|
| 80 |
+
|
| 81 |
+
Returns
|
| 82 |
+
-------
|
| 83 |
+
DataLoadState
|
| 84 |
+
Updated state with the system message and the next node identifier.
|
| 85 |
+
"""
|
| 86 |
+
agent_initialization_system_message = SystemMessage(
|
| 87 |
+
content=agent_system_prompt
|
| 88 |
+
)
|
| 89 |
+
messages = state.get("messages", [])
|
| 90 |
+
messages.append(agent_initialization_system_message)
|
| 91 |
+
return {
|
| 92 |
+
**state,
|
| 93 |
+
"messages": messages,
|
| 94 |
+
"current_node": "initialize_system",
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
async def get_resume(self, resume_source):
|
| 98 |
+
"""Parse a resume file and return its plainβtext content.
|
| 99 |
+
|
| 100 |
+
Parameters
|
| 101 |
+
----------
|
| 102 |
+
resume_source: Any
|
| 103 |
+
Path or fileβlike object accepted by ``parse_resume``.
|
| 104 |
+
"""
|
| 105 |
+
try:
|
| 106 |
+
logger.info("Parsing resume...")
|
| 107 |
+
resume_text = ""
|
| 108 |
+
assert resume_source is not None
|
| 109 |
+
resume_chunks = parse_resume(resume_source)
|
| 110 |
+
for chunk in resume_chunks:
|
| 111 |
+
if hasattr(chunk, "page_content") and chunk.page_content:
|
| 112 |
+
resume_text += chunk.page_content
|
| 113 |
+
elif isinstance(chunk, str) and chunk:
|
| 114 |
+
resume_text += chunk
|
| 115 |
+
else:
|
| 116 |
+
logger.debug(
|
| 117 |
+
"Skipping empty or invalid chunk in resume: %s", chunk
|
| 118 |
+
)
|
| 119 |
+
return resume_text
|
| 120 |
+
except Exception as e:
|
| 121 |
+
logger.error("Error parsing resume: %s", e)
|
| 122 |
+
raise
|
| 123 |
+
|
| 124 |
+
async def parse_job_description(self, job_description_source):
|
| 125 |
+
"""Parse a job description and return its text and company name.
|
| 126 |
+
|
| 127 |
+
Parameters
|
| 128 |
+
----------
|
| 129 |
+
job_description_source: Any
|
| 130 |
+
Source accepted by ``get_job_description``.
|
| 131 |
+
"""
|
| 132 |
+
try:
|
| 133 |
+
logger.info(
|
| 134 |
+
"Parsing job description from: %s", job_description_source
|
| 135 |
+
)
|
| 136 |
+
assert (
|
| 137 |
+
job_description_source is not None
|
| 138 |
+
), "Job description source cannot be None"
|
| 139 |
+
job_description_document: Document = await get_job_description(
|
| 140 |
+
job_description_source
|
| 141 |
+
)
|
| 142 |
+
company_name = ""
|
| 143 |
+
job_posting_text = ""
|
| 144 |
+
if job_description_document:
|
| 145 |
+
if hasattr(
|
| 146 |
+
job_description_document, "metadata"
|
| 147 |
+
) and isinstance(job_description_document.metadata, dict):
|
| 148 |
+
company_name = job_description_document.metadata.get(
|
| 149 |
+
"company_name", ""
|
| 150 |
+
)
|
| 151 |
+
if not company_name:
|
| 152 |
+
logger.warning(
|
| 153 |
+
"Company name not found in job description metadata."
|
| 154 |
+
)
|
| 155 |
+
else:
|
| 156 |
+
logger.warning(
|
| 157 |
+
"Metadata attribute missing or not a dict in job "
|
| 158 |
+
"description document."
|
| 159 |
+
)
|
| 160 |
+
if hasattr(job_description_document, "page_content"):
|
| 161 |
+
job_posting_text = job_description_document.page_content or ""
|
| 162 |
+
if not job_posting_text:
|
| 163 |
+
logger.info("Parsed job posting text is empty.")
|
| 164 |
+
else:
|
| 165 |
+
logger.warning(
|
| 166 |
+
"page_content attribute missing in job description document."
|
| 167 |
+
)
|
| 168 |
+
else:
|
| 169 |
+
logger.warning(
|
| 170 |
+
"get_job_description returned None for source: %s",
|
| 171 |
+
job_description_source,
|
| 172 |
+
)
|
| 173 |
+
return job_posting_text, company_name
|
| 174 |
+
except Exception as e:
|
| 175 |
+
logger.error(
|
| 176 |
+
"Error parsing job description from source '%s': %s",
|
| 177 |
+
job_description_source,
|
| 178 |
+
e,
|
| 179 |
+
exc_info=True,
|
| 180 |
+
)
|
| 181 |
+
raise
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# -----------------------------------------------------------------------
|
| 185 |
+
# Private helper methods used by load_inputs
|
| 186 |
+
# -----------------------------------------------------------------------
|
| 187 |
+
@log_exceptions
|
| 188 |
+
async def _load_resume(self, resume_source) -> str:
|
| 189 |
+
"""Load resume content, raising if the source is missing."""
|
| 190 |
+
if not resume_source:
|
| 191 |
+
raise ValueError("resume_source is required")
|
| 192 |
+
return await self.get_resume(resume_source)
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
@log_exceptions
|
| 196 |
+
async def _load_job_description(self, jd_source) -> Tuple[str, str]:
|
| 197 |
+
"""Load job description text and company name, raising if missing."""
|
| 198 |
+
if not jd_source:
|
| 199 |
+
raise ValueError("job_description_source is required")
|
| 200 |
+
return await self.parse_job_description(jd_source)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
@log_exceptions
|
| 204 |
+
async def _prompt_user(self, prompt_msg: str) -> str:
|
| 205 |
+
"""Prompt the user for input (synchronous ``input`` wrapped for async use)."""
|
| 206 |
+
# In a real async UI replace ``input`` with an async call.
|
| 207 |
+
return input(prompt_msg)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
async def load_inputs(self, state: DataLoadState) -> AppState:
|
| 211 |
+
"""Orchestrate loading of resume and job description.
|
| 212 |
+
|
| 213 |
+
The method populates ``state['company_research_data']`` with the parsed
|
| 214 |
+
resume, job description, and company name, then advances the workflow
|
| 215 |
+
to the ``load_inputs`` node.
|
| 216 |
+
"""
|
| 217 |
+
resume_src = state.get("resume_path")
|
| 218 |
+
jd_src = state.get("job_description_source")
|
| 219 |
+
|
| 220 |
+
# -------------------------------------------------------------------
|
| 221 |
+
# Load job description (or prompt if missing during verification)
|
| 222 |
+
# -------------------------------------------------------------------
|
| 223 |
+
job_text = ""
|
| 224 |
+
company_name = ""
|
| 225 |
+
if jd_src:
|
| 226 |
+
job_text, company_name = await self._load_job_description(jd_src)
|
| 227 |
+
elif state.get("current_node") == "verify":
|
| 228 |
+
job_text = await self._prompt_user(
|
| 229 |
+
"Please paste the job posting in text format: "
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# -------------------------------------------------------------------
|
| 233 |
+
# Load resume (or prompt if missing during verification)
|
| 234 |
+
# -------------------------------------------------------------------
|
| 235 |
+
resume_text = ""
|
| 236 |
+
if resume_src:
|
| 237 |
+
resume_text = await self._load_resume(resume_src)
|
| 238 |
+
elif state.get("current_node") == "verify":
|
| 239 |
+
raw = await self._prompt_user(
|
| 240 |
+
"Please paste the resume in text format: "
|
| 241 |
+
)
|
| 242 |
+
resume_text = raw
|
| 243 |
+
|
| 244 |
+
# Populate state
|
| 245 |
+
state["company_research_data"] = {
|
| 246 |
+
"resume": resume_text,
|
| 247 |
+
"job_description": job_text,
|
| 248 |
+
"company_name": company_name,
|
| 249 |
+
}
|
| 250 |
+
state["current_node"] = "load_inputs"
|
| 251 |
+
return state
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def validate_data_load_state(self, state: DataLoadState):
|
| 255 |
+
"""Ensure required fields are present in ``company_research_data``."""
|
| 256 |
+
assert state.company_research_data.get(
|
| 257 |
+
"resume"
|
| 258 |
+
), "Resume is missing in company_research_data"
|
| 259 |
+
assert state.company_research_data.get(
|
| 260 |
+
"job_description"
|
| 261 |
+
), "Job description is missing"
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def verify_inputs(self, state: AppState) -> Literal["load", "research"]:
|
| 265 |
+
"""Validate inputs and decide the next workflow node.
|
| 266 |
+
|
| 267 |
+
Returns
|
| 268 |
+
-------
|
| 269 |
+
Literal["load", "research"]
|
| 270 |
+
``"load"`` if required data is missing, otherwise ``"research"``.
|
| 271 |
+
"""
|
| 272 |
+
print("Verifying Inputs")
|
| 273 |
+
state["current_node"] = "verify"
|
| 274 |
+
logger.info("Verifying loaded inputs!")
|
| 275 |
+
assert state["company_research_data"].get(
|
| 276 |
+
"resume"
|
| 277 |
+
), "Resume is missing in company_research_data"
|
| 278 |
+
assert state["company_research_data"].get(
|
| 279 |
+
"job_description"
|
| 280 |
+
), "Job description is missing"
|
| 281 |
+
if not state.get("company_research_data"):
|
| 282 |
+
missing_items = []
|
| 283 |
+
if not state["company_research_data"].get("resume", ""):
|
| 284 |
+
missing_items.append("resume")
|
| 285 |
+
if not state["company_research_data"].get("job_description", ""):
|
| 286 |
+
missing_items.append("job description")
|
| 287 |
+
logger.error("Missing required data: %s", ", ".join(missing_items))
|
| 288 |
+
return "load"
|
| 289 |
+
# Normalise values to strings
|
| 290 |
+
for key in ["resume", "job_description"]:
|
| 291 |
+
try:
|
| 292 |
+
value = state["company_research_data"][key]
|
| 293 |
+
if isinstance(value, (list, tuple)):
|
| 294 |
+
state["company_research_data"][key] = " ".join(
|
| 295 |
+
str(x) for x in value
|
| 296 |
+
)
|
| 297 |
+
elif isinstance(value, dict):
|
| 298 |
+
state["company_research_data"][key] = str(value)
|
| 299 |
+
else:
|
| 300 |
+
state["company_research_data"][key] = str(value)
|
| 301 |
+
except Exception as e:
|
| 302 |
+
logger.warning("Error converting %s to string: %s", key, e)
|
| 303 |
+
raise
|
| 304 |
+
return "research"
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
async def run(self, state: DataLoadState) -> AppState:
|
| 308 |
+
"""Execute the loading step of the workflow."""
|
| 309 |
+
state = await self.load_inputs(state)
|
| 310 |
+
return state
|
{nodes β job_writing_agent/nodes}/research_workflow.py
RENAMED
|
@@ -3,11 +3,15 @@
|
|
| 3 |
This module performs the research phase of the job application writing process.
|
| 4 |
One of the stages is Tavily Search which will be use to search for the company
|
| 5 |
"""
|
|
|
|
| 6 |
import logging
|
|
|
|
| 7 |
from langgraph.graph import StateGraph, START, END
|
| 8 |
|
| 9 |
-
from
|
| 10 |
-
from
|
|
|
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
@@ -19,40 +23,61 @@ logging.basicConfig(level=logging.INFO)
|
|
| 19 |
async def research_company(state: ResearchState) -> ResearchState:
|
| 20 |
"""Research the company if name is available."""
|
| 21 |
state["current_node"] = "research_company"
|
| 22 |
-
|
| 23 |
try:
|
| 24 |
# Extract values from state
|
| 25 |
-
company_name = state["company_research_data"].get("company_name",
|
| 26 |
-
job_description = state["company_research_data"].get("job_description",
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
logger.info(f"Researching company: {company_name}")
|
|
|
|
| 29 |
# Call search_company using the invoke method instead of __call__
|
| 30 |
# The tool expects job_description and company_name and returns a tuple
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Store results in state - note that results is the first item in the tuple
|
| 46 |
-
state["attempted_search_queries"] =
|
| 47 |
-
state["company_research_data"]["tavily_search"] =
|
| 48 |
-
|
| 49 |
except Exception as e:
|
| 50 |
logger.error(f"Error in research_company: {str(e)}")
|
| 51 |
# Provide empty results to avoid breaking the workflow
|
| 52 |
-
state["company_research_data"]["tavily_search"] =
|
| 53 |
state["attempted_search_queries"] = []
|
| 54 |
-
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
print("\n\n\nInitializing research workflow...\n\n\n")
|
| 58 |
# Create research subgraph
|
|
@@ -70,12 +95,3 @@ research_subgraph.add_edge("relevance_filter", END)
|
|
| 70 |
|
| 71 |
# Compile research subgraph
|
| 72 |
research_workflow = research_subgraph.compile()
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
# class ResearchWorkflow:
|
| 76 |
-
|
| 77 |
-
# def __init__(self):
|
| 78 |
-
# self.research_workflow = research_workflow
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
| 3 |
This module performs the research phase of the job application writing process.
|
| 4 |
One of the stages is Tavily Search which will be use to search for the company
|
| 5 |
"""
|
| 6 |
+
|
| 7 |
import logging
|
| 8 |
+
import json
|
| 9 |
from langgraph.graph import StateGraph, START, END
|
| 10 |
|
| 11 |
+
from job_writing_agent.tools.SearchTool import TavilyResearchTool
|
| 12 |
+
from job_writing_agent.classes.classes import ResearchState
|
| 13 |
+
from job_writing_agent.tools.SearchTool import relevance_filter
|
| 14 |
+
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
|
|
| 23 |
async def research_company(state: ResearchState) -> ResearchState:
|
| 24 |
"""Research the company if name is available."""
|
| 25 |
state["current_node"] = "research_company"
|
| 26 |
+
|
| 27 |
try:
|
| 28 |
# Extract values from state
|
| 29 |
+
company_name: str = state["company_research_data"].get("company_name", None)
|
| 30 |
+
job_description = state["company_research_data"].get("job_description", None)
|
| 31 |
+
|
| 32 |
+
assert company_name is not None, "Company name is required for research_company"
|
| 33 |
+
assert job_description is not None, (
|
| 34 |
+
"Job description is required for research_company"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
logger.info(f"Researching company: {company_name}")
|
| 38 |
+
|
| 39 |
# Call search_company using the invoke method instead of __call__
|
| 40 |
# The tool expects job_description and company_name and returns a tuple
|
| 41 |
+
tavily_search = TavilyResearchTool(
|
| 42 |
+
job_description=job_description, company_name=company_name
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
tavily_search_queries = tavily_search.create_tavily_queries()
|
| 46 |
+
|
| 47 |
+
tavily_search_queries_json: dict = json.loads(
|
| 48 |
+
tavily_search_queries["search_queries"]
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
logger.info(list(tavily_search_queries_json.values()))
|
| 52 |
+
|
| 53 |
+
tavily_search_results: list[list[str]] = tavily_search.tavily_search_company(
|
| 54 |
+
tavily_search_queries_json
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
assert isinstance(tavily_search_results, list), (
|
| 58 |
+
"Expected list or tuple from tavily_search_company"
|
| 59 |
+
)
|
| 60 |
+
assert len(tavily_search_results) > 0, (
|
| 61 |
+
"No results returned from tavily_search_company"
|
| 62 |
+
)
|
| 63 |
+
assert len(tavily_search_queries_json) > 0, "No search queries were attempted"
|
| 64 |
+
|
| 65 |
+
logger.info(
|
| 66 |
+
f"Search completed with results and {len(tavily_search_queries)} queries"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
# Store results in state - note that results is the first item in the tuple
|
| 70 |
+
state["attempted_search_queries"] = list(tavily_search_queries_json.values())
|
| 71 |
+
state["company_research_data"]["tavily_search"] = tavily_search_results
|
| 72 |
+
|
| 73 |
except Exception as e:
|
| 74 |
logger.error(f"Error in research_company: {str(e)}")
|
| 75 |
# Provide empty results to avoid breaking the workflow
|
| 76 |
+
state["company_research_data"]["tavily_search"] = []
|
| 77 |
state["attempted_search_queries"] = []
|
| 78 |
+
finally:
|
| 79 |
+
return state
|
| 80 |
+
|
| 81 |
|
| 82 |
print("\n\n\nInitializing research workflow...\n\n\n")
|
| 83 |
# Create research subgraph
|
|
|
|
| 95 |
|
| 96 |
# Compile research subgraph
|
| 97 |
research_workflow = research_subgraph.compile()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{nodes β job_writing_agent/nodes}/selfconsistency.py
RENAMED
|
@@ -1,21 +1,23 @@
|
|
| 1 |
import logging
|
| 2 |
from datetime import datetime
|
|
|
|
|
|
|
| 3 |
|
| 4 |
from ..classes.classes import AppState
|
| 5 |
from ..prompts.templates import (
|
| 6 |
DRAFT_RATING_PROMPT,
|
| 7 |
BEST_DRAFT_SELECTION_PROMPT
|
| 8 |
)
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
# Constants
|
| 13 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
# llm_precise = LLMClient().get_llm()
|
| 18 |
-
|
| 19 |
|
| 20 |
def self_consistency_vote(state: AppState) -> AppState:
|
| 21 |
"""Choose the best draft from multiple variations."""
|
|
@@ -28,25 +30,25 @@ def self_consistency_vote(state: AppState) -> AppState:
|
|
| 28 |
|
| 29 |
# Get resume and job summaries, handling different formats
|
| 30 |
try:
|
| 31 |
-
if isinstance(state["
|
| 32 |
-
if hasattr(state["
|
| 33 |
-
resume_summary = state["
|
| 34 |
else:
|
| 35 |
-
resume_summary = state["
|
| 36 |
else:
|
| 37 |
-
resume_summary = str(state["
|
| 38 |
except Exception as e:
|
| 39 |
print(f"Warning: Error getting resume summary: {e}")
|
| 40 |
-
resume_summary = str(state["
|
| 41 |
|
| 42 |
try:
|
| 43 |
-
if isinstance(state["
|
| 44 |
-
job_summary = state["
|
| 45 |
else:
|
| 46 |
-
job_summary = str(state["
|
| 47 |
except Exception as e:
|
| 48 |
print(f"Warning: Error getting job summary: {e}")
|
| 49 |
-
job_summary = str(state["
|
| 50 |
|
| 51 |
for i, draft in enumerate(all_drafts):
|
| 52 |
rating = llm_precise.invoke(DRAFT_RATING_PROMPT.format(
|
|
|
|
| 1 |
import logging
|
| 2 |
from datetime import datetime
|
| 3 |
+
import json
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
from ..classes.classes import AppState
|
| 7 |
from ..prompts.templates import (
|
| 8 |
DRAFT_RATING_PROMPT,
|
| 9 |
BEST_DRAFT_SELECTION_PROMPT
|
| 10 |
)
|
| 11 |
+
from ..utils.llm_provider_factory import LLMFactory
|
| 12 |
+
|
| 13 |
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
# Constants
|
| 17 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 18 |
|
| 19 |
+
llm_factory = LLMFactory()
|
| 20 |
+
llm_precise = llm_factory.create_langchain(model="qwen/qwen3-4b:free", provider="openrouter", temperature=0.1)
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def self_consistency_vote(state: AppState) -> AppState:
|
| 23 |
"""Choose the best draft from multiple variations."""
|
|
|
|
| 30 |
|
| 31 |
# Get resume and job summaries, handling different formats
|
| 32 |
try:
|
| 33 |
+
if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
|
| 34 |
+
if hasattr(state["resume_path"][0], 'page_content'):
|
| 35 |
+
resume_summary = state["resume_path"][0].page_content
|
| 36 |
else:
|
| 37 |
+
resume_summary = state["resume_path"][0]
|
| 38 |
else:
|
| 39 |
+
resume_summary = str(state["resume_path"])
|
| 40 |
except Exception as e:
|
| 41 |
print(f"Warning: Error getting resume summary: {e}")
|
| 42 |
+
resume_summary = str(state["resume_path"])
|
| 43 |
|
| 44 |
try:
|
| 45 |
+
if isinstance(state["job_description_source"], list) and len(state["job_description_source"]) > 0:
|
| 46 |
+
job_summary = state["job_description_source"][0]
|
| 47 |
else:
|
| 48 |
+
job_summary = str(state["job_description_source"])
|
| 49 |
except Exception as e:
|
| 50 |
print(f"Warning: Error getting job summary: {e}")
|
| 51 |
+
job_summary = str(state["job_description_source"])
|
| 52 |
|
| 53 |
for i, draft in enumerate(all_drafts):
|
| 54 |
rating = llm_precise.invoke(DRAFT_RATING_PROMPT.format(
|
{nodes β job_writing_agent/nodes}/test_workflow.py
RENAMED
|
File without changes
|
{nodes β job_writing_agent/nodes}/variations.py
RENAMED
|
@@ -6,18 +6,20 @@ from langchain_core.documents import Document
|
|
| 6 |
|
| 7 |
|
| 8 |
from ..classes.classes import AppState
|
| 9 |
-
from ..utils.
|
| 10 |
-
from ..prompts.templates import
|
| 11 |
-
VARIATION_PROMPT
|
| 12 |
-
)
|
| 13 |
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
# Constants
|
| 17 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
| 23 |
"""Generate multiple variations of the draft for self-consistency voting."""
|
|
@@ -25,11 +27,22 @@ def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
|
| 25 |
|
| 26 |
# Get resume and job text, handling both string and Document types
|
| 27 |
try:
|
| 28 |
-
resume_text = "\n".join(
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
except Exception as e:
|
| 34 |
print(f"Warning: Error processing resume/job text: {e}")
|
| 35 |
# Fallback to simple string handling
|
|
@@ -42,7 +55,7 @@ def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
|
| 42 |
{"temperature": 0.75, "top_p": 0.92}, # Balanced
|
| 43 |
{"temperature": 0.8, "top_p": 0.95}, # More creative
|
| 44 |
{"temperature": 0.7, "top_p": 0.85}, # Alternative conservative
|
| 45 |
-
{"temperature": 0.8, "top_p": 0.98}
|
| 46 |
]
|
| 47 |
|
| 48 |
for settings in temp_variations:
|
|
@@ -52,11 +65,9 @@ def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
|
| 52 |
|
| 53 |
# Use VARIATION_PROMPT directly with the configured LLM
|
| 54 |
variation = VARIATION_PROMPT.format_messages(
|
| 55 |
-
resume_excerpt=resume_text,
|
| 56 |
-
job_excerpt=job_text,
|
| 57 |
-
draft=state["draft"]
|
| 58 |
)
|
| 59 |
-
|
| 60 |
response = configured_llm.invoke(variation)
|
| 61 |
|
| 62 |
if response and response.strip(): # Only add non-empty variations
|
|
@@ -70,4 +81,4 @@ def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
|
| 70 |
# If all variations failed, add the original draft as a fallback
|
| 71 |
variations.append(state["draft"])
|
| 72 |
|
| 73 |
-
return {"variations": variations}
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
from ..classes.classes import AppState
|
| 9 |
+
from ..utils.llm_provider_factory import LLMFactory
|
| 10 |
+
from ..prompts.templates import VARIATION_PROMPT
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
# Constants
|
| 15 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 16 |
|
| 17 |
+
llm_provider = LLMFactory()
|
| 18 |
+
|
| 19 |
+
llm = llm_provider.create_langchain(
|
| 20 |
+
"qwen/qwen3-4b:free", provider="openrouter", temperature=0.3
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
|
| 24 |
def generate_variations(state: AppState) -> Dict[str, List[str]]:
|
| 25 |
"""Generate multiple variations of the draft for self-consistency voting."""
|
|
|
|
| 27 |
|
| 28 |
# Get resume and job text, handling both string and Document types
|
| 29 |
try:
|
| 30 |
+
resume_text = "\n".join(
|
| 31 |
+
doc.page_content if isinstance(doc, Document) else doc
|
| 32 |
+
for doc in (
|
| 33 |
+
state["resume"][:2]
|
| 34 |
+
if isinstance(state["company_research_data"]["resume"], str)
|
| 35 |
+
else [state["resume"]]
|
| 36 |
+
)
|
| 37 |
+
)
|
| 38 |
+
job_text = "\n".join(
|
| 39 |
+
chunk
|
| 40 |
+
for chunk in (
|
| 41 |
+
state["company_research_data"]["job_description"][:2]
|
| 42 |
+
if isinstance(state["company_research_data"]["job_description"], str)
|
| 43 |
+
else [state["company_research_data"]["job_description"]]
|
| 44 |
+
)
|
| 45 |
+
)
|
| 46 |
except Exception as e:
|
| 47 |
print(f"Warning: Error processing resume/job text: {e}")
|
| 48 |
# Fallback to simple string handling
|
|
|
|
| 55 |
{"temperature": 0.75, "top_p": 0.92}, # Balanced
|
| 56 |
{"temperature": 0.8, "top_p": 0.95}, # More creative
|
| 57 |
{"temperature": 0.7, "top_p": 0.85}, # Alternative conservative
|
| 58 |
+
{"temperature": 0.8, "top_p": 0.98}, # Most creative
|
| 59 |
]
|
| 60 |
|
| 61 |
for settings in temp_variations:
|
|
|
|
| 65 |
|
| 66 |
# Use VARIATION_PROMPT directly with the configured LLM
|
| 67 |
variation = VARIATION_PROMPT.format_messages(
|
| 68 |
+
resume_excerpt=resume_text, job_excerpt=job_text, draft=state["draft"]
|
|
|
|
|
|
|
| 69 |
)
|
| 70 |
+
|
| 71 |
response = configured_llm.invoke(variation)
|
| 72 |
|
| 73 |
if response and response.strip(): # Only add non-empty variations
|
|
|
|
| 81 |
# If all variations failed, add the original draft as a fallback
|
| 82 |
variations.append(state["draft"])
|
| 83 |
|
| 84 |
+
return {"variations": variations}
|
prompts.md β job_writing_agent/prompts.md
RENAMED
|
File without changes
|
{prompts β job_writing_agent/prompts}/__init__.py
RENAMED
|
File without changes
|
{prompts β job_writing_agent/prompts}/templates.py
RENAMED
|
@@ -1,35 +1,40 @@
|
|
| 1 |
"""
|
| 2 |
Prompt templates for the job application writer.
|
| 3 |
|
| 4 |
-
This module contains all prompt templates used throughout the job application
|
| 5 |
generation process, organized by task.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from langchain_core.prompts import ChatPromptTemplate
|
| 9 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 10 |
|
| 11 |
-
|
| 12 |
# Persona selection prompts
|
| 13 |
-
|
| 14 |
-
PERSONA_DEVELOPMENT_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
You are my dedicated JobβApplication Writing Assistant.
|
| 17 |
MISSION
|
| 18 |
β’ Draft cover letters, LinkedIn messages, and answer's to questions within the job applications.
|
| 19 |
β’ Sound like me: grounded, confident, clearβnever fluffy or journalistic.
|
| 20 |
β’ You will be provided "STYLE & LANGUAGE RULES" and "SELFβEVALUATION CHECKLIST" to follow.
|
| 21 |
-
"""
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
or a hiring manager. Return ONLY 'recruiter' or 'hiring_manager':
|
| 24 |
-
|
| 25 |
-
{job_description}"""
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
# Draft generation prompts
|
| 30 |
|
| 31 |
-
COVER_LETTER_PROMPT: SystemMessage = SystemMessage(
|
| 32 |
-
|
| 33 |
You are CoverLetterGPT, a concise careerβwriting assistant.
|
| 34 |
|
| 35 |
CORE OBJECTIVE
|
|
@@ -57,159 +62,179 @@ COVER_LETTER_PROMPT: SystemMessage = SystemMessage(content=
|
|
| 57 |
ERROR HANDLING
|
| 58 |
If word count, section order, or format rules are violated, regenerate until correct.
|
| 59 |
"""
|
| 60 |
-
|
| 61 |
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
"""You are an expert job application writer who
|
| 66 |
creates personalized application materials.
|
| 67 |
|
| 68 |
{persona_instruction}
|
| 69 |
-
|
| 70 |
Write 5-7 bullet points highlighting the candidate's
|
| 71 |
qualifications for this specific role.
|
| 72 |
Create content that genuinely reflects the candidate's
|
| 73 |
background and is tailored to the specific job.
|
| 74 |
Ensure the tone is professional, confident, and authentic.
|
| 75 |
-
Today is {current_date}."""
|
|
|
|
| 76 |
|
| 77 |
|
| 78 |
-
LINKEDIN_NOTE_PROMPT: SystemMessage = SystemMessage(
|
|
|
|
| 79 |
writer who creates personalized application materials.
|
| 80 |
{persona_instruction}
|
| 81 |
-
|
| 82 |
Write a brief LinkedIn connection note to a hiring manager or recruiter (150 words max).
|
| 83 |
Create content that genuinely reflects the candidate's background and is tailored to the specific job.
|
| 84 |
Ensure the tone is professional, confident, and authentic.
|
| 85 |
-
Today is {current_date}."""
|
|
|
|
| 86 |
|
| 87 |
# Variation generation prompt
|
| 88 |
-
VARIATION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# Resume Excerpt
|
| 92 |
{resume_excerpt}
|
| 93 |
-
|
| 94 |
# Job Description Excerpt
|
| 95 |
{job_excerpt}
|
| 96 |
-
|
| 97 |
# Original Draft
|
| 98 |
{draft}
|
| 99 |
-
|
| 100 |
Create a variation of this draft with the same key points but different wording or structure.
|
| 101 |
-
"""
|
| 102 |
-
|
|
|
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
# Critique prompt
|
| 106 |
|
| 107 |
-
CRITIQUE_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
# Job Description
|
| 111 |
{job_description}
|
| 112 |
-
|
| 113 |
# Current Draft
|
| 114 |
{draft}
|
| 115 |
-
|
| 116 |
Critique this draft and suggest specific improvements. Focus on:
|
| 117 |
1. How well it targets the job requirements
|
| 118 |
2. Professional tone and language
|
| 119 |
3. Clarity and impact
|
| 120 |
4. Grammar and style
|
| 121 |
-
|
| 122 |
Return your critique in a constructive, actionable format.
|
| 123 |
-
"""
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
|
| 126 |
|
| 127 |
# Draft rating prompt
|
| 128 |
|
| 129 |
-
DRAFT_RATING_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
# Resume Summary
|
| 133 |
{resume_summary}
|
| 134 |
-
|
| 135 |
# Job Description Summary
|
| 136 |
{job_summary}
|
| 137 |
-
|
| 138 |
# Draft #{draft_number}
|
| 139 |
{draft}
|
| 140 |
-
|
| 141 |
Rate this draft on a scale of 1-10 for:
|
| 142 |
1. Relevance to the job requirements
|
| 143 |
2. Professional tone
|
| 144 |
3. Personalization
|
| 145 |
4. Persuasiveness
|
| 146 |
5. Clarity
|
| 147 |
-
|
| 148 |
Return ONLY a JSON object with these ratings and a brief explanation for each.
|
| 149 |
-
"""
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
|
| 152 |
|
| 153 |
# Best draft selection prompt
|
| 154 |
|
| 155 |
-
BEST_DRAFT_SELECTION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 156 |
-
|
|
|
|
|
|
|
| 157 |
You MUST return ONLY a single number between 1 and the number of drafts.
|
| 158 |
For example, if draft #2 is best, return ONLY '2'.
|
| 159 |
-
Do NOT include ANY other text, explanations, or characters in your response."""
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
| 162 |
{ratings_json}
|
| 163 |
|
| 164 |
Based on these ratings, return ONLY the number of the best draft (1-{num_drafts}).
|
| 165 |
Your entire response must be just one number.
|
| 166 |
Example: If draft #2 is best, return ONLY '2'.
|
| 167 |
-
"""
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
# Original Draft
|
| 175 |
{draft}
|
| 176 |
-
|
| 177 |
# Feedback
|
| 178 |
{feedback}
|
| 179 |
-
|
| 180 |
Revise the draft to incorporate this feedback while maintaining professionalism and impact.
|
| 181 |
Return the complete, final version.
|
| 182 |
-
"""
|
| 183 |
-
|
|
|
|
|
|
|
| 184 |
|
| 185 |
# Tavily query prompt to build knowledge context about the company
|
| 186 |
|
| 187 |
-
TAVILY_QUERY_PROMPT =
|
| 188 |
<Context>
|
| 189 |
-
The user needs targeted search queries (with rationale) for Tavily Search to research company {} and inform a personalized cover letter.
|
| 190 |
</Context>
|
| 191 |
|
| 192 |
<Requirements>
|
| 193 |
- Output a JSON object with five fields:
|
| 194 |
-
- Keys: recent_developments, recent_news, role_info, customers_partners, culture_values
|
| 195 |
-
- Each value: an array of exactly two strings: [search query for Tavily Search, reasoning].
|
| 196 |
-
- Always include the company name in the search query to boost relevance.
|
| 197 |
-
- If any data is missing, supply a sensible fallback query that still references the company.
|
| 198 |
- Do not repeat queries across fields.
|
| 199 |
</Requirements>
|
| 200 |
-
|
| 201 |
-
<OutputFormat>
|
| 202 |
-
```json
|
| 203 |
-
{
|
| 204 |
-
"recent_developments": ["β¦", "β¦"],
|
| 205 |
-
"recent_news": ["β¦", "β¦"],
|
| 206 |
-
"role_info": ["β¦", "β¦"],
|
| 207 |
-
"customers_partners":["β¦", "β¦"],
|
| 208 |
-
"culture_values": ["β¦", "β¦"]
|
| 209 |
-
}
|
| 210 |
-
```
|
| 211 |
-
</OutputFormat>
|
| 212 |
-
'''
|
| 213 |
|
| 214 |
JOB_DESCRIPTION_PROMPT = """You are a JSON extraction specialist. Extract job information from the provided text and return ONLY valid JSON.
|
| 215 |
|
|
@@ -236,4 +261,80 @@ REQUIRED OUTPUT FORMAT:
|
|
| 236 |
"job_title": "exact job title"
|
| 237 |
}}
|
| 238 |
|
| 239 |
-
Return only the JSON object - no other text."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Prompt templates for the job application writer.
|
| 3 |
|
| 4 |
+
This module contains all prompt templates used throughout the job application
|
| 5 |
generation process, organized by task.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from langchain_core.prompts import ChatPromptTemplate
|
| 9 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 10 |
|
|
|
|
| 11 |
# Persona selection prompts
|
| 12 |
+
#
|
| 13 |
+
PERSONA_DEVELOPMENT_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 14 |
+
[
|
| 15 |
+
SystemMessage(
|
| 16 |
+
content="""
|
| 17 |
You are my dedicated JobβApplication Writing Assistant.
|
| 18 |
MISSION
|
| 19 |
β’ Draft cover letters, LinkedIn messages, and answer's to questions within the job applications.
|
| 20 |
β’ Sound like me: grounded, confident, clearβnever fluffy or journalistic.
|
| 21 |
β’ You will be provided "STYLE & LANGUAGE RULES" and "SELFβEVALUATION CHECKLIST" to follow.
|
| 22 |
+
"""
|
| 23 |
+
),
|
| 24 |
+
HumanMessage(
|
| 25 |
+
content="""Analyze this job description and determine if it's better to write as if addressing a recruiter
|
| 26 |
or a hiring manager. Return ONLY 'recruiter' or 'hiring_manager':
|
| 27 |
+
|
| 28 |
+
{job_description}"""
|
| 29 |
+
),
|
| 30 |
+
]
|
| 31 |
+
)
|
| 32 |
|
| 33 |
|
| 34 |
# Draft generation prompts
|
| 35 |
|
| 36 |
+
COVER_LETTER_PROMPT: SystemMessage = SystemMessage(
|
| 37 |
+
content="""
|
| 38 |
You are CoverLetterGPT, a concise careerβwriting assistant.
|
| 39 |
|
| 40 |
CORE OBJECTIVE
|
|
|
|
| 62 |
ERROR HANDLING
|
| 63 |
If word count, section order, or format rules are violated, regenerate until correct.
|
| 64 |
"""
|
| 65 |
+
)
|
| 66 |
|
| 67 |
|
| 68 |
+
BULLET_POINTS_PROMPT: SystemMessage = SystemMessage(
|
| 69 |
+
content="""You are an expert job application writer who
|
|
|
|
| 70 |
creates personalized application materials.
|
| 71 |
|
| 72 |
{persona_instruction}
|
| 73 |
+
|
| 74 |
Write 5-7 bullet points highlighting the candidate's
|
| 75 |
qualifications for this specific role.
|
| 76 |
Create content that genuinely reflects the candidate's
|
| 77 |
background and is tailored to the specific job.
|
| 78 |
Ensure the tone is professional, confident, and authentic.
|
| 79 |
+
Today is {current_date}."""
|
| 80 |
+
)
|
| 81 |
|
| 82 |
|
| 83 |
+
LINKEDIN_NOTE_PROMPT: SystemMessage = SystemMessage(
|
| 84 |
+
content="""You are an expert job application
|
| 85 |
writer who creates personalized application materials.
|
| 86 |
{persona_instruction}
|
| 87 |
+
|
| 88 |
Write a brief LinkedIn connection note to a hiring manager or recruiter (150 words max).
|
| 89 |
Create content that genuinely reflects the candidate's background and is tailored to the specific job.
|
| 90 |
Ensure the tone is professional, confident, and authentic.
|
| 91 |
+
Today is {current_date}."""
|
| 92 |
+
)
|
| 93 |
|
| 94 |
# Variation generation prompt
|
| 95 |
+
VARIATION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 96 |
+
[
|
| 97 |
+
SystemMessage(
|
| 98 |
+
content="You are an expert job application writer. Create a variation of the given draft."
|
| 99 |
+
),
|
| 100 |
+
HumanMessage(
|
| 101 |
+
content="""
|
| 102 |
# Resume Excerpt
|
| 103 |
{resume_excerpt}
|
| 104 |
+
|
| 105 |
# Job Description Excerpt
|
| 106 |
{job_excerpt}
|
| 107 |
+
|
| 108 |
# Original Draft
|
| 109 |
{draft}
|
| 110 |
+
|
| 111 |
Create a variation of this draft with the same key points but different wording or structure.
|
| 112 |
+
"""
|
| 113 |
+
),
|
| 114 |
+
]
|
| 115 |
+
)
|
| 116 |
|
| 117 |
|
| 118 |
# Critique prompt
|
| 119 |
|
| 120 |
+
CRITIQUE_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 121 |
+
[
|
| 122 |
+
SystemMessage(
|
| 123 |
+
content="You are a professional editor who specializes in job applications. Provide constructive feedback."
|
| 124 |
+
),
|
| 125 |
+
HumanMessage(
|
| 126 |
+
content="""
|
| 127 |
# Job Description
|
| 128 |
{job_description}
|
| 129 |
+
|
| 130 |
# Current Draft
|
| 131 |
{draft}
|
| 132 |
+
|
| 133 |
Critique this draft and suggest specific improvements. Focus on:
|
| 134 |
1. How well it targets the job requirements
|
| 135 |
2. Professional tone and language
|
| 136 |
3. Clarity and impact
|
| 137 |
4. Grammar and style
|
| 138 |
+
|
| 139 |
Return your critique in a constructive, actionable format.
|
| 140 |
+
"""
|
| 141 |
+
),
|
| 142 |
+
]
|
| 143 |
+
)
|
| 144 |
|
| 145 |
|
| 146 |
# Draft rating prompt
|
| 147 |
|
| 148 |
+
DRAFT_RATING_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 149 |
+
[
|
| 150 |
+
SystemMessage(
|
| 151 |
+
content="You evaluate job application materials for effectiveness, appropriateness, and impact."
|
| 152 |
+
),
|
| 153 |
+
HumanMessage(
|
| 154 |
+
content="""
|
| 155 |
# Resume Summary
|
| 156 |
{resume_summary}
|
| 157 |
+
|
| 158 |
# Job Description Summary
|
| 159 |
{job_summary}
|
| 160 |
+
|
| 161 |
# Draft #{draft_number}
|
| 162 |
{draft}
|
| 163 |
+
|
| 164 |
Rate this draft on a scale of 1-10 for:
|
| 165 |
1. Relevance to the job requirements
|
| 166 |
2. Professional tone
|
| 167 |
3. Personalization
|
| 168 |
4. Persuasiveness
|
| 169 |
5. Clarity
|
| 170 |
+
|
| 171 |
Return ONLY a JSON object with these ratings and a brief explanation for each.
|
| 172 |
+
"""
|
| 173 |
+
),
|
| 174 |
+
]
|
| 175 |
+
)
|
| 176 |
|
| 177 |
|
| 178 |
# Best draft selection prompt
|
| 179 |
|
| 180 |
+
BEST_DRAFT_SELECTION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 181 |
+
[
|
| 182 |
+
SystemMessage(
|
| 183 |
+
content="""You are a job application expert who selects the best draft based on multiple ratings.
|
| 184 |
You MUST return ONLY a single number between 1 and the number of drafts.
|
| 185 |
For example, if draft #2 is best, return ONLY '2'.
|
| 186 |
+
Do NOT include ANY other text, explanations, or characters in your response."""
|
| 187 |
+
),
|
| 188 |
+
HumanMessage(
|
| 189 |
+
content="""Here are the ratings for {num_drafts} different drafts:
|
| 190 |
+
|
| 191 |
{ratings_json}
|
| 192 |
|
| 193 |
Based on these ratings, return ONLY the number of the best draft (1-{num_drafts}).
|
| 194 |
Your entire response must be just one number.
|
| 195 |
Example: If draft #2 is best, return ONLY '2'.
|
| 196 |
+
"""
|
| 197 |
+
),
|
| 198 |
+
]
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 203 |
+
[
|
| 204 |
+
SystemMessage(
|
| 205 |
+
content="You are an expert job application writer. Revise the draft based on feedback."
|
| 206 |
+
),
|
| 207 |
+
HumanMessage(
|
| 208 |
+
content="""
|
| 209 |
# Original Draft
|
| 210 |
{draft}
|
| 211 |
+
|
| 212 |
# Feedback
|
| 213 |
{feedback}
|
| 214 |
+
|
| 215 |
Revise the draft to incorporate this feedback while maintaining professionalism and impact.
|
| 216 |
Return the complete, final version.
|
| 217 |
+
"""
|
| 218 |
+
),
|
| 219 |
+
]
|
| 220 |
+
)
|
| 221 |
|
| 222 |
# Tavily query prompt to build knowledge context about the company
|
| 223 |
|
| 224 |
+
TAVILY_QUERY_PROMPT = """
|
| 225 |
<Context>
|
| 226 |
+
The user needs targeted search queries (with rationale) for Tavily Search to research company {company_name} and inform a personalized cover letter.
|
| 227 |
</Context>
|
| 228 |
|
| 229 |
<Requirements>
|
| 230 |
- Output a JSON object with five fields:
|
| 231 |
+
- Keys: recent_developments, recent_news, role_info, customers_partners, culture_values
|
| 232 |
+
- Each value: an array of exactly two strings: [search query for Tavily Search, reasoning].
|
| 233 |
+
- Always include the company name in the search query to boost relevance.
|
| 234 |
+
- If any data is missing, supply a sensible fallback query that still references the company.
|
| 235 |
- Do not repeat queries across fields.
|
| 236 |
</Requirements>
|
| 237 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
JOB_DESCRIPTION_PROMPT = """You are a JSON extraction specialist. Extract job information from the provided text and return ONLY valid JSON.
|
| 240 |
|
|
|
|
| 261 |
"job_title": "exact job title"
|
| 262 |
}}
|
| 263 |
|
| 264 |
+
Return only the JSON object - no other text."""
|
| 265 |
+
|
| 266 |
+
agent_system_prompt = """I act as your personal job-application assistant.
|
| 267 |
+
My function is to help you research, analyze, and write compelling application
|
| 268 |
+
materials β primarily LinkedIn reach-outs, short written responses, and cover
|
| 269 |
+
letters β that reflect your authentic tone and technical depth.
|
| 270 |
+
|
| 271 |
+
Objectives
|
| 272 |
+
Craft clear, grounded, and natural-sounding messages that align with your
|
| 273 |
+
authentic communication style. Demonstrate technical understanding and
|
| 274 |
+
contextual awareness of each companyβs product, values, and challenges.
|
| 275 |
+
|
| 276 |
+
Emphasize learning, reasoning, and problem-solving rather than self-promotion
|
| 277 |
+
or buzzwords. Ensure every message sounds like a thoughtful professional
|
| 278 |
+
reaching out, not a template or AI-generated draft.
|
| 279 |
+
|
| 280 |
+
Build continuity across roles β every message should fit within your professional narrative.
|
| 281 |
+
Tone and Writing Style
|
| 282 |
+
Conversational but precise β direct, human, and free of excess formality.
|
| 283 |
+
|
| 284 |
+
Subtle confidence β competence shown through clarity and insight, not self-congratulation.
|
| 285 |
+
|
| 286 |
+
Technical fluency β use of tools, frameworks, and engineering terms only when they add clarity.
|
| 287 |
+
|
| 288 |
+
Reflective and curious β focus on what you learned, how you think, and how you can contribute.
|
| 289 |
+
|
| 290 |
+
Natural pacing β avoid robotic phrasing, unnecessary enthusiasm, or exaggerated adjectives.
|
| 291 |
+
|
| 292 |
+
Avoid clichΓ©s and filler such as βthrilled,β βsuper excited,β βamazing opportunity,β βpassionate about.β
|
| 293 |
+
|
| 294 |
+
Method of Work
|
| 295 |
+
Research Phase
|
| 296 |
+
|
| 297 |
+
Conduct independent research on the companyβs product, mission, values, funding, and team.
|
| 298 |
+
|
| 299 |
+
Cross-reference with your experiences to find genuine points of alignment.
|
| 300 |
+
|
| 301 |
+
Understanding Phase
|
| 302 |
+
|
| 303 |
+
Discuss the job role and expectations in detail.
|
| 304 |
+
|
| 305 |
+
Identify how your prior projects and technical choices connect to the roleβs demands.
|
| 306 |
+
|
| 307 |
+
Drafting Phase
|
| 308 |
+
|
| 309 |
+
Produce concise, personalized drafts (60β120 words) written in your natural tone.
|
| 310 |
+
|
| 311 |
+
Maintain balance between professional precision and approachability.
|
| 312 |
+
|
| 313 |
+
Iteration Phase
|
| 314 |
+
|
| 315 |
+
Refine drafts collaboratively, focusing on phrasing, rhythm, and alignment with company voice.
|
| 316 |
+
|
| 317 |
+
Remove unnecessary polish and restore your authentic rhythm if it drifts toward generic tone.
|
| 318 |
+
|
| 319 |
+
Reflection Phase
|
| 320 |
+
|
| 321 |
+
Summarize what worked well (tone, structure, balance) for future re-use.
|
| 322 |
+
|
| 323 |
+
Maintain consistency across all application materials.
|
| 324 |
+
|
| 325 |
+
Persistent Preferences
|
| 326 |
+
Avoid βAI-soundingβ or over-polished phrasing.
|
| 327 |
+
|
| 328 |
+
Respect word limits:
|
| 329 |
+
|
| 330 |
+
LinkedIn messages: 60β80 words.
|
| 331 |
+
|
| 332 |
+
Application answers: 80β125 words.
|
| 333 |
+
|
| 334 |
+
Cover letters: 250β300 words.
|
| 335 |
+
|
| 336 |
+
Show understanding of why a companyβs product matters, not just what it does.
|
| 337 |
+
|
| 338 |
+
Favor depth over trendiness β insight and reasoning over surface-level alignment.
|
| 339 |
+
|
| 340 |
+
Reflect ownership, curiosity, and thoughtful engineering perspective."""
|
job_writing_agent/tools/SearchTool.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
import asyncio
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from langchain_tavily import TavilySearch
|
| 8 |
+
from openevals.llm import create_async_llm_as_judge
|
| 9 |
+
from openevals.prompts import (
|
| 10 |
+
RAG_RETRIEVAL_RELEVANCE_PROMPT,
|
| 11 |
+
RAG_HELPFULNESS_PROMPT
|
| 12 |
+
)
|
| 13 |
+
import dspy
|
| 14 |
+
|
| 15 |
+
from ..agents.output_schema import TavilySearchQueries
|
| 16 |
+
from ..classes.classes import ResearchState
|
| 17 |
+
from ..utils.llm_provider_factory import LLMFactory
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
env_path = Path(__file__).parent / '.env'
|
| 23 |
+
load_dotenv(dotenv_path=env_path, override=True)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
|
| 27 |
+
|
| 28 |
+
llm_provider = LLMFactory()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class TavilyResearchTool:
|
| 32 |
+
|
| 33 |
+
def __init__(self, job_description, company_name, max_results=5, model_name="qwen/qwen3-4b:free"):
|
| 34 |
+
self.dspy_llm = llm_provider.create_dspy(model=model_name,
|
| 35 |
+
provider="openrouter",
|
| 36 |
+
temperature=0.3)
|
| 37 |
+
self.job_description = job_description
|
| 38 |
+
self.company_name = company_name
|
| 39 |
+
self.tavily_searchtool = TavilySearch(max_results=max_results)
|
| 40 |
+
|
| 41 |
+
def create_tavily_queries(self):
|
| 42 |
+
"""
|
| 43 |
+
Generate search queries for TavilySearch based on the job description and company name.
|
| 44 |
+
Returns:
|
| 45 |
+
dict: A dictionary containing the generated search queries.
|
| 46 |
+
"""
|
| 47 |
+
tavily_query_generator = dspy.ChainOfThought(TavilySearchQueries)
|
| 48 |
+
with dspy.context(lm=self.dspy_llm, adapter=dspy.JSONAdapter()):
|
| 49 |
+
response = tavily_query_generator(job_description=self.job_description, company_name=self.company_name)
|
| 50 |
+
return response
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def tavily_search_company(self, queries):
|
| 54 |
+
|
| 55 |
+
query_results: list[list[str]] = []
|
| 56 |
+
for query in queries:
|
| 57 |
+
try:
|
| 58 |
+
search_query_response = self.tavily_searchtool.invoke({"query": queries[query]})
|
| 59 |
+
query_results.append([res['content'] for res in search_query_response['results']])
|
| 60 |
+
# print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
logger.error(f"Failed to perform company research using TavilySearchTool. Error : {e}")
|
| 63 |
+
continue
|
| 64 |
+
|
| 65 |
+
return query_results
|
| 66 |
+
|
| 67 |
+
llm_structured = llm_provider.create_langchain("llama3.1-8b",
|
| 68 |
+
provider="cerebras",
|
| 69 |
+
temperature=0.3)
|
| 70 |
+
|
| 71 |
+
def get_relevance_evaluator():
|
| 72 |
+
return create_async_llm_as_judge(
|
| 73 |
+
judge=llm_structured,
|
| 74 |
+
prompt=RAG_RETRIEVAL_RELEVANCE_PROMPT,
|
| 75 |
+
feedback_key="retrieval_relevance",
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def get_helpfulness_evaluator():
|
| 80 |
+
return create_async_llm_as_judge(
|
| 81 |
+
judge=llm_structured,
|
| 82 |
+
prompt=RAG_HELPFULNESS_PROMPT
|
| 83 |
+
+ '\nReturn "true" if the answer is helpful, and "false" otherwise.',
|
| 84 |
+
feedback_key="helpfulness",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
async def relevance_filter(state: ResearchState) -> ResearchState:
|
| 89 |
+
try:
|
| 90 |
+
# Set the current node
|
| 91 |
+
state["current_node"] = "relevance_filter"
|
| 92 |
+
|
| 93 |
+
# Get the all_query_data and attempted_queries_list
|
| 94 |
+
tavily_search_results = state["company_research_data"]["tavily_search"]
|
| 95 |
+
attempted_tavily_query_list = state["attempted_search_queries"]
|
| 96 |
+
|
| 97 |
+
# Check if all_query_data and attempted_queries_list are lists
|
| 98 |
+
assert isinstance(tavily_search_results, list), "tavily_search_results is not a list"
|
| 99 |
+
assert isinstance(attempted_tavily_query_list, list), "attempted_tavily_query_list is not a list"
|
| 100 |
+
|
| 101 |
+
print("Filtering results...")
|
| 102 |
+
|
| 103 |
+
filtered_search_results = [] # Stores results deemed relevant in this specific call
|
| 104 |
+
|
| 105 |
+
# Create a semaphore to limit concurrent tasks to 2
|
| 106 |
+
semaphore = asyncio.Semaphore(2)
|
| 107 |
+
|
| 108 |
+
async def evaluate_with_semaphore(query_result_item, input_query: str):
|
| 109 |
+
# query_result_item is a dict like {'rationale': '...', 'results': [...]}
|
| 110 |
+
async with semaphore:
|
| 111 |
+
relevance_evaluator = get_relevance_evaluator()
|
| 112 |
+
eval_result = await relevance_evaluator(
|
| 113 |
+
inputs=input_query, context=query_result_item # context is the whole result block for the query
|
| 114 |
+
)
|
| 115 |
+
return query_result_item, eval_result
|
| 116 |
+
|
| 117 |
+
# Create tasks for all results
|
| 118 |
+
tasks: list = []
|
| 119 |
+
|
| 120 |
+
for query_result, attempted_query in zip(tavily_search_results, attempted_tavily_query_list):
|
| 121 |
+
tasks.append(evaluate_with_semaphore(query_result, attempted_query))
|
| 122 |
+
# Process tasks as they complete
|
| 123 |
+
for completed_task in asyncio.as_completed(tasks):
|
| 124 |
+
query_result_item, eval_result = await completed_task
|
| 125 |
+
# logger.info(f"Evaluated query result for '{query_result_item}': {eval_result}")
|
| 126 |
+
if eval_result.get("score"): # Safely check for score
|
| 127 |
+
if isinstance(query_result_item, list):
|
| 128 |
+
filtered_search_results.extend(query_result_item)
|
| 129 |
+
else:
|
| 130 |
+
# Handle cases where "results" might not be a list or is missing
|
| 131 |
+
logger.warning("Expected a list in query_result_item, got: %s", type(query_result_item))
|
| 132 |
+
|
| 133 |
+
# Append the newly filtered results to the main compiled_results list
|
| 134 |
+
state["company_research_data"]["tavily_search"] = filtered_search_results
|
| 135 |
+
|
| 136 |
+
logger.info(f"Relevance filtering completed. {len(filtered_search_results)} relevant results found.")
|
| 137 |
+
|
| 138 |
+
return state
|
| 139 |
+
|
| 140 |
+
except Exception as e:
|
| 141 |
+
print(f"ERROR in relevance_filter: {e}")
|
| 142 |
+
import traceback
|
| 143 |
+
traceback.print_exc()
|
| 144 |
+
logger.error(f"Error in relevance_filter: {str(e)}")
|
| 145 |
+
# Return original state to avoid breaking the flow
|
| 146 |
+
return state
|
{tools β job_writing_agent/tools}/__init__.py
RENAMED
|
@@ -4,6 +4,6 @@ Created on Mon Oct 23 16:49:52 2023
|
|
| 4 |
@author: rishabhaggarwal
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
from .
|
| 8 |
|
| 9 |
-
__all__ = ["
|
|
|
|
| 4 |
@author: rishabhaggarwal
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
from .SearchTool import relevance_filter
|
| 8 |
|
| 9 |
+
__all__ = ["relevance_filter"]
|
job_writing_agent/tools/test_llm.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from job_writing_agent.utils.llm_provider_factory import LLMFactory
|
| 2 |
+
from langchain_cerebras import ChatCerebras
|
| 3 |
+
|
| 4 |
+
llm_provider = LLMFactory()
|
| 5 |
+
|
| 6 |
+
llm_cerebras = ChatCerebras(
|
| 7 |
+
model="llama3.1-8b", # Direct name: "llama3.1-8b"
|
| 8 |
+
temperature=0.3
|
| 9 |
+
)
|
| 10 |
+
print(llm_cerebras.invoke("Hey! Can you hear me?"))
|
job_writing_agent/tools/test_tavily.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
|
| 2 |
+
from langchain_core.prompt_values import PromptValue
|
| 3 |
+
import dspy
|
| 4 |
+
import os
|
| 5 |
+
import mlflow
|
| 6 |
+
|
| 7 |
+
mlflow.dspy.autolog(
|
| 8 |
+
log_compiles=True,
|
| 9 |
+
log_evals=True,
|
| 10 |
+
log_traces_from_compile=True
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
mlflow.set_tracking_uri("http://127.0.0.1:5000/")
|
| 14 |
+
mlflow.set_experiment("job description extract")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
OPENROUTER_API_KEY="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"
|
| 18 |
+
|
| 19 |
+
dspy.configure(lm=dspy.LM(
|
| 20 |
+
"openrouter/qwen/qwen3-4b:free",
|
| 21 |
+
api_key=OPENROUTER_API_KEY,
|
| 22 |
+
temperature=0.1
|
| 23 |
+
))
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class TavilySearchQueries(dspy.Signature):
|
| 28 |
+
"""Use the job description and company name
|
| 29 |
+
to create search queries for the tavily search tool"""
|
| 30 |
+
job_description_= dspy.InputField(desc="Job description of the role that candidate is applying for.")
|
| 31 |
+
company_name = dspy.InputField(desc="Name of the company the candidate is applying for.")
|
| 32 |
+
search_queries = dspy.OutputField(desc="Tavily Search Query")
|
| 33 |
+
search_query_relevance = dspy.OutputField(desc="Relevance for each tavily search query that is generated")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
tavily_query_generator = dspy.ChainOfThought(TavilySearchQueries)
|
| 37 |
+
|
| 38 |
+
job_description = """ Who are we?
|
| 39 |
+
|
| 40 |
+
Our mission is to scale intelligence to serve humanity. Weβre training and deploying frontier models for developers and enterprises who are building AI systems to power magical experiences like content generation, semantic search, RAG, and agents. We believe that our work is instrumental to the widespread adoption of AI.
|
| 41 |
+
|
| 42 |
+
We obsess over what we build. Each one of us is responsible for contributing to increasing the capabilities of our models and the value they drive for our customers. We like to work hard and move fast to do whatβs best for our customers.
|
| 43 |
+
|
| 44 |
+
Cohere is a team of researchers, engineers, designers, and more, who are passionate about their craft. Each person is one of the best in the world at what they do. We believe that a diverse range of perspectives is a requirement for building great products.
|
| 45 |
+
|
| 46 |
+
Join us on our mission and shape the future!
|
| 47 |
+
|
| 48 |
+
About North:
|
| 49 |
+
|
| 50 |
+
North is Cohere's cutting-edge AI workspace platform, designed to revolutionize the way enterprises utilize AI. It offers a secure and customizable environment, allowing companies to deploy AI while maintaining control over sensitive data. North integrates seamlessly with existing workflows, providing a trusted platform that connects AI agents with workplace tools and applications.
|
| 51 |
+
|
| 52 |
+
As a Senior/Staff Backend Engineer, you will:
|
| 53 |
+
Build and ship features for North, our AI workspace platform
|
| 54 |
+
Develop autonomous agents that talk to sensitive enterprise data
|
| 55 |
+
uns in low-resource environments, and has highly stringent deployment mechanisms
|
| 56 |
+
As security and privacy are paramount, you will sometimes need to re-invent the
|
| 57 |
+
wheel, and wonβt be able to use the most popular libraries or tooling
|
| 58 |
+
Collaborate with researchers to productionize state-of-the-art models and techniques
|
| 59 |
+
You may be a good fit if:
|
| 60 |
+
Have shipped (lots of) Python in production
|
| 61 |
+
You have built and deployed extremely performant client-side or server-side RAG/agentic
|
| 62 |
+
applications to millions of users You have strong coding abilities and are comfortable working across the stack. Youβre able to read and understand, and even fix issues outside of the main code base
|
| 63 |
+
Youβve worked in both large enterprises and startups
|
| 64 |
+
You excel in fast-paced environments and can execute while priorities and objectives are a moving target
|
| 65 |
+
If some of the above doesnβt line up perfectly with your experience, we still encourage you to apply!
|
| 66 |
+
If you want to work really hard on a glorious mission with teammates that want the same thing, Cohere is the place for you."""
|
| 67 |
+
|
| 68 |
+
response = tavily_query_generator(job_description_=job_description, company_name="Cohere")
|
| 69 |
+
|
| 70 |
+
print(response)
|
{utils β job_writing_agent/utils}/__init__.py
RENAMED
|
File without changes
|
job_writing_agent/utils/application_cli_interface.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import os
|
| 3 |
+
from typing import Optional, Any, Iterable
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
from requests.exceptions import RequestException
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
DEFAULT_MODEL = "qwen/qwen3-4b:free"
|
| 10 |
+
DEFAULT_CONTENT_TYPE = "cover_letter"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def readable_file(path: str) -> str:
|
| 14 |
+
"""Validate and return contents of a readable file."""
|
| 15 |
+
if not os.path.isfile(path):
|
| 16 |
+
raise argparse.ArgumentTypeError(f"File not found: {path}")
|
| 17 |
+
if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
|
| 18 |
+
raise argparse.ArgumentTypeError(
|
| 19 |
+
"Only text files (.txt, .md, .pdf, .json) are supported."
|
| 20 |
+
)
|
| 21 |
+
return path
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def valid_temp(temp: str) -> float:
|
| 25 |
+
"""Ensure temperature is within a reasonable range."""
|
| 26 |
+
value = float(temp)
|
| 27 |
+
if not (0 <= value <= 2):
|
| 28 |
+
raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
|
| 29 |
+
return value
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def is_valid_url(
|
| 33 |
+
job_posting: str, allowed_statuses: Iterable[int] | None = None
|
| 34 |
+
) -> bool:
|
| 35 |
+
"""
|
| 36 |
+
Returns ``True`` if *url* is reachable and its HTTP status code is in
|
| 37 |
+
`allowed_statuses`. Defaults to any 2xx or 3xx response (common
|
| 38 |
+
successful codes).
|
| 39 |
+
|
| 40 |
+
Parameters
|
| 41 |
+
----------
|
| 42 |
+
job_posting : str
|
| 43 |
+
The URL for the job posting.
|
| 44 |
+
timeout : float, optional
|
| 45 |
+
Timeout for the request (seconds). Defaults to 10.
|
| 46 |
+
allowed_statuses : Iterable[int] | None, optional
|
| 47 |
+
Specific status codes that are considered βvalidβ.
|
| 48 |
+
If ``None`` (default) any 200β399 status is accepted.
|
| 49 |
+
|
| 50 |
+
Returns
|
| 51 |
+
-------
|
| 52 |
+
bool
|
| 53 |
+
``True`` if the URL succeeded, ``False`` otherwise.
|
| 54 |
+
"""
|
| 55 |
+
if allowed_statuses is None:
|
| 56 |
+
# All 2xx and 3xx responses are considered βvalidβ
|
| 57 |
+
allowed_statuses = range(200, 400)
|
| 58 |
+
|
| 59 |
+
with requests.get(
|
| 60 |
+
job_posting, timeout=30, allow_redirects=True, stream=True
|
| 61 |
+
) as resp:
|
| 62 |
+
if resp.status_code in allowed_statuses:
|
| 63 |
+
return job_posting
|
| 64 |
+
else:
|
| 65 |
+
raise RequestException("Job Posting could not be reached")
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def handle_cli() -> argparse.Namespace:
|
| 69 |
+
"""Parse and validate CLI arguments for job application generator."""
|
| 70 |
+
parser = argparse.ArgumentParser(
|
| 71 |
+
description="""Assist the candidate in writing content for
|
| 72 |
+
job application such as answering to question in application
|
| 73 |
+
process, cover letters and more."""
|
| 74 |
+
)
|
| 75 |
+
parser.add_argument(
|
| 76 |
+
"-r",
|
| 77 |
+
"--resume",
|
| 78 |
+
required=True,
|
| 79 |
+
metavar="resume",
|
| 80 |
+
type=readable_file,
|
| 81 |
+
help="Relative/Absolute path to resume file in pdf, text, markdown format.",
|
| 82 |
+
)
|
| 83 |
+
parser.add_argument(
|
| 84 |
+
"-j",
|
| 85 |
+
"--job_posting",
|
| 86 |
+
required=True,
|
| 87 |
+
metavar="job_posting",
|
| 88 |
+
type=is_valid_url,
|
| 89 |
+
help="URL to job posting or paste raw text of job description text.",
|
| 90 |
+
)
|
| 91 |
+
parser.add_argument(
|
| 92 |
+
"-t",
|
| 93 |
+
"--content_type",
|
| 94 |
+
default=DEFAULT_CONTENT_TYPE,
|
| 95 |
+
choices=["cover_letter", "bullets", "linkedin_note"],
|
| 96 |
+
help="Type of application material to generate (default: cover_letter).",
|
| 97 |
+
)
|
| 98 |
+
parser.add_argument(
|
| 99 |
+
"-m",
|
| 100 |
+
"--model",
|
| 101 |
+
default=DEFAULT_MODEL,
|
| 102 |
+
metavar="MODEL",
|
| 103 |
+
help="Model to use (default: qwen/qwen3-4b:free).",
|
| 104 |
+
)
|
| 105 |
+
parser.add_argument(
|
| 106 |
+
"--temp",
|
| 107 |
+
type=valid_temp,
|
| 108 |
+
default=0.2,
|
| 109 |
+
metavar="FLOAT",
|
| 110 |
+
help="Temperature for generation, 0-2 (default: 0.7).",
|
| 111 |
+
)
|
| 112 |
+
parser.add_argument("--version", action="version", version="%(prog)s 1.0")
|
| 113 |
+
return parser.parse_args()
|
{utils β job_writing_agent/utils}/config.py
RENAMED
|
File without changes
|
job_writing_agent/utils/config_utils.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for creating model configurations.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import argparse
|
| 6 |
+
from typing import Dict, Any
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def create_model_config(args: argparse.Namespace) -> Dict[str, Any]:
|
| 10 |
+
"""
|
| 11 |
+
Creates a model configuration dictionary from command-line arguments.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
args: Parsed command-line arguments.
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
A dictionary with model configuration parameters.
|
| 18 |
+
"""
|
| 19 |
+
model_config = {}
|
| 20 |
+
if args.model:
|
| 21 |
+
model_config["model_name"] = args.model
|
| 22 |
+
if args.temp is not None:
|
| 23 |
+
model_config["temperature"] = min(0.25, args.temp)
|
| 24 |
+
model_config["precise_temperature"] = min(0.2, args.temp)
|
| 25 |
+
return model_config
|
{utils β job_writing_agent/utils}/document_processing.py
RENAMED
|
@@ -5,32 +5,34 @@ Document processing utilities for parsing resumes and job descriptions.
|
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
-
import json
|
| 9 |
-
|
| 10 |
from pathlib import Path
|
| 11 |
from urllib.parse import urlparse
|
| 12 |
from typing_extensions import Dict, List, Any
|
| 13 |
|
| 14 |
|
| 15 |
-
|
| 16 |
-
from langchain_community.document_loaders import PyPDFLoader,
|
|
|
|
| 17 |
from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
|
| 18 |
-
from
|
| 19 |
-
from langchain_core.messages import SystemMessage
|
| 20 |
from langchain_core.documents import Document
|
| 21 |
-
from
|
| 22 |
-
from langfuse.decorators import observe, langfuse_context
|
| 23 |
from pydantic import BaseModel, Field
|
| 24 |
|
| 25 |
# Local imports - using relative imports
|
| 26 |
from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
|
| 27 |
-
from .
|
| 28 |
-
from ..prompts.templates import JOB_DESCRIPTION_PROMPT
|
| 29 |
|
| 30 |
# Set up logging
|
| 31 |
logger = logging.getLogger(__name__)
|
| 32 |
logging.basicConfig(level=logging.INFO)
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# Default paths
|
| 36 |
DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
|
|
@@ -44,14 +46,6 @@ RESUME_SECTIONS: list[str] = [
|
|
| 44 |
"AWARDS", "LANGUAGES", "INTERESTS", "REFERENCES"
|
| 45 |
]
|
| 46 |
|
| 47 |
-
# Initialize LLM client
|
| 48 |
-
LLM: LLMClient = LLMClient()
|
| 49 |
-
|
| 50 |
-
llm_client: LLMClient = LLM.get_instance(
|
| 51 |
-
model_name="ejschwar/llama3.2-better-prompts:latest",
|
| 52 |
-
model_provider="ollama_json")
|
| 53 |
-
llm_structured = llm_client.get_llm()
|
| 54 |
-
|
| 55 |
|
| 56 |
class ResumeSection(BaseModel):
|
| 57 |
"""Model for a structured resume section."""
|
|
@@ -64,12 +58,27 @@ class StructuredResume(BaseModel):
|
|
| 64 |
sections: List[ResumeSection] = Field(description="List of resume sections")
|
| 65 |
contact_info: Dict[str, str] = Field(description="Contact information extracted from the resume")
|
| 66 |
|
|
|
|
| 67 |
class JobDescriptionComponents(BaseModel):
|
| 68 |
"""Model for job description components."""
|
| 69 |
company_name: str = Field(description="The company name")
|
| 70 |
job_description: str = Field(description="The job description")
|
| 71 |
reasoning: str = Field(description="The reasoning for the extracted information")
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
@observe()
|
| 74 |
def clean_resume_text(text: str) -> str:
|
| 75 |
"""Clean and normalize resume text by removing extra whitespace, fixing common PDF extraction issues.
|
|
@@ -214,7 +223,7 @@ def parse_resume(file_path: str | Path) -> List[Document]:
|
|
| 214 |
(β400 chars, 50βchar overlap) with {source, section} metadata.
|
| 215 |
"""
|
| 216 |
file_extension = Path(file_path).suffix.lower()
|
| 217 |
-
|
| 218 |
# Handle different file types
|
| 219 |
if file_extension == '.pdf':
|
| 220 |
text = PyPDFLoader(str(file_path), extraction_mode="layout").load()[0].page_content
|
|
@@ -229,14 +238,14 @@ def parse_resume(file_path: str | Path) -> List[Document]:
|
|
| 229 |
raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
|
| 230 |
else:
|
| 231 |
raise ValueError(f"Unsupported resume file type: {file_path}. Supported types: .pdf, .txt")
|
| 232 |
-
|
| 233 |
text = _collapse_ws(text)
|
| 234 |
|
| 235 |
# Tag headings with "###" so Markdown splitter can see them
|
| 236 |
tagged_lines = [
|
| 237 |
f"### {ln}" if _is_heading(ln) else ln
|
| 238 |
for ln in text.splitlines()]
|
| 239 |
-
|
| 240 |
md_text = "\n".join(tagged_lines)
|
| 241 |
|
| 242 |
if "###" in md_text:
|
|
@@ -252,27 +261,25 @@ def parse_resume(file_path: str | Path) -> List[Document]:
|
|
| 252 |
for doc in chunks:
|
| 253 |
doc.metadata.setdefault("source", str(file_path))
|
| 254 |
# section already present if headerβsplitter was used
|
| 255 |
-
|
| 256 |
return chunks
|
| 257 |
|
| 258 |
|
| 259 |
-
def get_job_description(file_path_or_url: str) -> Document:
|
| 260 |
"""Parse a job description from a file or URL into chunks.
|
| 261 |
|
| 262 |
Args:
|
| 263 |
file_path_or_url: Local file path or URL of job posting
|
| 264 |
|
| 265 |
Returns:
|
| 266 |
-
|
| 267 |
Document containing the job description
|
| 268 |
"""
|
| 269 |
# Check if the input is a URL
|
| 270 |
if file_path_or_url.startswith(('http://', 'https://')):
|
| 271 |
-
return
|
| 272 |
|
| 273 |
# Handle local files based on extension
|
| 274 |
file_extension = Path(file_path_or_url).suffix.lower()
|
| 275 |
-
|
| 276 |
# Handle txt files
|
| 277 |
if file_extension == '.txt':
|
| 278 |
try:
|
|
@@ -284,160 +291,117 @@ def get_job_description(file_path_or_url: str) -> Document:
|
|
| 284 |
except Exception as e:
|
| 285 |
logger.error(f"Error reading text file: {str(e)}")
|
| 286 |
raise ValueError(f"Could not read text file: {file_path_or_url}. Error: {str(e)}")
|
| 287 |
-
|
| 288 |
# For other file types
|
| 289 |
raise ValueError(f"Unsupported file type: {file_path_or_url}. Supported types: .pdf, .docx, .txt, .md")
|
| 290 |
|
| 291 |
|
| 292 |
-
def
|
| 293 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
Args:
|
| 296 |
-
url: URL of the job posting
|
| 297 |
|
| 298 |
Returns:
|
| 299 |
-
|
| 300 |
|
| 301 |
Raises:
|
| 302 |
-
ValueError: If URL format is invalid
|
| 303 |
-
|
| 304 |
-
LLMProcessingError: If LLM processing fails
|
| 305 |
"""
|
| 306 |
-
|
| 307 |
logger.info("Starting job description extraction from URL: %s", url)
|
| 308 |
-
# langfuse_handler = langfuse_context.get_current_langchain_handler()
|
| 309 |
-
extracted_text = None
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
raise ValueError("URL must start with http:// or https://")
|
| 317 |
|
| 318 |
-
|
|
|
|
|
|
|
| 319 |
try:
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
)
|
| 326 |
-
document_splitted = loader.load_and_split(text_splitter=text_splitter)
|
| 327 |
-
|
| 328 |
-
if not document_splitted:
|
| 329 |
-
logger.error("No content could be extracted from URL: %s", url)
|
| 330 |
-
raise URLExtractionError("No content could be extracted from URL")
|
| 331 |
-
|
| 332 |
-
extracted_text = " ".join(doc.page_content for doc in document_splitted)
|
| 333 |
-
logger.info("Successfully extracted %d characters from URL", len(extracted_text))
|
| 334 |
-
|
| 335 |
except Exception as e:
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
# Process with LLM
|
| 339 |
-
if not llm_structured:
|
| 340 |
-
logger.warning("LLM not available, returning raw extracted text")
|
| 341 |
-
return [extracted_text, "Unknown Company"]
|
| 342 |
|
|
|
|
| 343 |
try:
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
job_description_parser_human_message = HumanMessagePromptTemplate.from_template(
|
| 352 |
-
template=human_prompt,
|
| 353 |
-
input_variables=["extracted_text"])
|
| 354 |
-
chat_prompt = ChatPromptTemplate.from_messages([job_description_parser_system_message, job_description_parser_human_message])
|
| 355 |
-
|
| 356 |
-
# print("Chat prompt created successfully")
|
| 357 |
-
chain = chat_prompt | llm_structured | output_parser
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
# Direct Pydantic model
|
| 371 |
-
result = result.model_dump()
|
| 372 |
-
if isinstance(result, dict):
|
| 373 |
-
print("LLM returned a dictionary, converting to JobDescriptionComponents model", result)
|
| 374 |
-
else:
|
| 375 |
-
# Unexpected result type
|
| 376 |
-
print(f"Unexpected LLM result type: {type(result)}")
|
| 377 |
-
logger.error("Unexpected LLM result type: %s", type(result))
|
| 378 |
-
raise LLMProcessingError("Invalid LLM response format")
|
| 379 |
-
|
| 380 |
-
# Validate required fields
|
| 381 |
-
if not result.get("job_description") or not result.get("company_name"):
|
| 382 |
-
logger.warning("LLM returned empty required fields")
|
| 383 |
-
raise LLMProcessingError("Missing required fields in LLM response")
|
| 384 |
-
|
| 385 |
-
logger.info("Successfully processed job description with LLM")
|
| 386 |
-
# Create a Document object for the job description
|
| 387 |
-
job_doc = Document(
|
| 388 |
-
page_content=result["job_description"],
|
| 389 |
-
metadata={"company_name": result["company_name"]}
|
| 390 |
-
)
|
| 391 |
-
|
| 392 |
-
# print("Job description Document created successfully. Company name: ", result["company_name"])
|
| 393 |
-
# print("Job description content: ", job_doc.metadata) # Print first 100 chars for debugging
|
| 394 |
-
return job_doc
|
| 395 |
-
|
| 396 |
-
except Exception as e:
|
| 397 |
-
# Handle LLM processing errors first
|
| 398 |
-
if isinstance(e, LLMProcessingError):
|
| 399 |
-
raise
|
| 400 |
-
|
| 401 |
-
# Try to recover from JSON parsing errors
|
| 402 |
-
error_msg = str(e)
|
| 403 |
-
if "Invalid json output" in error_msg:
|
| 404 |
-
logger.warning("Attempting to recover from invalid JSON output")
|
| 405 |
-
|
| 406 |
-
# Extract JSON from error message
|
| 407 |
-
output = error_msg.split("Invalid json output:", 1)[1].strip()
|
| 408 |
-
start = output.find('{')
|
| 409 |
-
end = output.rfind('}') + 1
|
| 410 |
-
|
| 411 |
-
if start >= 0 and end > start:
|
| 412 |
-
try:
|
| 413 |
-
clean_json = output[start:end]
|
| 414 |
-
result = output_parser.parse(clean_json)
|
| 415 |
-
if hasattr(result, "job_description") and hasattr(result, "company_name"):
|
| 416 |
-
return [result.job_description, result.company_name]
|
| 417 |
-
except json.JSONDecodeError as json_e:
|
| 418 |
-
logger.error("Failed to recover from JSON error: %s", json_e)
|
| 419 |
-
|
| 420 |
-
raise LLMProcessingError(f"Failed to process job description with LLM: {str(e)}") from e
|
| 421 |
|
| 422 |
except Exception as e:
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
return [extracted_text, "Unknown Company"]
|
| 428 |
-
raise LLMProcessingError(f"Failed to process job description with LLM: {str(e)}") from e
|
| 429 |
-
|
| 430 |
-
except ValueError as e:
|
| 431 |
-
logger.error("URL validation error: %s", str(e))
|
| 432 |
-
raise
|
| 433 |
-
except URLExtractionError as e:
|
| 434 |
-
logger.error("Content extraction error: %s", str(e))
|
| 435 |
-
raise
|
| 436 |
except LLMProcessingError as e:
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
except Exception as e:
|
| 442 |
-
logger.error("
|
| 443 |
-
raise JobDescriptionParsingError(f"
|
|
|
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import re
|
|
|
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
from urllib.parse import urlparse
|
| 10 |
from typing_extensions import Dict, List, Any
|
| 11 |
|
| 12 |
|
| 13 |
+
import dspy
|
| 14 |
+
from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
|
| 15 |
+
from langchain_community.document_transformers import Html2TextTransformer
|
| 16 |
from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
|
| 17 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
|
|
| 18 |
from langchain_core.documents import Document
|
| 19 |
+
from langfuse import observe
|
|
|
|
| 20 |
from pydantic import BaseModel, Field
|
| 21 |
|
| 22 |
# Local imports - using relative imports
|
| 23 |
from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
|
| 24 |
+
from .llm_provider_factory import LLMFactory
|
|
|
|
| 25 |
|
| 26 |
# Set up logging
|
| 27 |
logger = logging.getLogger(__name__)
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
| 29 |
|
| 30 |
+
llm_provider = LLMFactory()
|
| 31 |
+
|
| 32 |
+
llm = llm_provider.create_langchain("qwen-3-32b",
|
| 33 |
+
provider="cerebras",
|
| 34 |
+
temperature=0.3,
|
| 35 |
+
)
|
| 36 |
|
| 37 |
# Default paths
|
| 38 |
DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
|
|
|
|
| 46 |
"AWARDS", "LANGUAGES", "INTERESTS", "REFERENCES"
|
| 47 |
]
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
class ResumeSection(BaseModel):
|
| 51 |
"""Model for a structured resume section."""
|
|
|
|
| 58 |
sections: List[ResumeSection] = Field(description="List of resume sections")
|
| 59 |
contact_info: Dict[str, str] = Field(description="Contact information extracted from the resume")
|
| 60 |
|
| 61 |
+
|
| 62 |
class JobDescriptionComponents(BaseModel):
|
| 63 |
"""Model for job description components."""
|
| 64 |
company_name: str = Field(description="The company name")
|
| 65 |
job_description: str = Field(description="The job description")
|
| 66 |
reasoning: str = Field(description="The reasoning for the extracted information")
|
| 67 |
|
| 68 |
+
|
| 69 |
+
class ExtractJobDescription(dspy.Signature):
|
| 70 |
+
"""Clean and extract the job description from the provided scraped HTML of the job posting.
|
| 71 |
+
Divide the job description into multiple sections under different headings.Company Overview,
|
| 72 |
+
Role Introduction,Qualifications and Requirements, Prefrred Qualifications, Salary, Location.
|
| 73 |
+
Do not alter the content of the job description.
|
| 74 |
+
"""
|
| 75 |
+
job_description_html_content = dspy.InputField(desc="HTML content of the job posting.")
|
| 76 |
+
job_description = dspy.OutputField(desc="Clean job description which is free of HTML tags and irrelevant information.")
|
| 77 |
+
job_role = dspy.OutputField(desc="The job role in the posting.")
|
| 78 |
+
company_name = dspy.OutputField(desc="Company Name of the Job listing.")
|
| 79 |
+
location = dspy.OutputField(desc="The location for the provided job posting.")
|
| 80 |
+
|
| 81 |
+
|
| 82 |
@observe()
|
| 83 |
def clean_resume_text(text: str) -> str:
|
| 84 |
"""Clean and normalize resume text by removing extra whitespace, fixing common PDF extraction issues.
|
|
|
|
| 223 |
(β400 chars, 50βchar overlap) with {source, section} metadata.
|
| 224 |
"""
|
| 225 |
file_extension = Path(file_path).suffix.lower()
|
| 226 |
+
|
| 227 |
# Handle different file types
|
| 228 |
if file_extension == '.pdf':
|
| 229 |
text = PyPDFLoader(str(file_path), extraction_mode="layout").load()[0].page_content
|
|
|
|
| 238 |
raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
|
| 239 |
else:
|
| 240 |
raise ValueError(f"Unsupported resume file type: {file_path}. Supported types: .pdf, .txt")
|
| 241 |
+
|
| 242 |
text = _collapse_ws(text)
|
| 243 |
|
| 244 |
# Tag headings with "###" so Markdown splitter can see them
|
| 245 |
tagged_lines = [
|
| 246 |
f"### {ln}" if _is_heading(ln) else ln
|
| 247 |
for ln in text.splitlines()]
|
| 248 |
+
|
| 249 |
md_text = "\n".join(tagged_lines)
|
| 250 |
|
| 251 |
if "###" in md_text:
|
|
|
|
| 261 |
for doc in chunks:
|
| 262 |
doc.metadata.setdefault("source", str(file_path))
|
| 263 |
# section already present if headerβsplitter was used
|
|
|
|
| 264 |
return chunks
|
| 265 |
|
| 266 |
|
| 267 |
+
async def get_job_description(file_path_or_url: str) -> Document:
|
| 268 |
"""Parse a job description from a file or URL into chunks.
|
| 269 |
|
| 270 |
Args:
|
| 271 |
file_path_or_url: Local file path or URL of job posting
|
| 272 |
|
| 273 |
Returns:
|
|
|
|
| 274 |
Document containing the job description
|
| 275 |
"""
|
| 276 |
# Check if the input is a URL
|
| 277 |
if file_path_or_url.startswith(('http://', 'https://')):
|
| 278 |
+
return await parse_job_description_from_url(file_path_or_url)
|
| 279 |
|
| 280 |
# Handle local files based on extension
|
| 281 |
file_extension = Path(file_path_or_url).suffix.lower()
|
| 282 |
+
|
| 283 |
# Handle txt files
|
| 284 |
if file_extension == '.txt':
|
| 285 |
try:
|
|
|
|
| 291 |
except Exception as e:
|
| 292 |
logger.error(f"Error reading text file: {str(e)}")
|
| 293 |
raise ValueError(f"Could not read text file: {file_path_or_url}. Error: {str(e)}")
|
| 294 |
+
|
| 295 |
# For other file types
|
| 296 |
raise ValueError(f"Unsupported file type: {file_path_or_url}. Supported types: .pdf, .docx, .txt, .md")
|
| 297 |
|
| 298 |
|
| 299 |
+
async def scrape_job_description_from_web(urls: List[str]):
|
| 300 |
+
"""This function will first scrape the data from the job listing.
|
| 301 |
+
Then using the recursive splitter using the different seperators,
|
| 302 |
+
it preserves the paragraphs, lines and words"""
|
| 303 |
+
loader = AsyncChromiumLoader(urls, headless=True)
|
| 304 |
+
scraped_data_documents = await loader.aload()
|
| 305 |
+
|
| 306 |
+
html2text = Html2TextTransformer()
|
| 307 |
+
markdown_scraped_data_documents = html2text.transform_documents(scraped_data_documents)
|
| 308 |
+
|
| 309 |
+
# Grab the first 1000 tokens of the site
|
| 310 |
+
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
| 311 |
+
chunk_size=1000, chunk_overlap=0
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
extracted_content = splitter.split_documents(markdown_scraped_data_documents)
|
| 315 |
+
|
| 316 |
+
return ".".join(doc.page_content for doc in extracted_content)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
async def parse_job_description_from_url(url: str) -> Document:
|
| 320 |
+
"""Extracts and structures a job description from a URL using an LLM.
|
| 321 |
+
|
| 322 |
+
This function fetches content from a URL, uses a DSPy model to extract key details,
|
| 323 |
+
and returns a structured LangChain Document. If the LLM processing fails, it falls
|
| 324 |
+
back to returning the raw extracted text.
|
| 325 |
|
| 326 |
Args:
|
| 327 |
+
url: The URL of the job posting.
|
| 328 |
|
| 329 |
Returns:
|
| 330 |
+
A Document containing the structured job description and company name in metadata.
|
| 331 |
|
| 332 |
Raises:
|
| 333 |
+
ValueError: If the URL format is invalid.
|
| 334 |
+
JobDescriptionParsingError: For any unexpected errors during the process.
|
|
|
|
| 335 |
"""
|
|
|
|
| 336 |
logger.info("Starting job description extraction from URL: %s", url)
|
|
|
|
|
|
|
| 337 |
|
| 338 |
+
# 1. Validate URL first (fail fast)
|
| 339 |
+
parsed_url = urlparse(url)
|
| 340 |
+
if not all([parsed_url.scheme, parsed_url.netloc]):
|
| 341 |
+
logger.error("Invalid URL format: %s", url)
|
| 342 |
+
raise ValueError("URL must be valid and start with http:// or https://")
|
|
|
|
| 343 |
|
| 344 |
+
raw_content = None
|
| 345 |
+
try:
|
| 346 |
+
# 2. Fetch content from the URL
|
| 347 |
try:
|
| 348 |
+
logger.info("Fetching content from URL...")
|
| 349 |
+
raw_content = await scrape_job_description_from_web([url])
|
| 350 |
+
if not raw_content or not raw_content.strip():
|
| 351 |
+
raise URLExtractionError("Failed to extract any meaningful content from the URL.")
|
| 352 |
+
logger.info("Successfully fetched raw content from URL.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
except Exception as e:
|
| 354 |
+
# Wrap any fetching error into our custom exception
|
| 355 |
+
raise URLExtractionError(f"Failed to download or read content from {url}: {e}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
+
# 3. Process content with the LLM
|
| 358 |
try:
|
| 359 |
+
logger.info("Processing content with DSPy LLM...")
|
| 360 |
+
# Configure DSPy LM (it's good practice to do this here if it can change)
|
| 361 |
+
dspy.configure(lm=dspy.LM(
|
| 362 |
+
"cerebras/qwen-3-32b",
|
| 363 |
+
api_key=os.environ.get("CEREBRAS_API_KEY"),
|
| 364 |
+
temperature=0.1,
|
| 365 |
+
max_tokens=60000 # Note: This max_tokens is unusually high
|
| 366 |
+
))
|
| 367 |
|
| 368 |
+
job_extract_fn = dspy.Predict(ExtractJobDescription)
|
| 369 |
+
result = job_extract_fn(job_description_html_content=raw_content)
|
| 370 |
+
logger.info("Successfully processed job description with LLM.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
|
| 372 |
+
# 4. Create the final Document with structured data
|
| 373 |
+
job_doc = Document(
|
| 374 |
+
page_content=result.job_description,
|
| 375 |
+
metadata={
|
| 376 |
+
"company_name": result.company_name,
|
| 377 |
+
"source": url,
|
| 378 |
+
"job_role": result.job_role,
|
| 379 |
+
"location": result.location
|
| 380 |
+
}
|
| 381 |
+
)
|
| 382 |
+
return job_doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
except Exception as e:
|
| 385 |
+
# Wrap any LLM error into our custom exception
|
| 386 |
+
raise LLMProcessingError(f"Failed to process content with LLM: {e}") from e
|
| 387 |
+
|
| 388 |
+
# 5. Handle specific, known errors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
except LLMProcessingError as e:
|
| 390 |
+
logger.warning(f"LLM processing failed: {e}. Falling back to raw text.")
|
| 391 |
+
# This is the corrected fallback logic. It uses the fetched `raw_content`.
|
| 392 |
+
if raw_content:
|
| 393 |
+
return Document(
|
| 394 |
+
page_content=raw_content,
|
| 395 |
+
metadata={"company_name": "Unknown", "source": url, "error": str(e)}
|
| 396 |
+
)
|
| 397 |
+
# If raw_content is also None, then the failure was catastrophic.
|
| 398 |
+
raise LLMProcessingError("LLM processing failed and no raw content was available for fallback.") from e
|
| 399 |
+
|
| 400 |
+
except URLExtractionError as e:
|
| 401 |
+
logger.error(f"Could not extract content from URL: {e}")
|
| 402 |
+
raise URLExtractionError("Failed to extract content from the URL.") from e
|
| 403 |
+
|
| 404 |
+
# 6. Catch any other unexpected errors
|
| 405 |
except Exception as e:
|
| 406 |
+
logger.error(f"An unexpected error occurred: {e}", exc_info=True)
|
| 407 |
+
raise JobDescriptionParsingError(f"An unexpected error occurred while parsing the job description: {e}") from e
|
job_writing_agent/utils/dspy_job_extract.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pprint
|
| 2 |
+
import dspy
|
| 3 |
+
import os
|
| 4 |
+
import asyncio
|
| 5 |
+
import mlflow
|
| 6 |
+
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader, AsyncChromiumLoader
|
| 7 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 8 |
+
from langchain_community.document_transformers import BeautifulSoupTransformer
|
| 9 |
+
from langchain_community.document_transformers import Html2TextTransformer
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
os.environ['CEREBRAS_API_KEY'] = "csk-m28t6w8vk6pjn3rdrtwtdjkynjh5hxfe29dtx2hnjedft9he"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
mlflow.dspy.autolog(
|
| 16 |
+
log_compiles=True,
|
| 17 |
+
log_evals=True,
|
| 18 |
+
log_traces_from_compile=True
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
mlflow.set_tracking_uri("http://127.0.0.1:5000/")
|
| 22 |
+
mlflow.set_experiment("job description extract")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ExtractJobDescription(dspy.Signature):
|
| 26 |
+
"""Clean and extract the job description from the provided scraped HTML of the job posting.
|
| 27 |
+
Divide the job description into multiple sections under different headings.Company Overview,
|
| 28 |
+
Role Introduction,Qualifications and Requirements, Prefrred Qualifications, Salary, Location.
|
| 29 |
+
Do not alter the content of the job description.
|
| 30 |
+
"""
|
| 31 |
+
job_description_html_content = dspy.InputField(desc="HTML content of the job posting.")
|
| 32 |
+
job_description = dspy.OutputField(desc="Clean job description which is free of HTML tags and irrelevant information.")
|
| 33 |
+
job_role = dspy.OutputField(desc="The job role in the posting.")
|
| 34 |
+
company_name = dspy.OutputField(desc="Company Name of the Job listing.")
|
| 35 |
+
location = dspy.OutputField(desc="The location for the provided job posting.")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def get_job_description(url: str):
|
| 39 |
+
loader = WebBaseLoader(url)
|
| 40 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 41 |
+
chunk_size=1000,
|
| 42 |
+
chunk_overlap=200,
|
| 43 |
+
separators=["\n\n", "\n", ". ", " ", ""]
|
| 44 |
+
)
|
| 45 |
+
document_splitted = loader.load_and_split(text_splitter=text_splitter)
|
| 46 |
+
|
| 47 |
+
extracted_text = " ".join(doc.page_content for doc in document_splitted)
|
| 48 |
+
return extracted_text
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def extract_jd():
|
| 52 |
+
job_url = "https://job-boards.greenhouse.io/verkada/jobs/4128645007"
|
| 53 |
+
job_description = trafilatura.fetch_url(job_url)
|
| 54 |
+
# job_description = get_job_description(job_url)
|
| 55 |
+
dspy.configure(lm=dspy.LM(
|
| 56 |
+
"cerebras/qwen-3-32b",
|
| 57 |
+
api_key=os.environ.get("CEREBRAS_API_KEY"),
|
| 58 |
+
temperature=0.1,
|
| 59 |
+
max_tokens=60000
|
| 60 |
+
),
|
| 61 |
+
adapter=dspy.JSONAdapter()
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
job_extact_fn = dspy.Predict(ExtractJobDescription)
|
| 65 |
+
|
| 66 |
+
return job_extact_fn(job_description_html_content=job_description)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def scrape_with_playwright(urls):
|
| 70 |
+
loader = AsyncChromiumLoader(urls, headless=True, user_agent="Mozilla/5.0 (compatible)")
|
| 71 |
+
docs = await loader.aload()
|
| 72 |
+
|
| 73 |
+
html2text = Html2TextTransformer()
|
| 74 |
+
docs_transformed = html2text.transform_documents(docs)
|
| 75 |
+
# print(f"Docs transformed: {docs_transformed}")
|
| 76 |
+
print("Extracting content with LLM")
|
| 77 |
+
|
| 78 |
+
# Grab the first 1000 tokens of the site
|
| 79 |
+
splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
| 80 |
+
chunk_size=1000, chunk_overlap=0
|
| 81 |
+
)
|
| 82 |
+
extracted_content = splitter.split_documents(docs_transformed)
|
| 83 |
+
|
| 84 |
+
return extracted_content
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# extract_jd()
|
| 88 |
+
|
| 89 |
+
urls = ["https://jobs.ashbyhq.com/MotherDuck/c11f6d31-64e9-4964-85dd-c5b25eee55bc"]
|
| 90 |
+
asyncio.run(scrape_with_playwright(urls))
|
| 91 |
+
# print(extracted_content)
|
{utils β job_writing_agent/utils}/errors.py
RENAMED
|
File without changes
|
{utils β job_writing_agent/utils}/langfuse_handler.py
RENAMED
|
File without changes
|
job_writing_agent/utils/llm_client.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
|
| 4 |
+
from abc import ABC, abstractmethod
|
| 5 |
+
from typing import Dict, Any, Literal
|
| 6 |
+
|
| 7 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 8 |
+
from langchain_ollama import ChatOllama
|
| 9 |
+
from langchain_openai import ChatOpenAI
|
| 10 |
+
from langchain_cerebras import ChatCerebras
|
| 11 |
+
from pydantic import SecretStr
|
| 12 |
+
import dspy
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
__all__ = [
|
| 17 |
+
"OllamaChatProvider",
|
| 18 |
+
"CerebrasChatProvider",
|
| 19 |
+
"OpenRouterChatProvider",
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
class LLMProvider(ABC):
|
| 23 |
+
"""Base class for LLM provider strategies."""
|
| 24 |
+
|
| 25 |
+
@abstractmethod
|
| 26 |
+
def get_default_config(self) -> Dict[str, Any]:
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
@abstractmethod
|
| 30 |
+
def get_langchain_params(self) -> set[str]:
|
| 31 |
+
pass
|
| 32 |
+
|
| 33 |
+
@abstractmethod
|
| 34 |
+
def get_dspy_params(self) -> set[str]:
|
| 35 |
+
pass
|
| 36 |
+
|
| 37 |
+
@abstractmethod
|
| 38 |
+
def format_model_name_for_provider(self, model: str) -> str:
|
| 39 |
+
"""Convert model name to DSPy format.
|
| 40 |
+
|
| 41 |
+
Different providers require different prefixes in DSPy.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
model: Model name as used in LangChain
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Model name formatted for DSPy
|
| 48 |
+
"""
|
| 49 |
+
pass
|
| 50 |
+
|
| 51 |
+
@abstractmethod
|
| 52 |
+
def validate_config(self, **config) -> Dict[str, Any]:
|
| 53 |
+
pass
|
| 54 |
+
|
| 55 |
+
def create_llm_instance(
|
| 56 |
+
self,
|
| 57 |
+
model: str,
|
| 58 |
+
framework: Literal['langchain', 'dspy'] = 'langchain',
|
| 59 |
+
**config
|
| 60 |
+
) -> BaseChatModel | dspy.LM:
|
| 61 |
+
"""Create LLM instance for specified framework."""
|
| 62 |
+
defaults = self.get_default_config()
|
| 63 |
+
|
| 64 |
+
# Get framework-specific supported params
|
| 65 |
+
if framework == 'langchain':
|
| 66 |
+
supported = self.get_langchain_params()
|
| 67 |
+
else:
|
| 68 |
+
supported = self.get_dspy_params()
|
| 69 |
+
|
| 70 |
+
# Filter unsupported params
|
| 71 |
+
filtered_config = {k: v for k, v in config.items() if k in supported}
|
| 72 |
+
|
| 73 |
+
# Warn about ignored params
|
| 74 |
+
ignored = set(config.keys()) - supported
|
| 75 |
+
if ignored:
|
| 76 |
+
logger.warning(f"Ignoring unsupported parameters for {framework}: {ignored}")
|
| 77 |
+
|
| 78 |
+
# Merge configs
|
| 79 |
+
merged_config = {**defaults, **filtered_config}
|
| 80 |
+
|
| 81 |
+
# Validate
|
| 82 |
+
validated_config = self.validate_config(**merged_config)
|
| 83 |
+
|
| 84 |
+
# Create instance based on framework
|
| 85 |
+
if framework == 'langchain':
|
| 86 |
+
return self._create_langchain_instance(model, **validated_config)
|
| 87 |
+
elif framework == 'dspy':
|
| 88 |
+
return self._create_dspy_instance(model, **validated_config)
|
| 89 |
+
else:
|
| 90 |
+
raise ValueError(f"Unsupported framework: {framework}")
|
| 91 |
+
|
| 92 |
+
@abstractmethod
|
| 93 |
+
def _create_langchain_instance(self, model: str, **config) -> BaseChatModel:
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
@abstractmethod
|
| 97 |
+
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 98 |
+
pass
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class OpenRouterChatProvider(LLMProvider):
|
| 102 |
+
"""Provider for OpenRouter.
|
| 103 |
+
|
| 104 |
+
Model format:
|
| 105 |
+
- LangChain: "openai/gpt-4", "anthropic/claude-3-opus"
|
| 106 |
+
- DSPy: Same - "openai/gpt-4", "anthropic/claude-3-opus"
|
| 107 |
+
|
| 108 |
+
Docs: https://openrouter.ai/docs
|
| 109 |
+
"""
|
| 110 |
+
|
| 111 |
+
OPENROUTER_API_URL = "https://openrouter.ai/api/v1"
|
| 112 |
+
|
| 113 |
+
def get_default_config(self) -> Dict[str, Any]:
|
| 114 |
+
return {'temperature': 0.2}
|
| 115 |
+
|
| 116 |
+
def get_langchain_params(self) -> set[str]:
|
| 117 |
+
return {
|
| 118 |
+
'temperature', 'max_tokens', 'top_p',
|
| 119 |
+
'frequency_penalty', 'presence_penalty',
|
| 120 |
+
'stop', 'n', 'stream'
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
def get_dspy_params(self) -> set[str]:
|
| 124 |
+
return {'temperature', 'max_tokens', 'top_p', 'stop', 'n'}
|
| 125 |
+
|
| 126 |
+
def format_model_name_for_provider(self, model: str) -> str:
|
| 127 |
+
"""OpenRouter models are used as-is in DSPy.
|
| 128 |
+
|
| 129 |
+
Examples:
|
| 130 |
+
"openai/gpt-4" -> "openai/gpt-4"
|
| 131 |
+
"anthropic/claude-3-opus" -> "anthropic/claude-3-opus"
|
| 132 |
+
"""
|
| 133 |
+
return f"{model}" # β
Use as-is - already has provider/model format
|
| 134 |
+
|
| 135 |
+
def validate_config(self, **config) -> Dict[str, Any]:
|
| 136 |
+
if 'temperature' in config:
|
| 137 |
+
temp = config['temperature']
|
| 138 |
+
if not 0 <= temp <= 2:
|
| 139 |
+
logger.warning(f"Temperature must be 0-2, got {temp}")
|
| 140 |
+
|
| 141 |
+
if 'api_key' not in config:
|
| 142 |
+
api_key = os.getenv('OPENROUTER_API_KEY')
|
| 143 |
+
if not api_key:
|
| 144 |
+
raise ValueError("OPENROUTER_API_KEY not set")
|
| 145 |
+
config['api_key'] = api_key
|
| 146 |
+
|
| 147 |
+
return config
|
| 148 |
+
|
| 149 |
+
def _create_langchain_instance(self, model: str, **config) -> ChatOpenAI:
|
| 150 |
+
"""Create LangChain instance.
|
| 151 |
+
|
| 152 |
+
Example model: "openai/gpt-4"
|
| 153 |
+
"""
|
| 154 |
+
api_key = config.pop('api_key')
|
| 155 |
+
|
| 156 |
+
return ChatOpenAI(
|
| 157 |
+
model=self.format_model_name_for_provider(model), # β
Use model as-is: "openai/gpt-4"
|
| 158 |
+
api_key=SecretStr(api_key),
|
| 159 |
+
base_url=self.OPENROUTER_API_URL,
|
| 160 |
+
**config
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 164 |
+
"""Create DSPy instance.
|
| 165 |
+
|
| 166 |
+
Example model: "openai/gpt-4"
|
| 167 |
+
"""
|
| 168 |
+
api_key = config.pop('api_key')
|
| 169 |
+
|
| 170 |
+
return dspy.LM(
|
| 171 |
+
model=self.format_model_name_for_provider(model), # β
Use as-is: "openai/gpt-4"
|
| 172 |
+
api_key=api_key,
|
| 173 |
+
api_base=self.OPENROUTER_API_URL,
|
| 174 |
+
**config
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
class CerebrasChatProvider(LLMProvider):
|
| 179 |
+
"""Provider for Cerebras.
|
| 180 |
+
|
| 181 |
+
Model format:
|
| 182 |
+
- LangChain: "llama3.1-8b", "llama3.1-70b" (direct names)
|
| 183 |
+
- DSPy: "openai/llama3.1-8b" (needs openai/ prefix for compatibility)
|
| 184 |
+
|
| 185 |
+
Docs: https://inference-docs.cerebras.ai/
|
| 186 |
+
"""
|
| 187 |
+
|
| 188 |
+
CEREBRAS_API_URL = "https://api.cerebras.ai/v1"
|
| 189 |
+
|
| 190 |
+
def get_default_config(self) -> Dict[str, Any]:
|
| 191 |
+
return {'temperature': 0.2, 'max_tokens': 1024}
|
| 192 |
+
|
| 193 |
+
def get_langchain_params(self) -> set[str]:
|
| 194 |
+
return {
|
| 195 |
+
'temperature', 'max_tokens', 'top_p',
|
| 196 |
+
'stop', 'stream', 'seed'
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
def get_dspy_params(self) -> set[str]:
|
| 200 |
+
return {'temperature', 'max_tokens', 'top_p', 'stop'}
|
| 201 |
+
|
| 202 |
+
def format_model_name_for_provider(self, model: str) -> str:
|
| 203 |
+
"""Cerebras models need 'cerebras/' prefix.
|
| 204 |
+
|
| 205 |
+
Examples:
|
| 206 |
+
"llama3.1-8b" -> "cerebras/llama3.1-8b"
|
| 207 |
+
"llama3.1-70b" -> "cerebras/llama3.1-70b"
|
| 208 |
+
"""
|
| 209 |
+
return f"cerebras/{model}" # β
Add openai/ prefix for OpenAI-compatible API
|
| 210 |
+
|
| 211 |
+
def validate_config(self, **config) -> Dict[str, Any]:
|
| 212 |
+
if 'temperature' in config:
|
| 213 |
+
temp = config['temperature']
|
| 214 |
+
if not 0 <= temp <= 1.5:
|
| 215 |
+
raise ValueError(f"Temperature must be 0-1.5, got {temp}")
|
| 216 |
+
|
| 217 |
+
if 'api_key' not in config:
|
| 218 |
+
api_key = os.getenv('CEREBRAS_API_KEY')
|
| 219 |
+
if not api_key:
|
| 220 |
+
raise ValueError("CEREBRAS_API_KEY not set")
|
| 221 |
+
config['api_key'] = api_key
|
| 222 |
+
|
| 223 |
+
return config
|
| 224 |
+
|
| 225 |
+
def _create_langchain_instance(self, model: str, **config) -> ChatCerebras:
|
| 226 |
+
"""Create LangChain instance.
|
| 227 |
+
|
| 228 |
+
Example model: "llama3.1-8b"
|
| 229 |
+
"""
|
| 230 |
+
|
| 231 |
+
return ChatCerebras(
|
| 232 |
+
model=model, # Direct name: "llama3.1-8b"
|
| 233 |
+
**config
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
@DeprecationWarning
|
| 238 |
+
def _create_langchain_instance_openaiclient(self, model: str, **config) -> ChatOpenAI:
|
| 239 |
+
"""
|
| 240 |
+
Create LangChain instance
|
| 241 |
+
Example model: "llama3.1-8b"
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
api_key = config.pop('api_key')
|
| 245 |
+
|
| 246 |
+
return ChatOpenAI(
|
| 247 |
+
model=self.format_model_name_for_provider(model), # Direct name: "llama3.1-8b"
|
| 248 |
+
api_key=SecretStr(api_key),
|
| 249 |
+
base_url=self.CEREBRAS_API_URL,
|
| 250 |
+
**config
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 254 |
+
"""Create DSPy instance.
|
| 255 |
+
|
| 256 |
+
Example model input: "llama3.1-8b"
|
| 257 |
+
DSPy format: "openai/llama3.1-8b"
|
| 258 |
+
"""
|
| 259 |
+
api_key = config.pop('api_key')
|
| 260 |
+
|
| 261 |
+
return dspy.LM(
|
| 262 |
+
model=self.format_model_name_for_provider(model), # With prefix: "openai/llama3.1-8b"
|
| 263 |
+
api_key=api_key,
|
| 264 |
+
api_base=self.CEREBRAS_API_URL,
|
| 265 |
+
**config
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
class OllamaChatProvider(LLMProvider):
|
| 270 |
+
"""Provider for Ollama.
|
| 271 |
+
|
| 272 |
+
Model format:
|
| 273 |
+
- LangChain: "llama3.2", "llama3.2:latest" (direct names with optional tags)
|
| 274 |
+
- DSPy: "ollama_chat/llama3.2" (needs ollama_chat/ prefix)
|
| 275 |
+
|
| 276 |
+
Docs: https://ollama.com/
|
| 277 |
+
"""
|
| 278 |
+
|
| 279 |
+
def get_default_config(self) -> Dict[str, Any]:
|
| 280 |
+
return {'temperature': 0.2, 'top_k': 40, 'top_p': 0.9}
|
| 281 |
+
|
| 282 |
+
def get_langchain_params(self) -> set[str]:
|
| 283 |
+
return {
|
| 284 |
+
'temperature', 'top_k', 'top_p', 'repeat_penalty',
|
| 285 |
+
'num_ctx', 'num_predict', 'format', 'seed'
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
def get_dspy_params(self) -> set[str]:
|
| 289 |
+
return {'temperature', 'top_p', 'num_ctx', 'seed'}
|
| 290 |
+
|
| 291 |
+
def format_model_name_for_provider(self, model: str) -> str:
|
| 292 |
+
"""Ollama models need 'ollama_chat/' prefix for DSPy.
|
| 293 |
+
|
| 294 |
+
Examples:
|
| 295 |
+
"llama3.2" -> "ollama_chat/llama3.2"
|
| 296 |
+
"llama3.2:latest" -> "ollama_chat/llama3.2:latest"
|
| 297 |
+
"""
|
| 298 |
+
return f"ollama_chat/{model}" # β
Add ollama_chat/ prefix
|
| 299 |
+
|
| 300 |
+
def validate_config(self, **config) -> Dict[str, Any]:
|
| 301 |
+
if 'temperature' in config:
|
| 302 |
+
temp = config['temperature']
|
| 303 |
+
if not 0 <= temp <= 2:
|
| 304 |
+
raise ValueError(f"Temperature must be 0-2, got {temp}")
|
| 305 |
+
|
| 306 |
+
if 'top_k' in config:
|
| 307 |
+
if not isinstance(config['top_k'], int) or config['top_k'] < 1:
|
| 308 |
+
raise ValueError("top_k must be positive integer")
|
| 309 |
+
|
| 310 |
+
return config
|
| 311 |
+
|
| 312 |
+
def _create_langchain_instance(self, model: str, **config) -> ChatOllama:
|
| 313 |
+
|
| 314 |
+
return ChatOllama(
|
| 315 |
+
model=self.format_model_name_for_provider(model),
|
| 316 |
+
**config)
|
| 317 |
+
|
| 318 |
+
def _create_dspy_instance(self, model: str, **config) -> dspy.LM:
|
| 319 |
+
|
| 320 |
+
return dspy.LM(
|
| 321 |
+
model=self.format_model_name_for_provider(model), # β
With prefix: "ollama_chat/llama3.2"
|
| 322 |
+
**config
|
| 323 |
+
)
|
job_writing_agent/utils/llm_provider_factory.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Any, Dict, Literal
|
| 3 |
+
|
| 4 |
+
import dspy
|
| 5 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 6 |
+
|
| 7 |
+
from .llm_client import (
|
| 8 |
+
CerebrasChatProvider,
|
| 9 |
+
LLMProvider,
|
| 10 |
+
OllamaChatProvider,
|
| 11 |
+
OpenRouterChatProvider,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class LLMFactory:
|
| 18 |
+
"""Factory for creating LLM instances supporting multiple frameworks and providers.
|
| 19 |
+
|
| 20 |
+
Supports both LangChain and DSPy frameworks with automatic model name formatting
|
| 21 |
+
for each provider.
|
| 22 |
+
|
| 23 |
+
Example:
|
| 24 |
+
>>> factory = LLMFactory()
|
| 25 |
+
>>>
|
| 26 |
+
>>> # LangChain usage
|
| 27 |
+
>>> llm = factory.create_langchain("llama3.1-8b", provider="cerebras")
|
| 28 |
+
>>> response = llm.invoke("Hello!")
|
| 29 |
+
>>>
|
| 30 |
+
>>> # DSPy usage
|
| 31 |
+
>>> lm = factory.create_dspy("llama3.1-8b", provider="cerebras")
|
| 32 |
+
>>> dspy.configure(lm=lm)
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, default_provider: str = "openrouter"):
|
| 36 |
+
"""Initialize factory with available providers.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
default_provider: Default provider to use if not specified
|
| 40 |
+
"""
|
| 41 |
+
self._providers: Dict[str, LLMProvider] = {
|
| 42 |
+
"ollama": OllamaChatProvider(),
|
| 43 |
+
"openrouter": OpenRouterChatProvider(),
|
| 44 |
+
"cerebras": CerebrasChatProvider(),
|
| 45 |
+
}
|
| 46 |
+
self._default_provider = default_provider
|
| 47 |
+
|
| 48 |
+
logger.info(
|
| 49 |
+
f"LLMFactory initialized with providers: {list(self._providers.keys())}, "
|
| 50 |
+
f"default: {default_provider}"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
def create(
|
| 54 |
+
self,
|
| 55 |
+
model: str,
|
| 56 |
+
provider: str | None = None,
|
| 57 |
+
framework: Literal["langchain", "dspy"] = "langchain",
|
| 58 |
+
**config,
|
| 59 |
+
) -> BaseChatModel | dspy.LM:
|
| 60 |
+
"""Create LLM instance for specified framework and provider.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
model: Model name/identifier (format depends on provider)
|
| 64 |
+
provider: Provider name ('ollama', 'openrouter', 'cerebras',)
|
| 65 |
+
Defaults to factory's default_provider
|
| 66 |
+
framework: 'langchain' or 'dspy' (default: 'langchain')
|
| 67 |
+
**config: Additional configuration parameters (temperature, max_tokens, etc.)
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
LangChain BaseChatModel or DSPy LM instance
|
| 71 |
+
|
| 72 |
+
Raises:
|
| 73 |
+
ValueError: If provider is unknown
|
| 74 |
+
|
| 75 |
+
Examples:
|
| 76 |
+
>>> factory = LLMFactory()
|
| 77 |
+
>>>
|
| 78 |
+
>>> # Create LangChain LLM
|
| 79 |
+
>>> llm = factory.create(
|
| 80 |
+
... "llama3.1-8b",
|
| 81 |
+
... provider="cerebras",
|
| 82 |
+
... framework="langchain",
|
| 83 |
+
... temperature=0.7
|
| 84 |
+
... )
|
| 85 |
+
>>>
|
| 86 |
+
>>> # Create DSPy LM
|
| 87 |
+
>>> lm = factory.create(
|
| 88 |
+
... "openai/gpt-4",
|
| 89 |
+
... provider="openrouter",
|
| 90 |
+
... framework="dspy",
|
| 91 |
+
... temperature=0.5
|
| 92 |
+
... )
|
| 93 |
+
"""
|
| 94 |
+
provider = provider or self._default_provider
|
| 95 |
+
|
| 96 |
+
if provider not in self._providers:
|
| 97 |
+
available = ", ".join(self._providers.keys())
|
| 98 |
+
logger.warning(
|
| 99 |
+
f"Invalid provider '{provider}'. Available providers: {available}. "
|
| 100 |
+
f"Falling back to default: {self._default_provider}"
|
| 101 |
+
)
|
| 102 |
+
provider = self._default_provider
|
| 103 |
+
|
| 104 |
+
strategy = self._providers[provider]
|
| 105 |
+
logger.debug(
|
| 106 |
+
f"Creating {framework} LLM: provider={provider}, model={model}, config={config}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
return strategy.create_llm_instance(model, framework=framework, **config)
|
| 110 |
+
|
| 111 |
+
def create_langchain(
|
| 112 |
+
self, model: str, provider: str | None = None, **config
|
| 113 |
+
) -> BaseChatModel:
|
| 114 |
+
"""Convenience method to create LangChain LLM.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
model: Model name/identifier
|
| 118 |
+
provider: Provider name (defaults to factory default)
|
| 119 |
+
**config: Configuration parameters
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
LangChain BaseChatModel instance
|
| 123 |
+
|
| 124 |
+
Example:
|
| 125 |
+
>>> factory = LLMFactory()
|
| 126 |
+
>>> llm = factory.create_langchain(
|
| 127 |
+
... "llama3.1-8b",
|
| 128 |
+
... provider="cerebras",
|
| 129 |
+
... temperature=0.7,
|
| 130 |
+
... max_tokens=2048
|
| 131 |
+
... )
|
| 132 |
+
>>> response = llm.invoke("Explain quantum computing")
|
| 133 |
+
>>> print(response.content)
|
| 134 |
+
"""
|
| 135 |
+
return self.create(model, provider, framework="langchain", **config)
|
| 136 |
+
|
| 137 |
+
def create_dspy(self, model: str, provider: str | None = None, **config) -> dspy.LM:
|
| 138 |
+
"""Convenience method to create DSPy LM.
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
model: Model name/identifier
|
| 142 |
+
provider: Provider name (defaults to factory default)
|
| 143 |
+
**config: Configuration parameters
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
DSPy LM instance
|
| 147 |
+
|
| 148 |
+
Example:
|
| 149 |
+
>>> import dspy
|
| 150 |
+
>>> factory = LLMFactory()
|
| 151 |
+
>>>
|
| 152 |
+
>>> lm = factory.create_dspy(
|
| 153 |
+
... "llama3.1-8b",
|
| 154 |
+
... provider="cerebras",
|
| 155 |
+
... temperature=0.5
|
| 156 |
+
... )
|
| 157 |
+
>>>
|
| 158 |
+
>>> # Set as default LM for DSPy
|
| 159 |
+
>>> dspy.configure(lm=lm)
|
| 160 |
+
>>>
|
| 161 |
+
>>> # Use in DSPy programs
|
| 162 |
+
>>> class QA(dspy.Signature):
|
| 163 |
+
... question = dspy.InputField()
|
| 164 |
+
... answer = dspy.OutputField()
|
| 165 |
+
>>>
|
| 166 |
+
>>> qa = dspy.ChainOfThought(QA)
|
| 167 |
+
>>> result = qa(question="What is AI?")
|
| 168 |
+
"""
|
| 169 |
+
return self.create(model, provider, framework="dspy", **config)
|
| 170 |
+
|
| 171 |
+
def register_provider(self, name: str, provider: LLMProvider) -> None:
|
| 172 |
+
"""Register a custom provider.
|
| 173 |
+
|
| 174 |
+
Allows extending the factory with custom provider implementations.
|
| 175 |
+
|
| 176 |
+
Args:
|
| 177 |
+
name: Unique identifier for the provider
|
| 178 |
+
provider: LLMProvider instance
|
| 179 |
+
|
| 180 |
+
Example:
|
| 181 |
+
>>> from abc import ABC
|
| 182 |
+
>>>
|
| 183 |
+
>>> class CustomProvider(LLMProvider):
|
| 184 |
+
... def get_default_config(self):
|
| 185 |
+
... return {'temperature': 0.1}
|
| 186 |
+
...
|
| 187 |
+
... def get_langchain_params(self):
|
| 188 |
+
... return {'temperature', 'max_tokens'}
|
| 189 |
+
...
|
| 190 |
+
... def get_dspy_params(self):
|
| 191 |
+
... return {'temperature'}
|
| 192 |
+
...
|
| 193 |
+
... def format_model_name_for_dspy(self, model):
|
| 194 |
+
... return f"custom/{model}"
|
| 195 |
+
...
|
| 196 |
+
... def validate_config(self, **config):
|
| 197 |
+
... return config
|
| 198 |
+
...
|
| 199 |
+
... def _create_langchain_instance(self, model, **config):
|
| 200 |
+
... return MyCustomChatModel(model=model, **config)
|
| 201 |
+
...
|
| 202 |
+
... def _create_dspy_instance(self, model, **config):
|
| 203 |
+
... return dspy.LM(model=f"custom/{model}", **config)
|
| 204 |
+
>>>
|
| 205 |
+
>>> factory = LLMFactory()
|
| 206 |
+
>>> factory.register_provider('custom', CustomProvider())
|
| 207 |
+
>>> llm = factory.create_langchain("my-model", provider="custom")
|
| 208 |
+
"""
|
| 209 |
+
if name in self._providers:
|
| 210 |
+
logger.warning(f"Overwriting existing provider: {name}")
|
| 211 |
+
|
| 212 |
+
self._providers[name] = provider
|
| 213 |
+
logger.info(f"Registered provider: {name}")
|
| 214 |
+
|
| 215 |
+
def unregister_provider(self, name: str) -> None:
|
| 216 |
+
"""Remove a provider from the factory.
|
| 217 |
+
|
| 218 |
+
Args:
|
| 219 |
+
name: Provider name to remove
|
| 220 |
+
|
| 221 |
+
Raises:
|
| 222 |
+
ValueError: If provider doesn't exist or is the default provider
|
| 223 |
+
"""
|
| 224 |
+
if name not in self._providers:
|
| 225 |
+
raise ValueError(f"Provider '{name}' not registered")
|
| 226 |
+
|
| 227 |
+
if name == self._default_provider:
|
| 228 |
+
raise ValueError(f"Cannot unregister default provider '{name}'")
|
| 229 |
+
|
| 230 |
+
del self._providers[name]
|
| 231 |
+
logger.info(f"Unregistered provider: {name}")
|
| 232 |
+
|
| 233 |
+
def list_providers(self) -> list[str]:
|
| 234 |
+
"""Get list of available provider names.
|
| 235 |
+
|
| 236 |
+
Returns:
|
| 237 |
+
List of registered provider names
|
| 238 |
+
|
| 239 |
+
Example:
|
| 240 |
+
>>> factory = LLMFactory()
|
| 241 |
+
>>> providers = factory.list_providers()
|
| 242 |
+
>>> print(providers)
|
| 243 |
+
['ollama', 'openrouter', 'cerebras', 'groq']
|
| 244 |
+
"""
|
| 245 |
+
return list(self._providers.keys())
|
| 246 |
+
|
| 247 |
+
def get_provider_info(self, provider: str) -> Dict[str, Any]:
|
| 248 |
+
"""Get detailed information about a provider.
|
| 249 |
+
|
| 250 |
+
Args:
|
| 251 |
+
provider: Provider name
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
Dictionary containing provider configuration details
|
| 255 |
+
|
| 256 |
+
Raises:
|
| 257 |
+
ValueError: If provider is unknown
|
| 258 |
+
|
| 259 |
+
Example:
|
| 260 |
+
>>> factory = LLMFactory()
|
| 261 |
+
>>> info = factory.get_provider_info("cerebras")
|
| 262 |
+
>>> print(info)
|
| 263 |
+
{
|
| 264 |
+
'name': 'cerebras',
|
| 265 |
+
'default_config': {'temperature': 0.2, 'max_tokens': 1024},
|
| 266 |
+
'langchain_params': ['temperature', 'max_tokens', ...],
|
| 267 |
+
'dspy_params': ['temperature', 'max_tokens', ...]
|
| 268 |
+
}
|
| 269 |
+
"""
|
| 270 |
+
if provider not in self._providers:
|
| 271 |
+
available = ", ".join(self._providers.keys())
|
| 272 |
+
raise ValueError(
|
| 273 |
+
f"Unknown provider '{provider}'. Available providers: {available}"
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
strategy = self._providers[provider]
|
| 277 |
+
return {
|
| 278 |
+
"name": provider,
|
| 279 |
+
"default_config": strategy.get_default_config(),
|
| 280 |
+
"langchain_params": list(strategy.get_langchain_params()),
|
| 281 |
+
"dspy_params": list(strategy.get_dspy_params()),
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
def get_all_providers_info(self) -> Dict[str, Dict[str, Any]]:
|
| 285 |
+
"""Get information about all registered providers.
|
| 286 |
+
|
| 287 |
+
Returns:
|
| 288 |
+
Dictionary mapping provider names to their info
|
| 289 |
+
|
| 290 |
+
Example:
|
| 291 |
+
>>> factory = LLMFactory()
|
| 292 |
+
>>> all_info = factory.get_all_providers_info()
|
| 293 |
+
>>> for provider, info in all_info.items():
|
| 294 |
+
... print(f"{provider}: {info['default_config']}")
|
| 295 |
+
"""
|
| 296 |
+
return {name: self.get_provider_info(name) for name in self._providers.keys()}
|
| 297 |
+
|
| 298 |
+
def set_default_provider(self, provider: str) -> None:
|
| 299 |
+
"""Change the default provider.
|
| 300 |
+
|
| 301 |
+
Args:
|
| 302 |
+
provider: Provider name to set as default
|
| 303 |
+
|
| 304 |
+
Raises:
|
| 305 |
+
ValueError: If provider is unknown
|
| 306 |
+
|
| 307 |
+
Example:
|
| 308 |
+
>>> factory = LLMFactory()
|
| 309 |
+
>>> factory.set_default_provider('cerebras')
|
| 310 |
+
>>> llm = factory.create_langchain("llama3.1-8b") # Uses cerebras
|
| 311 |
+
"""
|
| 312 |
+
if provider not in self._providers:
|
| 313 |
+
available = ", ".join(self._providers.keys())
|
| 314 |
+
raise ValueError(
|
| 315 |
+
f"Cannot set unknown provider '{provider}' as default. "
|
| 316 |
+
f"Available: {available}"
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
old_default = self._default_provider
|
| 320 |
+
self._default_provider = provider
|
| 321 |
+
logger.info(f"Changed default provider from '{old_default}' to '{provider}'")
|
| 322 |
+
|
| 323 |
+
def get_default_provider(self) -> str:
|
| 324 |
+
"""Get the current default provider name.
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
Name of the default provider
|
| 328 |
+
"""
|
| 329 |
+
return self._default_provider
|
| 330 |
+
|
| 331 |
+
def __repr__(self) -> str:
|
| 332 |
+
"""String representation of the factory."""
|
| 333 |
+
return (
|
| 334 |
+
f"LLMFactory("
|
| 335 |
+
f"providers={list(self._providers.keys())}, "
|
| 336 |
+
f"default='{self._default_provider}'"
|
| 337 |
+
f")"
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
def __str__(self) -> str:
|
| 341 |
+
"""User-friendly string representation."""
|
| 342 |
+
return (
|
| 343 |
+
f"LLMFactory with {len(self._providers)} providers: "
|
| 344 |
+
f"{', '.join(self._providers.keys())} "
|
| 345 |
+
f"(default: {self._default_provider})"
|
| 346 |
+
)
|
job_writing_agent/utils/result_utils.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for handling and saving workflow results.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def print_result(content_type: str, final_content: str):
|
| 12 |
+
"""
|
| 13 |
+
Prints the final generated content to the console with formatting.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
content_type: The type of content being printed (e.g., "cover_letter").
|
| 17 |
+
final_content: The final generated content string.
|
| 18 |
+
"""
|
| 19 |
+
print("\n" + "=" * 80)
|
| 20 |
+
print(f"FINAL {content_type.upper()}:\n{final_content}")
|
| 21 |
+
print("=" * 80)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def save_result(content_type: str, final_content: str) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Saves the final generated content to a timestamped text file.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
content_type: The type of content being saved, used in the filename.
|
| 30 |
+
final_content: The final generated content string.
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
The path to the saved output file.
|
| 34 |
+
"""
|
| 35 |
+
output_file = f"{content_type}_{datetime.now():%Y%m%d_%H%M%S}.txt"
|
| 36 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 37 |
+
f.write(final_content)
|
| 38 |
+
logger.info("Saved to %s", output_file)
|
| 39 |
+
return output_file
|
{utils β job_writing_agent/utils}/vector_store.py
RENAMED
|
File without changes
|
job_writing_agent/workflow.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Workflow runner for the job application writer.
|
| 3 |
+
This module provides the JobWorkflow class and CLI runner.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import logging
|
| 8 |
+
import sys
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from functools import cached_property
|
| 11 |
+
from typing import Optional, Dict, Any
|
| 12 |
+
|
| 13 |
+
from langchain_core.tracers import ConsoleCallbackHandler
|
| 14 |
+
from langgraph.graph import StateGraph
|
| 15 |
+
from langfuse import Langfuse
|
| 16 |
+
from langgraph.graph.state import CompiledStateGraph
|
| 17 |
+
|
| 18 |
+
from job_writing_agent.agents.nodes import (
|
| 19 |
+
create_draft,
|
| 20 |
+
critique_draft,
|
| 21 |
+
finalize_document,
|
| 22 |
+
human_approval,
|
| 23 |
+
)
|
| 24 |
+
from job_writing_agent.classes import AppState, DataLoadState
|
| 25 |
+
from job_writing_agent.nodes import Dataloading, generate_variations, self_consistency_vote
|
| 26 |
+
from job_writing_agent.nodes.research_workflow import research_workflow
|
| 27 |
+
from job_writing_agent.utils.application_cli_interface import handle_cli
|
| 28 |
+
from job_writing_agent.utils.result_utils import print_result, save_result
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class JobWorkflow:
|
| 35 |
+
"""
|
| 36 |
+
Workflow runner for the job application writer.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def __init__(self, resume: str, job_description_source: str, content: str):
|
| 40 |
+
self.resume = resume
|
| 41 |
+
self.job_description_source = job_description_source
|
| 42 |
+
self.content = content
|
| 43 |
+
self.dataloading = Dataloading()
|
| 44 |
+
self.langfuse = Langfuse()
|
| 45 |
+
|
| 46 |
+
@cached_property
|
| 47 |
+
def app_state(self) -> AppState:
|
| 48 |
+
return AppState(
|
| 49 |
+
resume_path=self.resume,
|
| 50 |
+
job_description_source=self.job_description_source,
|
| 51 |
+
company_research_data=None,
|
| 52 |
+
draft="",
|
| 53 |
+
feedback="",
|
| 54 |
+
final="",
|
| 55 |
+
content=self.content,
|
| 56 |
+
current_node="",
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
@cached_property
|
| 60 |
+
def job_app_graph(self) -> StateGraph:
|
| 61 |
+
graph = StateGraph(DataLoadState)
|
| 62 |
+
graph.add_node("initialize_system", self.dataloading.set_agent_system_message)
|
| 63 |
+
graph.add_node("load", self.dataloading.run)
|
| 64 |
+
graph.add_node("research", research_workflow)
|
| 65 |
+
graph.add_node("create_draft", create_draft)
|
| 66 |
+
graph.add_node("variations", generate_variations)
|
| 67 |
+
graph.add_node("self_consistency", self_consistency_vote)
|
| 68 |
+
graph.add_node("critique", critique_draft)
|
| 69 |
+
graph.add_node("human_approval", human_approval)
|
| 70 |
+
graph.add_node("finalize", finalize_document)
|
| 71 |
+
|
| 72 |
+
graph.set_entry_point("initialize_system")
|
| 73 |
+
graph.set_finish_point("finalize")
|
| 74 |
+
graph.add_edge("initialize_system", "load")
|
| 75 |
+
graph.add_conditional_edges("load", self.dataloading.verify_inputs)
|
| 76 |
+
graph.add_edge("research", "create_draft")
|
| 77 |
+
graph.add_edge("create_draft", "variations")
|
| 78 |
+
graph.add_edge("variations", "self_consistency")
|
| 79 |
+
graph.add_edge("self_consistency", "critique")
|
| 80 |
+
graph.add_edge("critique", "human_approval")
|
| 81 |
+
graph.add_edge("human_approval", "finalize")
|
| 82 |
+
return graph
|
| 83 |
+
|
| 84 |
+
async def run(self) -> Optional[Dict[str, Any]]:
|
| 85 |
+
"""
|
| 86 |
+
Run the job application writer workflow.
|
| 87 |
+
"""
|
| 88 |
+
try:
|
| 89 |
+
compiled_graph = self.compile()
|
| 90 |
+
except Exception as e:
|
| 91 |
+
logger.error("Error compiling graph: %s", e)
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
run_name = f"Job Application Writer - {self.app_state['content']} - {datetime.now():%Y-%m-%d-%H%M%S}"
|
| 95 |
+
config = {
|
| 96 |
+
"configurable": {
|
| 97 |
+
"thread_id": f"job_app_session_{datetime.now():%Y%m%d%H%M%S}",
|
| 98 |
+
"callbacks": [ConsoleCallbackHandler()],
|
| 99 |
+
"run_name": run_name,
|
| 100 |
+
"tags": ["job-application", self.app_state["content"]],
|
| 101 |
+
},
|
| 102 |
+
"recursion_limit": 10,
|
| 103 |
+
}
|
| 104 |
+
try:
|
| 105 |
+
self.app_state["current_node"] = "initialize_system"
|
| 106 |
+
graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
logger.error("Error running graph: %s", e)
|
| 109 |
+
return None
|
| 110 |
+
return graph_output
|
| 111 |
+
|
| 112 |
+
def compile(self) -> CompiledStateGraph:
|
| 113 |
+
"""Compile the workflow graph."""
|
| 114 |
+
return self.job_app_graph.compile()
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def main():
|
| 118 |
+
args = handle_cli()
|
| 119 |
+
workflow = JobWorkflow(
|
| 120 |
+
resume=args.resume,
|
| 121 |
+
job_description_source=args.job_posting,
|
| 122 |
+
content=args.content_type,
|
| 123 |
+
)
|
| 124 |
+
result = asyncio.run(workflow.run())
|
| 125 |
+
if result:
|
| 126 |
+
print_result(args.content_type, result["final"])
|
| 127 |
+
save_result(args.content_type, result["final"])
|
| 128 |
+
print("Workflow completed successfully.")
|
| 129 |
+
else:
|
| 130 |
+
print("Error running workflow.")
|
| 131 |
+
sys.exit(1)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
main()
|
langgraph.json
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"dependencies": [
|
| 3 |
-
"."
|
| 4 |
-
],
|
| 5 |
-
"graphs": {
|
| 6 |
-
"job_application": "langgraph_init:job_app_graph"
|
| 7 |
-
},
|
| 8 |
-
"env": "./.env",
|
| 9 |
-
"python_version": "3.11"
|
| 10 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nodes/initializing.py
DELETED
|
@@ -1,225 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
-
Created on Mon Oct 23 16:49:52 2023
|
| 4 |
-
@author: rishabhaggarwal
|
| 5 |
-
"""
|
| 6 |
-
import os
|
| 7 |
-
import logging
|
| 8 |
-
from typing_extensions import Literal
|
| 9 |
-
|
| 10 |
-
from langchain_core.documents import Document
|
| 11 |
-
from langchain_core.messages import SystemMessage
|
| 12 |
-
|
| 13 |
-
from job_writer.classes import AppState, DataLoadState
|
| 14 |
-
from job_writer.utils.document_processing import (
|
| 15 |
-
parse_resume,
|
| 16 |
-
get_job_description
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
logger = logging.getLogger(__name__)
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
class Dataloading:
|
| 24 |
-
"""
|
| 25 |
-
Initialize the state for the job application writer workflow.
|
| 26 |
-
"""
|
| 27 |
-
def __init__(self):
|
| 28 |
-
pass
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
async def system_setup(self, state: AppState) -> DataLoadState:
|
| 32 |
-
"""Initialize conversation by setting up a persona through System Prompt."""
|
| 33 |
-
|
| 34 |
-
resume_path = state.get("resume_path")
|
| 35 |
-
|
| 36 |
-
# Verify if the resume file path provided is valid
|
| 37 |
-
if not resume_path:
|
| 38 |
-
logger.error("Resume path is not provided in the state.")
|
| 39 |
-
elif not os.path.exists(resume_path):
|
| 40 |
-
logger.error("Resume file does not exist at path: %s", resume_path)
|
| 41 |
-
# Similar handling as above:
|
| 42 |
-
# raise FileNotFoundError(f"Resume file not found: {resume_path}")
|
| 43 |
-
elif not os.path.isfile(resume_path):
|
| 44 |
-
logger.error("The path provided for the resume is not a file: %s", resume_path)
|
| 45 |
-
# Similar handling:
|
| 46 |
-
# raise ValueError(f"Resume path is not a file: {resume_path}")
|
| 47 |
-
else:
|
| 48 |
-
logger.info("Resume path verified: %s", resume_path)
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
persona_init_message = SystemMessage(
|
| 52 |
-
content="You are my dedicated assistant for writing job application content, "
|
| 53 |
-
"including cover letters, LinkedIn outreach messages, and responses to "
|
| 54 |
-
"job-specfific questions (e.g., experience, culture fit, or motivation)."
|
| 55 |
-
)
|
| 56 |
-
messages = state.get("messages", [])
|
| 57 |
-
messages.append(persona_init_message)
|
| 58 |
-
|
| 59 |
-
return {
|
| 60 |
-
**state,
|
| 61 |
-
"messages": messages,
|
| 62 |
-
"current_node": "initialize_system"
|
| 63 |
-
|
| 64 |
-
}
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
async def get_resume(self, resume_source):
|
| 68 |
-
"""
|
| 69 |
-
Get the resume te
|
| 70 |
-
"""
|
| 71 |
-
try:
|
| 72 |
-
print("Parsing resume....")
|
| 73 |
-
resume_text = ""
|
| 74 |
-
resume_chunks = parse_resume(resume_source)
|
| 75 |
-
for chunk in resume_chunks:
|
| 76 |
-
if hasattr(chunk, 'page_content') and chunk.page_content:
|
| 77 |
-
resume_text += chunk.page_content
|
| 78 |
-
elif isinstance(chunk, str) and chunk: # If parse_resume (util) returns list of strings
|
| 79 |
-
resume_text += chunk
|
| 80 |
-
else:
|
| 81 |
-
logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
|
| 82 |
-
continue
|
| 83 |
-
return resume_text
|
| 84 |
-
except Exception as e:
|
| 85 |
-
print(f"Error parsing resume: {e}")
|
| 86 |
-
raise e
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
async def parse_job_description(self, job_description_source):
|
| 90 |
-
try:
|
| 91 |
-
logger.info("Parsing job description from: %s", job_description_source)
|
| 92 |
-
document: Document = get_job_description(job_description_source)
|
| 93 |
-
|
| 94 |
-
company_name = ""
|
| 95 |
-
job_posting_text = ""
|
| 96 |
-
|
| 97 |
-
if document:
|
| 98 |
-
# Extract company name from metadata
|
| 99 |
-
if hasattr(document, 'metadata') and isinstance(document.metadata, dict):
|
| 100 |
-
company_name = document.metadata.get("company_name", "")
|
| 101 |
-
if not company_name:
|
| 102 |
-
logger.warning("Company name not found in job description metadata.")
|
| 103 |
-
else:
|
| 104 |
-
logger.warning("Metadata attribute not found or not a dictionary in the Document for job description.")
|
| 105 |
-
|
| 106 |
-
# Extract the job posting text from page_content
|
| 107 |
-
if hasattr(document, 'page_content'):
|
| 108 |
-
job_posting_text = document.page_content
|
| 109 |
-
if not job_posting_text:
|
| 110 |
-
logger.info("Parsed job posting text is empty.")
|
| 111 |
-
else:
|
| 112 |
-
logger.warning("page_content attribute not found in the Document for job description.")
|
| 113 |
-
else:
|
| 114 |
-
logger.warning("get_job_description returned None for source: %s", job_description_source)
|
| 115 |
-
|
| 116 |
-
return job_posting_text, company_name
|
| 117 |
-
|
| 118 |
-
except Exception as e:
|
| 119 |
-
logger.error("Error parsing job description from source '%s': %s", job_description_source, e, exc_info=True)
|
| 120 |
-
raise e
|
| 121 |
-
|
| 122 |
-
async def load_inputs(self, state: DataLoadState) -> AppState:
|
| 123 |
-
"""
|
| 124 |
-
Parse the resume and job description to prepare the data from the context
|
| 125 |
-
which is required for the job application writer for the current state
|
| 126 |
-
"""
|
| 127 |
-
|
| 128 |
-
resume_source = state.get("resume_path", "")
|
| 129 |
-
job_description_source = state.get("job_description_source", None)
|
| 130 |
-
|
| 131 |
-
# Initialize result containers\
|
| 132 |
-
resume_text = ""
|
| 133 |
-
job_posting_text = ""
|
| 134 |
-
company_name = ""
|
| 135 |
-
resume_chunks = [] # Handle job description input
|
| 136 |
-
if job_description_source:
|
| 137 |
-
try:
|
| 138 |
-
job_posting_text, company_name = await self.parse_job_description(job_description_source)
|
| 139 |
-
print(f"Job description parsing complete. Length: {len(job_posting_text) if job_posting_text else 0}")
|
| 140 |
-
|
| 141 |
-
# Ensure job_posting_text is not empty
|
| 142 |
-
if not job_posting_text:
|
| 143 |
-
print("WARNING: Job posting text is empty after parsing.")
|
| 144 |
-
job_posting_text = "No job description available. Please check the URL or provide a different source."
|
| 145 |
-
except Exception as e:
|
| 146 |
-
print(f"Error parsing job description: {e} in file {__file__}")
|
| 147 |
-
# Set a default value to prevent errors
|
| 148 |
-
job_posting_text = "Error parsing job description."
|
| 149 |
-
company_name = "Unknown Company"
|
| 150 |
-
|
| 151 |
-
if resume_source:
|
| 152 |
-
try:
|
| 153 |
-
resume_text = await self.get_resume(resume_source)
|
| 154 |
-
except Exception as e:
|
| 155 |
-
print(f"Error parsing resume: {e} in file {__file__}")
|
| 156 |
-
raise e
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
# If either is missing, prompt the user
|
| 160 |
-
if state["current_node"] == "verify" and not resume_text:
|
| 161 |
-
resume_chunks = input("Please paste the resume in text format: ")
|
| 162 |
-
resume_text = [Document(page_content=resume_chunks, metadata={"source": "resume"})]
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
if state["current_node"] == "verify" and not job_posting_text:
|
| 166 |
-
job_text = input("Please paste the job posting in text format: ")
|
| 167 |
-
job_posting_text = [job_text]
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
# Extract company name
|
| 171 |
-
state["company_research_data"] = {'resume': resume_text, 'job_description': job_posting_text, 'company_name': company_name}
|
| 172 |
-
|
| 173 |
-
state["current_node"] = "load_inputs"
|
| 174 |
-
|
| 175 |
-
return state
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
def validate_data_load_state(self,state: DataLoadState):
|
| 179 |
-
assert state.company_research_data.get("resume"), "Resume is missing in company_research_data"
|
| 180 |
-
assert state.company_research_data.get("job_description"), "Job description is missing"
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
def verify_inputs(self, state: AppState) -> Literal["load", "research"]:
|
| 184 |
-
"""Verify that required inputs are present."""
|
| 185 |
-
|
| 186 |
-
print("Verifying Inputs")
|
| 187 |
-
state["current_node"] = "verify"
|
| 188 |
-
|
| 189 |
-
logger.info("Verifying loaded inputs!")
|
| 190 |
-
|
| 191 |
-
assert state["company_research_data"].get("resume"), "Resume is missing in company_research_data"
|
| 192 |
-
assert state["company_research_data"].get("job_description"), "Job description is missing"
|
| 193 |
-
|
| 194 |
-
if not state.get("company_research_data"):
|
| 195 |
-
missing_items = []
|
| 196 |
-
if not state.get("company_research_data").get("resume", ""):
|
| 197 |
-
missing_items.append("resume")
|
| 198 |
-
if not state.get("company_research_data").get("job_description", ""):
|
| 199 |
-
missing_items.append("job description")
|
| 200 |
-
print(f'Missing required data: {", ".join(missing_items)}')
|
| 201 |
-
|
| 202 |
-
return "load"
|
| 203 |
-
|
| 204 |
-
# Normalize state content to strings
|
| 205 |
-
for key in ["resume", "job_description"]:
|
| 206 |
-
try:
|
| 207 |
-
if isinstance(state["company_research_data"][key], (list, tuple)):
|
| 208 |
-
state["company_research_data"][key] = " ".join(str(x) for x in state["company_research_data"][key])
|
| 209 |
-
elif isinstance(state["company_research_data"][key], dict):
|
| 210 |
-
state["company_research_data"][key] = str(state["company_research_data"][key])
|
| 211 |
-
else:
|
| 212 |
-
state["company_research_data"][key] = str(state["company_research_data"][key])
|
| 213 |
-
except Exception as e:
|
| 214 |
-
logger.warning("Error converting %s to string: %s", key, e)
|
| 215 |
-
raise e
|
| 216 |
-
|
| 217 |
-
return "research"
|
| 218 |
-
|
| 219 |
-
async def run(self, state: DataLoadState) -> AppState:
|
| 220 |
-
"""
|
| 221 |
-
Run the InitializeState class to initialize
|
| 222 |
-
the state for the job application writer workflow.
|
| 223 |
-
"""
|
| 224 |
-
state = await self.load_inputs(state)
|
| 225 |
-
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.py
DELETED
|
File without changes
|
testing.ipynb
DELETED
|
@@ -1,1069 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 3,
|
| 6 |
-
"id": "d26f6647",
|
| 7 |
-
"metadata": {},
|
| 8 |
-
"outputs": [],
|
| 9 |
-
"source": [
|
| 10 |
-
"from langchain.prompts import ChatPromptTemplate\n",
|
| 11 |
-
"from langchain_core.messages import AIMessage, HumanMessage, SystemMessage"
|
| 12 |
-
]
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"cell_type": "markdown",
|
| 16 |
-
"id": "f337ecb5",
|
| 17 |
-
"metadata": {},
|
| 18 |
-
"source": []
|
| 19 |
-
},
|
| 20 |
-
{
|
| 21 |
-
"cell_type": "code",
|
| 22 |
-
"execution_count": 9,
|
| 23 |
-
"id": "92b12890",
|
| 24 |
-
"metadata": {},
|
| 25 |
-
"outputs": [],
|
| 26 |
-
"source": [
|
| 27 |
-
"messages = ChatPromptTemplate.from_messages([SystemMessage(content=f\"\"\"\n",
|
| 28 |
-
" You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
|
| 29 |
-
"\n",
|
| 30 |
-
" Rules:\n",
|
| 31 |
-
" 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
|
| 32 |
-
" 2. Map the job description into five categories:\n",
|
| 33 |
-
" β’ query1: recent developments\n",
|
| 34 |
-
" β’ query2: recent news\n",
|
| 35 |
-
" β’ query3:company profile\n",
|
| 36 |
-
" β’ query4: key customers & partners\n",
|
| 37 |
-
" β’ query5: culture & values\n",
|
| 38 |
-
" 3. Each value is a twoβelement list:\n",
|
| 39 |
-
" [<query string>, <oneβsentence rationale>]\n",
|
| 40 |
-
" 4. Use filters (source:, date:[now-30d TO now], site:β¦, etc.) where helpful.\n",
|
| 41 |
-
" 5. If information is missing in the JD, fall back sensibly\n",
|
| 42 |
-
" (e.g. search for βemployee testimonialsβ).\n",
|
| 43 |
-
" 6. Return **only** valid JSON.\n",
|
| 44 |
-
" \"\"\"\n",
|
| 45 |
-
" )\n",
|
| 46 |
-
" , HumanMessage(content=\"Hello World\")])"
|
| 47 |
-
]
|
| 48 |
-
},
|
| 49 |
-
{
|
| 50 |
-
"cell_type": "code",
|
| 51 |
-
"execution_count": 6,
|
| 52 |
-
"id": "e38c3632",
|
| 53 |
-
"metadata": {},
|
| 54 |
-
"outputs": [],
|
| 55 |
-
"source": [
|
| 56 |
-
"input_message = ChatPromptTemplate.from_messages([HumanMessage(content=\"Hello World\")])\n"
|
| 57 |
-
]
|
| 58 |
-
},
|
| 59 |
-
{
|
| 60 |
-
"cell_type": "code",
|
| 61 |
-
"execution_count": 11,
|
| 62 |
-
"id": "dac1ec19",
|
| 63 |
-
"metadata": {},
|
| 64 |
-
"outputs": [
|
| 65 |
-
{
|
| 66 |
-
"name": "stdout",
|
| 67 |
-
"output_type": "stream",
|
| 68 |
-
"text": [
|
| 69 |
-
"================================\u001b[1m System Message \u001b[0m================================\n",
|
| 70 |
-
"\n",
|
| 71 |
-
"\n",
|
| 72 |
-
" You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
|
| 73 |
-
"\n",
|
| 74 |
-
" Rules:\n",
|
| 75 |
-
" 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
|
| 76 |
-
" 2. Map the job description into five categories:\n",
|
| 77 |
-
" β’ query1: recent developments\n",
|
| 78 |
-
" β’ query2: recent news\n",
|
| 79 |
-
" β’ query3:company profile\n",
|
| 80 |
-
" β’ query4: key customers & partners\n",
|
| 81 |
-
" β’ query5: culture & values\n",
|
| 82 |
-
" 3. Each value is a twoβelement list:\n",
|
| 83 |
-
" [<query string>, <oneβsentence rationale>]\n",
|
| 84 |
-
" 4. Use filters (source:, date:[now-30d TO now], site:β¦, etc.) where helpful.\n",
|
| 85 |
-
" 5. If information is missing in the JD, fall back sensibly\n",
|
| 86 |
-
" (e.g. search for βemployee testimonialsβ).\n",
|
| 87 |
-
" 6. Return **only** valid JSON.\n",
|
| 88 |
-
" \n",
|
| 89 |
-
"\n",
|
| 90 |
-
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
| 91 |
-
"\n",
|
| 92 |
-
"Hello World\n"
|
| 93 |
-
]
|
| 94 |
-
}
|
| 95 |
-
],
|
| 96 |
-
"source": [
|
| 97 |
-
"messages.pretty_print()"
|
| 98 |
-
]
|
| 99 |
-
},
|
| 100 |
-
{
|
| 101 |
-
"cell_type": "code",
|
| 102 |
-
"execution_count": 14,
|
| 103 |
-
"id": "7ebd0d0d",
|
| 104 |
-
"metadata": {},
|
| 105 |
-
"outputs": [],
|
| 106 |
-
"source": [
|
| 107 |
-
"from langchain.prompts import (\n",
|
| 108 |
-
" ChatPromptTemplate,\n",
|
| 109 |
-
" HumanMessagePromptTemplate,\n",
|
| 110 |
-
" SystemMessagePromptTemplate,\n",
|
| 111 |
-
")\n",
|
| 112 |
-
"\n",
|
| 113 |
-
"input_message = HumanMessagePromptTemplate.from_template(\"Below is the required job description and resume: {background_information}\", input_variables=[\"background_information\"])"
|
| 114 |
-
]
|
| 115 |
-
},
|
| 116 |
-
{
|
| 117 |
-
"cell_type": "code",
|
| 118 |
-
"execution_count": 17,
|
| 119 |
-
"id": "cd6b3cb8",
|
| 120 |
-
"metadata": {},
|
| 121 |
-
"outputs": [
|
| 122 |
-
{
|
| 123 |
-
"data": {
|
| 124 |
-
"text/plain": [
|
| 125 |
-
"HumanMessage(content='Below is the required job description and resume: This is Rishabh', additional_kwargs={}, response_metadata={})"
|
| 126 |
-
]
|
| 127 |
-
},
|
| 128 |
-
"execution_count": 17,
|
| 129 |
-
"metadata": {},
|
| 130 |
-
"output_type": "execute_result"
|
| 131 |
-
}
|
| 132 |
-
],
|
| 133 |
-
"source": [
|
| 134 |
-
"input_message.format(background_information=\"This is Rishabh\")"
|
| 135 |
-
]
|
| 136 |
-
},
|
| 137 |
-
{
|
| 138 |
-
"cell_type": "code",
|
| 139 |
-
"execution_count": 18,
|
| 140 |
-
"id": "c9628bed",
|
| 141 |
-
"metadata": {},
|
| 142 |
-
"outputs": [],
|
| 143 |
-
"source": [
|
| 144 |
-
"import re\n",
|
| 145 |
-
"from pathlib import Path\n",
|
| 146 |
-
"from typing import List\n",
|
| 147 |
-
"\n",
|
| 148 |
-
"from langchain_community.document_loaders import PyPDFLoader\n",
|
| 149 |
-
"from langchain.text_splitter import (\n",
|
| 150 |
-
" MarkdownHeaderTextSplitter,\n",
|
| 151 |
-
" RecursiveCharacterTextSplitter,\n",
|
| 152 |
-
")\n",
|
| 153 |
-
"from langchain.schema import Document"
|
| 154 |
-
]
|
| 155 |
-
},
|
| 156 |
-
{
|
| 157 |
-
"cell_type": "code",
|
| 158 |
-
"execution_count": 29,
|
| 159 |
-
"id": "c352da72",
|
| 160 |
-
"metadata": {},
|
| 161 |
-
"outputs": [],
|
| 162 |
-
"source": [
|
| 163 |
-
"def _collapse_ws(text: str) -> str:\n",
|
| 164 |
-
" \"\"\"Collapse stray whitespace but keep bullet breaks.\"\"\"\n",
|
| 165 |
-
" text = re.sub(r\"\\n\\s*([β’\\-β])\\s*\", r\"\\n\\1 \", text)\n",
|
| 166 |
-
" return re.sub(r\"[ \\t\\r\\f\\v]+\", \" \", text).replace(\" \\n\", \"\\n\").strip()\n",
|
| 167 |
-
"\n",
|
| 168 |
-
"\n",
|
| 169 |
-
"def _is_heading(line: str) -> bool:\n",
|
| 170 |
-
" return (\n",
|
| 171 |
-
" line.isupper()\n",
|
| 172 |
-
" and len(line.split()) <= 5\n",
|
| 173 |
-
" and not re.search(r\"\\d\", line)\n",
|
| 174 |
-
" )\n",
|
| 175 |
-
"\n",
|
| 176 |
-
"\n",
|
| 177 |
-
"def parse_resume(pdf_path: str | Path) -> List[Document]:\n",
|
| 178 |
-
" \"\"\"\n",
|
| 179 |
-
" Load a singleβpage rΓ©sumΓ© PDF β list[Document] chunks\n",
|
| 180 |
-
" (β400 chars, 50βchar overlap) with {source, section} metadata.\n",
|
| 181 |
-
" \"\"\"\n",
|
| 182 |
-
" text = PyPDFLoader(str(pdf_path), extraction_mode=\"layout\").load()[0].page_content\n",
|
| 183 |
-
" print(text)\n",
|
| 184 |
-
" text = _collapse_ws(text)\n",
|
| 185 |
-
"\n",
|
| 186 |
-
" # Tag headings with \"###\" so Markdown splitter can see them\n",
|
| 187 |
-
" tagged_lines = [\n",
|
| 188 |
-
" f\"### {ln}\" if _is_heading(ln) else ln\n",
|
| 189 |
-
" for ln in text.splitlines()\n",
|
| 190 |
-
" ]\n",
|
| 191 |
-
" md_text = \"\\n\".join(tagged_lines)\n",
|
| 192 |
-
"\n",
|
| 193 |
-
" if \"###\" in md_text:\n",
|
| 194 |
-
" splitter = MarkdownHeaderTextSplitter(\n",
|
| 195 |
-
" headers_to_split_on=[(\"###\", \"section\")]\n",
|
| 196 |
-
" )\n",
|
| 197 |
-
" chunks = splitter.split_text(md_text) # already returns Documents\n",
|
| 198 |
-
" else:\n",
|
| 199 |
-
" print(f\"No headings found.\")\n",
|
| 200 |
-
" splitter = RecursiveCharacterTextSplitter(\n",
|
| 201 |
-
" chunk_size=400, chunk_overlap=50\n",
|
| 202 |
-
" )\n",
|
| 203 |
-
" chunks = [\n",
|
| 204 |
-
" Document(page_content=chunk, metadata={})\n",
|
| 205 |
-
" for chunk in splitter.split_text(md_text)\n",
|
| 206 |
-
" ]\n",
|
| 207 |
-
"\n",
|
| 208 |
-
" # Attach metadata\n",
|
| 209 |
-
" for doc in chunks:\n",
|
| 210 |
-
" doc.metadata.setdefault(\"source\", str(pdf_path))\n",
|
| 211 |
-
" # section already present if headerβsplitter was used\n",
|
| 212 |
-
" return chunks\n"
|
| 213 |
-
]
|
| 214 |
-
},
|
| 215 |
-
{
|
| 216 |
-
"cell_type": "code",
|
| 217 |
-
"execution_count": 31,
|
| 218 |
-
"id": "14e062e4",
|
| 219 |
-
"metadata": {},
|
| 220 |
-
"outputs": [
|
| 221 |
-
{
|
| 222 |
-
"name": "stdout",
|
| 223 |
-
"output_type": "stream",
|
| 224 |
-
"text": [
|
| 225 |
-
"Rishabh Aggarwal\n",
|
| 226 |
-
" (602) 580-5734 β’ raggar15@asu.edu β’ LinkedIn β’ Tempe, AZ\n",
|
| 227 |
-
"TECHNICAL SKILLS\n",
|
| 228 |
-
"Programming Languages: Python, Java, JavaScript, Bash, HTML, CSS\n",
|
| 229 |
-
"Databases: SQL (PostgreSQL, MySQL, SQLite), NoSQL (MongoDB, Redis, DynamoDB, Pinecone)\n",
|
| 230 |
-
"Frameworks/Tools: SpringBoot, React, JUnit, Node.js, RESTful APIs, Django, Kafka, Airο¬ow, FastAPI, Pydantic, Tableau\n",
|
| 231 |
-
"DevOps/Cloud: AWS, GCP, GitHub Actions, Docker, Jenkins, Terraform, Kubernetes, MLFlow, GitLab\n",
|
| 232 |
-
"AI Tools/Frameworks: PyTorch, Tensorο¬ow, scikit-learn, LangGraph, LangChain, LangSmith, ChatGPT\n",
|
| 233 |
-
"PROFESSIONAL EXPERIENCE\n",
|
| 234 |
-
"Amazon Inc, Tempe, AZ: Software Development Engineer | Seller Payment Services Dec 2023 - Aug 2024\n",
|
| 235 |
-
"β Established AWS Evidently setup to handle 50K+ daily API requests to new Lambda service using AWS CDK(TypeScript)\n",
|
| 236 |
-
"β Added metrics to monitor traο¬c and enhance service observability of the Lambda service through CloudWatch logs\n",
|
| 237 |
-
"β Developed SNS Event Publishers in Java using Spring Boot to process 10K+ daily events in an event-driven architecture\n",
|
| 238 |
-
"β Led load balancer migration planning for a microservice with a focus on safe rollbacks and minimum downtime\n",
|
| 239 |
-
"β Designed a dashboard for ALB migration to monitor traο¬c with high-severity alarms to enhance observability\n",
|
| 240 |
-
"β Directed weekly meetings with a 7-member agile team to analyze metrics and customer data, guiding decision-making for\n",
|
| 241 |
-
" live campaigns involving over 50K sellers\n",
|
| 242 |
-
"MetaJungle, Ozark, MO: Lead Backend Engineer Jun 2023 - Dec 2023\n",
|
| 243 |
-
"β Architected a scalable AWS cloud infrastructure for a Marketplace using Terraform IaC with ECS and Fargate\n",
|
| 244 |
-
" instances, reduced costs by 40% while maintaining high reliability using Blue/Green deployment strategy\n",
|
| 245 |
-
"β Engineered and managed Jenkins CI/CD pipeline allowing faster iterative development by reducing deployment time by\n",
|
| 246 |
-
" 75% , leveraging Github hooks and Docker Containerization\n",
|
| 247 |
-
"β Migrated over 1.2TB on-premises Microsoft SQL Server database with over 2 million records to AWS RDS, utilizing\n",
|
| 248 |
-
" AWS DMS ensuring eο¬cient indexing and retrieval\n",
|
| 249 |
-
"β Developed 10+ RESTful APIs in Node.js to manage data for over 500 NFT collections and 10,000 listings from MongoDB\n",
|
| 250 |
-
"β Automated extraction and compression of 50,000+ images from Ethereum Blockchain and stored on AWS S3 using\n",
|
| 251 |
-
" Airο¬ow workο¬ows in Python, leading to almost 30% storage cost savings\n",
|
| 252 |
-
"Omnipresent Robot Technologies, Delhi, India: Software Engineer Jun 2018 - Jul 2021\n",
|
| 253 |
-
"β Engineered a distributed, scalable AI surveillance application with edge-device computation using Python, OpenCV,\n",
|
| 254 |
-
" and scikit-learn, ensuring security for 10,000+ daily park visitors\n",
|
| 255 |
-
"β Architected a distributed system for real-time video streaming using Apache Kafka and Python to process 50+ parallel\n",
|
| 256 |
-
" video streams, reducing latency by 60% by rigorous debugging and performance optimization\n",
|
| 257 |
-
"β Led the development of an analytics dashboard using Django, React and Postgres to show breach records, alerts, and\n",
|
| 258 |
-
" intuitive data visualizations using Google Charts, allowing data-driven decision making\n",
|
| 259 |
-
"β Developed a drone compliance platform using Django to automate ο¬ight authorization and authentication process,\n",
|
| 260 |
-
" leading to enhanced productivity of the drone engineering team\n",
|
| 261 |
-
"β Led collaboration of a team of engineers and drone operators to conduct real-world testing of the compliance system\n",
|
| 262 |
-
"β Mentored interns to understand software development best practices, coding standards, and version control systems\n",
|
| 263 |
-
"ADDITIONAL EXPERIENCE\n",
|
| 264 |
-
"ML Software Developer at ASU Jul 2022 - May 2023\n",
|
| 265 |
-
"β Trained deep learning models using PyTorch and Scikit to detect low-resolution objects in 15,000+ satellite images\n",
|
| 266 |
-
"β Executed adversarial attacks and utilized MLFlow for ο¬ne-tuning multi-class classiο¬cation machine learning model,\n",
|
| 267 |
-
" enhancing model robustness and improving accuracy by 20%\n",
|
| 268 |
-
"Mayhem Heroes Cybersecurity Open Source Hackathon Apr 2022\n",
|
| 269 |
-
"Integrated Mayhem into CI/CD pipeline for Open Source repos using GitHub Actions, reducing security risks by over 80%\n",
|
| 270 |
-
" EDUCATION\n",
|
| 271 |
-
"Master of Science in Information Technology\n",
|
| 272 |
-
"Arizona State University, Tempe, Arizona\n"
|
| 273 |
-
]
|
| 274 |
-
}
|
| 275 |
-
],
|
| 276 |
-
"source": [
|
| 277 |
-
"chunks = parse_resume(\"C:\\\\Users\\\\risha\\\\Downloads\\\\Rishabh_SDE_Resume.pdf\")"
|
| 278 |
-
]
|
| 279 |
-
},
|
| 280 |
-
{
|
| 281 |
-
"cell_type": "code",
|
| 282 |
-
"execution_count": 40,
|
| 283 |
-
"id": "0100cc62",
|
| 284 |
-
"metadata": {},
|
| 285 |
-
"outputs": [
|
| 286 |
-
{
|
| 287 |
-
"name": "stdout",
|
| 288 |
-
"output_type": "stream",
|
| 289 |
-
"text": [
|
| 290 |
-
"Resume chunk: Rishabh Aggarwal\n",
|
| 291 |
-
"(602) 580-5734 β’ raggar15@asu.edu β’ LinkedIn β’ Tempe, AZ\n",
|
| 292 |
-
"Resume chunk: Programming Languages: Python, Java, JavaScript, Bash, HTML, CSS\n",
|
| 293 |
-
"Databases: SQL (PostgreSQL, MySQL, SQLite), NoSQL (MongoDB, Redis, DynamoDB, Pinecone)\n",
|
| 294 |
-
"Frameworks/Tools: SpringBoot, React, JUnit, Node.js, RESTful APIs, Django, Kafka, Airο¬ow, FastAPI, Pydantic, Tableau\n",
|
| 295 |
-
"DevOps/Cloud: AWS, GCP, GitHub Actions, Docker, Jenkins, Terraform, Kubernetes, MLFlow, GitLab\n",
|
| 296 |
-
"AI Tools/Frameworks: PyTorch, Tensorο¬ow, scikit-learn, LangGraph, LangChain, LangSmith, ChatGPT\n",
|
| 297 |
-
"Resume chunk: Amazon Inc, Tempe, AZ: Software Development Engineer | Seller Payment Services Dec 2023 - Aug 2024\n",
|
| 298 |
-
"β Established AWS Evidently setup to handle 50K+ daily API requests to new Lambda service using AWS CDK(TypeScript)\n",
|
| 299 |
-
"β Added metrics to monitor traο¬c and enhance service observability of the Lambda service through CloudWatch logs\n",
|
| 300 |
-
"β Developed SNS Event Publishers in Java using Spring Boot to process 10K+ daily events in an event-driven architecture\n",
|
| 301 |
-
"β Led load balancer migration planning for a microservice with a focus on safe rollbacks and minimum downtime\n",
|
| 302 |
-
"β Designed a dashboard for ALB migration to monitor traο¬c with high-severity alarms to enhance observability\n",
|
| 303 |
-
"β Directed weekly meetings with a 7-member agile team to analyze metrics and customer data, guiding decision-making for\n",
|
| 304 |
-
"live campaigns involving over 50K sellers\n",
|
| 305 |
-
"MetaJungle, Ozark, MO: Lead Backend Engineer Jun 2023 - Dec 2023\n",
|
| 306 |
-
"β Architected a scalable AWS cloud infrastructure for a Marketplace using Terraform IaC with ECS and Fargate\n",
|
| 307 |
-
"instances, reduced costs by 40% while maintaining high reliability using Blue/Green deployment strategy\n",
|
| 308 |
-
"β Engineered and managed Jenkins CI/CD pipeline allowing faster iterative development by reducing deployment time by\n",
|
| 309 |
-
"75% , leveraging Github hooks and Docker Containerization\n",
|
| 310 |
-
"β Migrated over 1.2TB on-premises Microsoft SQL Server database with over 2 million records to AWS RDS, utilizing\n",
|
| 311 |
-
"AWS DMS ensuring eο¬cient indexing and retrieval\n",
|
| 312 |
-
"β Developed 10+ RESTful APIs in Node.js to manage data for over 500 NFT collections and 10,000 listings from MongoDB\n",
|
| 313 |
-
"β Automated extraction and compression of 50,000+ images from Ethereum Blockchain and stored on AWS S3 using\n",
|
| 314 |
-
"Airο¬ow workο¬ows in Python, leading to almost 30% storage cost savings\n",
|
| 315 |
-
"Omnipresent Robot Technologies, Delhi, India: Software Engineer Jun 2018 - Jul 2021\n",
|
| 316 |
-
"β Engineered a distributed, scalable AI surveillance application with edge-device computation using Python, OpenCV,\n",
|
| 317 |
-
"and scikit-learn, ensuring security for 10,000+ daily park visitors\n",
|
| 318 |
-
"β Architected a distributed system for real-time video streaming using Apache Kafka and Python to process 50+ parallel\n",
|
| 319 |
-
"video streams, reducing latency by 60% by rigorous debugging and performance optimization\n",
|
| 320 |
-
"β Led the development of an analytics dashboard using Django, React and Postgres to show breach records, alerts, and\n",
|
| 321 |
-
"intuitive data visualizations using Google Charts, allowing data-driven decision making\n",
|
| 322 |
-
"β Developed a drone compliance platform using Django to automate ο¬ight authorization and authentication process,\n",
|
| 323 |
-
"leading to enhanced productivity of the drone engineering team\n",
|
| 324 |
-
"β Led collaboration of a team of engineers and drone operators to conduct real-world testing of the compliance system\n",
|
| 325 |
-
"β Mentored interns to understand software development best practices, coding standards, and version control systems\n",
|
| 326 |
-
"Resume chunk: ML Software Developer at ASU Jul 2022 - May 2023\n",
|
| 327 |
-
"β Trained deep learning models using PyTorch and Scikit to detect low-resolution objects in 15,000+ satellite images\n",
|
| 328 |
-
"β Executed adversarial attacks and utilized MLFlow for ο¬ne-tuning multi-class classiο¬cation machine learning model,\n",
|
| 329 |
-
"enhancing model robustness and improving accuracy by 20%\n",
|
| 330 |
-
"Mayhem Heroes Cybersecurity Open Source Hackathon Apr 2022\n",
|
| 331 |
-
"Integrated Mayhem into CI/CD pipeline for Open Source repos using GitHub Actions, reducing security risks by over 80%\n",
|
| 332 |
-
"Resume chunk: Master of Science in Information Technology\n",
|
| 333 |
-
"Arizona State University, Tempe, Arizona\n"
|
| 334 |
-
]
|
| 335 |
-
}
|
| 336 |
-
],
|
| 337 |
-
"source": [
|
| 338 |
-
"resume_text = \"\"\n",
|
| 339 |
-
"for chunk in chunks:\n",
|
| 340 |
-
" print(f\"Resume chunk: {chunk.page_content}\")\n",
|
| 341 |
-
" resume_text+= (chunk.page_content)"
|
| 342 |
-
]
|
| 343 |
-
},
|
| 344 |
-
{
|
| 345 |
-
"cell_type": "code",
|
| 346 |
-
"execution_count": 41,
|
| 347 |
-
"id": "b045de91",
|
| 348 |
-
"metadata": {},
|
| 349 |
-
"outputs": [],
|
| 350 |
-
"source": [
|
| 351 |
-
"from pydantic import BaseModel, Field\n",
|
| 352 |
-
"\n",
|
| 353 |
-
"class TavilyQuerySet(BaseModel):\n",
|
| 354 |
-
" query1: tuple[str, str] = Field(\n",
|
| 355 |
-
" ...,\n",
|
| 356 |
-
" description=\"DSL for Recent Developments + 1βsentence rationale\",\n",
|
| 357 |
-
" )\n",
|
| 358 |
-
" query2: tuple[str, str] = Field(\n",
|
| 359 |
-
" ...,\n",
|
| 360 |
-
" description=\"DSL for Recent News + rationale\",\n",
|
| 361 |
-
" )\n",
|
| 362 |
-
" query3: tuple[str, str]\n",
|
| 363 |
-
" query4: tuple[str, str]\n",
|
| 364 |
-
" query5: tuple[str, str]"
|
| 365 |
-
]
|
| 366 |
-
},
|
| 367 |
-
{
|
| 368 |
-
"cell_type": "code",
|
| 369 |
-
"execution_count": 42,
|
| 370 |
-
"id": "eda95e9a",
|
| 371 |
-
"metadata": {},
|
| 372 |
-
"outputs": [],
|
| 373 |
-
"source": [
|
| 374 |
-
"from langchain.output_parsers import PydanticOutputParser\n",
|
| 375 |
-
"parser = PydanticOutputParser(pydantic_object=TavilyQuerySet)\n",
|
| 376 |
-
"\n",
|
| 377 |
-
"messages = SystemMessage(content=f\"\"\"\n",
|
| 378 |
-
" You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
|
| 379 |
-
" {parser.get_format_instructions()}\n",
|
| 380 |
-
"\n",
|
| 381 |
-
" \n",
|
| 382 |
-
" Rules:\n",
|
| 383 |
-
" 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
|
| 384 |
-
" 2. Map the job description into five categories:\n",
|
| 385 |
-
" β’ query1: recent developments\n",
|
| 386 |
-
" β’ query2: recent news\n",
|
| 387 |
-
" β’ query3:company profile\n",
|
| 388 |
-
" β’ query4: key customers & partners\n",
|
| 389 |
-
" β’ query5: culture & values\n",
|
| 390 |
-
" 3. Each value is a twoβelement list:\n",
|
| 391 |
-
" [<query string>, <oneβsentence rationale>]\n",
|
| 392 |
-
" 4. Use filters (source:, date:[now-30d TO now], site:β¦, etc.) where helpful.\n",
|
| 393 |
-
" 5. If information is missing in the JD, fall back sensibly\n",
|
| 394 |
-
" (e.g. search for βemployee testimonialsβ).\n",
|
| 395 |
-
" 6. Return **only** valid JSON.\n",
|
| 396 |
-
" \"\"\")"
|
| 397 |
-
]
|
| 398 |
-
},
|
| 399 |
-
{
|
| 400 |
-
"cell_type": "code",
|
| 401 |
-
"execution_count": 53,
|
| 402 |
-
"id": "9738103e",
|
| 403 |
-
"metadata": {},
|
| 404 |
-
"outputs": [
|
| 405 |
-
{
|
| 406 |
-
"data": {
|
| 407 |
-
"text/plain": [
|
| 408 |
-
"'The output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"properties\": {\"query1\": {\"description\": \"DSL for Recent Developments + 1βsentence rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query1\", \"type\": \"array\"}, \"query2\": {\"description\": \"DSL for Recent News + rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query2\", \"type\": \"array\"}, \"query3\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query3\", \"type\": \"array\"}, \"query4\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query4\", \"type\": \"array\"}, \"query5\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query5\", \"type\": \"array\"}}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]}\\n```'"
|
| 409 |
-
]
|
| 410 |
-
},
|
| 411 |
-
"execution_count": 53,
|
| 412 |
-
"metadata": {},
|
| 413 |
-
"output_type": "execute_result"
|
| 414 |
-
}
|
| 415 |
-
],
|
| 416 |
-
"source": [
|
| 417 |
-
"parser.get_format_instructions()"
|
| 418 |
-
]
|
| 419 |
-
},
|
| 420 |
-
{
|
| 421 |
-
"cell_type": "code",
|
| 422 |
-
"execution_count": 52,
|
| 423 |
-
"id": "c3174432",
|
| 424 |
-
"metadata": {},
|
| 425 |
-
"outputs": [
|
| 426 |
-
{
|
| 427 |
-
"data": {
|
| 428 |
-
"text/plain": [
|
| 429 |
-
"{'properties': {'query1': {'description': 'DSL for Recent Developments + 1βsentence rationale',\n",
|
| 430 |
-
" 'maxItems': 2,\n",
|
| 431 |
-
" 'minItems': 2,\n",
|
| 432 |
-
" 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
|
| 433 |
-
" 'title': 'Query1',\n",
|
| 434 |
-
" 'type': 'array'},\n",
|
| 435 |
-
" 'query2': {'description': 'DSL for Recent News + rationale',\n",
|
| 436 |
-
" 'maxItems': 2,\n",
|
| 437 |
-
" 'minItems': 2,\n",
|
| 438 |
-
" 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
|
| 439 |
-
" 'title': 'Query2',\n",
|
| 440 |
-
" 'type': 'array'},\n",
|
| 441 |
-
" 'query3': {'maxItems': 2,\n",
|
| 442 |
-
" 'minItems': 2,\n",
|
| 443 |
-
" 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
|
| 444 |
-
" 'title': 'Query3',\n",
|
| 445 |
-
" 'type': 'array'},\n",
|
| 446 |
-
" 'query4': {'maxItems': 2,\n",
|
| 447 |
-
" 'minItems': 2,\n",
|
| 448 |
-
" 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
|
| 449 |
-
" 'title': 'Query4',\n",
|
| 450 |
-
" 'type': 'array'},\n",
|
| 451 |
-
" 'query5': {'maxItems': 2,\n",
|
| 452 |
-
" 'minItems': 2,\n",
|
| 453 |
-
" 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
|
| 454 |
-
" 'title': 'Query5',\n",
|
| 455 |
-
" 'type': 'array'}},\n",
|
| 456 |
-
" 'required': ['query1', 'query2', 'query3', 'query4', 'query5'],\n",
|
| 457 |
-
" 'title': 'TavilyQuerySet',\n",
|
| 458 |
-
" 'type': 'object'}"
|
| 459 |
-
]
|
| 460 |
-
},
|
| 461 |
-
"execution_count": 52,
|
| 462 |
-
"metadata": {},
|
| 463 |
-
"output_type": "execute_result"
|
| 464 |
-
}
|
| 465 |
-
],
|
| 466 |
-
"source": [
|
| 467 |
-
"TavilyQuerySet.model_json_schema()"
|
| 468 |
-
]
|
| 469 |
-
},
|
| 470 |
-
{
|
| 471 |
-
"cell_type": "code",
|
| 472 |
-
"execution_count": 44,
|
| 473 |
-
"id": "5884df35",
|
| 474 |
-
"metadata": {},
|
| 475 |
-
"outputs": [
|
| 476 |
-
{
|
| 477 |
-
"name": "stdout",
|
| 478 |
-
"output_type": "stream",
|
| 479 |
-
"text": [
|
| 480 |
-
"================================\u001b[1m System Message \u001b[0m================================\n",
|
| 481 |
-
"\n",
|
| 482 |
-
"\n",
|
| 483 |
-
" You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
|
| 484 |
-
" The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
|
| 485 |
-
"\n",
|
| 486 |
-
"As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
|
| 487 |
-
"the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
|
| 488 |
-
"\n",
|
| 489 |
-
"Here is the output schema:\n",
|
| 490 |
-
"```\n",
|
| 491 |
-
"{\"properties\": {\"query1\": {\"description\": \"DSL for Recent Developments + 1βsentence rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query1\", \"type\": \"array\"}, \"query2\": {\"description\": \"DSL for Recent News + rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query2\", \"type\": \"array\"}, \"query3\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query3\", \"type\": \"array\"}, \"query4\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query4\", \"type\": \"array\"}, \"query5\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query5\", \"type\": \"array\"}}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]}\n",
|
| 492 |
-
"```\n",
|
| 493 |
-
"\n",
|
| 494 |
-
"\n",
|
| 495 |
-
" Rules:\n",
|
| 496 |
-
" 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
|
| 497 |
-
" 2. Map the job description into five categories:\n",
|
| 498 |
-
" β’ query1: recent developments\n",
|
| 499 |
-
" β’ query2: recent news\n",
|
| 500 |
-
" β’ query3:company profile\n",
|
| 501 |
-
" β’ query4: key customers & partners\n",
|
| 502 |
-
" β’ query5: culture & values\n",
|
| 503 |
-
" 3. Each value is a twoβelement list:\n",
|
| 504 |
-
" [<query string>, <oneβsentence rationale>]\n",
|
| 505 |
-
" 4. Use filters (source:, date:[now-30d TO now], site:β¦, etc.) where helpful.\n",
|
| 506 |
-
" 5. If information is missing in the JD, fall back sensibly\n",
|
| 507 |
-
" (e.g. search for βemployee testimonialsβ).\n",
|
| 508 |
-
" 6. Return **only** valid JSON.\n",
|
| 509 |
-
" \n"
|
| 510 |
-
]
|
| 511 |
-
}
|
| 512 |
-
],
|
| 513 |
-
"source": [
|
| 514 |
-
"messages.pretty_print()"
|
| 515 |
-
]
|
| 516 |
-
},
|
| 517 |
-
{
|
| 518 |
-
"cell_type": "code",
|
| 519 |
-
"execution_count": 46,
|
| 520 |
-
"id": "d2c3cc8b",
|
| 521 |
-
"metadata": {},
|
| 522 |
-
"outputs": [],
|
| 523 |
-
"source": [
|
| 524 |
-
"x = \"\"\"properties\": {\"query1\": [{\"query\": \"Shalin Mehta AND \\\"Computational Microscopy Platform\\\"\", \"rationale\": \"Recent developments within the company\"}, {\"query\": \"Shalin Mehta AND \\\"Biohub SF\\\"\", \"rationale\": \"Recent developments within the company\"}], \"query2\": [{\"query\": \"Chan Zuckerberg Biohub - San Francisco AND recent news\", \"rationale\": \"Recent news about the company\"}, {\"query\": \"COVID-19 AND Chan Zuckerberg Biohub - San Francisco\", \"rationale\": \"Recent news about the company\"}], \"query3\": [{\"query\": \"Shalin Mehta AND \\\"role: Software Engineer\\\"\", \"rationale\": \"Information about the company that relates to the role\"}, {\"query\": \"Chan Zuckerberg Biohub - San Francisco AND \\\"team: Bioengineering\\\"\", \"rationale\": \"Information about the company that relates to the role\"}], \"query4\": [{\"query\": \"key customers: Chan Zuckerberg Biohub\", \"rationale\": \"Key customers & partners\"}, {\"query\": \"partners: Chan Zuckerberg Biohub SF\", \"rationale\": \"Key customers & partners\"}], \"query5\": [{\"query\": \"company culture: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}, {\"query\": \"values: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}]}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]\"\"\""
|
| 525 |
-
]
|
| 526 |
-
},
|
| 527 |
-
{
|
| 528 |
-
"cell_type": "code",
|
| 529 |
-
"execution_count": 49,
|
| 530 |
-
"id": "7d8508a4",
|
| 531 |
-
"metadata": {},
|
| 532 |
-
"outputs": [
|
| 533 |
-
{
|
| 534 |
-
"name": "stdout",
|
| 535 |
-
"output_type": "stream",
|
| 536 |
-
"text": [
|
| 537 |
-
"properties\": {\"query1\": [{\"query\": \"Shalin Mehta AND \"Computational Microscopy Platform\"\", \"rationale\": \"Recent developments within the company\"}, {\"query\": \"Shalin Mehta AND \"Biohub SF\"\", \"rationale\": \"Recent developments within the company\"}], \"query2\": [{\"query\": \"Chan Zuckerberg Biohub - San Francisco AND recent news\", \"rationale\": \"Recent news about the company\"}, {\"query\": \"COVID-19 AND Chan Zuckerberg Biohub - San Francisco\", \"rationale\": \"Recent news about the company\"}], \"query3\": [{\"query\": \"Shalin Mehta AND \"role: Software Engineer\"\", \"rationale\": \"Information about the company that relates to the role\"}, {\"query\": \"Chan Zuckerberg Biohub - San Francisco AND \"team: Bioengineering\"\", \"rationale\": \"Information about the company that relates to the role\"}], \"query4\": [{\"query\": \"key customers: Chan Zuckerberg Biohub\", \"rationale\": \"Key customers & partners\"}, {\"query\": \"partners: Chan Zuckerberg Biohub SF\", \"rationale\": \"Key customers & partners\"}], \"query5\": [{\"query\": \"company culture: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}, {\"query\": \"values: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}]}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]\n"
|
| 538 |
-
]
|
| 539 |
-
}
|
| 540 |
-
],
|
| 541 |
-
"source": [
|
| 542 |
-
"print(x)"
|
| 543 |
-
]
|
| 544 |
-
},
|
| 545 |
-
{
|
| 546 |
-
"cell_type": "code",
|
| 547 |
-
"execution_count": 54,
|
| 548 |
-
"id": "1fab5ee9",
|
| 549 |
-
"metadata": {},
|
| 550 |
-
"outputs": [],
|
| 551 |
-
"source": [
|
| 552 |
-
"from langchain_core.prompts import (\n",
|
| 553 |
-
" PromptTemplate,\n",
|
| 554 |
-
")"
|
| 555 |
-
]
|
| 556 |
-
},
|
| 557 |
-
{
|
| 558 |
-
"cell_type": "code",
|
| 559 |
-
"execution_count": null,
|
| 560 |
-
"id": "e93695ff",
|
| 561 |
-
"metadata": {},
|
| 562 |
-
"outputs": [],
|
| 563 |
-
"source": [
|
| 564 |
-
"prompt = PromptTemplate.from_template(\"Below is the required job description and resume: {background_information}\", input_variables=[\"background_information\"])"
|
| 565 |
-
]
|
| 566 |
-
},
|
| 567 |
-
{
|
| 568 |
-
"cell_type": "code",
|
| 569 |
-
"execution_count": 55,
|
| 570 |
-
"id": "f5330010",
|
| 571 |
-
"metadata": {},
|
| 572 |
-
"outputs": [],
|
| 573 |
-
"source": [
|
| 574 |
-
"x = ('query1', ('recent developments within the company', 'The Associate Software engineer will build open source software tools for managing and processing 10-100 terabyte-scale datasets.'))"
|
| 575 |
-
]
|
| 576 |
-
},
|
| 577 |
-
{
|
| 578 |
-
"cell_type": "code",
|
| 579 |
-
"execution_count": 61,
|
| 580 |
-
"id": "5753afd4",
|
| 581 |
-
"metadata": {},
|
| 582 |
-
"outputs": [],
|
| 583 |
-
"source": [
|
| 584 |
-
"keys = ('q', ('y', 'z'))\n",
|
| 585 |
-
"\n",
|
| 586 |
-
"dict_x = dict(zip(keys, x))"
|
| 587 |
-
]
|
| 588 |
-
},
|
| 589 |
-
{
|
| 590 |
-
"cell_type": "code",
|
| 591 |
-
"execution_count": 63,
|
| 592 |
-
"id": "06d50119",
|
| 593 |
-
"metadata": {},
|
| 594 |
-
"outputs": [
|
| 595 |
-
{
|
| 596 |
-
"data": {
|
| 597 |
-
"text/plain": [
|
| 598 |
-
"('recent developments within the company',\n",
|
| 599 |
-
" 'The Associate Software engineer will build open source software tools for managing and processing 10-100 terabyte-scale datasets.')"
|
| 600 |
-
]
|
| 601 |
-
},
|
| 602 |
-
"execution_count": 63,
|
| 603 |
-
"metadata": {},
|
| 604 |
-
"output_type": "execute_result"
|
| 605 |
-
}
|
| 606 |
-
],
|
| 607 |
-
"source": [
|
| 608 |
-
"dict_x[('y', 'z')]"
|
| 609 |
-
]
|
| 610 |
-
},
|
| 611 |
-
{
|
| 612 |
-
"cell_type": "code",
|
| 613 |
-
"execution_count": null,
|
| 614 |
-
"id": "f03d758e",
|
| 615 |
-
"metadata": {},
|
| 616 |
-
"outputs": [],
|
| 617 |
-
"source": [
|
| 618 |
-
"from langchain.output_parsers import PydanticOutputParser, OutputFixingParser, RetryOutputParser\n",
|
| 619 |
-
"base_parser = PydanticOutputParser(pydantic_object=TavilyQuerySet)\n",
|
| 620 |
-
"\n"
|
| 621 |
-
]
|
| 622 |
-
},
|
| 623 |
-
{
|
| 624 |
-
"cell_type": "code",
|
| 625 |
-
"execution_count": 1,
|
| 626 |
-
"id": "d8dd9c74",
|
| 627 |
-
"metadata": {},
|
| 628 |
-
"outputs": [
|
| 629 |
-
{
|
| 630 |
-
"ename": "NameError",
|
| 631 |
-
"evalue": "name 'parser' is not defined",
|
| 632 |
-
"output_type": "error",
|
| 633 |
-
"traceback": [
|
| 634 |
-
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 635 |
-
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
|
| 636 |
-
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m format_instructions = \u001b[43mparser\u001b[49m.get_format_instructions()\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[34;01mollama\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m chat\n\u001b[32m 5\u001b[39m tavily_role_messages = SystemMessage(content=\n\u001b[32m 6\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\"\"\u001b[39m\n\u001b[32m 7\u001b[39m \u001b[33m When you reply, output **only** valid JSON that can be parsed\u001b[39m\n\u001b[32m (...)\u001b[39m\u001b[32m 31\u001b[39m \u001b[33m 5. Return **only** valid JSON that matches the schema exactly. No other fields\u001b[39m\n\u001b[32m 32\u001b[39m \u001b[33m \u001b[39m\u001b[33m\"\"\"\u001b[39m)\n",
|
| 637 |
-
"\u001b[31mNameError\u001b[39m: name 'parser' is not defined"
|
| 638 |
-
]
|
| 639 |
-
}
|
| 640 |
-
],
|
| 641 |
-
"source": [
|
| 642 |
-
"format_instructions = parser.get_format_instructions()\n",
|
| 643 |
-
"from ollama import chat\n",
|
| 644 |
-
"\n",
|
| 645 |
-
"\n",
|
| 646 |
-
"tavily_role_messages = SystemMessage(content=\n",
|
| 647 |
-
" f\"\"\"\n",
|
| 648 |
-
" When you reply, output **only** valid JSON that can be parsed\n",
|
| 649 |
-
" into the Pydantic model shown below. Do **not** wrap it in \"properties\"\n",
|
| 650 |
-
" or \"required\".:\n",
|
| 651 |
-
" \n",
|
| 652 |
-
" ------------------------------------------------\n",
|
| 653 |
-
"\n",
|
| 654 |
-
"\n",
|
| 655 |
-
" {format_instructions}\n",
|
| 656 |
-
"\n",
|
| 657 |
-
" \n",
|
| 658 |
-
" -------------------------------------------------\n",
|
| 659 |
-
"\n",
|
| 660 |
-
" Rules:\n",
|
| 661 |
-
" 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
|
| 662 |
-
" 2. Map the job description into five categories:\n",
|
| 663 |
-
" β’ query1: recent developments within the company\n",
|
| 664 |
-
" β’ query2: recent news about the company\n",
|
| 665 |
-
" β’ query3: information about the company that relates to the role\n",
|
| 666 |
-
" β’ query4: key customers & partners\n",
|
| 667 |
-
" β’ query5: culture & values of the company\n",
|
| 668 |
-
" 3. Each value is a twoβelement list:\n",
|
| 669 |
-
" [<query string>, <oneβsentence rationale>]\n",
|
| 670 |
-
" 4. If information is missing in the JD, fall back sensibly\n",
|
| 671 |
-
" (e.g. search for βemployee testimonialsβ).\n",
|
| 672 |
-
" 5. Return **only** valid JSON that matches the schema exactly. No other fields\n",
|
| 673 |
-
" \"\"\")\n",
|
| 674 |
-
"\n",
|
| 675 |
-
"\n",
|
| 676 |
-
"response = chat(\n",
|
| 677 |
-
" messages=[{\n",
|
| 678 |
-
" tavily_role_messages,\n",
|
| 679 |
-
" input_message}\n",
|
| 680 |
-
" ],\n",
|
| 681 |
-
" model='llama3.2:latest',\n",
|
| 682 |
-
" format=TavilyQuerySet.model_json_schema(),\n",
|
| 683 |
-
" )"
|
| 684 |
-
]
|
| 685 |
-
},
|
| 686 |
-
{
|
| 687 |
-
"cell_type": "code",
|
| 688 |
-
"execution_count": 2,
|
| 689 |
-
"id": "8deb0abd",
|
| 690 |
-
"metadata": {},
|
| 691 |
-
"outputs": [],
|
| 692 |
-
"source": [
|
| 693 |
-
"p = ('query1', ['Recent developments within the company using computational microscopy platform', 'This project will require working on microscopes in a BSL-2 imaging laboratory'])"
|
| 694 |
-
]
|
| 695 |
-
},
|
| 696 |
-
{
|
| 697 |
-
"cell_type": "code",
|
| 698 |
-
"execution_count": 3,
|
| 699 |
-
"id": "d2fcab19",
|
| 700 |
-
"metadata": {},
|
| 701 |
-
"outputs": [
|
| 702 |
-
{
|
| 703 |
-
"data": {
|
| 704 |
-
"text/plain": [
|
| 705 |
-
"'Recent developments within the company using computational microscopy platform'"
|
| 706 |
-
]
|
| 707 |
-
},
|
| 708 |
-
"execution_count": 3,
|
| 709 |
-
"metadata": {},
|
| 710 |
-
"output_type": "execute_result"
|
| 711 |
-
}
|
| 712 |
-
],
|
| 713 |
-
"source": [
|
| 714 |
-
"p[1][0]"
|
| 715 |
-
]
|
| 716 |
-
},
|
| 717 |
-
{
|
| 718 |
-
"cell_type": "code",
|
| 719 |
-
"execution_count": 6,
|
| 720 |
-
"id": "55e3f46a",
|
| 721 |
-
"metadata": {},
|
| 722 |
-
"outputs": [],
|
| 723 |
-
"source": [
|
| 724 |
-
"COVER_LETTER_PROMPT = SystemMessage(content=\"\"\"You are my dedicated assistant for writing job application content, including cover letters, LinkedIn outreach messages, and responses to job-specific questions (e.g., experience, culture fit, or motivation).\n",
|
| 725 |
-
"\n",
|
| 726 |
-
"Your goal is to generate content that:\n",
|
| 727 |
-
"1. Reflects **my personality**, tone, and authentic voice, based on examples I provide.\n",
|
| 728 |
-
"2. Matches **my knowledge, experience, and interests**, which Iβll also share or update as needed.\n",
|
| 729 |
-
"3. Adopts **my writing style and energy** (e.g., grounded, confident, thoughtfulβbut not overly polished or generic).\n",
|
| 730 |
-
"4. Embeds **genuine enthusiasm or alignment** with the company or role, without sounding performative.\n",
|
| 731 |
-
"5. Avoids filler, clichΓ©s, or overused corporate phrasesβkeep it **authentic and specific**.\n",
|
| 732 |
-
"6. Learns over time by asking me relevant clarifying questions when needed (e.g., change in tone, new experience, updates to goals).\n",
|
| 733 |
-
"7. Balances job description alignment with personal storytelling, roughly in a 75:25 ratio.\n",
|
| 734 |
-
"8. Keeps outputs **concise** and within any given word or character limits.\"\"\")\n"
|
| 735 |
-
]
|
| 736 |
-
},
|
| 737 |
-
{
|
| 738 |
-
"cell_type": "code",
|
| 739 |
-
"execution_count": 7,
|
| 740 |
-
"id": "ea061e0e",
|
| 741 |
-
"metadata": {},
|
| 742 |
-
"outputs": [],
|
| 743 |
-
"source": [
|
| 744 |
-
"from langchain_core.prompts import (\n",
|
| 745 |
-
" ChatPromptTemplate,\n",
|
| 746 |
-
" HumanMessagePromptTemplate,\n",
|
| 747 |
-
" SystemMessagePromptTemplate,\n",
|
| 748 |
-
")\n",
|
| 749 |
-
"from langchain_core.messages import (\n",
|
| 750 |
-
" AIMessage,\n",
|
| 751 |
-
" HumanMessage,\n",
|
| 752 |
-
" SystemMessage,\n",
|
| 753 |
-
")\n",
|
| 754 |
-
"\n",
|
| 755 |
-
"FirstDraftGenerationPromptTemplate = ChatPromptTemplate.from_messages([COVER_LETTER_PROMPT])"
|
| 756 |
-
]
|
| 757 |
-
},
|
| 758 |
-
{
|
| 759 |
-
"cell_type": "code",
|
| 760 |
-
"execution_count": 8,
|
| 761 |
-
"id": "b96cbe64",
|
| 762 |
-
"metadata": {},
|
| 763 |
-
"outputs": [
|
| 764 |
-
{
|
| 765 |
-
"data": {
|
| 766 |
-
"text/plain": [
|
| 767 |
-
"ChatPromptTemplate(input_variables=[], input_types={}, partial_variables={}, messages=[SystemMessage(content='You are my dedicated assistant for writing job application content, including cover letters, LinkedIn outreach messages, and responses to job-specific questions (e.g., experience, culture fit, or motivation).\\n\\nYour goal is to generate content that:\\n1. Reflects **my personality**, tone, and authentic voice, based on examples I provide.\\n2. Matches **my knowledge, experience, and interests**, which Iβll also share or update as needed.\\n3. Adopts **my writing style and energy** (e.g., grounded, confident, thoughtfulβbut not overly polished or generic).\\n4. Embeds **genuine enthusiasm or alignment** with the company or role, without sounding performative.\\n5. Avoids filler, clichΓ©s, or overused corporate phrasesβkeep it **authentic and specific**.\\n6. Learns over time by asking me relevant clarifying questions when needed (e.g., change in tone, new experience, updates to goals).\\n7. Balances job description alignment with personal storytelling, roughly in a 75:25 ratio.\\n8. Keeps outputs **concise** and within any given word or character limits.', additional_kwargs={}, response_metadata={})])"
|
| 768 |
-
]
|
| 769 |
-
},
|
| 770 |
-
"execution_count": 8,
|
| 771 |
-
"metadata": {},
|
| 772 |
-
"output_type": "execute_result"
|
| 773 |
-
}
|
| 774 |
-
],
|
| 775 |
-
"source": [
|
| 776 |
-
"FirstDraftGenerationPromptTemplate"
|
| 777 |
-
]
|
| 778 |
-
},
|
| 779 |
-
{
|
| 780 |
-
"cell_type": "code",
|
| 781 |
-
"execution_count": null,
|
| 782 |
-
"id": "dfd03f8d",
|
| 783 |
-
"metadata": {},
|
| 784 |
-
"outputs": [],
|
| 785 |
-
"source": [
|
| 786 |
-
"current_application_session = \"Heello World\"\n",
|
| 787 |
-
"company_research_data = \"Company Research Data\""
|
| 788 |
-
]
|
| 789 |
-
},
|
| 790 |
-
{
|
| 791 |
-
"cell_type": "code",
|
| 792 |
-
"execution_count": 10,
|
| 793 |
-
"id": "c5fef665",
|
| 794 |
-
"metadata": {},
|
| 795 |
-
"outputs": [],
|
| 796 |
-
"source": [
|
| 797 |
-
"CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(\n",
|
| 798 |
-
" \"\"\"\n",
|
| 799 |
-
" # Resume and Job Description\n",
|
| 800 |
-
" {current_job_role}\n",
|
| 801 |
-
"\n",
|
| 802 |
-
" # Company Information\n",
|
| 803 |
-
" {company_research_data}\n",
|
| 804 |
-
"\n",
|
| 805 |
-
" Create a cover letter that highlights the match between my qualifications and the job requirements.\n",
|
| 806 |
-
" \"\"\",\n",
|
| 807 |
-
" input_variables=[\"current_job_role\",\n",
|
| 808 |
-
" \"company_research_data\"])"
|
| 809 |
-
]
|
| 810 |
-
},
|
| 811 |
-
{
|
| 812 |
-
"cell_type": "code",
|
| 813 |
-
"execution_count": 17,
|
| 814 |
-
"id": "c89ba644",
|
| 815 |
-
"metadata": {},
|
| 816 |
-
"outputs": [],
|
| 817 |
-
"source": [
|
| 818 |
-
"FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)"
|
| 819 |
-
]
|
| 820 |
-
},
|
| 821 |
-
{
|
| 822 |
-
"cell_type": "code",
|
| 823 |
-
"execution_count": 18,
|
| 824 |
-
"id": "6997c553",
|
| 825 |
-
"metadata": {},
|
| 826 |
-
"outputs": [],
|
| 827 |
-
"source": [
|
| 828 |
-
"chain = (\n",
|
| 829 |
-
" ({\"current_job_role\": lambda x: x[\"current_job_role\"],\n",
|
| 830 |
-
" \"company_research_data\": lambda x: x[\"company_research_data\"]})\n",
|
| 831 |
-
" | FirstDraftGenerationPromptTemplate\n",
|
| 832 |
-
" )"
|
| 833 |
-
]
|
| 834 |
-
},
|
| 835 |
-
{
|
| 836 |
-
"cell_type": "code",
|
| 837 |
-
"execution_count": 19,
|
| 838 |
-
"id": "55f51dbf",
|
| 839 |
-
"metadata": {},
|
| 840 |
-
"outputs": [
|
| 841 |
-
{
|
| 842 |
-
"data": {
|
| 843 |
-
"text/plain": [
|
| 844 |
-
"{\n",
|
| 845 |
-
" current_job_role: RunnableLambda(...),\n",
|
| 846 |
-
" company_research_data: RunnableLambda(...)\n",
|
| 847 |
-
"}\n",
|
| 848 |
-
"| ChatPromptTemplate(input_variables=[], input_types={}, partial_variables={}, messages=[SystemMessage(content='You are my dedicated assistant for writing job application content, including cover letters, LinkedIn outreach messages, and responses to job-specific questions (e.g., experience, culture fit, or motivation).\\n\\nYour goal is to generate content that:\\n1. Reflects **my personality**, tone, and authentic voice, based on examples I provide.\\n2. Matches **my knowledge, experience, and interests**, which Iβll also share or update as needed.\\n3. Adopts **my writing style and energy** (e.g., grounded, confident, thoughtfulβbut not overly polished or generic).\\n4. Embeds **genuine enthusiasm or alignment** with the company or role, without sounding performative.\\n5. Avoids filler, clichΓ©s, or overused corporate phrasesβkeep it **authentic and specific**.\\n6. Learns over time by asking me relevant clarifying questions when needed (e.g., change in tone, new experience, updates to goals).\\n7. Balances job description alignment with personal storytelling, roughly in a 75:25 ratio.\\n8. Keeps outputs **concise** and within any given word or character limits.', additional_kwargs={}, response_metadata={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['company_research_data', 'current_job_role'], input_types={}, partial_variables={}, template='\\n # Resume and Job Description\\n {current_job_role}\\n\\n # Company Information\\n {company_research_data}\\n\\n Create a cover letter that highlights the match between my qualifications and the job requirements.\\n '), additional_kwargs={})])"
|
| 849 |
-
]
|
| 850 |
-
},
|
| 851 |
-
"execution_count": 19,
|
| 852 |
-
"metadata": {},
|
| 853 |
-
"output_type": "execute_result"
|
| 854 |
-
}
|
| 855 |
-
],
|
| 856 |
-
"source": [
|
| 857 |
-
"chain"
|
| 858 |
-
]
|
| 859 |
-
},
|
| 860 |
-
{
|
| 861 |
-
"cell_type": "code",
|
| 862 |
-
"execution_count": null,
|
| 863 |
-
"id": "48c54667",
|
| 864 |
-
"metadata": {},
|
| 865 |
-
"outputs": [
|
| 866 |
-
{
|
| 867 |
-
"ename": "ModuleNotFoundError",
|
| 868 |
-
"evalue": "No module named 'utils'",
|
| 869 |
-
"output_type": "error",
|
| 870 |
-
"traceback": [
|
| 871 |
-
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 872 |
-
"\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
|
| 873 |
-
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_client\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LLMClient\n\u001b[32m 3\u001b[39m LLM = LLMClient()\n\u001b[32m 4\u001b[39m llm = LLMClient().get_llm()\n",
|
| 874 |
-
"\u001b[31mModuleNotFoundError\u001b[39m: No module named 'utils'"
|
| 875 |
-
]
|
| 876 |
-
}
|
| 877 |
-
],
|
| 878 |
-
"source": [
|
| 879 |
-
"from job_writer.utils.llm_client import LLMClient\n",
|
| 880 |
-
"\n",
|
| 881 |
-
"LLM = LLMClient()\n",
|
| 882 |
-
"llm = LLMClient().get_llm()"
|
| 883 |
-
]
|
| 884 |
-
},
|
| 885 |
-
{
|
| 886 |
-
"cell_type": "code",
|
| 887 |
-
"execution_count": null,
|
| 888 |
-
"id": "421df9ca",
|
| 889 |
-
"metadata": {},
|
| 890 |
-
"outputs": [],
|
| 891 |
-
"source": [
|
| 892 |
-
"from job_writer.tools.TavilySearch import search_company\n",
|
| 893 |
-
"\n",
|
| 894 |
-
"# Test job description\n",
|
| 895 |
-
"test_job = \"\"\"\n",
|
| 896 |
-
"Software Engineer - Backend\n",
|
| 897 |
-
"OpenAI\n",
|
| 898 |
-
"\n",
|
| 899 |
-
"We are looking for experienced backend engineers to join our team. Our ideal candidate will have experience with one or more of the following technologies: Python, Java, C++. \n",
|
| 900 |
-
"\n",
|
| 901 |
-
"Responsibilities:\n",
|
| 902 |
-
"- Design and implement scalable and efficient backend systems\n",
|
| 903 |
-
"- Write clean, maintainable code\n",
|
| 904 |
-
"- Work with cross-functional teams\n",
|
| 905 |
-
"\n",
|
| 906 |
-
"Requirements:\n",
|
| 907 |
-
"- Strong proficiency in one or more programming languages\n",
|
| 908 |
-
"- Strong understanding of software design patterns and principles\n",
|
| 909 |
-
"- Experience with distributed systems\n",
|
| 910 |
-
"\"\"\"\n",
|
| 911 |
-
"\n",
|
| 912 |
-
"# Test the search_company function\n",
|
| 913 |
-
"results = search_company(test_job)\n",
|
| 914 |
-
"for query_key, data in results.items():\n",
|
| 915 |
-
" print(f\"\\n{query_key}:\")\n",
|
| 916 |
-
" print(f\"Query: {data['query']}\")\n",
|
| 917 |
-
" print(f\"Rationale: {data['rationale']}\")\n",
|
| 918 |
-
" if data['results']:\n",
|
| 919 |
-
" print(f\"First result: {data['results'][0][:200]}...\")\n",
|
| 920 |
-
" else:\n",
|
| 921 |
-
" print(\"No results found\")\n"
|
| 922 |
-
]
|
| 923 |
-
},
|
| 924 |
-
{
|
| 925 |
-
"cell_type": "code",
|
| 926 |
-
"execution_count": 1,
|
| 927 |
-
"id": "18f12ff8",
|
| 928 |
-
"metadata": {},
|
| 929 |
-
"outputs": [],
|
| 930 |
-
"source": [
|
| 931 |
-
"from langchain_core.prompts import (\n",
|
| 932 |
-
" ChatPromptTemplate,\n",
|
| 933 |
-
" HumanMessagePromptTemplate,\n",
|
| 934 |
-
" SystemMessagePromptTemplate,\n",
|
| 935 |
-
")\n",
|
| 936 |
-
"from langchain_core.messages import (\n",
|
| 937 |
-
" AIMessage,\n",
|
| 938 |
-
" HumanMessage,\n",
|
| 939 |
-
" SystemMessage,\n",
|
| 940 |
-
")"
|
| 941 |
-
]
|
| 942 |
-
},
|
| 943 |
-
{
|
| 944 |
-
"cell_type": "code",
|
| 945 |
-
"execution_count": 2,
|
| 946 |
-
"id": "3ba77224",
|
| 947 |
-
"metadata": {},
|
| 948 |
-
"outputs": [],
|
| 949 |
-
"source": [
|
| 950 |
-
"from job_writer.prompts.templates import (\n",
|
| 951 |
-
" TAVILY_QUERY_PROMPT\n",
|
| 952 |
-
")"
|
| 953 |
-
]
|
| 954 |
-
},
|
| 955 |
-
{
|
| 956 |
-
"cell_type": "code",
|
| 957 |
-
"execution_count": 3,
|
| 958 |
-
"id": "50bb7c0c",
|
| 959 |
-
"metadata": {},
|
| 960 |
-
"outputs": [],
|
| 961 |
-
"source": [
|
| 962 |
-
"tavily_search_prompt = ChatPromptTemplate.from_messages([\n",
|
| 963 |
-
" SystemMessage(content=TAVILY_QUERY_PROMPT),\n",
|
| 964 |
-
" HumanMessage(\n",
|
| 965 |
-
" \"Below is the required job description and resume: {background_information}\",\n",
|
| 966 |
-
" input_variables=[\"background_information\"]\n",
|
| 967 |
-
" )\n",
|
| 968 |
-
"])"
|
| 969 |
-
]
|
| 970 |
-
},
|
| 971 |
-
{
|
| 972 |
-
"cell_type": "code",
|
| 973 |
-
"execution_count": 5,
|
| 974 |
-
"id": "372e6346",
|
| 975 |
-
"metadata": {},
|
| 976 |
-
"outputs": [],
|
| 977 |
-
"source": [
|
| 978 |
-
"job_description = \"\"\"\n",
|
| 979 |
-
"Software Engineer - Backend\n",
|
| 980 |
-
"OpenAI\n",
|
| 981 |
-
"\n",
|
| 982 |
-
"We are looking for experienced backend engineers to join our team. Our ideal candidate will have experience with one or more of the following technologies: Python, Java, C++. \n",
|
| 983 |
-
"\n",
|
| 984 |
-
"Responsibilities:\n",
|
| 985 |
-
"- Design and implement scalable and efficient backend systems\n",
|
| 986 |
-
"- Write clean, maintainable code\n",
|
| 987 |
-
"- Work with cross-functional teams\n",
|
| 988 |
-
"\n",
|
| 989 |
-
"Requirements:\n",
|
| 990 |
-
"- Strong proficiency in one or more programming languages\n",
|
| 991 |
-
"- Strong understanding of software design patterns and principles\n",
|
| 992 |
-
"- Experience with distributed systems\n",
|
| 993 |
-
"\"\"\""
|
| 994 |
-
]
|
| 995 |
-
},
|
| 996 |
-
{
|
| 997 |
-
"cell_type": "code",
|
| 998 |
-
"execution_count": 6,
|
| 999 |
-
"id": "3a27365f",
|
| 1000 |
-
"metadata": {},
|
| 1001 |
-
"outputs": [
|
| 1002 |
-
{
|
| 1003 |
-
"data": {
|
| 1004 |
-
"text/plain": [
|
| 1005 |
-
"'System: \\n<Background>\\nSINCE THE USER IS APPPLYING FOR A JOB, THE QUERIES SHOULD BE WRITTEN IN A WAY THAT RESULST IN RELEVANT INFORMATION ABOUT THE COMPANY. THIS WILL HELP THE USER WRITE A MORE PERSONALIZED AND RELEVANT APPLICATION.\\n\\nCategory mapping (remember this!):\\n query1 : recent developments\\n query2 : recent news\\n query3 : role-related info\\n query4 : key customers & partners \\n query5 : culture & values\\n\\nNote: The above are just categories. The queries should be written in a way that results in relevant information about the company. Must include the company name in the query to ensure results have a higher confidence.\\n</Background>\\n\\n<Instructions>\\n 1. Each array must contain **exactly two** strings: [search_query, one_sentence_rationale] \\n 2. If data is missing, craft a sensible fallback query; never return an empty array. \\n 3. If the employer name cannot be found, use `\"UNKNOWN\"`. \\n 4. Escape JSON only where required.\\n 5. Query cannot be repeated. It will lead to irrelevant results.\\n</Instructions>\\n\\n<EXAMPLE>\\n JSON->\\n \"query1\": (\"....\", \"...\")\\n \"query2\": (\"....\", \"...\")\\n \"query3\": (\"....\", \"...\")\\n \"query4\": (\"....\", \"...\")\\n \"query5\": (\"....\", \"...\")\\n</EXAMPLE>\\n \\nHuman: Below is the required job description and resume: {background_information}'"
|
| 1006 |
-
]
|
| 1007 |
-
},
|
| 1008 |
-
"execution_count": 6,
|
| 1009 |
-
"metadata": {},
|
| 1010 |
-
"output_type": "execute_result"
|
| 1011 |
-
}
|
| 1012 |
-
],
|
| 1013 |
-
"source": [
|
| 1014 |
-
"tavily_search_prompt.format(background_information=job_description)"
|
| 1015 |
-
]
|
| 1016 |
-
},
|
| 1017 |
-
{
|
| 1018 |
-
"cell_type": "code",
|
| 1019 |
-
"execution_count": 8,
|
| 1020 |
-
"id": "6b973991",
|
| 1021 |
-
"metadata": {},
|
| 1022 |
-
"outputs": [
|
| 1023 |
-
{
|
| 1024 |
-
"name": "stdout",
|
| 1025 |
-
"output_type": "stream",
|
| 1026 |
-
"text": [
|
| 1027 |
-
"Initializing LLM with model llama3.2:latest and provider ollama in c:\\users\\risha\\python-dir\\knowledgebase\\job_writer\\utils\\llm_client.py\n",
|
| 1028 |
-
"Initializing LLM with model llama3.2:latest and provider ollama in c:\\users\\risha\\python-dir\\knowledgebase\\job_writer\\utils\\llm_client.py\n"
|
| 1029 |
-
]
|
| 1030 |
-
}
|
| 1031 |
-
],
|
| 1032 |
-
"source": [
|
| 1033 |
-
"from job_writer.utils.llm_client import LLMClient\n",
|
| 1034 |
-
"\n",
|
| 1035 |
-
"LLM = LLMClient()\n",
|
| 1036 |
-
"llm = LLMClient().get_llm()"
|
| 1037 |
-
]
|
| 1038 |
-
},
|
| 1039 |
-
{
|
| 1040 |
-
"cell_type": "code",
|
| 1041 |
-
"execution_count": null,
|
| 1042 |
-
"id": "5ff5ac65",
|
| 1043 |
-
"metadata": {},
|
| 1044 |
-
"outputs": [],
|
| 1045 |
-
"source": []
|
| 1046 |
-
}
|
| 1047 |
-
],
|
| 1048 |
-
"metadata": {
|
| 1049 |
-
"kernelspec": {
|
| 1050 |
-
"display_name": "Python 3",
|
| 1051 |
-
"language": "python",
|
| 1052 |
-
"name": "python3"
|
| 1053 |
-
},
|
| 1054 |
-
"language_info": {
|
| 1055 |
-
"codemirror_mode": {
|
| 1056 |
-
"name": "ipython",
|
| 1057 |
-
"version": 3
|
| 1058 |
-
},
|
| 1059 |
-
"file_extension": ".py",
|
| 1060 |
-
"mimetype": "text/x-python",
|
| 1061 |
-
"name": "python",
|
| 1062 |
-
"nbconvert_exporter": "python",
|
| 1063 |
-
"pygments_lexer": "ipython3",
|
| 1064 |
-
"version": "3.12.10"
|
| 1065 |
-
}
|
| 1066 |
-
},
|
| 1067 |
-
"nbformat": 4,
|
| 1068 |
-
"nbformat_minor": 5
|
| 1069 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/TavilySearch.py
DELETED
|
@@ -1,230 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import os
|
| 3 |
-
import json
|
| 4 |
-
import asyncio
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
from langchain_core.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
|
| 8 |
-
from langchain_core.prompt_values import PromptValue
|
| 9 |
-
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 10 |
-
from langchain_community.tools import tool
|
| 11 |
-
from langchain.output_parsers import PydanticOutputParser, RetryOutputParser
|
| 12 |
-
from openevals.llm import create_async_llm_as_judge
|
| 13 |
-
from openevals.prompts import (
|
| 14 |
-
RAG_RETRIEVAL_RELEVANCE_PROMPT,
|
| 15 |
-
RAG_HELPFULNESS_PROMPT
|
| 16 |
-
)
|
| 17 |
-
|
| 18 |
-
from ..utils.llm_client import LLMClient
|
| 19 |
-
from ..agents.output_schema import TavilyQuerySet
|
| 20 |
-
from ..prompts.templates import TAVILY_QUERY_PROMPT
|
| 21 |
-
from ..classes.classes import ResearchState
|
| 22 |
-
|
| 23 |
-
logger = logging.getLogger(__name__)
|
| 24 |
-
|
| 25 |
-
LLM = LLMClient()
|
| 26 |
-
llm_client = LLM.get_instance(model_name="ejschwar/llama3.2-better-prompts:latest", model_provider="ollama_llm")
|
| 27 |
-
llm_structured = llm_client.get_llm()
|
| 28 |
-
|
| 29 |
-
relevance_evaluator = create_async_llm_as_judge(
|
| 30 |
-
judge=llm_structured,
|
| 31 |
-
prompt=RAG_RETRIEVAL_RELEVANCE_PROMPT,
|
| 32 |
-
feedback_key="retrieval_relevance",
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
helpfulness_evaluator = create_async_llm_as_judge(
|
| 36 |
-
judge=llm_structured,
|
| 37 |
-
prompt=RAG_HELPFULNESS_PROMPT
|
| 38 |
-
+ '\nReturn "true" if the answer is helpful, and "false" otherwise.',
|
| 39 |
-
feedback_key="helpfulness",
|
| 40 |
-
)
|
| 41 |
-
|
| 42 |
-
@tool
|
| 43 |
-
def search_company(job_description: str, company_name: str) -> dict:
|
| 44 |
-
"""Gather information about a company to understand more about the role,
|
| 45 |
-
recent developments, culture, and values of the company."""
|
| 46 |
-
|
| 47 |
-
try:
|
| 48 |
-
# Get format instructions from the parser
|
| 49 |
-
base_parser = PydanticOutputParser(pydantic_object=TavilyQuerySet)
|
| 50 |
-
parser = RetryOutputParser.from_llm(llm_structured, base_parser)
|
| 51 |
-
format_instructions = parser.get_format_instructions()
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
# Create the prompt with both messages
|
| 55 |
-
chat_prompt_tavily: ChatPromptTemplate = ChatPromptTemplate.from_messages([
|
| 56 |
-
SystemMessagePromptTemplate.from_template(
|
| 57 |
-
TAVILY_QUERY_PROMPT,
|
| 58 |
-
input_variables=["company_name"]
|
| 59 |
-
),
|
| 60 |
-
HumanMessagePromptTemplate.from_template(
|
| 61 |
-
"Below is the required job description to parse:\n\n{job_description}",
|
| 62 |
-
input_variables=["job_description"]
|
| 63 |
-
)
|
| 64 |
-
])
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
chat_prompt_value: PromptValue = chat_prompt_tavily.format_prompt(
|
| 68 |
-
company_name=company_name,
|
| 69 |
-
job_description=job_description
|
| 70 |
-
)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
# Format messages and get LLM response
|
| 74 |
-
chat_prompt_tavily_messages = chat_prompt_tavily.format_messages(
|
| 75 |
-
company_name=company_name,
|
| 76 |
-
job_description=job_description
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
# Get response from LLM
|
| 81 |
-
search_results_llm = llm_structured.invoke(chat_prompt_tavily_messages)
|
| 82 |
-
# logger.info("Raw LLM Response content: %s", search_results_llm.content)
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
try:
|
| 86 |
-
parsed_query_set: TavilyQuerySet = parser.parse_with_prompt(search_results_llm.content, chat_prompt_value)
|
| 87 |
-
logger.info("Parsed TavilyQuerySet: %s", parsed_query_set.model_dump_json(indent=2))
|
| 88 |
-
except json.JSONDecodeError as e:
|
| 89 |
-
logger.error("JSON decoding error while parsing LLM response: %s. LLM content was: %s", e, search_results_llm.content, exc_info=True)
|
| 90 |
-
raise
|
| 91 |
-
except Exception as e: # Catches PydanticValidationErrors and other parsing issues
|
| 92 |
-
logger.error("Error parsing TavilyQuerySet from LLM completion: %s. LLM content was: %s", e, search_results_llm.content, exc_info=True)
|
| 93 |
-
raise
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
# Initialize search with advanced parameters
|
| 97 |
-
search = TavilySearchResults(max_results=4, search_depth="advanced")
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
# Prepare the structure for storing queries, rationales, and Tavily results
|
| 101 |
-
company_research_data = {}
|
| 102 |
-
attempted_queries = []
|
| 103 |
-
query_attributes = [f"query{i}" for i in range(1, 6)]
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
for attr_name in query_attributes:
|
| 107 |
-
query_list = getattr(parsed_query_set, attr_name, None)
|
| 108 |
-
if query_list and isinstance(query_list, list) and len(query_list) > 0:
|
| 109 |
-
actual_query = query_list[0]
|
| 110 |
-
rationale = query_list[1] if len(query_list) > 1 else "N/A" # Handle if rationale is missing
|
| 111 |
-
company_research_data[attr_name] = {
|
| 112 |
-
'query': actual_query,
|
| 113 |
-
'rationale': rationale,
|
| 114 |
-
'results': []
|
| 115 |
-
}
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
# logger.info("Prepared company research structure: %s", json.dumps(company_research_data, indent=2))
|
| 119 |
-
# Execute each query and store results
|
| 120 |
-
for query_key, query_info in company_research_data.items():
|
| 121 |
-
try:
|
| 122 |
-
if not isinstance(query_info['query'], str) or not query_info['query'].strip():
|
| 123 |
-
logger.warning("Skipping Tavily search for %s due to invalid/empty query: '%s'", query_key, query_info['query'])
|
| 124 |
-
query_info['results'] = []
|
| 125 |
-
continue
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
logger.info("Executing Tavily search for %s: '%s'", query_key, query_info['query'])
|
| 129 |
-
# tool.invoke({"args": {'query': 'who won the last french open'}, "type": "tool_call", "id": "foo", "name": "tavily"})
|
| 130 |
-
tavily_api_results = search.invoke({"args": {'query': query_info['query']}, "type": "tool_call", "id": "job_search", "name": "tavily"})
|
| 131 |
-
attempted_queries.append(query_info['query'])
|
| 132 |
-
del query_info['query']
|
| 133 |
-
|
| 134 |
-
if tavily_api_results and isinstance(tavily_api_results, list) and len(tavily_api_results) > 0:
|
| 135 |
-
query_info['results'] = [result['content'] for result in tavily_api_results if 'content' in result]
|
| 136 |
-
else:
|
| 137 |
-
logger.info("No results or unexpected format from Tavily for %s.", query_key)
|
| 138 |
-
query_info['results'] = []
|
| 139 |
-
except Exception as e:
|
| 140 |
-
logger.error("Error executing Tavily search for query %s ('%s'): %s", query_key, query_info['query'], str(e), exc_info=True)
|
| 141 |
-
query_info['results'] = []
|
| 142 |
-
|
| 143 |
-
# print("Results: ", results)
|
| 144 |
-
return company_research_data, attempted_queries
|
| 145 |
-
|
| 146 |
-
except json.JSONDecodeError as e:
|
| 147 |
-
logger.error("JSON decoding error: %s", e)
|
| 148 |
-
raise
|
| 149 |
-
except AttributeError as e:
|
| 150 |
-
logger.error("Attribute error: %s", e)
|
| 151 |
-
raise
|
| 152 |
-
except Exception as e:
|
| 153 |
-
logger.error("Unexpected error: %s", e)
|
| 154 |
-
raise
|
| 155 |
-
|
| 156 |
-
async def relevance_filter(state: ResearchState) -> ResearchState:
|
| 157 |
-
try:
|
| 158 |
-
# Mark the current node
|
| 159 |
-
state["current_node"] = "relevance_filter"
|
| 160 |
-
|
| 161 |
-
# Check if company_research_data exists
|
| 162 |
-
if not state.get("company_research_data"):
|
| 163 |
-
print("ERROR: company_research_data not found in state")
|
| 164 |
-
return state
|
| 165 |
-
|
| 166 |
-
# Check if tavily_search results exist
|
| 167 |
-
if not state["company_research_data"].get("tavily_search"):
|
| 168 |
-
print("ERROR: tavily_search not found in company_research_data")
|
| 169 |
-
state["company_research_data"]["tavily_search"] = []
|
| 170 |
-
return state
|
| 171 |
-
|
| 172 |
-
# Initialize compiled_results if not present
|
| 173 |
-
if "compiled_results" not in state:
|
| 174 |
-
state["compiled_results"] = []
|
| 175 |
-
|
| 176 |
-
print("Filtering results...")
|
| 177 |
-
# Get the company research data which contains results for different queries
|
| 178 |
-
# Example: {'query1': {'rationale': ..., 'results': [...]}, 'query2': ...}
|
| 179 |
-
|
| 180 |
-
all_query_data = state["company_research_data"].get("tavily_search", {})
|
| 181 |
-
# print("All query data:", all_query_data)
|
| 182 |
-
filtered_results_for_current_run = [] # Stores results deemed relevant in this specific call
|
| 183 |
-
|
| 184 |
-
# Create a semaphore to limit concurrent tasks to 2
|
| 185 |
-
semaphore = asyncio.Semaphore(2)
|
| 186 |
-
|
| 187 |
-
async def evaluate_with_semaphore(query_result_item: dict):
|
| 188 |
-
# query_result_item is a dict like {'rationale': '...', 'results': [...]}
|
| 189 |
-
async with semaphore:
|
| 190 |
-
# Safely get the query to use for relevance evaluation
|
| 191 |
-
attempted_queries_list = state.get("attempted_search_queries", [])
|
| 192 |
-
input_query = attempted_queries_list[-1] if attempted_queries_list else "No query context available"
|
| 193 |
-
|
| 194 |
-
eval_result = await relevance_evaluator(
|
| 195 |
-
inputs=input_query, context=query_result_item # context is the whole result block for the query
|
| 196 |
-
)
|
| 197 |
-
return query_result_item, eval_result
|
| 198 |
-
|
| 199 |
-
# Create tasks for all results
|
| 200 |
-
tasks = [evaluate_with_semaphore(query_info) for query_info in all_query_data.values() if isinstance(query_info, dict) and "results" in query_info]
|
| 201 |
-
|
| 202 |
-
# Process tasks as they complete
|
| 203 |
-
for completed_task in asyncio.as_completed(tasks):
|
| 204 |
-
query_result_item, eval_result = await completed_task
|
| 205 |
-
if eval_result.get("score"): # Safely check for score
|
| 206 |
-
# Assuming query_result_item["results"] is a list of content strings
|
| 207 |
-
if isinstance(query_result_item.get("results"), list):
|
| 208 |
-
# print(f"Evaluated result: {query_result_item}")
|
| 209 |
-
filtered_results_for_current_run.extend(query_result_item["results"])
|
| 210 |
-
else:
|
| 211 |
-
# Handle cases where "results" might not be a list or is missing
|
| 212 |
-
logger.warning("Expected a list for 'results' in query_result_item, got: %s", type(query_result_item.get('results')))
|
| 213 |
-
|
| 214 |
-
logger.info("Filtered results for current run: %s",filtered_results_for_current_run)
|
| 215 |
-
|
| 216 |
-
# The error occurs at a line like the following (line 178 in your traceback):
|
| 217 |
-
# This print statement will now safely access "compiled_results"
|
| 218 |
-
# print("Compiled results (before append): ", state["compiled_results"]) # Append the newly filtered results to the main compiled_results list
|
| 219 |
-
state["compiled_results"].extend(filtered_results_for_current_run)
|
| 220 |
-
state["company_research_data"]["tavily_search"] = filtered_results_for_current_run
|
| 221 |
-
# logger.info(f"Compiled results (after append): {state['compiled_results']}")
|
| 222 |
-
return state
|
| 223 |
-
|
| 224 |
-
except Exception as e:
|
| 225 |
-
print(f"ERROR in relevance_filter: {e}")
|
| 226 |
-
import traceback
|
| 227 |
-
traceback.print_exc()
|
| 228 |
-
logger.error(f"Error in relevance_filter: {str(e)}")
|
| 229 |
-
# Return original state to avoid breaking the flow
|
| 230 |
-
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/llm_client.py
DELETED
|
@@ -1,141 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
LLM Client module for managing language model interactions.
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
import os
|
| 6 |
-
from typing_extensions import Optional, Union
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
from langchain_core.language_models.chat_models import BaseChatModel
|
| 10 |
-
from langchain_core.language_models.llms import BaseLLM
|
| 11 |
-
from langchain_ollama import ChatOllama
|
| 12 |
-
from langchain_openai import ChatOpenAI
|
| 13 |
-
|
| 14 |
-
from .errors import ModelNotFoundError
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
class LLMClient:
|
| 18 |
-
"""
|
| 19 |
-
Client for managing language model interactions.
|
| 20 |
-
Provides a unified interface for different LLM backends.
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
_instance = None # Singleton instance
|
| 24 |
-
|
| 25 |
-
@classmethod
|
| 26 |
-
def get_instance(cls, model_name: Optional[str] = None, model_provider: Optional[str] = None):
|
| 27 |
-
"""Get or create a singleton instance of the LLM client.
|
| 28 |
-
|
| 29 |
-
Args:
|
| 30 |
-
model_name: Optional model name to override the default
|
| 31 |
-
|
| 32 |
-
Returns:
|
| 33 |
-
LLMClient instance
|
| 34 |
-
"""
|
| 35 |
-
if cls._instance is None:
|
| 36 |
-
cls._instance = LLMClient(model_name, model_provider)
|
| 37 |
-
elif model_name is not None and cls._instance.model_name != model_name:
|
| 38 |
-
# Reinitialize if a different model is requested
|
| 39 |
-
cls._instance = LLMClient(model_name)
|
| 40 |
-
|
| 41 |
-
return cls._instance
|
| 42 |
-
|
| 43 |
-
def __init__(self, model_name: Optional[str] = None, model_provider: Optional[str] = None):
|
| 44 |
-
"""Initialize the LLM client with the specified model.
|
| 45 |
-
|
| 46 |
-
Args:
|
| 47 |
-
model_name: Name of the model to use (default: from environment or "llama3.2:latest")
|
| 48 |
-
"""
|
| 49 |
-
print("Initializing LLM Client with model:", model_name, "and provider:", model_provider)
|
| 50 |
-
self.model_name = model_name or os.getenv("DEFAULT_LLM_MODEL", "llama3.2:latest")
|
| 51 |
-
self.model_provider = model_provider or os.getenv("LLM_PROVIDER", "ollama").lower()
|
| 52 |
-
self.llm = self._initialize_llm()
|
| 53 |
-
|
| 54 |
-
def __str__(self):
|
| 55 |
-
return f"LLMClient(model_name={self.model_name}, provider={self.model_provider})"
|
| 56 |
-
|
| 57 |
-
def _initialize_llm(self) -> Union[BaseLLM, BaseChatModel]:
|
| 58 |
-
"""Initialize the appropriate LLM based on configuration.
|
| 59 |
-
|
| 60 |
-
Returns:
|
| 61 |
-
Initialized LLM instance
|
| 62 |
-
"""
|
| 63 |
-
print(f"Initializing LLM with model {self.model_name} and provider {self.model_provider} in {__file__}")
|
| 64 |
-
if self.model_provider == "ollama":
|
| 65 |
-
return self._initialize_llama()
|
| 66 |
-
elif self.model_provider == "openai":
|
| 67 |
-
return self._initialize_openai()
|
| 68 |
-
elif self.model_provider == "ollama_json":
|
| 69 |
-
return self._initialize_jsonllm()
|
| 70 |
-
else:
|
| 71 |
-
raise ValueError(f"Unsupported LLM provider: {self.model_provider}")
|
| 72 |
-
|
| 73 |
-
def _initialize_llama(self) -> BaseChatModel:
|
| 74 |
-
"""Initialize an Ollama LLM.
|
| 75 |
-
|
| 76 |
-
Returns:
|
| 77 |
-
Ollama LLM instance
|
| 78 |
-
"""
|
| 79 |
-
try:
|
| 80 |
-
# model = OllamaLLM(model=self.model_name, temperature=0.1, top_k=1, repeat_penalty=1.2)
|
| 81 |
-
model: ChatOllama = ChatOllama(model=self.model_name, temperature=0.1, top_k=1, repeat_penalty=1.2)
|
| 82 |
-
return model
|
| 83 |
-
except Exception as e:
|
| 84 |
-
raise ModelNotFoundError(f"Failed to initialize Ollama with model {self.model_name}: {e}") from e
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def _initialize_jsonllm(self) -> BaseChatModel:
|
| 88 |
-
"""
|
| 89 |
-
Initialize a Mistral chat model.
|
| 90 |
-
Returns:
|
| 91 |
-
Mistral chat model instance
|
| 92 |
-
"""
|
| 93 |
-
try:
|
| 94 |
-
model: ChatOllama = ChatOllama(model=self.model_name, format='json', temperature=0.1, top_k=1, repeat_penalty=1.2)
|
| 95 |
-
return model
|
| 96 |
-
except Exception as e:
|
| 97 |
-
raise ModelNotFoundError(f"Failed to initialize Ollama with model {self.model_name}: {e}") from e
|
| 98 |
-
|
| 99 |
-
def _initialize_openai(self) -> BaseChatModel:
|
| 100 |
-
"""Initialize an OpenAI chat model.
|
| 101 |
-
|
| 102 |
-
Returns:
|
| 103 |
-
OpenAI chat model instance
|
| 104 |
-
"""
|
| 105 |
-
api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJhcHAiLCJleHAiOjE3OTk5OTk5OTksInN1YiI6NjU1MDM3LCJhdWQiOiJXRUIiLCJpYXQiOjE2OTQwNzY4NTF9.hBcFcCqO1UF2Jb-m8Nv5u5zJPvQIuXUSZgyqggAD-ww"
|
| 106 |
-
# api_key = os.getenv("OPENAI_API_KEY")
|
| 107 |
-
if not api_key:
|
| 108 |
-
raise ValueError("OPENAI_API_KEY environment variable not set")
|
| 109 |
-
|
| 110 |
-
try:
|
| 111 |
-
return ChatOpenAI(model_name=self.model_name, api_key=api_key)
|
| 112 |
-
except Exception as e:
|
| 113 |
-
raise ModelNotFoundError(f"Failed to initialize Ollama with model {self.model_name}: {e}") from e
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
def get_llm(self) -> Union[BaseLLM, BaseChatModel]:
|
| 117 |
-
"""Get the initialized LLM instance.
|
| 118 |
-
|
| 119 |
-
Returns:
|
| 120 |
-
LLM instance
|
| 121 |
-
"""
|
| 122 |
-
if self.llm is None:
|
| 123 |
-
raise RuntimeError("LLM client not initialized")
|
| 124 |
-
return self.llm
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
def reinitialize(self, model_name: Optional[str] = None, provider: Optional[str] = None) -> None:
|
| 128 |
-
"""Reinitialize the LLM with a different model or provider.
|
| 129 |
-
|
| 130 |
-
Args:
|
| 131 |
-
model_name: New model name to use
|
| 132 |
-
provider: New provider to use
|
| 133 |
-
"""
|
| 134 |
-
print(f"Reinitializing LLM client from {self.model_name} to {model_name}")
|
| 135 |
-
if model_name:
|
| 136 |
-
self.model_name = model_name
|
| 137 |
-
if provider:
|
| 138 |
-
self.model_provider = provider.lower()
|
| 139 |
-
|
| 140 |
-
self.llm = self._initialize_llm()
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
workflow.py
DELETED
|
@@ -1,210 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Workflow runner for the job application writer.
|
| 3 |
-
|
| 4 |
-
This module provides functions for running the job application
|
| 5 |
-
writer graph in both interactive and batch modes.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
import asyncio
|
| 9 |
-
import argparse
|
| 10 |
-
import sys
|
| 11 |
-
|
| 12 |
-
from datetime import datetime
|
| 13 |
-
from langchain_core.tracers import ConsoleCallbackHandler
|
| 14 |
-
from langgraph.graph import StateGraph
|
| 15 |
-
from langfuse import Langfuse
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
from job_writer.nodes import Dataloading
|
| 19 |
-
from job_writer.nodes.research_workflow import research_workflow
|
| 20 |
-
from job_writer.classes import AppState, DataLoadState
|
| 21 |
-
from job_writer.agents.nodes import (
|
| 22 |
-
create_draft,
|
| 23 |
-
critique_draft,
|
| 24 |
-
finalize_document,
|
| 25 |
-
human_approval,
|
| 26 |
-
)
|
| 27 |
-
from job_writer.nodes import (
|
| 28 |
-
generate_variations,
|
| 29 |
-
self_consistency_vote
|
| 30 |
-
)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
class JobWorkflow:
|
| 34 |
-
"""
|
| 35 |
-
Workflow runner for the job application writer.
|
| 36 |
-
Args:
|
| 37 |
-
resume: Resume text or file path
|
| 38 |
-
job_description: Job description text or URL
|
| 39 |
-
content:
|
| 40 |
-
Type of application material to generate
|
| 41 |
-
model_config: Configuration for language models
|
| 42 |
-
"""
|
| 43 |
-
|
| 44 |
-
#
|
| 45 |
-
def __init__(self, resume=None, job_description_source=None, content=None, model_configuration=None):
|
| 46 |
-
"""Initialize the Writing Workflow."""
|
| 47 |
-
print(f"Initializing Workflow for {content}")
|
| 48 |
-
self.resume = resume
|
| 49 |
-
self.job_description_source = job_description_source
|
| 50 |
-
self.content = content
|
| 51 |
-
self.model_configuration = model_configuration
|
| 52 |
-
|
| 53 |
-
# Initialize the app state
|
| 54 |
-
self.app_state = AppState(
|
| 55 |
-
resume_path=resume,
|
| 56 |
-
job_description_source=job_description_source,
|
| 57 |
-
company_research_data=None,
|
| 58 |
-
draft="",
|
| 59 |
-
feedback="",
|
| 60 |
-
final="",
|
| 61 |
-
content=content,
|
| 62 |
-
current_node=""
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
self.__init__nodes()
|
| 66 |
-
self._build_workflow()
|
| 67 |
-
|
| 68 |
-
self.langfuse = Langfuse()
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def __init__nodes(self):
|
| 72 |
-
self.dataloading = Dataloading()
|
| 73 |
-
# self.createdraft = create_draft()
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def _build_workflow(self):
|
| 77 |
-
# Build the graph with config
|
| 78 |
-
self.job_app_graph = StateGraph(DataLoadState)
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
self.job_app_graph.add_node("initialize_system", self.dataloading.system_setup)
|
| 82 |
-
self.job_app_graph.add_node("load", self.dataloading.run)
|
| 83 |
-
# self.job_app_graph.add_node("build_persona", select_persona)
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
# Add research workflow as a node
|
| 87 |
-
self.job_app_graph.add_node("research", research_workflow)
|
| 88 |
-
self.job_app_graph.add_node("create_draft", create_draft)
|
| 89 |
-
self.job_app_graph.add_node("variations", generate_variations)
|
| 90 |
-
self.job_app_graph.add_node("self_consistency", self_consistency_vote)
|
| 91 |
-
self.job_app_graph.add_node("critique", critique_draft)
|
| 92 |
-
self.job_app_graph.add_node("human_approval", human_approval)
|
| 93 |
-
self.job_app_graph.add_node("finalize", finalize_document)
|
| 94 |
-
|
| 95 |
-
self.job_app_graph.set_entry_point("initialize_system")
|
| 96 |
-
self.job_app_graph.set_finish_point("finalize")
|
| 97 |
-
|
| 98 |
-
self.job_app_graph.add_edge("initialize_system", "load")
|
| 99 |
-
self.job_app_graph.add_conditional_edges("load", self.dataloading.verify_inputs)
|
| 100 |
-
self.job_app_graph.add_edge("research", "create_draft")
|
| 101 |
-
self.job_app_graph.add_edge("create_draft", "variations")
|
| 102 |
-
self.job_app_graph.add_edge("variations", "self_consistency")
|
| 103 |
-
self.job_app_graph.add_edge("self_consistency", "critique")
|
| 104 |
-
self.job_app_graph.add_edge("critique", "human_approval")
|
| 105 |
-
self.job_app_graph.add_edge("human_approval", "finalize")
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
async def run(self) -> str | None:
|
| 109 |
-
"""
|
| 110 |
-
Run the job application writer workflow.
|
| 111 |
-
"""
|
| 112 |
-
# Compile the graph
|
| 113 |
-
try:
|
| 114 |
-
compiled_graph = self.compile()
|
| 115 |
-
except Exception as e:
|
| 116 |
-
print(f"Error compiling graph: {e}")
|
| 117 |
-
return
|
| 118 |
-
# Set up run configuration
|
| 119 |
-
run_name = f"Job Application Writer - {self.app_state['content']} - {datetime.now().strftime('%Y-%m-%d-%H%M%S')}"
|
| 120 |
-
config = {
|
| 121 |
-
"configurable": {
|
| 122 |
-
"thread_id": f"job_app_session_{datetime.now().strftime('%Y%m%d%H%M%S')}",
|
| 123 |
-
"callbacks": [ConsoleCallbackHandler()],
|
| 124 |
-
"run_name": run_name,
|
| 125 |
-
"tags": ["job-application", self.app_state['content']]
|
| 126 |
-
},
|
| 127 |
-
"recursion_limit": 10
|
| 128 |
-
}
|
| 129 |
-
# Run the graph
|
| 130 |
-
try:
|
| 131 |
-
self.app_state["current_node"] = "initialize_system"
|
| 132 |
-
graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
|
| 133 |
-
except Exception as e:
|
| 134 |
-
print(f"Error running graph: {e}")
|
| 135 |
-
return
|
| 136 |
-
|
| 137 |
-
return graph_output
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
def compile(self):
|
| 141 |
-
"""Compile the graph."""
|
| 142 |
-
graph = self.job_app_graph.compile()
|
| 143 |
-
return graph
|
| 144 |
-
|
| 145 |
-
def print_result(self, content_type, final_content):
|
| 146 |
-
"""Print the final generated content to the console."""
|
| 147 |
-
print("\n" + "="*80)
|
| 148 |
-
print(f"FINAL {content_type.upper()}:")
|
| 149 |
-
print(final_content)
|
| 150 |
-
print("="*80)
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
def save_result(self, content_type, final_content):
|
| 154 |
-
"""Save the final generated content to a file and return the filename."""
|
| 155 |
-
output_file = f"{content_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
| 156 |
-
with open(output_file, "w", encoding="utf-8") as f:
|
| 157 |
-
f.write(final_content)
|
| 158 |
-
print(f"\nSaved to {output_file}")
|
| 159 |
-
return output_file
|
| 160 |
-
|
| 161 |
-
if __name__ == "__main__":
|
| 162 |
-
|
| 163 |
-
parser = argparse.ArgumentParser(description="Generate job application materials")
|
| 164 |
-
parser.add_argument("--resume", required=True, help="Path to resume file or resume text")
|
| 165 |
-
parser.add_argument("--job", required=True, help="Path/URL to job description or description text")
|
| 166 |
-
parser.add_argument("--type", default="cover_letter",
|
| 167 |
-
choices=["cover_letter", "bullets", "linkedin_note"],
|
| 168 |
-
help="Type of application material to generate")
|
| 169 |
-
parser.add_argument("--model", help="Ollama model to use")
|
| 170 |
-
parser.add_argument("--temp", type=float, help="Temperature for generation")
|
| 171 |
-
|
| 172 |
-
args = parser.parse_args()
|
| 173 |
-
|
| 174 |
-
# Configure models if specified
|
| 175 |
-
model_config = {}
|
| 176 |
-
if args.model:
|
| 177 |
-
model_config["model_name"] = args.model
|
| 178 |
-
if args.temp is not None:
|
| 179 |
-
model_config["temperature"] = min(0.25, args.temp)
|
| 180 |
-
model_config["precise_temperature"] = min(0.2, args.temp)
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
# Initialize the workflow
|
| 184 |
-
workflow = JobWorkflow(
|
| 185 |
-
resume=args.resume,
|
| 186 |
-
job_description_source=args.job,
|
| 187 |
-
content=args.type,
|
| 188 |
-
model_configuration=model_config
|
| 189 |
-
)
|
| 190 |
-
|
| 191 |
-
# Run the workflow
|
| 192 |
-
result = asyncio.run(workflow.run())
|
| 193 |
-
|
| 194 |
-
if result:
|
| 195 |
-
# Print the result to the console
|
| 196 |
-
workflow.print_result(args.type, result["final"])
|
| 197 |
-
else:
|
| 198 |
-
print("Error running workflow.")
|
| 199 |
-
sys.exit(1)
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
# Save the result to a file
|
| 203 |
-
if result:
|
| 204 |
-
workflow.save_result(args.type, result["final"])
|
| 205 |
-
else:
|
| 206 |
-
print("Error saving result.")
|
| 207 |
-
sys.exit(1)
|
| 208 |
-
|
| 209 |
-
# Print a success message
|
| 210 |
-
print("Workflow completed successfully.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|