Commit ·
a01026b
1
Parent(s): 046508a
Refactor job writing agent: Improved code structure by introducing new data loading classes, enhancing logging practices, and ensuring safe environment variable access. Updated workflow logic for better readability and maintainability.
Browse files- src/job_writing_agent/__init__.py +90 -39
- src/job_writing_agent/agents/nodes.py +128 -162
- src/job_writing_agent/classes/classes.py +3 -1
- src/job_writing_agent/nodes/__init__.py +22 -2
- src/job_writing_agent/nodes/data_loading_workflow.py +259 -0
- src/job_writing_agent/nodes/initializing.py +0 -513
- src/job_writing_agent/nodes/job_description_loader.py +1 -1
- src/job_writing_agent/nodes/research_workflow.py +92 -48
- src/job_writing_agent/nodes/selfconsistency.py +75 -51
- src/job_writing_agent/nodes/system_initializer.py +73 -0
- src/job_writing_agent/nodes/validation_helper.py +124 -0
- src/job_writing_agent/nodes/variations.py +46 -28
- src/job_writing_agent/prompts/templates.py +75 -76
- src/job_writing_agent/prompts/test_templates.py +59 -0
- src/job_writing_agent/tools/SearchTool.py +43 -18
- src/job_writing_agent/utils/application_cli_interface.py +54 -33
- src/job_writing_agent/utils/config.py +28 -9
- src/job_writing_agent/utils/document_processing.py +36 -14
- src/job_writing_agent/utils/vector_store.py +42 -40
- src/job_writing_agent/workflow.py +85 -50
src/job_writing_agent/__init__.py
CHANGED
|
@@ -7,7 +7,8 @@ using LangChain and LangGraph with LangSmith observability.
|
|
| 7 |
|
| 8 |
__version__ = "0.1.0"
|
| 9 |
|
| 10 |
-
import os
|
|
|
|
| 11 |
import logging
|
| 12 |
from pathlib import Path
|
| 13 |
from dotenv import load_dotenv
|
|
@@ -16,77 +17,112 @@ from dotenv import load_dotenv
|
|
| 16 |
# Set up logging
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
logger.setLevel(logging.INFO)
|
| 19 |
-
log_dir = Path(__file__).parent /
|
| 20 |
log_dir.mkdir(exist_ok=True)
|
| 21 |
-
logger.addHandler(logging.FileHandler(log_dir /
|
| 22 |
-
logger.info(
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Load environment variables from .env file
|
| 25 |
-
env_path = Path(__file__).parent /
|
| 26 |
|
| 27 |
|
| 28 |
def _set_env(var: str):
|
| 29 |
if not os.environ.get(var):
|
| 30 |
-
os.environ[var] = getpass
|
| 31 |
logger.info(f"{var} set to {os.environ[var]}")
|
| 32 |
|
|
|
|
| 33 |
if env_path.exists():
|
| 34 |
-
logger.info("Loading environment variables from %s",
|
| 35 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 36 |
else:
|
| 37 |
-
logger.warning(
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Check for critical environment variables
|
| 40 |
if not os.getenv("TAVILY_API_KEY"):
|
| 41 |
-
logger.warning(
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
| 43 |
_set_env("TAVILY_API_KEY")
|
| 44 |
|
| 45 |
|
| 46 |
if not os.getenv("GEMINI_API_KEY"):
|
| 47 |
-
logger.warning(
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
| 49 |
_set_env("GEMINI_API_KEY")
|
| 50 |
|
| 51 |
|
| 52 |
if not os.getenv("PINECONE_API_KEY"):
|
| 53 |
-
logger.warning(
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
| 55 |
_set_env("PINECONE_API_KEY")
|
| 56 |
|
| 57 |
if not os.getenv("LANGFUSE_PUBLIC_KEY"):
|
| 58 |
-
logger.warning(
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
| 60 |
_set_env("LANGFUSE_PUBLIC_KEY")
|
| 61 |
|
| 62 |
if not os.getenv("LANGFUSE_SECRET_KEY"):
|
| 63 |
-
logger.warning(
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
_set_env("LANGFUSE_SECRET_KEY")
|
| 66 |
|
| 67 |
if not os.getenv("LANGSMITH_API_KEY"):
|
| 68 |
-
logger.warning(
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
| 70 |
_set_env("LANGSMITH_API_KEY")
|
| 71 |
|
| 72 |
if not os.getenv("OPENROUTER_API_KEY"):
|
| 73 |
-
logger.warning(
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
| 75 |
_set_env("OPENROUTER_API_KEY")
|
| 76 |
|
| 77 |
if not os.getenv("LANGSMITH_PROJECT"):
|
| 78 |
-
logger.warning(
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
_set_env("LANGSMITH_PROJECT")
|
| 81 |
|
| 82 |
if not os.getenv("LANGSMITH_ENDPOINT"):
|
| 83 |
-
logger.warning(
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
_set_env("LANGSMITH_ENDPOINT")
|
| 86 |
|
| 87 |
if not os.getenv("CEREBRAS_API_KEY"):
|
| 88 |
-
logger.warning(
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
_set_env("CEREBRAS_API_KEY")
|
| 91 |
|
| 92 |
os.environ["LANGSMITH_TRACING"] = "true"
|
|
@@ -111,33 +147,48 @@ from dotenv import load_dotenv
|
|
| 111 |
|
| 112 |
logger = logging.getLogger(__name__)
|
| 113 |
logger.setLevel(logging.INFO)
|
| 114 |
-
log_dir = Path(__file__).parent /
|
| 115 |
log_dir.mkdir(exist_ok=True)
|
| 116 |
-
logger.addHandler(logging.FileHandler(log_dir /
|
| 117 |
-
logger.info(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
env_path = Path(__file__).parent / '.env'
|
| 120 |
|
| 121 |
def _set_env(var: str):
|
| 122 |
if not os.environ.get(var):
|
| 123 |
os.environ[var] = getpass.getpass(f"{var}: ")
|
| 124 |
logger.info(f"{var} set to {os.environ[var]}")
|
| 125 |
|
|
|
|
| 126 |
def load_environment_variables(key_array):
|
| 127 |
for key in key_array:
|
| 128 |
if not os.getenv(key):
|
| 129 |
-
logger.warning(
|
|
|
|
|
|
|
| 130 |
_set_env(key)
|
| 131 |
|
|
|
|
| 132 |
if env_path.exists():
|
| 133 |
-
logger.info("Loading environment variables from %s",
|
| 134 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 135 |
else:
|
| 136 |
-
logger.warning(
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
# Check for critical environment variables
|
| 141 |
load_environment_variables(environment_key_array)
|
| 142 |
|
| 143 |
-
__all__ = ["job_app_graph", "workflows/research_workflow"]
|
|
|
|
| 7 |
|
| 8 |
__version__ = "0.1.0"
|
| 9 |
|
| 10 |
+
import os
|
| 11 |
+
from getpass import getpass
|
| 12 |
import logging
|
| 13 |
from pathlib import Path
|
| 14 |
from dotenv import load_dotenv
|
|
|
|
| 17 |
# Set up logging
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
logger.setLevel(logging.INFO)
|
| 20 |
+
log_dir = Path(__file__).parent / "logs"
|
| 21 |
log_dir.mkdir(exist_ok=True)
|
| 22 |
+
logger.addHandler(logging.FileHandler(log_dir / "job_writer.log", mode="a"))
|
| 23 |
+
logger.info(
|
| 24 |
+
"Logger initialized. Writing to %s", Path(__file__).parent / "job_writer.log"
|
| 25 |
+
)
|
| 26 |
|
| 27 |
# Load environment variables from .env file
|
| 28 |
+
env_path = Path(__file__).parent / ".env"
|
| 29 |
|
| 30 |
|
| 31 |
def _set_env(var: str):
|
| 32 |
if not os.environ.get(var):
|
| 33 |
+
os.environ[var] = getpass(f"{var}: ")
|
| 34 |
logger.info(f"{var} set to {os.environ[var]}")
|
| 35 |
|
| 36 |
+
|
| 37 |
if env_path.exists():
|
| 38 |
+
logger.info("Loading environment variables from %s", env_path)
|
| 39 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 40 |
else:
|
| 41 |
+
logger.warning(
|
| 42 |
+
".env file not found at %s. Using system environment variables.", env_path
|
| 43 |
+
)
|
| 44 |
|
| 45 |
# Check for critical environment variables
|
| 46 |
if not os.getenv("TAVILY_API_KEY"):
|
| 47 |
+
logger.warning(
|
| 48 |
+
"TAVILY_API_KEY environment variable is not set."
|
| 49 |
+
" Failed to get TAVILY_API_KEY at Path %s",
|
| 50 |
+
env_path,
|
| 51 |
+
)
|
| 52 |
_set_env("TAVILY_API_KEY")
|
| 53 |
|
| 54 |
|
| 55 |
if not os.getenv("GEMINI_API_KEY"):
|
| 56 |
+
logger.warning(
|
| 57 |
+
"GEMINI_API_KEY environment variable is not set. "
|
| 58 |
+
"Failed to get GEMINI_API_KEY at Path %s",
|
| 59 |
+
env_path,
|
| 60 |
+
)
|
| 61 |
_set_env("GEMINI_API_KEY")
|
| 62 |
|
| 63 |
|
| 64 |
if not os.getenv("PINECONE_API_KEY"):
|
| 65 |
+
logger.warning(
|
| 66 |
+
"PINECONE_API_KEY environment variable is not set."
|
| 67 |
+
" Failed to get PINECONE_API_KEY at Path %s",
|
| 68 |
+
env_path,
|
| 69 |
+
)
|
| 70 |
_set_env("PINECONE_API_KEY")
|
| 71 |
|
| 72 |
if not os.getenv("LANGFUSE_PUBLIC_KEY"):
|
| 73 |
+
logger.warning(
|
| 74 |
+
"LANGFUSE_PUBLIC_KEY environment variable is not set."
|
| 75 |
+
" Failed to get LANGFUSE_PUBLIC_KEY at Path %s",
|
| 76 |
+
env_path,
|
| 77 |
+
)
|
| 78 |
_set_env("LANGFUSE_PUBLIC_KEY")
|
| 79 |
|
| 80 |
if not os.getenv("LANGFUSE_SECRET_KEY"):
|
| 81 |
+
logger.warning(
|
| 82 |
+
"LANGFUSE_SECRET_KEY environment variable is not set."
|
| 83 |
+
" Failed to get LANGFUSE_SECRET_KEY at Path %s",
|
| 84 |
+
env_path,
|
| 85 |
+
)
|
| 86 |
_set_env("LANGFUSE_SECRET_KEY")
|
| 87 |
|
| 88 |
if not os.getenv("LANGSMITH_API_KEY"):
|
| 89 |
+
logger.warning(
|
| 90 |
+
"LANGSMITH_API_KEY environment variable is not set."
|
| 91 |
+
" Failed to get LANGSMITH_API_KEY at Path %s",
|
| 92 |
+
env_path,
|
| 93 |
+
)
|
| 94 |
_set_env("LANGSMITH_API_KEY")
|
| 95 |
|
| 96 |
if not os.getenv("OPENROUTER_API_KEY"):
|
| 97 |
+
logger.warning(
|
| 98 |
+
"OPENROUTER_API_KEY environment variable is not set."
|
| 99 |
+
" Failed to get OPENROUTER_API_KEY at Path %s",
|
| 100 |
+
env_path,
|
| 101 |
+
)
|
| 102 |
_set_env("OPENROUTER_API_KEY")
|
| 103 |
|
| 104 |
if not os.getenv("LANGSMITH_PROJECT"):
|
| 105 |
+
logger.warning(
|
| 106 |
+
"LANGSMITH_PROJECT environment variable is not set."
|
| 107 |
+
" Failed to get LANGSMITH_PROJECT at Path %s",
|
| 108 |
+
env_path,
|
| 109 |
+
)
|
| 110 |
_set_env("LANGSMITH_PROJECT")
|
| 111 |
|
| 112 |
if not os.getenv("LANGSMITH_ENDPOINT"):
|
| 113 |
+
logger.warning(
|
| 114 |
+
"LANGSMITH_ENDPOINT environment variable is not set."
|
| 115 |
+
" Failed to get LANGSMITH_ENDPOINT at Path %s",
|
| 116 |
+
env_path,
|
| 117 |
+
)
|
| 118 |
_set_env("LANGSMITH_ENDPOINT")
|
| 119 |
|
| 120 |
if not os.getenv("CEREBRAS_API_KEY"):
|
| 121 |
+
logger.warning(
|
| 122 |
+
"CEREBRAS_API_KEY environment variable is not set."
|
| 123 |
+
" Failed to get CEREBRAS_API_KEY at Path %s",
|
| 124 |
+
env_path,
|
| 125 |
+
)
|
| 126 |
_set_env("CEREBRAS_API_KEY")
|
| 127 |
|
| 128 |
os.environ["LANGSMITH_TRACING"] = "true"
|
|
|
|
| 147 |
|
| 148 |
logger = logging.getLogger(__name__)
|
| 149 |
logger.setLevel(logging.INFO)
|
| 150 |
+
log_dir = Path(__file__).parent / "logs"
|
| 151 |
log_dir.mkdir(exist_ok=True)
|
| 152 |
+
logger.addHandler(logging.FileHandler(log_dir / "job_writer.log", mode="a"))
|
| 153 |
+
logger.info(
|
| 154 |
+
"Logger initialized. Writing to %s", Path(__file__).parent / "job_writer.log"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
env_path = Path(__file__).parent / ".env"
|
| 158 |
|
|
|
|
| 159 |
|
| 160 |
def _set_env(var: str):
|
| 161 |
if not os.environ.get(var):
|
| 162 |
os.environ[var] = getpass.getpass(f"{var}: ")
|
| 163 |
logger.info(f"{var} set to {os.environ[var]}")
|
| 164 |
|
| 165 |
+
|
| 166 |
def load_environment_variables(key_array):
|
| 167 |
for key in key_array:
|
| 168 |
if not os.getenv(key):
|
| 169 |
+
logger.warning(
|
| 170 |
+
f"{key} environment variable is not set. Failed to get {key} at Path {env_path}"
|
| 171 |
+
)
|
| 172 |
_set_env(key)
|
| 173 |
|
| 174 |
+
|
| 175 |
if env_path.exists():
|
| 176 |
+
logger.info("Loading environment variables from %s", env_path)
|
| 177 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 178 |
else:
|
| 179 |
+
logger.warning(
|
| 180 |
+
".env file not found at %s. Using system environment variables.", env_path
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
environment_key_array = [
|
| 185 |
+
"TAVILY_API_KEY",
|
| 186 |
+
"GEMINI_API_KEY",
|
| 187 |
+
"PINECONE_API_KEY",
|
| 188 |
+
"LANGFUSE_PUBLIC_KEY",
|
| 189 |
+
"LANGFUSE_SECRET_KEY",
|
| 190 |
+
]
|
| 191 |
# Check for critical environment variables
|
| 192 |
load_environment_variables(environment_key_array)
|
| 193 |
|
| 194 |
+
__all__ = ["job_app_graph", "workflows/research_workflow"]
|
src/job_writing_agent/agents/nodes.py
CHANGED
|
@@ -5,21 +5,22 @@ This module contains all the node functions used in the job application
|
|
| 5 |
writer workflow graph, each handling a specific step in the process.
|
| 6 |
"""
|
| 7 |
|
|
|
|
| 8 |
import logging
|
| 9 |
from datetime import datetime
|
| 10 |
|
| 11 |
-
|
| 12 |
-
from langchain_core.output_parsers import StrOutputParser
|
| 13 |
from langchain_core.messages import SystemMessage
|
|
|
|
| 14 |
|
| 15 |
-
|
|
|
|
| 16 |
from ..prompts.templates import (
|
| 17 |
-
CRITIQUE_PROMPT,
|
| 18 |
-
PERSONA_DEVELOPMENT_PROMPT,
|
| 19 |
-
COVER_LETTER_PROMPT,
|
| 20 |
-
REVISION_PROMPT,
|
| 21 |
BULLET_POINTS_PROMPT,
|
|
|
|
|
|
|
| 22 |
LINKEDIN_NOTE_PROMPT,
|
|
|
|
| 23 |
)
|
| 24 |
from ..utils.llm_provider_factory import LLMFactory
|
| 25 |
|
|
@@ -30,132 +31,67 @@ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
|
| 30 |
|
| 31 |
def create_draft(state: ResearchState) -> ResultState:
|
| 32 |
"""Create initial draft of the application material."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# Create LLM inside function (lazy initialization)
|
| 34 |
llm_provider = LLMFactory()
|
| 35 |
llm = llm_provider.create_langchain(
|
| 36 |
-
"
|
|
|
|
|
|
|
| 37 |
)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Determine which type of content we're creating
|
| 40 |
-
company_background_information = state.get("company_research_data", {})
|
| 41 |
|
| 42 |
content_category = state.get("content_category", "cover_letter")
|
| 43 |
|
| 44 |
-
#
|
| 45 |
-
original_resume_text = company_background_information.get("resume", "")
|
| 46 |
-
|
| 47 |
-
try:
|
| 48 |
-
# Not yet implemented
|
| 49 |
-
if state.get("vector_store"):
|
| 50 |
-
vector_store = state.get("vector_store")
|
| 51 |
-
|
| 52 |
-
# Extract key requirements from job description
|
| 53 |
-
prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
|
| 54 |
-
|
| 55 |
-
if company_background_information:
|
| 56 |
-
key_requirements = prompt.invoke(
|
| 57 |
-
{
|
| 58 |
-
"job_description": company_background_information[
|
| 59 |
-
"job_description"
|
| 60 |
-
]
|
| 61 |
-
}
|
| 62 |
-
)
|
| 63 |
-
else:
|
| 64 |
-
return key_requirements
|
| 65 |
-
|
| 66 |
-
if not key_requirements:
|
| 67 |
-
print("Warning: No key requirements found in the job description.")
|
| 68 |
-
return state
|
| 69 |
-
|
| 70 |
-
# Use the key requirements to query for the most relevant resume parts
|
| 71 |
-
namespace = f"resume_{state['session_id']}"
|
| 72 |
-
relevant_docs = vector_store.retrieve_similar(
|
| 73 |
-
query=key_requirements, namespace=namespace, k=3
|
| 74 |
-
)
|
| 75 |
-
|
| 76 |
-
# Use these relevant sections with higher weight in the draft creation
|
| 77 |
-
highly_relevant_resume = "\n".join(
|
| 78 |
-
[doc.page_content for doc in relevant_docs]
|
| 79 |
-
)
|
| 80 |
-
# Combine highly relevant parts with full resume text
|
| 81 |
-
resume_text = f"""
|
| 82 |
-
# Most Relevant Experience
|
| 83 |
-
{highly_relevant_resume}
|
| 84 |
-
|
| 85 |
-
# Full Resume
|
| 86 |
-
{original_resume_text}
|
| 87 |
-
"""
|
| 88 |
-
# Update the company_background_information with the enhanced resume
|
| 89 |
-
company_background_information["resume"] = resume_text
|
| 90 |
-
except Exception as e:
|
| 91 |
-
logger.warning(f"Could not use vector search for relevant resume parts: {e}")
|
| 92 |
-
# Continue with regular resume text
|
| 93 |
-
|
| 94 |
-
# Select the appropriate prompt template based on application type and persona
|
| 95 |
logger.info(f"The candidate wants the Agent to assist with : {content_category}")
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
elif content_category == "linkedin_connect_request":
|
| 99 |
-
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([LINKEDIN_NOTE_PROMPT])
|
| 100 |
-
else:
|
| 101 |
-
FirstDraftGenerationPromptTemplate = ChatPromptTemplate([COVER_LETTER_PROMPT])
|
| 102 |
-
|
| 103 |
-
# Create the draft using the selected prompt template
|
| 104 |
-
CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
|
| 105 |
-
"""
|
| 106 |
-
Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
|
| 107 |
-
|
| 108 |
-
**Job Description:**
|
| 109 |
-
|
| 110 |
-
'''
|
| 111 |
-
{current_job_role}
|
| 112 |
-
'''
|
| 113 |
-
|
| 114 |
-
**Candidate Resume:**
|
| 115 |
-
|
| 116 |
-
'''
|
| 117 |
-
{candidate_resume}
|
| 118 |
-
'''
|
| 119 |
-
|
| 120 |
-
**Company Research Data:**
|
| 121 |
-
|
| 122 |
-
'''
|
| 123 |
-
{company_research_data}
|
| 124 |
-
'''
|
| 125 |
-
""",
|
| 126 |
-
input_variables=[
|
| 127 |
-
"current_job_role",
|
| 128 |
-
"company_research_data",
|
| 129 |
-
"candidate_resume",
|
| 130 |
-
],
|
| 131 |
)
|
| 132 |
|
| 133 |
-
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
#
|
| 136 |
draft_generation_chain = (
|
| 137 |
(
|
| 138 |
{
|
| 139 |
"current_job_role": lambda x: x["current_job_role"],
|
| 140 |
-
"company_research_data": lambda x: x["company_research_data"],
|
| 141 |
"candidate_resume": lambda x: x["candidate_resume"],
|
|
|
|
|
|
|
| 142 |
}
|
| 143 |
)
|
| 144 |
-
|
|
| 145 |
| llm
|
| 146 |
)
|
| 147 |
|
| 148 |
-
# Prepare the inputs
|
| 149 |
application_background_data = {
|
| 150 |
-
"current_job_role": company_background_information
|
| 151 |
-
"
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
| 155 |
}
|
| 156 |
|
| 157 |
response = draft_generation_chain.invoke(application_background_data)
|
| 158 |
logger.info(f"Draft has been created: {response.content}")
|
|
|
|
| 159 |
app_state = ResultState(
|
| 160 |
draft=response.content,
|
| 161 |
feedback="",
|
|
@@ -176,31 +112,37 @@ def critique_draft(state: ResultState) -> ResultState:
|
|
| 176 |
try:
|
| 177 |
logger.info("Critiquing draft...")
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
)
|
| 184 |
-
|
| 185 |
-
job_description = str(state["company_research_data"].get("job_description", ""))
|
| 186 |
-
draft = str(state.get("draft", ""))
|
| 187 |
|
| 188 |
# Debug logging to verify values
|
| 189 |
logger.debug(f"Job description length: {len(job_description)}")
|
| 190 |
-
logger.debug(f"Draft length: {len(
|
| 191 |
|
| 192 |
-
|
|
|
|
| 193 |
logger.warning("Missing job_description or draft in state")
|
| 194 |
-
# Return state with empty feedback
|
| 195 |
return ResultState(
|
| 196 |
-
draft=
|
| 197 |
-
feedback=
|
| 198 |
critique_feedback="",
|
| 199 |
current_node="critique",
|
| 200 |
-
company_research_data=
|
| 201 |
-
output_data=
|
| 202 |
)
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
# Use the same pattern as create_draft:
|
| 205 |
# 1. Create ChatPromptTemplate from SystemMessage
|
| 206 |
# 2. Append HumanMessagePromptTemplate with variables
|
|
@@ -213,10 +155,10 @@ def critique_draft(state: ResultState) -> ResultState:
|
|
| 213 |
)
|
| 214 |
|
| 215 |
# Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
|
| 216 |
-
|
| 217 |
|
| 218 |
# Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
|
| 219 |
-
|
| 220 |
"""
|
| 221 |
# Job Description
|
| 222 |
{job_description}
|
|
@@ -235,7 +177,7 @@ def critique_draft(state: ResultState) -> ResultState:
|
|
| 235 |
input_variables=["job_description", "draft"],
|
| 236 |
)
|
| 237 |
|
| 238 |
-
|
| 239 |
|
| 240 |
# Create chain (like line 129-139 in create_draft)
|
| 241 |
critique_chain = (
|
|
@@ -243,15 +185,15 @@ def critique_draft(state: ResultState) -> ResultState:
|
|
| 243 |
"job_description": lambda x: x["job_description"],
|
| 244 |
"draft": lambda x: x["draft"],
|
| 245 |
}
|
| 246 |
-
|
|
| 247 |
| llm
|
| 248 |
)
|
| 249 |
|
| 250 |
-
# Invoke with input variables
|
| 251 |
critique = critique_chain.invoke(
|
| 252 |
{
|
| 253 |
"job_description": job_description,
|
| 254 |
-
"draft":
|
| 255 |
}
|
| 256 |
)
|
| 257 |
|
|
@@ -260,16 +202,15 @@ def critique_draft(state: ResultState) -> ResultState:
|
|
| 260 |
)
|
| 261 |
logger.info("Draft critique completed")
|
| 262 |
|
| 263 |
-
# Store the critique
|
| 264 |
-
|
| 265 |
-
draft=
|
| 266 |
-
feedback=
|
| 267 |
critique_feedback=critique_content,
|
| 268 |
current_node="critique",
|
| 269 |
-
company_research_data=
|
| 270 |
-
output_data=
|
| 271 |
)
|
| 272 |
-
return app_state
|
| 273 |
|
| 274 |
except Exception as e:
|
| 275 |
logger.error(f"Error in critique_draft: {e}", exc_info=True)
|
|
@@ -279,70 +220,84 @@ def critique_draft(state: ResultState) -> ResultState:
|
|
| 279 |
|
| 280 |
def human_approval(state: ResultState) -> ResultState:
|
| 281 |
"""Human-in-the-loop checkpoint for feedback on the draft."""
|
| 282 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
print("\n" + "=" * 80)
|
| 284 |
print("DRAFT FOR REVIEW:")
|
| 285 |
-
print(
|
| 286 |
print("\nAUTOMATIC CRITIQUE:")
|
| 287 |
-
print(
|
| 288 |
print("=" * 80)
|
| 289 |
print("\nPlease provide your feedback (press Enter to continue with no changes):")
|
| 290 |
|
| 291 |
# In a real implementation, this would be handled by the UI
|
| 292 |
human_feedback = input()
|
| 293 |
-
|
| 294 |
-
|
|
|
|
| 295 |
feedback=human_feedback,
|
| 296 |
-
critique_feedback=
|
| 297 |
current_node="human_approval",
|
| 298 |
-
company_research_data=
|
| 299 |
-
output_data=
|
| 300 |
)
|
| 301 |
-
return result_state
|
| 302 |
|
| 303 |
|
| 304 |
def finalize_document(state: ResultState) -> DataLoadState:
|
| 305 |
"""Incorporate feedback and finalize the document."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
# Create LLM inside function (lazy initialization)
|
| 308 |
llm_provider = LLMFactory()
|
| 309 |
llm = llm_provider.create_langchain(
|
| 310 |
-
"
|
|
|
|
|
|
|
| 311 |
)
|
| 312 |
|
| 313 |
-
# Create
|
| 314 |
revision_chain = (
|
| 315 |
{
|
| 316 |
-
"draft": lambda x: x
|
| 317 |
-
"feedback": lambda x: x
|
| 318 |
-
"critique_feedback": lambda x: x
|
| 319 |
}
|
| 320 |
| REVISION_PROMPT
|
| 321 |
| llm
|
| 322 |
)
|
| 323 |
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
# Invoke with input variables (like line 239 in critique_draft)
|
| 327 |
final_content = revision_chain.invoke(
|
| 328 |
{
|
| 329 |
-
"draft":
|
| 330 |
-
"feedback":
|
| 331 |
-
"critique_feedback":
|
| 332 |
}
|
| 333 |
)
|
| 334 |
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
|
|
|
| 340 |
current_node="finalize",
|
| 341 |
output_data=final_content.content
|
| 342 |
if hasattr(final_content, "content")
|
| 343 |
else str(final_content),
|
| 344 |
)
|
| 345 |
-
return app_state
|
| 346 |
|
| 347 |
|
| 348 |
"""
|
|
@@ -351,8 +306,19 @@ Conditional node to determine if next node should be 'draft' node or "research"
|
|
| 351 |
|
| 352 |
|
| 353 |
def determine_next_step(state: AppState) -> str:
|
| 354 |
-
"""
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
return "draft"
|
| 358 |
return "research"
|
|
|
|
| 5 |
writer workflow graph, each handling a specific step in the process.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
# Standard library imports
|
| 9 |
import logging
|
| 10 |
from datetime import datetime
|
| 11 |
|
| 12 |
+
# Third-party imports
|
|
|
|
| 13 |
from langchain_core.messages import SystemMessage
|
| 14 |
+
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
|
| 15 |
|
| 16 |
+
# Local imports
|
| 17 |
+
from ..classes.classes import AppState, DataLoadState, ResearchState, ResultState
|
| 18 |
from ..prompts.templates import (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
BULLET_POINTS_PROMPT,
|
| 20 |
+
COVER_LETTER_PROMPT,
|
| 21 |
+
DRAFT_GENERATION_CONTEXT_PROMPT,
|
| 22 |
LINKEDIN_NOTE_PROMPT,
|
| 23 |
+
REVISION_PROMPT,
|
| 24 |
)
|
| 25 |
from ..utils.llm_provider_factory import LLMFactory
|
| 26 |
|
|
|
|
| 31 |
|
| 32 |
def create_draft(state: ResearchState) -> ResultState:
|
| 33 |
"""Create initial draft of the application material."""
|
| 34 |
+
# Validate state inputs
|
| 35 |
+
company_background_information = state.get("company_research_data", {})
|
| 36 |
+
if not company_background_information:
|
| 37 |
+
logger.error("Missing company_research_data in state")
|
| 38 |
+
raise ValueError("company_research_data is required in state")
|
| 39 |
+
|
| 40 |
# Create LLM inside function (lazy initialization)
|
| 41 |
llm_provider = LLMFactory()
|
| 42 |
llm = llm_provider.create_langchain(
|
| 43 |
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 44 |
+
provider="openrouter",
|
| 45 |
+
temperature=0.3,
|
| 46 |
)
|
| 47 |
|
| 48 |
+
draft_category_map = {
|
| 49 |
+
"cover_letter": COVER_LETTER_PROMPT,
|
| 50 |
+
"bullets": BULLET_POINTS_PROMPT,
|
| 51 |
+
"linkedin_connect_request": LINKEDIN_NOTE_PROMPT,
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
# Determine which type of content we're creating
|
|
|
|
| 55 |
|
| 56 |
content_category = state.get("content_category", "cover_letter")
|
| 57 |
|
| 58 |
+
# Select appropriate system message template based on content category
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
logger.info(f"The candidate wants the Agent to assist with : {content_category}")
|
| 60 |
+
system_message_template = draft_category_map.get(
|
| 61 |
+
content_category, COVER_LETTER_PROMPT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
+
# Build the complete prompt template: system message + context
|
| 65 |
+
draft_prompt_template = ChatPromptTemplate([system_message_template])
|
| 66 |
+
draft_prompt_template.append(DRAFT_GENERATION_CONTEXT_PROMPT)
|
| 67 |
|
| 68 |
+
# Build the chain: input formatting -> prompt template -> LLM
|
| 69 |
draft_generation_chain = (
|
| 70 |
(
|
| 71 |
{
|
| 72 |
"current_job_role": lambda x: x["current_job_role"],
|
|
|
|
| 73 |
"candidate_resume": lambda x: x["candidate_resume"],
|
| 74 |
+
"company_research_data": lambda x: x["company_research_data"],
|
| 75 |
+
"current_date": lambda x: x["current_date"],
|
| 76 |
}
|
| 77 |
)
|
| 78 |
+
| draft_prompt_template
|
| 79 |
| llm
|
| 80 |
)
|
| 81 |
|
| 82 |
+
# Prepare the inputs with safe dictionary access
|
| 83 |
application_background_data = {
|
| 84 |
+
"current_job_role": company_background_information.get("job_description", ""),
|
| 85 |
+
"candidate_resume": company_background_information.get("resume", ""),
|
| 86 |
+
"company_research_data": company_background_information.get(
|
| 87 |
+
"company_research_data_summary", "Company Research Data is not available"
|
| 88 |
+
),
|
| 89 |
+
"current_date": CURRENT_DATE,
|
| 90 |
}
|
| 91 |
|
| 92 |
response = draft_generation_chain.invoke(application_background_data)
|
| 93 |
logger.info(f"Draft has been created: {response.content}")
|
| 94 |
+
|
| 95 |
app_state = ResultState(
|
| 96 |
draft=response.content,
|
| 97 |
feedback="",
|
|
|
|
| 112 |
try:
|
| 113 |
logger.info("Critiquing draft...")
|
| 114 |
|
| 115 |
+
# Validate and extract required state fields once at the start
|
| 116 |
+
company_research_data = state.get("company_research_data", {})
|
| 117 |
+
job_description = str(company_research_data.get("job_description", ""))
|
| 118 |
+
draft_content = str(state.get("draft", ""))
|
| 119 |
+
feedback = state.get("feedback", "")
|
| 120 |
+
output_data = state.get("output_data", "")
|
|
|
|
|
|
|
| 121 |
|
| 122 |
# Debug logging to verify values
|
| 123 |
logger.debug(f"Job description length: {len(job_description)}")
|
| 124 |
+
logger.debug(f"Draft length: {len(draft_content)}")
|
| 125 |
|
| 126 |
+
# Early return if required fields are missing
|
| 127 |
+
if not job_description or not draft_content:
|
| 128 |
logger.warning("Missing job_description or draft in state")
|
|
|
|
| 129 |
return ResultState(
|
| 130 |
+
draft=draft_content,
|
| 131 |
+
feedback=feedback,
|
| 132 |
critique_feedback="",
|
| 133 |
current_node="critique",
|
| 134 |
+
company_research_data=company_research_data,
|
| 135 |
+
output_data=output_data,
|
| 136 |
)
|
| 137 |
|
| 138 |
+
# Create LLM inside function (lazy initialization)
|
| 139 |
+
llm_provider = LLMFactory()
|
| 140 |
+
llm = llm_provider.create_langchain(
|
| 141 |
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 142 |
+
provider="openrouter",
|
| 143 |
+
temperature=0.3,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
# Use the same pattern as create_draft:
|
| 147 |
# 1. Create ChatPromptTemplate from SystemMessage
|
| 148 |
# 2. Append HumanMessagePromptTemplate with variables
|
|
|
|
| 155 |
)
|
| 156 |
|
| 157 |
# Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
|
| 158 |
+
critique_prompt_template = ChatPromptTemplate([critique_system_message])
|
| 159 |
|
| 160 |
# Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
|
| 161 |
+
critique_context_message = HumanMessagePromptTemplate.from_template(
|
| 162 |
"""
|
| 163 |
# Job Description
|
| 164 |
{job_description}
|
|
|
|
| 177 |
input_variables=["job_description", "draft"],
|
| 178 |
)
|
| 179 |
|
| 180 |
+
critique_prompt_template.append(critique_context_message)
|
| 181 |
|
| 182 |
# Create chain (like line 129-139 in create_draft)
|
| 183 |
critique_chain = (
|
|
|
|
| 185 |
"job_description": lambda x: x["job_description"],
|
| 186 |
"draft": lambda x: x["draft"],
|
| 187 |
}
|
| 188 |
+
| critique_prompt_template
|
| 189 |
| llm
|
| 190 |
)
|
| 191 |
|
| 192 |
+
# Invoke with validated input variables
|
| 193 |
critique = critique_chain.invoke(
|
| 194 |
{
|
| 195 |
"job_description": job_description,
|
| 196 |
+
"draft": draft_content,
|
| 197 |
}
|
| 198 |
)
|
| 199 |
|
|
|
|
| 202 |
)
|
| 203 |
logger.info("Draft critique completed")
|
| 204 |
|
| 205 |
+
# Store the critique - using validated variables from top of function
|
| 206 |
+
return ResultState(
|
| 207 |
+
draft=draft_content,
|
| 208 |
+
feedback=feedback,
|
| 209 |
critique_feedback=critique_content,
|
| 210 |
current_node="critique",
|
| 211 |
+
company_research_data=company_research_data,
|
| 212 |
+
output_data=output_data,
|
| 213 |
)
|
|
|
|
| 214 |
|
| 215 |
except Exception as e:
|
| 216 |
logger.error(f"Error in critique_draft: {e}", exc_info=True)
|
|
|
|
| 220 |
|
| 221 |
def human_approval(state: ResultState) -> ResultState:
|
| 222 |
"""Human-in-the-loop checkpoint for feedback on the draft."""
|
| 223 |
+
# Validate and extract all required state fields once
|
| 224 |
+
draft_content = state.get("draft", "")
|
| 225 |
+
critique_feedback_content = state.get("critique_feedback", "No critique available")
|
| 226 |
+
company_research_data = state.get("company_research_data", {})
|
| 227 |
+
output_data = state.get("output_data", "")
|
| 228 |
+
|
| 229 |
+
# Display draft and critique for review
|
| 230 |
print("\n" + "=" * 80)
|
| 231 |
print("DRAFT FOR REVIEW:")
|
| 232 |
+
print(draft_content)
|
| 233 |
print("\nAUTOMATIC CRITIQUE:")
|
| 234 |
+
print(critique_feedback_content)
|
| 235 |
print("=" * 80)
|
| 236 |
print("\nPlease provide your feedback (press Enter to continue with no changes):")
|
| 237 |
|
| 238 |
# In a real implementation, this would be handled by the UI
|
| 239 |
human_feedback = input()
|
| 240 |
+
|
| 241 |
+
return ResultState(
|
| 242 |
+
draft=draft_content,
|
| 243 |
feedback=human_feedback,
|
| 244 |
+
critique_feedback=critique_feedback_content,
|
| 245 |
current_node="human_approval",
|
| 246 |
+
company_research_data=company_research_data,
|
| 247 |
+
output_data=output_data,
|
| 248 |
)
|
|
|
|
| 249 |
|
| 250 |
|
| 251 |
def finalize_document(state: ResultState) -> DataLoadState:
|
| 252 |
"""Incorporate feedback and finalize the document."""
|
| 253 |
+
# Validate and extract all required state fields once
|
| 254 |
+
draft_content = state.get("draft", "")
|
| 255 |
+
feedback_content = state.get("feedback", "")
|
| 256 |
+
critique_feedback_content = state.get("critique_feedback", "")
|
| 257 |
+
company_research_data = state.get("company_research_data", {})
|
| 258 |
+
|
| 259 |
+
if not draft_content:
|
| 260 |
+
logger.warning("Missing draft in state for finalization")
|
| 261 |
|
| 262 |
# Create LLM inside function (lazy initialization)
|
| 263 |
llm_provider = LLMFactory()
|
| 264 |
llm = llm_provider.create_langchain(
|
| 265 |
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 266 |
+
provider="openrouter",
|
| 267 |
+
temperature=0.3,
|
| 268 |
)
|
| 269 |
|
| 270 |
+
# Create revision chain
|
| 271 |
revision_chain = (
|
| 272 |
{
|
| 273 |
+
"draft": lambda x: x.get("draft", ""),
|
| 274 |
+
"feedback": lambda x: x.get("feedback", ""),
|
| 275 |
+
"critique_feedback": lambda x: x.get("critique_feedback", ""),
|
| 276 |
}
|
| 277 |
| REVISION_PROMPT
|
| 278 |
| llm
|
| 279 |
)
|
| 280 |
|
| 281 |
+
# Invoke with validated input variables
|
|
|
|
|
|
|
| 282 |
final_content = revision_chain.invoke(
|
| 283 |
{
|
| 284 |
+
"draft": draft_content,
|
| 285 |
+
"feedback": feedback_content,
|
| 286 |
+
"critique_feedback": critique_feedback_content,
|
| 287 |
}
|
| 288 |
)
|
| 289 |
|
| 290 |
+
# Return final state using validated variables
|
| 291 |
+
return DataLoadState(
|
| 292 |
+
draft=draft_content,
|
| 293 |
+
feedback=feedback_content,
|
| 294 |
+
critique_feedback=critique_feedback_content,
|
| 295 |
+
company_research_data=company_research_data,
|
| 296 |
current_node="finalize",
|
| 297 |
output_data=final_content.content
|
| 298 |
if hasattr(final_content, "content")
|
| 299 |
else str(final_content),
|
| 300 |
)
|
|
|
|
| 301 |
|
| 302 |
|
| 303 |
"""
|
|
|
|
| 306 |
|
| 307 |
|
| 308 |
def determine_next_step(state: AppState) -> str:
|
| 309 |
+
"""
|
| 310 |
+
Determine next workflow step based on company name presence.
|
| 311 |
+
|
| 312 |
+
If the company name is missing within the AppState, we can't
|
| 313 |
+
create the content draft and therefore redirect to the research node.
|
| 314 |
+
|
| 315 |
+
Args:
|
| 316 |
+
state: Current application state
|
| 317 |
+
|
| 318 |
+
Returns:
|
| 319 |
+
Next node name: "draft" or "research"
|
| 320 |
+
"""
|
| 321 |
+
company_name = state.get("company_name", "")
|
| 322 |
+
if not company_name:
|
| 323 |
return "draft"
|
| 324 |
return "research"
|
src/job_writing_agent/classes/classes.py
CHANGED
|
@@ -74,7 +74,7 @@ class DataLoadState(MessagesState, total=False):
|
|
| 74 |
|
| 75 |
resume_path: str
|
| 76 |
job_description_source: str
|
| 77 |
-
|
| 78 |
resume: str
|
| 79 |
job_description: str
|
| 80 |
company_name: str
|
|
@@ -96,11 +96,13 @@ class ResearchState(MessagesState):
|
|
| 96 |
tavily_search: Dict[str, Any] Stores the results of the Tavily search
|
| 97 |
attempted_search_queries: List of queries used extracted from the job description
|
| 98 |
compiled_knowledge: Compiled knowledge from the research
|
|
|
|
| 99 |
"""
|
| 100 |
|
| 101 |
company_research_data: Dict[str, Any]
|
| 102 |
attempted_search_queries: List[str]
|
| 103 |
current_node: str
|
|
|
|
| 104 |
|
| 105 |
|
| 106 |
class ResultState(MessagesState):
|
|
|
|
| 74 |
|
| 75 |
resume_path: str
|
| 76 |
job_description_source: str
|
| 77 |
+
content_category: str # "cover_letter", "bullets", "linkedin_note"
|
| 78 |
resume: str
|
| 79 |
job_description: str
|
| 80 |
company_name: str
|
|
|
|
| 96 |
tavily_search: Dict[str, Any] Stores the results of the Tavily search
|
| 97 |
attempted_search_queries: List of queries used extracted from the job description
|
| 98 |
compiled_knowledge: Compiled knowledge from the research
|
| 99 |
+
content_category: Type of application material to generate
|
| 100 |
"""
|
| 101 |
|
| 102 |
company_research_data: Dict[str, Any]
|
| 103 |
attempted_search_queries: List[str]
|
| 104 |
current_node: str
|
| 105 |
+
content_category: str
|
| 106 |
|
| 107 |
|
| 108 |
class ResultState(MessagesState):
|
src/job_writing_agent/nodes/__init__.py
CHANGED
|
@@ -4,10 +4,30 @@ Created on Mon Oct 23 16:49:52 2023
|
|
| 4 |
@author: rishabhaggarwal
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# from .createdraft import CreateDraft
|
| 9 |
from .variations import generate_variations
|
| 10 |
from .selfconsistency import self_consistency_vote
|
| 11 |
from .research_workflow import research_workflow
|
| 12 |
|
| 13 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
@author: rishabhaggarwal
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
# Legacy import (deprecated - use new classes instead)
|
| 8 |
+
from .data_loading_workflow import data_loading_workflow
|
| 9 |
+
|
| 10 |
+
# New data loading classes following SOLID principles
|
| 11 |
+
from .resume_loader import ResumeLoader
|
| 12 |
+
from .job_description_loader import JobDescriptionLoader
|
| 13 |
+
from .system_initializer import SystemInitializer
|
| 14 |
+
from .validation_helper import ValidationHelper
|
| 15 |
+
|
| 16 |
+
# Other workflow components
|
| 17 |
# from .createdraft import CreateDraft
|
| 18 |
from .variations import generate_variations
|
| 19 |
from .selfconsistency import self_consistency_vote
|
| 20 |
from .research_workflow import research_workflow
|
| 21 |
|
| 22 |
+
__all__ = [
|
| 23 |
+
# New data loading classes
|
| 24 |
+
"ResumeLoader",
|
| 25 |
+
"JobDescriptionLoader",
|
| 26 |
+
"SystemInitializer",
|
| 27 |
+
"ValidationHelper",
|
| 28 |
+
"data_loading_workflow",
|
| 29 |
+
# Other components
|
| 30 |
+
"generate_variations",
|
| 31 |
+
"self_consistency_vote",
|
| 32 |
+
"research_workflow",
|
| 33 |
+
]
|
src/job_writing_agent/nodes/data_loading_workflow.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Data Loading Workflow Module
|
| 4 |
+
|
| 5 |
+
This module defines the data loading subgraph workflow, including all node
|
| 6 |
+
functions and the subgraph definition. It uses the separate loader classes
|
| 7 |
+
(ResumeLoader, JobDescriptionLoader, SystemInitializer, ValidationHelper)
|
| 8 |
+
following the Single Responsibility Principle.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
from typing import Any
|
| 13 |
+
|
| 14 |
+
from langgraph.graph import StateGraph, END, START
|
| 15 |
+
|
| 16 |
+
from job_writing_agent.classes import DataLoadState
|
| 17 |
+
from job_writing_agent.nodes.resume_loader import ResumeLoader
|
| 18 |
+
from job_writing_agent.nodes.job_description_loader import JobDescriptionLoader
|
| 19 |
+
from job_writing_agent.nodes.system_initializer import SystemInitializer
|
| 20 |
+
from job_writing_agent.nodes.validation_helper import ValidationHelper
|
| 21 |
+
from job_writing_agent.utils.logging.logging_decorators import (
|
| 22 |
+
log_async,
|
| 23 |
+
log_execution,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ============================================================================
|
| 30 |
+
# Data Loading Subgraph Node Functions
|
| 31 |
+
# ============================================================================
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@log_async
|
| 35 |
+
async def set_agent_system_message_node(state: DataLoadState) -> DataLoadState:
|
| 36 |
+
"""
|
| 37 |
+
Node function to initialize system message in workflow state.
|
| 38 |
+
|
| 39 |
+
This node wraps the SystemInitializer.set_agent_system_message method
|
| 40 |
+
for use in the LangGraph workflow.
|
| 41 |
+
|
| 42 |
+
Parameters
|
| 43 |
+
----------
|
| 44 |
+
state: DataLoadState
|
| 45 |
+
Current workflow state.
|
| 46 |
+
|
| 47 |
+
Returns
|
| 48 |
+
-------
|
| 49 |
+
DataLoadState
|
| 50 |
+
Updated state with system message added to messages list.
|
| 51 |
+
"""
|
| 52 |
+
initializer = SystemInitializer()
|
| 53 |
+
return await initializer.set_agent_system_message(state)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@log_async
|
| 57 |
+
async def parse_resume_node(state: DataLoadState) -> DataLoadState:
|
| 58 |
+
"""
|
| 59 |
+
Node to parse resume in parallel with job description parsing.
|
| 60 |
+
|
| 61 |
+
Extracts resume parsing logic for parallel execution.
|
| 62 |
+
Returns only the resume data - reducer will merge with job description data.
|
| 63 |
+
|
| 64 |
+
Parameters
|
| 65 |
+
----------
|
| 66 |
+
state: DataLoadState
|
| 67 |
+
Current workflow state containing resume_path.
|
| 68 |
+
|
| 69 |
+
Returns
|
| 70 |
+
-------
|
| 71 |
+
DataLoadState
|
| 72 |
+
Partial state update with resume data in company_research_data.
|
| 73 |
+
LangGraph will merge this with other parallel updates.
|
| 74 |
+
"""
|
| 75 |
+
loader = ResumeLoader()
|
| 76 |
+
resume_src = state.get("resume_path")
|
| 77 |
+
|
| 78 |
+
resume_text = ""
|
| 79 |
+
if resume_src:
|
| 80 |
+
resume_text = await loader._load_resume(resume_src)
|
| 81 |
+
elif state.get("current_node") == "verify":
|
| 82 |
+
resume_text = await loader._prompt_user_for_resume()
|
| 83 |
+
|
| 84 |
+
# Return only the resume data - reducer will merge this with job description data
|
| 85 |
+
logger.info(f"Resume parsed: {len(resume_text)} characters")
|
| 86 |
+
return {
|
| 87 |
+
"company_research_data": {"resume": resume_text},
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@log_async
|
| 92 |
+
async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
|
| 93 |
+
"""
|
| 94 |
+
Node to parse job description in parallel with resume parsing.
|
| 95 |
+
|
| 96 |
+
Extracts job description parsing logic for parallel execution.
|
| 97 |
+
Returns only the job description data - reducer will merge with resume data.
|
| 98 |
+
|
| 99 |
+
Parameters
|
| 100 |
+
----------
|
| 101 |
+
state: DataLoadState
|
| 102 |
+
Current workflow state containing job_description_source.
|
| 103 |
+
|
| 104 |
+
Returns
|
| 105 |
+
-------
|
| 106 |
+
DataLoadState
|
| 107 |
+
Partial state update with job description and company name in
|
| 108 |
+
company_research_data. LangGraph will merge this with other parallel updates.
|
| 109 |
+
"""
|
| 110 |
+
loader = JobDescriptionLoader()
|
| 111 |
+
jd_src = state.get("job_description_source")
|
| 112 |
+
|
| 113 |
+
job_text = ""
|
| 114 |
+
company_name = ""
|
| 115 |
+
if jd_src:
|
| 116 |
+
job_text, company_name = await loader._load_job_description(jd_src)
|
| 117 |
+
elif state.get("current_node") == "verify":
|
| 118 |
+
job_text = await loader._prompt_user_for_job_description()
|
| 119 |
+
|
| 120 |
+
# Return only the job description data - reducer will merge this with resume data
|
| 121 |
+
logger.info(
|
| 122 |
+
f"Job description parsed: {len(job_text)} characters, company: {company_name}"
|
| 123 |
+
)
|
| 124 |
+
return {
|
| 125 |
+
"company_research_data": {
|
| 126 |
+
"job_description": job_text,
|
| 127 |
+
"company_name": company_name,
|
| 128 |
+
},
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
@log_execution
|
| 133 |
+
def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
|
| 134 |
+
"""
|
| 135 |
+
Aggregate results from parallel resume and job description parsing nodes.
|
| 136 |
+
|
| 137 |
+
This node runs after both parse_resume_node and parse_job_description_node
|
| 138 |
+
complete. It ensures both results are present, normalizes values to strings,
|
| 139 |
+
and structures the final state.
|
| 140 |
+
|
| 141 |
+
Normalization is performed here (not in ValidationHelper) to follow SRP:
|
| 142 |
+
- This function: Aggregates and normalizes data
|
| 143 |
+
- ValidationHelper: Only validates data
|
| 144 |
+
|
| 145 |
+
Parameters
|
| 146 |
+
----------
|
| 147 |
+
state: DataLoadState
|
| 148 |
+
Current workflow state with parallel parsing results.
|
| 149 |
+
|
| 150 |
+
Returns
|
| 151 |
+
-------
|
| 152 |
+
DataLoadState
|
| 153 |
+
Updated state with normalized and structured company_research_data.
|
| 154 |
+
"""
|
| 155 |
+
# Ensure company_research_data exists
|
| 156 |
+
if "company_research_data" not in state:
|
| 157 |
+
state["company_research_data"] = {}
|
| 158 |
+
|
| 159 |
+
# Extract research data once, then get results from parallel nodes
|
| 160 |
+
company_research_data = state["company_research_data"]
|
| 161 |
+
resume_text = company_research_data.get("resume", "")
|
| 162 |
+
job_text = company_research_data.get("job_description", "")
|
| 163 |
+
company_name = company_research_data.get("company_name", "")
|
| 164 |
+
|
| 165 |
+
# Normalize values to strings (handles list, tuple, dict, str)
|
| 166 |
+
def normalize_value(value: list | tuple | dict | str | Any) -> str:
|
| 167 |
+
"""
|
| 168 |
+
Normalize a value to a string representation.
|
| 169 |
+
|
| 170 |
+
Args:
|
| 171 |
+
value: Value to normalize (list, tuple, dict, or any other type)
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
String representation of the value
|
| 175 |
+
"""
|
| 176 |
+
if isinstance(value, (list, tuple)):
|
| 177 |
+
return " ".join(str(x) for x in value)
|
| 178 |
+
elif isinstance(value, dict):
|
| 179 |
+
return str(value)
|
| 180 |
+
else:
|
| 181 |
+
return str(value)
|
| 182 |
+
|
| 183 |
+
# Normalize all values
|
| 184 |
+
resume_text = normalize_value(resume_text) if resume_text else ""
|
| 185 |
+
job_text = normalize_value(job_text) if job_text else ""
|
| 186 |
+
company_name = normalize_value(company_name) if company_name else ""
|
| 187 |
+
|
| 188 |
+
# Validate both are present (log warnings but don't fail here - validation node will handle)
|
| 189 |
+
if not resume_text:
|
| 190 |
+
logger.warning("Resume text is empty after parsing")
|
| 191 |
+
if not job_text:
|
| 192 |
+
logger.warning("Job description text is empty after parsing")
|
| 193 |
+
|
| 194 |
+
# Ensure final structure is correct
|
| 195 |
+
state["company_research_data"] = {
|
| 196 |
+
"resume": resume_text,
|
| 197 |
+
"job_description": job_text,
|
| 198 |
+
"company_name": company_name,
|
| 199 |
+
}
|
| 200 |
+
state["current_node"] = "aggregate_results"
|
| 201 |
+
|
| 202 |
+
logger.info("Data loading results aggregated and normalized successfully")
|
| 203 |
+
return state
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
@log_execution
|
| 207 |
+
def verify_inputs_node(state: DataLoadState) -> DataLoadState:
|
| 208 |
+
"""
|
| 209 |
+
Verify that required inputs are present and set next_node for routing.
|
| 210 |
+
|
| 211 |
+
This node wraps the ValidationHelper.verify_inputs method for use in
|
| 212 |
+
the LangGraph workflow. It only validates - normalization is done in
|
| 213 |
+
aggregate_data_loading_results.
|
| 214 |
+
|
| 215 |
+
Parameters
|
| 216 |
+
----------
|
| 217 |
+
state: DataLoadState
|
| 218 |
+
Current workflow state with aggregated and normalized data.
|
| 219 |
+
|
| 220 |
+
Returns
|
| 221 |
+
-------
|
| 222 |
+
DataLoadState
|
| 223 |
+
Updated state with next_node set for routing ("load" or "research").
|
| 224 |
+
"""
|
| 225 |
+
validator = ValidationHelper()
|
| 226 |
+
return validator.verify_inputs(state)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
# ============================================================================
|
| 230 |
+
# Data Loading Subgraph Definition
|
| 231 |
+
# ============================================================================
|
| 232 |
+
|
| 233 |
+
# Create data loading subgraph
|
| 234 |
+
data_loading_subgraph = StateGraph(DataLoadState)
|
| 235 |
+
|
| 236 |
+
# Add subgraph nodes
|
| 237 |
+
data_loading_subgraph.add_node(
|
| 238 |
+
"set_agent_system_message", set_agent_system_message_node
|
| 239 |
+
)
|
| 240 |
+
data_loading_subgraph.add_node("parse_resume", parse_resume_node)
|
| 241 |
+
data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
|
| 242 |
+
data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
|
| 243 |
+
data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
|
| 244 |
+
|
| 245 |
+
# Add subgraph edges
|
| 246 |
+
data_loading_subgraph.add_edge(START, "set_agent_system_message")
|
| 247 |
+
# Parallel execution: both nodes start after set_agent_system_message
|
| 248 |
+
data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
|
| 249 |
+
data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
|
| 250 |
+
# Both parallel nodes feed into aggregate (LangGraph waits for both)
|
| 251 |
+
data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
|
| 252 |
+
data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
|
| 253 |
+
# Aggregate feeds into verification
|
| 254 |
+
data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
|
| 255 |
+
# Verification ends the subgraph
|
| 256 |
+
data_loading_subgraph.add_edge("verify_inputs", END)
|
| 257 |
+
|
| 258 |
+
# Compile data loading subgraph
|
| 259 |
+
data_loading_workflow = data_loading_subgraph.compile()
|
src/job_writing_agent/nodes/initializing.py
DELETED
|
@@ -1,513 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
-
Job Application Writer - Initialization Module
|
| 4 |
-
|
| 5 |
-
This module provides the Dataloading class responsible for loading and validating
|
| 6 |
-
inputs required for the job-application workflow. It handles parsing resumes and
|
| 7 |
-
job descriptions, managing missing inputs, and populating application state.
|
| 8 |
-
|
| 9 |
-
The module includes utilities for:
|
| 10 |
-
- Parsing resume files and extracting text content
|
| 11 |
-
- Parsing job descriptions and extracting company information
|
| 12 |
-
- Orchestrating input loading with validation
|
| 13 |
-
- Providing user prompts for missing information during verification
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
import logging
|
| 17 |
-
from typing import Tuple, Optional
|
| 18 |
-
|
| 19 |
-
from langchain_core.documents import Document
|
| 20 |
-
from langchain_core.messages import SystemMessage
|
| 21 |
-
from langgraph.graph import StateGraph, END, START
|
| 22 |
-
|
| 23 |
-
from job_writing_agent.classes import DataLoadState
|
| 24 |
-
from job_writing_agent.utils.document_processing import (
|
| 25 |
-
parse_resume,
|
| 26 |
-
get_job_description,
|
| 27 |
-
)
|
| 28 |
-
from job_writing_agent.prompts.templates import agent_system_prompt
|
| 29 |
-
from job_writing_agent.utils.logging.logging_decorators import (
|
| 30 |
-
log_async,
|
| 31 |
-
log_execution,
|
| 32 |
-
log_errors,
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
logger = logging.getLogger(__name__)
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
# Note: Using centralized logging decorators from utils.logging.logging_decorators
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
class Dataloading:
|
| 42 |
-
"""
|
| 43 |
-
Helper class providing utility methods for loading and parsing data.
|
| 44 |
-
|
| 45 |
-
This class provides helper methods used by the data loading subgraph nodes.
|
| 46 |
-
The actual workflow orchestration is handled by the data_loading_workflow subgraph.
|
| 47 |
-
|
| 48 |
-
Methods
|
| 49 |
-
-------
|
| 50 |
-
set_agent_system_message(state: DataLoadState) -> DataLoadState
|
| 51 |
-
Adds the system prompt to the conversation state.
|
| 52 |
-
get_resume(resume_source) -> str
|
| 53 |
-
Parses a resume file and returns its plain‑text content.
|
| 54 |
-
parse_job_description(job_description_source) -> Tuple[str, str]
|
| 55 |
-
Parses a job description and returns its text and company name.
|
| 56 |
-
verify_inputs(state: DataLoadState) -> DataLoadState
|
| 57 |
-
Validates inputs and sets next_node for routing.
|
| 58 |
-
|
| 59 |
-
Private Methods (used by subgraph nodes)
|
| 60 |
-
-----------------------------------------
|
| 61 |
-
_load_resume(resume_source) -> str
|
| 62 |
-
Load resume content, raising if the source is missing.
|
| 63 |
-
_load_job_description(jd_source) -> Tuple[str, str]
|
| 64 |
-
Load job description text and company name, raising if missing.
|
| 65 |
-
_prompt_user(prompt_msg: str) -> str
|
| 66 |
-
Prompt the user for input (synchronous input wrapped for async use).
|
| 67 |
-
|
| 68 |
-
"""
|
| 69 |
-
|
| 70 |
-
def __init__(self):
|
| 71 |
-
"""Initialize Dataloading helper class."""
|
| 72 |
-
pass
|
| 73 |
-
|
| 74 |
-
# =======================================================================
|
| 75 |
-
# System/Initialization Methods
|
| 76 |
-
# =======================================================================
|
| 77 |
-
|
| 78 |
-
@log_async
|
| 79 |
-
async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
|
| 80 |
-
"""Add the system prompt to the conversation state.
|
| 81 |
-
|
| 82 |
-
Parameters
|
| 83 |
-
----------
|
| 84 |
-
state: DataLoadState
|
| 85 |
-
Current workflow state.
|
| 86 |
-
|
| 87 |
-
Returns
|
| 88 |
-
-------
|
| 89 |
-
DataLoadState
|
| 90 |
-
Updated state with the system message and the next node identifier.
|
| 91 |
-
"""
|
| 92 |
-
agent_initialization_system_message = SystemMessage(content=agent_system_prompt)
|
| 93 |
-
messages = state.get("messages", [])
|
| 94 |
-
messages.append(agent_initialization_system_message)
|
| 95 |
-
return {
|
| 96 |
-
**state,
|
| 97 |
-
"messages": messages,
|
| 98 |
-
"current_node": "initialize_system",
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
# =======================================================================
|
| 102 |
-
# Public Parsing Methods
|
| 103 |
-
# =======================================================================
|
| 104 |
-
|
| 105 |
-
@log_async
|
| 106 |
-
@log_errors
|
| 107 |
-
async def get_resume(self, resume_source):
|
| 108 |
-
"""
|
| 109 |
-
Parse a resume file and return its plain‑text content.
|
| 110 |
-
|
| 111 |
-
This method extracts text from resume chunks, handling both Document
|
| 112 |
-
objects and plain strings. Empty or invalid chunks are skipped.
|
| 113 |
-
|
| 114 |
-
Parameters
|
| 115 |
-
----------
|
| 116 |
-
resume_source: Any
|
| 117 |
-
Path or file‑like object accepted by ``parse_resume``.
|
| 118 |
-
|
| 119 |
-
Returns
|
| 120 |
-
-------
|
| 121 |
-
str
|
| 122 |
-
Plain text content of the resume.
|
| 123 |
-
|
| 124 |
-
Raises
|
| 125 |
-
------
|
| 126 |
-
AssertionError
|
| 127 |
-
If resume_source is None.
|
| 128 |
-
Exception
|
| 129 |
-
If parsing fails.
|
| 130 |
-
"""
|
| 131 |
-
logger.info("Parsing resume...")
|
| 132 |
-
resume_text = ""
|
| 133 |
-
assert resume_source is not None
|
| 134 |
-
resume_chunks = parse_resume(resume_source)
|
| 135 |
-
for chunk in resume_chunks:
|
| 136 |
-
if hasattr(chunk, "page_content") and chunk.page_content:
|
| 137 |
-
resume_text += chunk.page_content
|
| 138 |
-
elif isinstance(chunk, str) and chunk:
|
| 139 |
-
resume_text += chunk
|
| 140 |
-
else:
|
| 141 |
-
logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
|
| 142 |
-
return resume_text
|
| 143 |
-
|
| 144 |
-
@log_async
|
| 145 |
-
@log_errors
|
| 146 |
-
async def parse_job_description(self, job_description_source):
|
| 147 |
-
"""
|
| 148 |
-
Parse a job description and return its text and company name.
|
| 149 |
-
|
| 150 |
-
Extracts both the job posting text and company name from the document.
|
| 151 |
-
Company name is extracted from document metadata if available.
|
| 152 |
-
|
| 153 |
-
Parameters
|
| 154 |
-
----------
|
| 155 |
-
job_description_source: Any
|
| 156 |
-
Source accepted by ``get_job_description`` (URL, file path, etc.).
|
| 157 |
-
|
| 158 |
-
Returns
|
| 159 |
-
-------
|
| 160 |
-
Tuple[str, str]
|
| 161 |
-
A tuple of (job_posting_text, company_name).
|
| 162 |
-
|
| 163 |
-
Raises
|
| 164 |
-
------
|
| 165 |
-
AssertionError
|
| 166 |
-
If job_description_source is None.
|
| 167 |
-
Exception
|
| 168 |
-
If parsing fails.
|
| 169 |
-
"""
|
| 170 |
-
company_name = ""
|
| 171 |
-
job_posting_text = ""
|
| 172 |
-
|
| 173 |
-
logger.info("Parsing job description from: %s", job_description_source)
|
| 174 |
-
assert job_description_source is not None, (
|
| 175 |
-
"Job description source cannot be None"
|
| 176 |
-
)
|
| 177 |
-
|
| 178 |
-
job_description_document: Optional[Document] = await get_job_description(
|
| 179 |
-
job_description_source
|
| 180 |
-
)
|
| 181 |
-
|
| 182 |
-
# Extract company name from metadata
|
| 183 |
-
if hasattr(job_description_document, "metadata") and isinstance(
|
| 184 |
-
job_description_document.metadata, dict
|
| 185 |
-
):
|
| 186 |
-
company_name = job_description_document.metadata.get("company_name", "")
|
| 187 |
-
if not company_name:
|
| 188 |
-
logger.warning("Company name not found in job description metadata.")
|
| 189 |
-
else:
|
| 190 |
-
logger.warning(
|
| 191 |
-
"Metadata attribute missing or not a dict in job description document."
|
| 192 |
-
)
|
| 193 |
-
|
| 194 |
-
# Extract job posting text
|
| 195 |
-
if hasattr(job_description_document, "page_content"):
|
| 196 |
-
job_posting_text = job_description_document.page_content or ""
|
| 197 |
-
if not job_posting_text:
|
| 198 |
-
logger.info("Parsed job posting text is empty.")
|
| 199 |
-
else:
|
| 200 |
-
logger.warning(
|
| 201 |
-
"page_content attribute missing in job description document."
|
| 202 |
-
)
|
| 203 |
-
|
| 204 |
-
return job_posting_text, company_name
|
| 205 |
-
|
| 206 |
-
@log_async
|
| 207 |
-
async def get_application_form_details(self, job_description_source):
|
| 208 |
-
"""
|
| 209 |
-
Placeholder for future method to get application form details.
|
| 210 |
-
|
| 211 |
-
This method will be implemented to extract form fields and requirements
|
| 212 |
-
from job application forms.
|
| 213 |
-
|
| 214 |
-
Parameters
|
| 215 |
-
----------
|
| 216 |
-
job_description_source: Any
|
| 217 |
-
Source of the job description or application form.
|
| 218 |
-
"""
|
| 219 |
-
# TODO: Implement form field extraction
|
| 220 |
-
pass
|
| 221 |
-
|
| 222 |
-
# =======================================================================
|
| 223 |
-
# Validation Methods
|
| 224 |
-
# =======================================================================
|
| 225 |
-
|
| 226 |
-
@log_execution
|
| 227 |
-
@log_errors
|
| 228 |
-
def verify_inputs(self, state: DataLoadState) -> DataLoadState:
|
| 229 |
-
"""
|
| 230 |
-
Validate inputs and set next_node for routing.
|
| 231 |
-
|
| 232 |
-
This method validates that both resume and job description are present
|
| 233 |
-
in the state, normalizes their values to strings, and sets the next_node
|
| 234 |
-
field for conditional routing in the main workflow.
|
| 235 |
-
|
| 236 |
-
Parameters
|
| 237 |
-
----------
|
| 238 |
-
state: DataLoadState
|
| 239 |
-
Current workflow state containing company_research_data.
|
| 240 |
-
|
| 241 |
-
Returns
|
| 242 |
-
-------
|
| 243 |
-
DataLoadState
|
| 244 |
-
Updated state with next_node set to "load" (if validation fails)
|
| 245 |
-
or "research" (if validation passes).
|
| 246 |
-
|
| 247 |
-
Raises
|
| 248 |
-
------
|
| 249 |
-
Exception
|
| 250 |
-
If normalization fails for any field.
|
| 251 |
-
"""
|
| 252 |
-
logger.info("Verifying loaded inputs!")
|
| 253 |
-
state["current_node"] = "verify"
|
| 254 |
-
|
| 255 |
-
# Validate required fields
|
| 256 |
-
company_research_data = state.get("company_research_data", {})
|
| 257 |
-
|
| 258 |
-
if not company_research_data.get("resume"):
|
| 259 |
-
logger.error("Resume is missing in company_research_data")
|
| 260 |
-
state["next_node"] = "load" # Loop back to load subgraph
|
| 261 |
-
return state
|
| 262 |
-
|
| 263 |
-
if not company_research_data.get("job_description"):
|
| 264 |
-
logger.error("Job description is missing in company_research_data")
|
| 265 |
-
state["next_node"] = "load" # Loop back to load subgraph
|
| 266 |
-
return state
|
| 267 |
-
|
| 268 |
-
# Normalize values to strings
|
| 269 |
-
for key in ["resume", "job_description"]:
|
| 270 |
-
try:
|
| 271 |
-
value = company_research_data[key]
|
| 272 |
-
if isinstance(value, (list, tuple)):
|
| 273 |
-
company_research_data[key] = " ".join(str(x) for x in value)
|
| 274 |
-
elif isinstance(value, dict):
|
| 275 |
-
company_research_data[key] = str(value)
|
| 276 |
-
else:
|
| 277 |
-
company_research_data[key] = str(value)
|
| 278 |
-
except Exception as e:
|
| 279 |
-
logger.warning("Error converting %s to string: %s", key, e)
|
| 280 |
-
state["next_node"] = "load"
|
| 281 |
-
return state
|
| 282 |
-
|
| 283 |
-
# All validations passed
|
| 284 |
-
state["next_node"] = "research"
|
| 285 |
-
logger.info("Inputs verified successfully, proceeding to research")
|
| 286 |
-
return state
|
| 287 |
-
|
| 288 |
-
# =======================================================================
|
| 289 |
-
# Private Helper Methods (used by subgraph nodes)
|
| 290 |
-
# =======================================================================
|
| 291 |
-
|
| 292 |
-
@log_async
|
| 293 |
-
@log_errors
|
| 294 |
-
async def _load_resume(self, resume_source) -> str:
|
| 295 |
-
"""
|
| 296 |
-
Load resume content, raising if the source is missing.
|
| 297 |
-
|
| 298 |
-
This is a wrapper around get_resume() that validates the source first.
|
| 299 |
-
Used by subgraph nodes for consistent error handling.
|
| 300 |
-
|
| 301 |
-
Parameters
|
| 302 |
-
----------
|
| 303 |
-
resume_source: Any
|
| 304 |
-
Path or file-like object for the resume.
|
| 305 |
-
|
| 306 |
-
Returns
|
| 307 |
-
-------
|
| 308 |
-
str
|
| 309 |
-
Plain text content of the resume.
|
| 310 |
-
|
| 311 |
-
Raises
|
| 312 |
-
------
|
| 313 |
-
ValueError
|
| 314 |
-
If resume_source is None or empty.
|
| 315 |
-
"""
|
| 316 |
-
if not resume_source:
|
| 317 |
-
raise ValueError("resume_source is required")
|
| 318 |
-
return await self.get_resume(resume_source)
|
| 319 |
-
|
| 320 |
-
@log_async
|
| 321 |
-
@log_errors
|
| 322 |
-
async def _load_job_description(self, jd_source) -> Tuple[str, str]:
|
| 323 |
-
"""
|
| 324 |
-
Load job description text and company name, raising if missing.
|
| 325 |
-
|
| 326 |
-
This is a wrapper around parse_job_description() that validates the source first.
|
| 327 |
-
Used by subgraph nodes for consistent error handling.
|
| 328 |
-
|
| 329 |
-
Parameters
|
| 330 |
-
----------
|
| 331 |
-
jd_source: Any
|
| 332 |
-
Source for the job description (URL, file path, etc.).
|
| 333 |
-
|
| 334 |
-
Returns
|
| 335 |
-
-------
|
| 336 |
-
Tuple[str, str]
|
| 337 |
-
A tuple of (job_posting_text, company_name).
|
| 338 |
-
|
| 339 |
-
Raises
|
| 340 |
-
------
|
| 341 |
-
ValueError
|
| 342 |
-
If jd_source is None or empty.
|
| 343 |
-
"""
|
| 344 |
-
if not jd_source:
|
| 345 |
-
raise ValueError("job_description_source is required")
|
| 346 |
-
return await self.parse_job_description(jd_source)
|
| 347 |
-
|
| 348 |
-
@log_async
|
| 349 |
-
@log_errors
|
| 350 |
-
async def _prompt_user(self, prompt_msg: str) -> str:
|
| 351 |
-
"""
|
| 352 |
-
Prompt the user for input (synchronous input wrapped for async use).
|
| 353 |
-
|
| 354 |
-
This method wraps the synchronous input() function to be used in async contexts.
|
| 355 |
-
In a production async UI, this would be replaced with an async input mechanism.
|
| 356 |
-
|
| 357 |
-
Parameters
|
| 358 |
-
----------
|
| 359 |
-
prompt_msg: str
|
| 360 |
-
Message to display to the user.
|
| 361 |
-
|
| 362 |
-
Returns
|
| 363 |
-
-------
|
| 364 |
-
str
|
| 365 |
-
User input string.
|
| 366 |
-
"""
|
| 367 |
-
# In a real async UI replace input with an async call.
|
| 368 |
-
return input(prompt_msg)
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
# ============================================================================
|
| 372 |
-
# Data Loading Subgraph Nodes
|
| 373 |
-
# ============================================================================
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
@log_async
|
| 377 |
-
async def parse_resume_node(state: DataLoadState) -> DataLoadState:
|
| 378 |
-
"""
|
| 379 |
-
Node to parse resume in parallel with job description parsing.
|
| 380 |
-
|
| 381 |
-
Extracts resume parsing logic from load_inputs for parallel execution.
|
| 382 |
-
Returns only the resume data - reducer will merge with job description data.
|
| 383 |
-
"""
|
| 384 |
-
dataloading = Dataloading()
|
| 385 |
-
resume_src = state.get("resume_path")
|
| 386 |
-
|
| 387 |
-
resume_text = ""
|
| 388 |
-
if resume_src:
|
| 389 |
-
resume_text = await dataloading._load_resume(resume_src)
|
| 390 |
-
elif state.get("current_node") == "verify":
|
| 391 |
-
resume_text = await dataloading._prompt_user(
|
| 392 |
-
"Please paste the resume in text format: "
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
-
# Return only the resume data - reducer will merge this with job description data
|
| 396 |
-
logger.info(f"Resume parsed: {len(resume_text)} characters")
|
| 397 |
-
# Return partial state update - LangGraph will merge this with other parallel updates
|
| 398 |
-
return {
|
| 399 |
-
"company_research_data": {"resume": resume_text},
|
| 400 |
-
}
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
@log_async
|
| 404 |
-
async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
|
| 405 |
-
"""
|
| 406 |
-
Node to parse job description in parallel with resume parsing.
|
| 407 |
-
|
| 408 |
-
Extracts job description parsing logic from load_inputs for parallel execution.
|
| 409 |
-
Returns only the job description data - reducer will merge with resume data.
|
| 410 |
-
"""
|
| 411 |
-
dataloading = Dataloading()
|
| 412 |
-
jd_src = state.get("job_description_source")
|
| 413 |
-
|
| 414 |
-
job_text = ""
|
| 415 |
-
company_name = ""
|
| 416 |
-
if jd_src:
|
| 417 |
-
job_text, company_name = await dataloading._load_job_description(jd_src)
|
| 418 |
-
elif state.get("current_node") == "verify":
|
| 419 |
-
job_text = await dataloading._prompt_user(
|
| 420 |
-
"Please paste the job posting in text format: "
|
| 421 |
-
)
|
| 422 |
-
|
| 423 |
-
# Return only the job description data - reducer will merge this with resume data
|
| 424 |
-
logger.info(
|
| 425 |
-
f"Job description parsed: {len(job_text)} characters, company: {company_name}"
|
| 426 |
-
)
|
| 427 |
-
# Return partial state update - LangGraph will merge this with other parallel updates
|
| 428 |
-
return {
|
| 429 |
-
"company_research_data": {
|
| 430 |
-
"job_description": job_text,
|
| 431 |
-
"company_name": company_name,
|
| 432 |
-
},
|
| 433 |
-
}
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
@log_execution
|
| 437 |
-
def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
|
| 438 |
-
"""
|
| 439 |
-
Aggregate results from parallel resume and job description parsing nodes.
|
| 440 |
-
|
| 441 |
-
This node runs after both parse_resume_node and parse_job_description_node
|
| 442 |
-
complete. It ensures both results are present and normalizes the state.
|
| 443 |
-
"""
|
| 444 |
-
# Ensure company_research_data exists
|
| 445 |
-
if "company_research_data" not in state:
|
| 446 |
-
state["company_research_data"] = {}
|
| 447 |
-
|
| 448 |
-
# Get results from parallel nodes
|
| 449 |
-
resume_text = state["company_research_data"].get("resume", "")
|
| 450 |
-
job_text = state["company_research_data"].get("job_description", "")
|
| 451 |
-
company_name = state["company_research_data"].get("company_name", "")
|
| 452 |
-
|
| 453 |
-
# Validate both are present
|
| 454 |
-
if not resume_text:
|
| 455 |
-
logger.warning("Resume text is empty after parsing")
|
| 456 |
-
if not job_text:
|
| 457 |
-
logger.warning("Job description text is empty after parsing")
|
| 458 |
-
|
| 459 |
-
# Ensure final structure is correct
|
| 460 |
-
state["company_research_data"] = {
|
| 461 |
-
"resume": resume_text,
|
| 462 |
-
"job_description": job_text,
|
| 463 |
-
"company_name": company_name,
|
| 464 |
-
}
|
| 465 |
-
state["current_node"] = "aggregate_results"
|
| 466 |
-
|
| 467 |
-
logger.info("Data loading results aggregated successfully")
|
| 468 |
-
return state
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
@log_execution
|
| 472 |
-
def verify_inputs_node(state: DataLoadState) -> DataLoadState:
|
| 473 |
-
"""
|
| 474 |
-
Verify that required inputs are present and set next_node for routing.
|
| 475 |
-
|
| 476 |
-
Modified from verify_inputs to return state with next_node instead of string.
|
| 477 |
-
"""
|
| 478 |
-
dataloading = Dataloading()
|
| 479 |
-
return dataloading.verify_inputs(state)
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
# ============================================================================
|
| 483 |
-
# Data Loading Subgraph
|
| 484 |
-
# ============================================================================
|
| 485 |
-
|
| 486 |
-
# Create data loading subgraph
|
| 487 |
-
data_loading_subgraph = StateGraph(DataLoadState)
|
| 488 |
-
|
| 489 |
-
# Add subgraph nodes
|
| 490 |
-
dataloading_instance = Dataloading()
|
| 491 |
-
data_loading_subgraph.add_node(
|
| 492 |
-
"set_agent_system_message", dataloading_instance.set_agent_system_message
|
| 493 |
-
)
|
| 494 |
-
data_loading_subgraph.add_node("parse_resume", parse_resume_node)
|
| 495 |
-
data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
|
| 496 |
-
data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
|
| 497 |
-
data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
|
| 498 |
-
|
| 499 |
-
# Add subgraph edges
|
| 500 |
-
data_loading_subgraph.add_edge(START, "set_agent_system_message")
|
| 501 |
-
# Parallel execution: both nodes start after set_agent_system_message
|
| 502 |
-
data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
|
| 503 |
-
data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
|
| 504 |
-
# Both parallel nodes feed into aggregate (LangGraph waits for both)
|
| 505 |
-
data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
|
| 506 |
-
data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
|
| 507 |
-
# Aggregate feeds into verification
|
| 508 |
-
data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
|
| 509 |
-
# Verification ends the subgraph
|
| 510 |
-
data_loading_subgraph.add_edge("verify_inputs", END)
|
| 511 |
-
|
| 512 |
-
# Compile data loading subgraph
|
| 513 |
-
data_loading_workflow = data_loading_subgraph.compile()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/job_writing_agent/nodes/job_description_loader.py
CHANGED
|
@@ -166,7 +166,7 @@ class JobDescriptionLoader:
|
|
| 166 |
# TODO: Implement form field extraction
|
| 167 |
pass
|
| 168 |
|
| 169 |
-
async def
|
| 170 |
"""
|
| 171 |
Prompt the user for input (synchronous input wrapped for async use).
|
| 172 |
|
|
|
|
| 166 |
# TODO: Implement form field extraction
|
| 167 |
pass
|
| 168 |
|
| 169 |
+
async def _prompt_user_for_job_description(self) -> str:
|
| 170 |
"""
|
| 171 |
Prompt the user for input (synchronous input wrapped for async use).
|
| 172 |
|
src/job_writing_agent/nodes/research_workflow.py
CHANGED
|
@@ -1,17 +1,25 @@
|
|
| 1 |
# research_workflow.py
|
| 2 |
-
|
| 3 |
-
|
|
|
|
| 4 |
import asyncio
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
import dspy
|
| 9 |
-
from
|
| 10 |
-
|
| 11 |
-
|
| 12 |
from job_writing_agent.agents.output_schema import (
|
| 13 |
CompanyResearchDataSummarizationSchema,
|
| 14 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from job_writing_agent.utils.llm_provider_factory import LLMFactory
|
| 16 |
|
| 17 |
logger = logging.getLogger(__name__)
|
|
@@ -25,12 +33,19 @@ EVAL_TIMEOUT = 15 # seconds per evaluation
|
|
| 25 |
|
| 26 |
def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
|
| 27 |
"""
|
| 28 |
-
Validate that required inputs are present.
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"""
|
| 31 |
try:
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
| 35 |
if not company_name or not company_name.strip():
|
| 36 |
logger.error("Company name is missing or empty")
|
|
@@ -42,14 +57,14 @@ def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
|
|
| 42 |
|
| 43 |
return True, company_name.strip(), job_description.strip()
|
| 44 |
|
| 45 |
-
except (
|
| 46 |
logger.error(f"Invalid state structure: {e}")
|
| 47 |
return False, "", ""
|
| 48 |
|
| 49 |
|
| 50 |
def parse_dspy_queries_with_fallback(
|
| 51 |
-
raw_queries:
|
| 52 |
-
) ->
|
| 53 |
"""
|
| 54 |
Parse DSPy query output with multiple fallback strategies.
|
| 55 |
Returns a dict of query_id -> query_string.
|
|
@@ -88,7 +103,7 @@ def parse_dspy_queries_with_fallback(
|
|
| 88 |
return get_fallback_queries(company_name)
|
| 89 |
|
| 90 |
|
| 91 |
-
def get_fallback_queries(company_name: str) ->
|
| 92 |
"""
|
| 93 |
Generate basic fallback queries when DSPy fails.
|
| 94 |
"""
|
|
@@ -102,19 +117,27 @@ def get_fallback_queries(company_name: str) -> Dict[str, str]:
|
|
| 102 |
def company_research_data_summary(state: ResearchState) -> ResearchState:
|
| 103 |
"""
|
| 104 |
Summarize the filtered research data into a concise summary.
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
"""
|
| 107 |
try:
|
| 108 |
-
|
|
|
|
| 109 |
|
| 110 |
-
# Extract the current research data
|
| 111 |
company_research_data = state.get("company_research_data", {})
|
| 112 |
tavily_search_data = company_research_data.get("tavily_search", [])
|
| 113 |
|
| 114 |
# If no research data, skip summarization
|
| 115 |
if not tavily_search_data or len(tavily_search_data) == 0:
|
| 116 |
logger.warning("No research data to summarize. Skipping summarization.")
|
| 117 |
-
return
|
| 118 |
|
| 119 |
logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
|
| 120 |
|
|
@@ -127,7 +150,7 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
|
|
| 127 |
|
| 128 |
llm_provider = LLMFactory()
|
| 129 |
llm = llm_provider.create_dspy(
|
| 130 |
-
model="
|
| 131 |
provider="openrouter",
|
| 132 |
temperature=0.3,
|
| 133 |
)
|
|
@@ -137,29 +160,31 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
|
|
| 137 |
response = company_research_data_summarization(
|
| 138 |
company_research_data=company_research_data
|
| 139 |
)
|
| 140 |
-
# Extract the summary from the response
|
| 141 |
-
|
| 142 |
if hasattr(response, "company_research_data_summary"):
|
| 143 |
summary_json_str = response.company_research_data_summary
|
| 144 |
-
elif isinstance(response, dict)
|
| 145 |
-
summary_json_str = response
|
| 146 |
else:
|
| 147 |
logger.error(
|
| 148 |
f"Unexpected response format from summarization: {type(response)}"
|
| 149 |
)
|
| 150 |
-
return
|
| 151 |
|
| 152 |
-
#
|
| 153 |
-
|
|
|
|
| 154 |
summary_json_str
|
| 155 |
)
|
|
|
|
| 156 |
|
| 157 |
-
return
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
|
| 161 |
# Return state unchanged on error
|
| 162 |
-
return
|
| 163 |
|
| 164 |
|
| 165 |
async def research_company_with_retry(state: ResearchState) -> ResearchState:
|
|
@@ -173,9 +198,16 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
|
|
| 173 |
|
| 174 |
if not is_valid:
|
| 175 |
logger.error("Invalid inputs for research. Skipping research phase.")
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
logger.info(f"Researching company: {company_name}")
|
| 181 |
|
|
@@ -254,14 +286,17 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
|
|
| 254 |
if len(search_results) == 0:
|
| 255 |
logger.warning("No search results returned")
|
| 256 |
|
| 257 |
-
# Store results
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
)
|
| 264 |
-
return state
|
| 265 |
|
| 266 |
except Exception as e:
|
| 267 |
logger.error(
|
|
@@ -273,22 +308,31 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
|
|
| 273 |
await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
|
| 274 |
else:
|
| 275 |
logger.error("All retry attempts exhausted. Using empty results.")
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
|
| 287 |
# Create research subgraph
|
| 288 |
research_subgraph = StateGraph(ResearchState)
|
| 289 |
|
| 290 |
# Add research subgraph nodes
|
| 291 |
-
research_subgraph.add_node("research_company",
|
| 292 |
research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
|
| 293 |
research_subgraph.add_node(
|
| 294 |
"company_research_data_summary", company_research_data_summary
|
|
|
|
| 1 |
# research_workflow.py
|
| 2 |
+
"""Research workflow for company information gathering and filtering."""
|
| 3 |
+
|
| 4 |
+
# Standard library imports
|
| 5 |
import asyncio
|
| 6 |
+
import json
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Any, Dict, cast
|
| 9 |
|
| 10 |
+
# Third-party imports
|
| 11 |
import dspy
|
| 12 |
+
from langgraph.graph import END, START, StateGraph
|
| 13 |
+
|
| 14 |
+
# Local imports
|
| 15 |
from job_writing_agent.agents.output_schema import (
|
| 16 |
CompanyResearchDataSummarizationSchema,
|
| 17 |
)
|
| 18 |
+
from job_writing_agent.classes.classes import ResearchState
|
| 19 |
+
from job_writing_agent.tools.SearchTool import (
|
| 20 |
+
TavilyResearchTool,
|
| 21 |
+
filter_research_results_by_relevance,
|
| 22 |
+
)
|
| 23 |
from job_writing_agent.utils.llm_provider_factory import LLMFactory
|
| 24 |
|
| 25 |
logger = logging.getLogger(__name__)
|
|
|
|
| 33 |
|
| 34 |
def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
|
| 35 |
"""
|
| 36 |
+
Validate that required inputs are present in research state.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
state: Current research workflow state
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Tuple of (is_valid, company_name, job_description)
|
| 43 |
"""
|
| 44 |
try:
|
| 45 |
+
# Safe dictionary access with fallbacks
|
| 46 |
+
company_research_data = state.get("company_research_data", {})
|
| 47 |
+
company_name = company_research_data.get("company_name", "")
|
| 48 |
+
job_description = company_research_data.get("job_description", "")
|
| 49 |
|
| 50 |
if not company_name or not company_name.strip():
|
| 51 |
logger.error("Company name is missing or empty")
|
|
|
|
| 57 |
|
| 58 |
return True, company_name.strip(), job_description.strip()
|
| 59 |
|
| 60 |
+
except (TypeError, AttributeError) as e:
|
| 61 |
logger.error(f"Invalid state structure: {e}")
|
| 62 |
return False, "", ""
|
| 63 |
|
| 64 |
|
| 65 |
def parse_dspy_queries_with_fallback(
|
| 66 |
+
raw_queries: dict[str, Any], company_name: str
|
| 67 |
+
) -> dict[str, str]:
|
| 68 |
"""
|
| 69 |
Parse DSPy query output with multiple fallback strategies.
|
| 70 |
Returns a dict of query_id -> query_string.
|
|
|
|
| 103 |
return get_fallback_queries(company_name)
|
| 104 |
|
| 105 |
|
| 106 |
+
def get_fallback_queries(company_name: str) -> dict[str, str]:
|
| 107 |
"""
|
| 108 |
Generate basic fallback queries when DSPy fails.
|
| 109 |
"""
|
|
|
|
| 117 |
def company_research_data_summary(state: ResearchState) -> ResearchState:
|
| 118 |
"""
|
| 119 |
Summarize the filtered research data into a concise summary.
|
| 120 |
+
|
| 121 |
+
Replaces the raw tavily_search results with a summarized version using LLM.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
state: Current research state with search results
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
Updated state with research summary
|
| 128 |
"""
|
| 129 |
try:
|
| 130 |
+
# Update current node
|
| 131 |
+
updated_state = {**state, "current_node": "company_research_data_summary"}
|
| 132 |
|
| 133 |
+
# Extract the current research data with safe access
|
| 134 |
company_research_data = state.get("company_research_data", {})
|
| 135 |
tavily_search_data = company_research_data.get("tavily_search", [])
|
| 136 |
|
| 137 |
# If no research data, skip summarization
|
| 138 |
if not tavily_search_data or len(tavily_search_data) == 0:
|
| 139 |
logger.warning("No research data to summarize. Skipping summarization.")
|
| 140 |
+
return updated_state
|
| 141 |
|
| 142 |
logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
|
| 143 |
|
|
|
|
| 150 |
|
| 151 |
llm_provider = LLMFactory()
|
| 152 |
llm = llm_provider.create_dspy(
|
| 153 |
+
model="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 154 |
provider="openrouter",
|
| 155 |
temperature=0.3,
|
| 156 |
)
|
|
|
|
| 160 |
response = company_research_data_summarization(
|
| 161 |
company_research_data=company_research_data
|
| 162 |
)
|
| 163 |
+
# Extract the summary from the response with safe access
|
| 164 |
+
summary_json_str = ""
|
| 165 |
if hasattr(response, "company_research_data_summary"):
|
| 166 |
summary_json_str = response.company_research_data_summary
|
| 167 |
+
elif isinstance(response, dict):
|
| 168 |
+
summary_json_str = response.get("company_research_data_summary", "")
|
| 169 |
else:
|
| 170 |
logger.error(
|
| 171 |
f"Unexpected response format from summarization: {type(response)}"
|
| 172 |
)
|
| 173 |
+
return updated_state
|
| 174 |
|
| 175 |
+
# Update state with summary using safe dictionary operations
|
| 176 |
+
updated_company_research_data = {**company_research_data}
|
| 177 |
+
updated_company_research_data["company_research_data_summary"] = (
|
| 178 |
summary_json_str
|
| 179 |
)
|
| 180 |
+
updated_state["company_research_data"] = updated_company_research_data
|
| 181 |
|
| 182 |
+
return updated_state
|
| 183 |
|
| 184 |
except Exception as e:
|
| 185 |
logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
|
| 186 |
# Return state unchanged on error
|
| 187 |
+
return updated_state
|
| 188 |
|
| 189 |
|
| 190 |
async def research_company_with_retry(state: ResearchState) -> ResearchState:
|
|
|
|
| 198 |
|
| 199 |
if not is_valid:
|
| 200 |
logger.error("Invalid inputs for research. Skipping research phase.")
|
| 201 |
+
return ResearchState(
|
| 202 |
+
company_research_data={
|
| 203 |
+
**state.get("company_research_data", {}),
|
| 204 |
+
"tavily_search": [],
|
| 205 |
+
},
|
| 206 |
+
attempted_search_queries=[],
|
| 207 |
+
current_node="research_company",
|
| 208 |
+
content_category=state.get("content_category", "cover_letter"),
|
| 209 |
+
messages=state.get("messages", []),
|
| 210 |
+
)
|
| 211 |
|
| 212 |
logger.info(f"Researching company: {company_name}")
|
| 213 |
|
|
|
|
| 286 |
if len(search_results) == 0:
|
| 287 |
logger.warning("No search results returned")
|
| 288 |
|
| 289 |
+
# Store results and return ResearchState
|
| 290 |
+
return ResearchState(
|
| 291 |
+
company_research_data={
|
| 292 |
+
**state.get("company_research_data", {}),
|
| 293 |
+
"tavily_search": search_results,
|
| 294 |
+
},
|
| 295 |
+
attempted_search_queries=list(queries.values()),
|
| 296 |
+
current_node="research_company",
|
| 297 |
+
content_category=state.get("content_category", "cover_letter"),
|
| 298 |
+
messages=state.get("messages", []),
|
| 299 |
)
|
|
|
|
| 300 |
|
| 301 |
except Exception as e:
|
| 302 |
logger.error(
|
|
|
|
| 308 |
await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
|
| 309 |
else:
|
| 310 |
logger.error("All retry attempts exhausted. Using empty results.")
|
| 311 |
+
return ResearchState(
|
| 312 |
+
company_research_data={
|
| 313 |
+
**state.get("company_research_data", {}),
|
| 314 |
+
"tavily_search": [],
|
| 315 |
+
},
|
| 316 |
+
attempted_search_queries=[],
|
| 317 |
+
current_node="research_company",
|
| 318 |
+
content_category=state.get("content_category", "cover_letter"),
|
| 319 |
+
messages=state.get("messages", []),
|
| 320 |
+
)
|
| 321 |
|
| 322 |
+
return ResearchState(
|
| 323 |
+
company_research_data=state.get("company_research_data", {}),
|
| 324 |
+
attempted_search_queries=[],
|
| 325 |
+
current_node="research_company",
|
| 326 |
+
content_category=state.get("content_category", "cover_letter"),
|
| 327 |
+
messages=state.get("messages", []),
|
| 328 |
+
)
|
| 329 |
|
| 330 |
|
| 331 |
# Create research subgraph
|
| 332 |
research_subgraph = StateGraph(ResearchState)
|
| 333 |
|
| 334 |
# Add research subgraph nodes
|
| 335 |
+
research_subgraph.add_node("research_company", research_company_with_retry)
|
| 336 |
research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
|
| 337 |
research_subgraph.add_node(
|
| 338 |
"company_research_data_summary", company_research_data_summary
|
src/job_writing_agent/nodes/selfconsistency.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
-
|
| 2 |
-
from datetime import datetime
|
| 3 |
import json
|
|
|
|
| 4 |
import re
|
|
|
|
| 5 |
|
|
|
|
| 6 |
from ..classes.classes import AppState
|
| 7 |
-
from ..prompts.templates import
|
| 8 |
from ..utils.llm_provider_factory import LLMFactory
|
| 9 |
|
| 10 |
|
|
@@ -14,82 +16,104 @@ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
|
| 14 |
|
| 15 |
|
| 16 |
def self_consistency_vote(state: AppState) -> AppState:
|
| 17 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Create LLM inside function (lazy initialization)
|
| 19 |
llm_factory = LLMFactory()
|
| 20 |
-
|
| 21 |
model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
|
| 22 |
)
|
| 23 |
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
-
all_drafts = [
|
| 27 |
|
| 28 |
# First, have the LLM rate each draft
|
| 29 |
-
|
| 30 |
|
| 31 |
-
# Get resume and job summaries
|
| 32 |
try:
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
| 36 |
else:
|
| 37 |
-
resume_summary =
|
| 38 |
else:
|
| 39 |
-
resume_summary = str(
|
| 40 |
except Exception as e:
|
| 41 |
-
|
| 42 |
-
resume_summary = str(state
|
| 43 |
|
| 44 |
try:
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
):
|
| 49 |
-
job_summary = state["job_description_source"][0]
|
| 50 |
else:
|
| 51 |
-
job_summary = str(
|
| 52 |
except Exception as e:
|
| 53 |
-
|
| 54 |
-
job_summary = str(state
|
| 55 |
-
|
| 56 |
-
for
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
)
|
| 65 |
-
|
| 66 |
|
| 67 |
-
# Create
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
# Get the selected draft index with error handling
|
| 73 |
try:
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
# Extract just the first number found in the response
|
| 76 |
-
number_match = re.search(r"\d+",
|
| 77 |
if not number_match:
|
| 78 |
-
|
| 79 |
-
"
|
| 80 |
)
|
| 81 |
-
|
| 82 |
else:
|
| 83 |
-
|
| 84 |
# Validate the index is in range
|
| 85 |
-
if
|
| 86 |
-
|
| 87 |
-
f"
|
| 88 |
)
|
| 89 |
-
|
| 90 |
except (ValueError, TypeError) as e:
|
| 91 |
-
|
| 92 |
-
|
| 93 |
|
| 94 |
-
state
|
| 95 |
-
|
|
|
|
|
|
| 1 |
+
# Standard library imports
|
|
|
|
| 2 |
import json
|
| 3 |
+
import logging
|
| 4 |
import re
|
| 5 |
+
from datetime import datetime
|
| 6 |
|
| 7 |
+
# Local imports
|
| 8 |
from ..classes.classes import AppState
|
| 9 |
+
from ..prompts.templates import BEST_DRAFT_SELECTION_PROMPT, DRAFT_RATING_PROMPT
|
| 10 |
from ..utils.llm_provider_factory import LLMFactory
|
| 11 |
|
| 12 |
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def self_consistency_vote(state: AppState) -> AppState:
|
| 19 |
+
"""
|
| 20 |
+
Choose the best draft from multiple variations using LLM-based voting.
|
| 21 |
+
|
| 22 |
+
This function rates all draft variations and selects the best one based on
|
| 23 |
+
criteria like relevance, professional tone, personalization, and persuasiveness.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
state: Application state containing the original draft and variations
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Updated state with the best draft selected
|
| 30 |
+
"""
|
| 31 |
# Create LLM inside function (lazy initialization)
|
| 32 |
llm_factory = LLMFactory()
|
| 33 |
+
precise_llm = llm_factory.create_langchain(
|
| 34 |
model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
|
| 35 |
)
|
| 36 |
|
| 37 |
+
variations_data = state.get("variations", {"variations": []})
|
| 38 |
+
original_draft = state.get("draft", "")
|
| 39 |
|
| 40 |
+
all_drafts = [original_draft] + variations_data.get("variations", [])
|
| 41 |
|
| 42 |
# First, have the LLM rate each draft
|
| 43 |
+
draft_ratings = []
|
| 44 |
|
| 45 |
+
# Get resume and job summaries with safe dictionary access
|
| 46 |
try:
|
| 47 |
+
resume_path = state.get("resume_path", "")
|
| 48 |
+
if isinstance(resume_path, list) and len(resume_path) > 0:
|
| 49 |
+
if hasattr(resume_path[0], "page_content"):
|
| 50 |
+
resume_summary = resume_path[0].page_content
|
| 51 |
else:
|
| 52 |
+
resume_summary = resume_path[0]
|
| 53 |
else:
|
| 54 |
+
resume_summary = str(resume_path)
|
| 55 |
except Exception as e:
|
| 56 |
+
logger.warning(f"Error getting resume summary: {e}")
|
| 57 |
+
resume_summary = str(state.get("resume_path", ""))
|
| 58 |
|
| 59 |
try:
|
| 60 |
+
job_description_source = state.get("job_description_source", "")
|
| 61 |
+
if isinstance(job_description_source, list) and len(job_description_source) > 0:
|
| 62 |
+
job_summary = job_description_source[0]
|
|
|
|
|
|
|
| 63 |
else:
|
| 64 |
+
job_summary = str(job_description_source)
|
| 65 |
except Exception as e:
|
| 66 |
+
logger.warning(f"Error getting job summary: {e}")
|
| 67 |
+
job_summary = str(state.get("job_description_source", ""))
|
| 68 |
+
|
| 69 |
+
for draft_index, draft_content in enumerate(all_drafts):
|
| 70 |
+
# Create chain with proper prompt template invocation
|
| 71 |
+
rating_chain = DRAFT_RATING_PROMPT | precise_llm
|
| 72 |
+
rating_result = rating_chain.invoke(
|
| 73 |
+
{
|
| 74 |
+
"resume_summary": resume_summary,
|
| 75 |
+
"job_summary": job_summary,
|
| 76 |
+
"draft": draft_content,
|
| 77 |
+
"draft_number": draft_index + 1,
|
| 78 |
+
}
|
| 79 |
)
|
| 80 |
+
draft_ratings.append(rating_result)
|
| 81 |
|
| 82 |
+
# Create chain for draft selection with proper prompt template invocation
|
| 83 |
+
selection_chain = BEST_DRAFT_SELECTION_PROMPT | precise_llm
|
| 84 |
+
selection_result = selection_chain.invoke(
|
| 85 |
+
{
|
| 86 |
+
"ratings_json": json.dumps(draft_ratings, indent=2),
|
| 87 |
+
"num_drafts": len(all_drafts),
|
| 88 |
+
}
|
| 89 |
)
|
| 90 |
|
| 91 |
# Get the selected draft index with error handling
|
| 92 |
try:
|
| 93 |
+
selection_text = str(
|
| 94 |
+
selection_result.content
|
| 95 |
+
if hasattr(selection_result, "content")
|
| 96 |
+
else selection_result
|
| 97 |
+
).strip()
|
| 98 |
# Extract just the first number found in the response
|
| 99 |
+
number_match = re.search(r"\d+", selection_text)
|
| 100 |
if not number_match:
|
| 101 |
+
logger.warning(
|
| 102 |
+
"Could not extract draft number from LLM response. Using original draft."
|
| 103 |
)
|
| 104 |
+
best_draft_index = 0
|
| 105 |
else:
|
| 106 |
+
best_draft_index = int(number_match.group()) - 1
|
| 107 |
# Validate the index is in range
|
| 108 |
+
if best_draft_index < 0 or best_draft_index >= len(all_drafts):
|
| 109 |
+
logger.warning(
|
| 110 |
+
f"Selected draft index {best_draft_index + 1} out of range. Using original draft."
|
| 111 |
)
|
| 112 |
+
best_draft_index = 0
|
| 113 |
except (ValueError, TypeError) as e:
|
| 114 |
+
logger.warning(f"Error selecting best draft: {e}. Using original draft.")
|
| 115 |
+
best_draft_index = 0
|
| 116 |
|
| 117 |
+
# Update state with best draft using safe dictionary operations
|
| 118 |
+
updated_state = {**state, "draft": all_drafts[best_draft_index]}
|
| 119 |
+
return updated_state
|
src/job_writing_agent/nodes/system_initializer.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
System Initializer Module
|
| 4 |
+
|
| 5 |
+
This module provides the SystemInitializer class responsible for initializing
|
| 6 |
+
system messages in the workflow state. It follows the Single Responsibility
|
| 7 |
+
Principle by focusing solely on system message initialization.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
from typing import Optional
|
| 12 |
+
|
| 13 |
+
from langchain_core.messages import SystemMessage
|
| 14 |
+
|
| 15 |
+
from job_writing_agent.classes import DataLoadState
|
| 16 |
+
from job_writing_agent.prompts.templates import agent_system_prompt
|
| 17 |
+
from job_writing_agent.utils.logging.logging_decorators import log_async
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class SystemInitializer:
|
| 23 |
+
"""
|
| 24 |
+
Responsible for initializing system messages in workflow state.
|
| 25 |
+
|
| 26 |
+
Example:
|
| 27 |
+
>>> initializer = SystemInitializer()
|
| 28 |
+
>>> state = await initializer.set_agent_system_message(initial_state)
|
| 29 |
+
>>>
|
| 30 |
+
>>> # With custom prompt for testing
|
| 31 |
+
>>> custom_prompt = "Custom system prompt"
|
| 32 |
+
>>> initializer = SystemInitializer(system_prompt=custom_prompt)
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
def __init__(self, system_prompt: Optional[str] = None):
|
| 36 |
+
"""
|
| 37 |
+
Initialize SystemInitializer with optional system prompt dependency injection.
|
| 38 |
+
|
| 39 |
+
Parameters
|
| 40 |
+
----------
|
| 41 |
+
system_prompt: Optional[str]
|
| 42 |
+
System prompt text to use. Defaults to `agent_system_prompt` from
|
| 43 |
+
prompts.templates. Can be injected for testing or custom prompts.
|
| 44 |
+
"""
|
| 45 |
+
self._system_prompt = system_prompt or agent_system_prompt
|
| 46 |
+
|
| 47 |
+
@log_async
|
| 48 |
+
async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
|
| 49 |
+
"""
|
| 50 |
+
Add the system prompt to the conversation state.
|
| 51 |
+
|
| 52 |
+
This method creates a SystemMessage from the configured prompt and
|
| 53 |
+
adds it to the messages list in the workflow state.
|
| 54 |
+
|
| 55 |
+
Parameters
|
| 56 |
+
----------
|
| 57 |
+
state: DataLoadState
|
| 58 |
+
Current workflow state containing messages list.
|
| 59 |
+
|
| 60 |
+
Returns
|
| 61 |
+
-------
|
| 62 |
+
DataLoadState
|
| 63 |
+
Updated state with the system message added to messages list
|
| 64 |
+
and current_node set to "initialize_system".
|
| 65 |
+
"""
|
| 66 |
+
agent_initialization_system_message = SystemMessage(content=self._system_prompt)
|
| 67 |
+
messages = state.get("messages", [])
|
| 68 |
+
messages.append(agent_initialization_system_message)
|
| 69 |
+
return {
|
| 70 |
+
**state,
|
| 71 |
+
"messages": messages,
|
| 72 |
+
"current_node": "initialize_system",
|
| 73 |
+
}
|
src/job_writing_agent/nodes/validation_helper.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Validation Helper Module
|
| 4 |
+
|
| 5 |
+
This module provides the ValidationHelper class responsible for validating
|
| 6 |
+
workflow inputs and setting routing decisions. It follows the Single
|
| 7 |
+
Responsibility Principle by focusing solely on input validation.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
from job_writing_agent.classes import DataLoadState
|
| 13 |
+
from job_writing_agent.utils.logging.logging_decorators import (
|
| 14 |
+
log_execution,
|
| 15 |
+
log_errors,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class ValidationHelper:
|
| 22 |
+
"""
|
| 23 |
+
Responsible for validating workflow inputs and setting routing decisions.
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
Example:
|
| 27 |
+
>>> validator = ValidationHelper()
|
| 28 |
+
>>> validated_state = validator.verify_inputs(state)
|
| 29 |
+
>>> next_node = validated_state.get("next_node") # "load" or "research"
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
"""
|
| 34 |
+
Initialize ValidationHelper.
|
| 35 |
+
|
| 36 |
+
This class is stateless - no dependencies needed for validation logic.
|
| 37 |
+
"""
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
@log_execution
|
| 41 |
+
@log_errors
|
| 42 |
+
def verify_inputs(self, state: DataLoadState) -> DataLoadState:
|
| 43 |
+
"""
|
| 44 |
+
Validate inputs and set next_node for routing.
|
| 45 |
+
|
| 46 |
+
This method validates that both resume and job description are present
|
| 47 |
+
and non-empty in the state.
|
| 48 |
+
Parameters
|
| 49 |
+
----------
|
| 50 |
+
state: DataLoadState
|
| 51 |
+
Current workflow state containing company_research_data.
|
| 52 |
+
|
| 53 |
+
Returns
|
| 54 |
+
-------
|
| 55 |
+
DataLoadState
|
| 56 |
+
Updated state with next_node set to "load" (if validation fails)
|
| 57 |
+
or "research" (if validation passes).
|
| 58 |
+
"""
|
| 59 |
+
logger.info("Verifying loaded inputs!")
|
| 60 |
+
state["current_node"] = "verify"
|
| 61 |
+
|
| 62 |
+
# Validate required fields using helper methods
|
| 63 |
+
if not self._validate_resume(state):
|
| 64 |
+
logger.error("Resume is missing or empty in company_research_data")
|
| 65 |
+
state["next_node"] = "load" # Loop back to load subgraph
|
| 66 |
+
return state
|
| 67 |
+
|
| 68 |
+
if not self._validate_job_description(state):
|
| 69 |
+
logger.error("Job description is missing or empty in company_research_data")
|
| 70 |
+
state["next_node"] = "load" # Loop back to load subgraph
|
| 71 |
+
return state
|
| 72 |
+
|
| 73 |
+
# All validations passed
|
| 74 |
+
state["next_node"] = "research"
|
| 75 |
+
logger.info("Inputs verified successfully, proceeding to research")
|
| 76 |
+
return state
|
| 77 |
+
|
| 78 |
+
def _validate_resume(self, state: DataLoadState) -> bool:
|
| 79 |
+
"""
|
| 80 |
+
Validate that resume is present and non-empty in company_research_data.
|
| 81 |
+
|
| 82 |
+
Private helper method for better code organization.
|
| 83 |
+
|
| 84 |
+
Parameters
|
| 85 |
+
----------
|
| 86 |
+
state: DataLoadState
|
| 87 |
+
Current workflow state.
|
| 88 |
+
|
| 89 |
+
Returns
|
| 90 |
+
-------
|
| 91 |
+
bool
|
| 92 |
+
True if resume is present and non-empty, False otherwise.
|
| 93 |
+
"""
|
| 94 |
+
company_research_data = state.get("company_research_data", {})
|
| 95 |
+
resume = company_research_data.get("resume", "")
|
| 96 |
+
# Handle various types: convert to string and check if non-empty
|
| 97 |
+
if not resume:
|
| 98 |
+
return False
|
| 99 |
+
resume_str = str(resume).strip()
|
| 100 |
+
return bool(resume_str)
|
| 101 |
+
|
| 102 |
+
def _validate_job_description(self, state: DataLoadState) -> bool:
|
| 103 |
+
"""
|
| 104 |
+
Validate that job description is present and non-empty in company_research_data.
|
| 105 |
+
|
| 106 |
+
Private helper method for better code organization.
|
| 107 |
+
|
| 108 |
+
Parameters
|
| 109 |
+
----------
|
| 110 |
+
state: DataLoadState
|
| 111 |
+
Current workflow state.
|
| 112 |
+
|
| 113 |
+
Returns
|
| 114 |
+
-------
|
| 115 |
+
bool
|
| 116 |
+
True if job description is present and non-empty, False otherwise.
|
| 117 |
+
"""
|
| 118 |
+
company_research_data = state.get("company_research_data", {})
|
| 119 |
+
job_description = company_research_data.get("job_description", "")
|
| 120 |
+
# Handle various types: convert to string and check if non-empty
|
| 121 |
+
if not job_description:
|
| 122 |
+
return False
|
| 123 |
+
job_desc_str = str(job_description).strip()
|
| 124 |
+
return bool(job_desc_str)
|
src/job_writing_agent/nodes/variations.py
CHANGED
|
@@ -1,22 +1,36 @@
|
|
|
|
|
| 1 |
import logging
|
| 2 |
from datetime import datetime
|
| 3 |
-
from typing_extensions import Dict, List
|
| 4 |
|
|
|
|
| 5 |
from langchain_core.documents import Document
|
| 6 |
|
| 7 |
-
|
| 8 |
from ..classes.classes import ResultState
|
| 9 |
-
from ..utils.llm_provider_factory import LLMFactory
|
| 10 |
from ..prompts.templates import VARIATION_PROMPT
|
| 11 |
-
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
# Constants
|
| 15 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 16 |
|
| 17 |
|
| 18 |
-
def generate_variations(state: ResultState) ->
|
| 19 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Create LLM inside function (lazy initialization)
|
| 21 |
llm_provider = LLMFactory()
|
| 22 |
llm = llm_provider.create_langchain(
|
|
@@ -27,27 +41,30 @@ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
|
|
| 27 |
|
| 28 |
# Get resume and job text, handling both string and Document types
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
else
|
| 36 |
-
|
| 37 |
-
)
|
| 38 |
-
job_text = "\n".join(
|
| 39 |
-
chunk
|
| 40 |
-
for chunk in (
|
| 41 |
-
state["company_research_data"]["job_description"][:2]
|
| 42 |
-
if isinstance(state["company_research_data"]["job_description"], str)
|
| 43 |
-
else [state["company_research_data"]["job_description"]]
|
| 44 |
)
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
except Exception as e:
|
| 47 |
-
|
| 48 |
# Fallback to simple string handling
|
| 49 |
-
resume_text = str(
|
| 50 |
-
job_text = str(
|
| 51 |
|
| 52 |
# Generate variations with different temperatures and creativity settings
|
| 53 |
temp_variations = [
|
|
@@ -65,22 +82,23 @@ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
|
|
| 65 |
|
| 66 |
# Use VARIATION_PROMPT directly with the configured LLM
|
| 67 |
variation = VARIATION_PROMPT.format_messages(
|
| 68 |
-
resume_excerpt=resume_text, job_excerpt=job_text, draft=
|
| 69 |
)
|
| 70 |
|
| 71 |
response = configured_llm.invoke(variation)
|
| 72 |
|
| 73 |
-
|
| 74 |
|
| 75 |
if response and response.strip(): # Only add non-empty variations
|
| 76 |
variations.append(response)
|
| 77 |
except Exception as e:
|
| 78 |
-
|
| 79 |
continue
|
| 80 |
|
| 81 |
# Ensure we have at least one variation
|
| 82 |
if not variations:
|
| 83 |
# If all variations failed, add the original draft as a fallback
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
return {"variations": variations}
|
|
|
|
| 1 |
+
# Standard library imports
|
| 2 |
import logging
|
| 3 |
from datetime import datetime
|
|
|
|
| 4 |
|
| 5 |
+
# Third-party imports
|
| 6 |
from langchain_core.documents import Document
|
| 7 |
|
| 8 |
+
# Local imports
|
| 9 |
from ..classes.classes import ResultState
|
|
|
|
| 10 |
from ..prompts.templates import VARIATION_PROMPT
|
| 11 |
+
from ..utils.llm_provider_factory import LLMFactory
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
# Constants
|
| 15 |
CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
|
| 16 |
|
| 17 |
|
| 18 |
+
def generate_variations(state: ResultState) -> dict[str, list[str]]:
|
| 19 |
+
"""
|
| 20 |
+
Generate multiple variations of the draft for self-consistency voting.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
state: Current result state with draft and research data
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
Dictionary containing list of draft variations
|
| 27 |
+
"""
|
| 28 |
+
# Validate and extract all required state fields once
|
| 29 |
+
company_research_data = state.get("company_research_data", {})
|
| 30 |
+
draft_content = state.get("draft", "")
|
| 31 |
+
resume_data = company_research_data.get("resume", "")
|
| 32 |
+
job_description_data = company_research_data.get("job_description", "")
|
| 33 |
+
|
| 34 |
# Create LLM inside function (lazy initialization)
|
| 35 |
llm_provider = LLMFactory()
|
| 36 |
llm = llm_provider.create_langchain(
|
|
|
|
| 41 |
|
| 42 |
# Get resume and job text, handling both string and Document types
|
| 43 |
try:
|
| 44 |
+
# Extract resume text
|
| 45 |
+
if isinstance(resume_data, str):
|
| 46 |
+
resume_text = resume_data[:2000] # Limit to first 2000 chars
|
| 47 |
+
elif isinstance(resume_data, list):
|
| 48 |
+
resume_text = "\n".join(
|
| 49 |
+
doc.page_content if isinstance(doc, Document) else str(doc)
|
| 50 |
+
for doc in resume_data[:2]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
)
|
| 52 |
+
else:
|
| 53 |
+
resume_text = str(resume_data)
|
| 54 |
+
|
| 55 |
+
# Extract job description text
|
| 56 |
+
if isinstance(job_description_data, str):
|
| 57 |
+
job_text = job_description_data[:2000] # Limit to first 2000 chars
|
| 58 |
+
elif isinstance(job_description_data, list):
|
| 59 |
+
job_text = "\n".join(str(chunk) for chunk in job_description_data[:2])
|
| 60 |
+
else:
|
| 61 |
+
job_text = str(job_description_data)
|
| 62 |
+
|
| 63 |
except Exception as e:
|
| 64 |
+
logger.warning(f"Error processing resume/job text: {e}")
|
| 65 |
# Fallback to simple string handling
|
| 66 |
+
resume_text = str(resume_data)
|
| 67 |
+
job_text = str(job_description_data)
|
| 68 |
|
| 69 |
# Generate variations with different temperatures and creativity settings
|
| 70 |
temp_variations = [
|
|
|
|
| 82 |
|
| 83 |
# Use VARIATION_PROMPT directly with the configured LLM
|
| 84 |
variation = VARIATION_PROMPT.format_messages(
|
| 85 |
+
resume_excerpt=resume_text, job_excerpt=job_text, draft=draft_content
|
| 86 |
)
|
| 87 |
|
| 88 |
response = configured_llm.invoke(variation)
|
| 89 |
|
| 90 |
+
logger.debug(f"Generated variation with settings {settings}")
|
| 91 |
|
| 92 |
if response and response.strip(): # Only add non-empty variations
|
| 93 |
variations.append(response)
|
| 94 |
except Exception as e:
|
| 95 |
+
logger.warning(f"Error generating variation with settings {settings}: {e}")
|
| 96 |
continue
|
| 97 |
|
| 98 |
# Ensure we have at least one variation
|
| 99 |
if not variations:
|
| 100 |
# If all variations failed, add the original draft as a fallback
|
| 101 |
+
logger.warning("All variations failed, using original draft as fallback")
|
| 102 |
+
variations.append(draft_content)
|
| 103 |
|
| 104 |
return {"variations": variations}
|
src/job_writing_agent/prompts/templates.py
CHANGED
|
@@ -9,6 +9,7 @@ from langchain_core.prompts import (
|
|
| 9 |
ChatPromptTemplate,
|
| 10 |
SystemMessagePromptTemplate,
|
| 11 |
HumanMessagePromptTemplate,
|
|
|
|
| 12 |
)
|
| 13 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 14 |
|
|
@@ -36,63 +37,59 @@ PERSONA_DEVELOPMENT_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_message
|
|
| 36 |
|
| 37 |
|
| 38 |
# Draft generation prompts
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
"""
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
-
BULLET_POINTS_PROMPT
|
| 73 |
-
|
| 74 |
-
creates personalized application materials.
|
| 75 |
-
|
| 76 |
-
{persona_instruction}
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
| 84 |
)
|
| 85 |
|
| 86 |
|
| 87 |
-
LINKEDIN_NOTE_PROMPT
|
| 88 |
-
|
| 89 |
-
writer who creates personalized application materials.
|
| 90 |
-
{persona_instruction}
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
| 96 |
)
|
| 97 |
|
| 98 |
# Variation generation prompt
|
|
@@ -230,6 +227,35 @@ REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
|
| 230 |
]
|
| 231 |
)
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
# Tavily query prompt to build knowledge context about the company
|
| 234 |
|
| 235 |
TAVILY_QUERY_PROMPT = """
|
|
@@ -247,33 +273,6 @@ The user needs targeted search queries (with rationale) for Tavily Search to res
|
|
| 247 |
</Requirements>
|
| 248 |
"""
|
| 249 |
|
| 250 |
-
JOB_DESCRIPTION_PROMPT = """You are a JSON extraction specialist. Extract job information from the provided text and return ONLY valid JSON.
|
| 251 |
-
|
| 252 |
-
CRITICAL: Your response must be parseable by json.loads() - no markdown, no explanations, no extra text.
|
| 253 |
-
|
| 254 |
-
Extract these three fields in exact order:
|
| 255 |
-
1. job_description field - Complete job posting formatted in clean markdown with proper headers (## Job Description, ## Responsibilities, ## Requirements, etc.)
|
| 256 |
-
2. company_name field - Exact company name as mentioned
|
| 257 |
-
3. job_title field - Exact job title as posted
|
| 258 |
-
|
| 259 |
-
FORMATTING RULES:
|
| 260 |
-
- Use double quotes for all strings
|
| 261 |
-
- Escape internal quotes with \\"
|
| 262 |
-
- Escape newlines as \\\\n in the job description field
|
| 263 |
-
- Replace actual line breaks with \\\\n
|
| 264 |
-
- If any field is missing, use empty string ""
|
| 265 |
-
- No trailing commas
|
| 266 |
-
- No comments or extra whitespace
|
| 267 |
-
|
| 268 |
-
REQUIRED OUTPUT FORMAT:
|
| 269 |
-
{{
|
| 270 |
-
"job_description": "markdown formatted job description with \\\\n for line breaks",
|
| 271 |
-
"company_name": "exact company name",
|
| 272 |
-
"job_title": "exact job title"
|
| 273 |
-
}}
|
| 274 |
-
|
| 275 |
-
Return only the JSON object - no other text."""
|
| 276 |
-
|
| 277 |
agent_system_prompt = """I act as your personal job-application assistant.
|
| 278 |
My function is to help you research, analyze, and write compelling application
|
| 279 |
materials — primarily LinkedIn reach-outs, short written responses, and cover
|
|
|
|
| 9 |
ChatPromptTemplate,
|
| 10 |
SystemMessagePromptTemplate,
|
| 11 |
HumanMessagePromptTemplate,
|
| 12 |
+
AIMessagePromptTemplate,
|
| 13 |
)
|
| 14 |
from langchain_core.messages import SystemMessage, HumanMessage
|
| 15 |
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
# Draft generation prompts
|
| 40 |
+
COVER_LETTER_PROMPT = AIMessagePromptTemplate.from_template(
|
| 41 |
+
"""
|
| 42 |
+
I am CoverLetterGPT, a concise career writing assistant.
|
| 43 |
+
|
| 44 |
+
CORE OBJECTIVE
|
| 45 |
+
• Draft a 3‑paragraph cover letter (150‑180 words total) that targets hiring managers
|
| 46 |
+
and technical recruiters. Assume it may reach the CEO.
|
| 47 |
+
• Begin exactly with: "To Hiring Team,"
|
| 48 |
+
End exactly with: "Thanks, Rishabh"
|
| 49 |
+
• Tone: polite, casual, enthusiastic — but no em dashes (—) and no clichés.
|
| 50 |
+
• Every fact about achievements, skills, or company details must be traceable to the
|
| 51 |
+
provided resume, job description, or company research; otherwise, ask the user.
|
| 52 |
+
• If any critical detail is missing or ambiguous, STOP and ask a clarifying question
|
| 53 |
+
before writing the letter.
|
| 54 |
+
• Keep sentences tight; avoid filler like “I am excited to…” (enthusiasm comes
|
| 55 |
+
through precise language).
|
| 56 |
+
• Never exceed 180 words. Never fall below 150 words.
|
| 57 |
+
|
| 58 |
+
SELF‑EVALUATION (append after the letter)
|
| 59 |
+
After producing the cover letter, output an “### Evaluation” section containing:
|
| 60 |
+
Comprehensiveness (1‑5)
|
| 61 |
+
Evidence provided (1‑5)
|
| 62 |
+
Clarity of explanation (1‑5)
|
| 63 |
+
Potential limitations or biases (bullet list)
|
| 64 |
+
Areas for improvement (brief notes)
|
| 65 |
+
|
| 66 |
+
ERROR HANDLING
|
| 67 |
+
If word count, section order, or format rules are violated, regenerate until correct.
|
| 68 |
+
"""
|
|
|
|
| 69 |
)
|
| 70 |
|
| 71 |
|
| 72 |
+
BULLET_POINTS_PROMPT = AIMessagePromptTemplate.from_template(
|
| 73 |
+
"""I am an expert job application writer who creates personalized application materials.
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
Write 5-7 bullet points highlighting the candidate's
|
| 76 |
+
qualifications for this specific role.
|
| 77 |
+
Create content that genuinely reflects the candidate's
|
| 78 |
+
background and is tailored to the specific job.
|
| 79 |
+
Ensure the tone is professional, confident, and authentic.
|
| 80 |
+
Today is {current_date}.""",
|
| 81 |
+
input_variables=["current_date"],
|
| 82 |
)
|
| 83 |
|
| 84 |
|
| 85 |
+
LINKEDIN_NOTE_PROMPT = AIMessagePromptTemplate.from_template(
|
| 86 |
+
"""I am an expert job application writer who creates personalized application materials.
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
Write a brief LinkedIn connection note to a hiring manager or recruiter (100 words max).
|
| 89 |
+
Create content that genuinely reflects the candidate's background and is tailored to the specific job.
|
| 90 |
+
Ensure the tone is professional, confident, and authentic.
|
| 91 |
+
Today is {current_date}.""",
|
| 92 |
+
input_variables=["current_date"],
|
| 93 |
)
|
| 94 |
|
| 95 |
# Variation generation prompt
|
|
|
|
| 227 |
]
|
| 228 |
)
|
| 229 |
|
| 230 |
+
DRAFT_GENERATION_CONTEXT_PROMPT = HumanMessagePromptTemplate.from_template(
|
| 231 |
+
"""
|
| 232 |
+
Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
|
| 233 |
+
|
| 234 |
+
**Job Description:**
|
| 235 |
+
|
| 236 |
+
START OF JOB DESCRIPTION'''
|
| 237 |
+
{current_job_role}
|
| 238 |
+
'''END OF JOB DESCRIPTION
|
| 239 |
+
|
| 240 |
+
**Candidate Resume:**
|
| 241 |
+
|
| 242 |
+
START OF CANDIDATE RESUME'''
|
| 243 |
+
{candidate_resume}
|
| 244 |
+
'''END OF CANDIDATE RESUME
|
| 245 |
+
|
| 246 |
+
**Company Research Data:**
|
| 247 |
+
|
| 248 |
+
START OF COMPANY RESEARCH DATA'''
|
| 249 |
+
{company_research_data}
|
| 250 |
+
'''END OF COMPANY RESEARCH DATA
|
| 251 |
+
""",
|
| 252 |
+
input_variables=[
|
| 253 |
+
"current_job_role",
|
| 254 |
+
"candidate_resume",
|
| 255 |
+
"company_research_data",
|
| 256 |
+
],
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
# Tavily query prompt to build knowledge context about the company
|
| 260 |
|
| 261 |
TAVILY_QUERY_PROMPT = """
|
|
|
|
| 273 |
</Requirements>
|
| 274 |
"""
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
agent_system_prompt = """I act as your personal job-application assistant.
|
| 277 |
My function is to help you research, analyze, and write compelling application
|
| 278 |
materials — primarily LinkedIn reach-outs, short written responses, and cover
|
src/job_writing_agent/prompts/test_templates.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_core.prompts import (
|
| 2 |
+
ChatPromptTemplate,
|
| 3 |
+
SystemMessagePromptTemplate,
|
| 4 |
+
AIMessagePromptTemplate,
|
| 5 |
+
HumanMessagePromptTemplate,
|
| 6 |
+
)
|
| 7 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
| 8 |
+
|
| 9 |
+
from job_writing_agent.utils.llm_provider_factory import LLMFactory
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
llm_provider = LLMFactory()
|
| 13 |
+
llm = llm_provider.create_langchain(
|
| 14 |
+
"allenai/olmo-3.1-32b-think:free",
|
| 15 |
+
provider="openrouter",
|
| 16 |
+
temperature=0.1,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Use PromptTemplate classes for variable interpolation
|
| 21 |
+
TEST_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
| 22 |
+
[
|
| 23 |
+
# Use SystemMessagePromptTemplate for SystemMessage with variables
|
| 24 |
+
SystemMessagePromptTemplate.from_template(
|
| 25 |
+
"You can answer any question that the user asks. If you don't know the answer, say 'I don't know' and don't make up an answer. Todays date is {current_date}.",
|
| 26 |
+
input_variables=["current_date"],
|
| 27 |
+
),
|
| 28 |
+
# Use AIMessagePromptTemplate for AIMessage with variables (if needed)
|
| 29 |
+
# Or use AIMessage directly if no variables
|
| 30 |
+
AIMessagePromptTemplate.from_template(
|
| 31 |
+
"I am here to help you answer any question that you ask.",
|
| 32 |
+
input_variables=["current_date"],
|
| 33 |
+
),
|
| 34 |
+
]
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Now the chain will work correctly
|
| 38 |
+
prompt_test_chain = ({"current_date": lambda x: x["current_date"]}) | TEST_PROMPT | llm
|
| 39 |
+
|
| 40 |
+
# Test it
|
| 41 |
+
print(TEST_PROMPT)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
BULLET_POINTS_PROMPT = SystemMessagePromptTemplate.from_template(
|
| 45 |
+
"""You are an expert job application writer who
|
| 46 |
+
creates personalized application materials.
|
| 47 |
+
|
| 48 |
+
{persona_instruction}
|
| 49 |
+
|
| 50 |
+
Write 5-7 bullet points highlighting the candidate's
|
| 51 |
+
qualifications for this specific role.
|
| 52 |
+
Create content that genuinely reflects the candidate's
|
| 53 |
+
background and is tailored to the specific job.
|
| 54 |
+
Ensure the tone is professional, confident, and authentic.
|
| 55 |
+
Today is {current_date}.""",
|
| 56 |
+
input_variables=["persona_instruction", "current_date"],
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
print(BULLET_POINTS_PROMPT)
|
src/job_writing_agent/tools/SearchTool.py
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
|
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
-
import asyncio
|
| 4 |
-
from dotenv import load_dotenv
|
| 5 |
from pathlib import Path
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
from langchain_tavily import TavilySearch
|
| 8 |
from openevals.llm import create_async_llm_as_judge
|
| 9 |
-
from openevals.prompts import
|
| 10 |
-
import dspy
|
| 11 |
|
|
|
|
| 12 |
from ..agents.output_schema import TavilySearchQueries
|
| 13 |
from ..classes.classes import ResearchState
|
| 14 |
from ..utils.llm_provider_factory import LLMFactory
|
|
@@ -21,7 +24,11 @@ env_path = Path(__file__).parent / ".env"
|
|
| 21 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 22 |
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
class TavilyResearchTool:
|
|
@@ -30,7 +37,7 @@ class TavilyResearchTool:
|
|
| 30 |
job_description,
|
| 31 |
company_name,
|
| 32 |
max_results=5,
|
| 33 |
-
model_name="
|
| 34 |
):
|
| 35 |
# Create LLM inside __init__ (lazy initialization)
|
| 36 |
llm_provider = LLMFactory()
|
|
@@ -55,19 +62,34 @@ class TavilyResearchTool:
|
|
| 55 |
return response
|
| 56 |
|
| 57 |
def tavily_search_company(self, queries):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
query_results: list[list[str]] = []
|
| 59 |
-
for
|
| 60 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
search_query_response = self.tavily_searchtool.invoke(
|
| 62 |
-
{"query":
|
| 63 |
)
|
|
|
|
|
|
|
| 64 |
query_results.append(
|
| 65 |
-
[res
|
| 66 |
)
|
| 67 |
-
# print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
|
| 68 |
except Exception as e:
|
| 69 |
logger.error(
|
| 70 |
-
f"Failed to perform company research using TavilySearchTool. Error
|
| 71 |
)
|
| 72 |
continue
|
| 73 |
|
|
@@ -120,10 +142,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
|
|
| 120 |
try:
|
| 121 |
state["current_node"] = "filter_research_results_by_relevance"
|
| 122 |
|
| 123 |
-
# Extract
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
)
|
| 127 |
search_queries_used = state.get("attempted_search_queries", [])
|
| 128 |
|
| 129 |
# Validate data types
|
|
@@ -138,7 +159,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
|
|
| 138 |
# Early exit if no results
|
| 139 |
if len(raw_search_results) == 0:
|
| 140 |
logger.info("No search results to filter.")
|
| 141 |
-
|
|
|
|
|
|
|
| 142 |
return state
|
| 143 |
|
| 144 |
logger.info(
|
|
@@ -201,6 +224,7 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
|
|
| 201 |
logger.warning(
|
| 202 |
f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
|
| 203 |
)
|
|
|
|
| 204 |
return (search_result_content, True, "timeout")
|
| 205 |
|
| 206 |
except Exception as e:
|
|
@@ -248,8 +272,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
|
|
| 248 |
else:
|
| 249 |
results_removed_count += 1
|
| 250 |
|
| 251 |
-
# Update
|
| 252 |
-
|
|
|
|
| 253 |
|
| 254 |
# Log filtering summary
|
| 255 |
total_evaluated = len(raw_search_results)
|
|
|
|
| 1 |
+
# Standard library imports
|
| 2 |
+
import asyncio
|
| 3 |
import logging
|
| 4 |
import os
|
|
|
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
+
# Third-party imports
|
| 8 |
+
import dspy
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
from langchain_tavily import TavilySearch
|
| 11 |
from openevals.llm import create_async_llm_as_judge
|
| 12 |
+
from openevals.prompts import RAG_HELPFULNESS_PROMPT, RAG_RETRIEVAL_RELEVANCE_PROMPT
|
|
|
|
| 13 |
|
| 14 |
+
# Local imports
|
| 15 |
from ..agents.output_schema import TavilySearchQueries
|
| 16 |
from ..classes.classes import ResearchState
|
| 17 |
from ..utils.llm_provider_factory import LLMFactory
|
|
|
|
| 24 |
load_dotenv(dotenv_path=env_path, override=True)
|
| 25 |
|
| 26 |
|
| 27 |
+
# Safe environment variable access with validation
|
| 28 |
+
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
| 29 |
+
if not openrouter_api_key:
|
| 30 |
+
logger.error("OPENROUTER_API_KEY environment variable not set")
|
| 31 |
+
raise ValueError("OPENROUTER_API_KEY environment variable is required")
|
| 32 |
|
| 33 |
|
| 34 |
class TavilyResearchTool:
|
|
|
|
| 37 |
job_description,
|
| 38 |
company_name,
|
| 39 |
max_results=5,
|
| 40 |
+
model_name="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 41 |
):
|
| 42 |
# Create LLM inside __init__ (lazy initialization)
|
| 43 |
llm_provider = LLMFactory()
|
|
|
|
| 62 |
return response
|
| 63 |
|
| 64 |
def tavily_search_company(self, queries):
|
| 65 |
+
"""
|
| 66 |
+
Execute Tavily searches for multiple queries.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
queries: Dictionary of query identifiers to query strings
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
List of search result lists, one per query
|
| 73 |
+
"""
|
| 74 |
query_results: list[list[str]] = []
|
| 75 |
+
for query_key in queries:
|
| 76 |
try:
|
| 77 |
+
query_string = queries.get(query_key, "")
|
| 78 |
+
if not query_string:
|
| 79 |
+
logger.warning(f"Empty query for key: {query_key}")
|
| 80 |
+
continue
|
| 81 |
+
|
| 82 |
search_query_response = self.tavily_searchtool.invoke(
|
| 83 |
+
{"query": query_string}
|
| 84 |
)
|
| 85 |
+
# Safe dictionary access for response
|
| 86 |
+
results = search_query_response.get("results", [])
|
| 87 |
query_results.append(
|
| 88 |
+
[res.get("content", "") for res in results if isinstance(res, dict)]
|
| 89 |
)
|
|
|
|
| 90 |
except Exception as e:
|
| 91 |
logger.error(
|
| 92 |
+
f"Failed to perform company research using TavilySearchTool. Error: {e}"
|
| 93 |
)
|
| 94 |
continue
|
| 95 |
|
|
|
|
| 142 |
try:
|
| 143 |
state["current_node"] = "filter_research_results_by_relevance"
|
| 144 |
|
| 145 |
+
# Extract and validate required state fields once
|
| 146 |
+
company_research_data = state.get("company_research_data", {})
|
| 147 |
+
raw_search_results = company_research_data.get("tavily_search", [])
|
|
|
|
| 148 |
search_queries_used = state.get("attempted_search_queries", [])
|
| 149 |
|
| 150 |
# Validate data types
|
|
|
|
| 159 |
# Early exit if no results
|
| 160 |
if len(raw_search_results) == 0:
|
| 161 |
logger.info("No search results to filter.")
|
| 162 |
+
# Update using the extracted variable
|
| 163 |
+
company_research_data["tavily_search"] = []
|
| 164 |
+
state["company_research_data"] = company_research_data
|
| 165 |
return state
|
| 166 |
|
| 167 |
logger.info(
|
|
|
|
| 224 |
logger.warning(
|
| 225 |
f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
|
| 226 |
)
|
| 227 |
+
# Keep the result on timeout to avoid losing potentially useful data
|
| 228 |
return (search_result_content, True, "timeout")
|
| 229 |
|
| 230 |
except Exception as e:
|
|
|
|
| 272 |
else:
|
| 273 |
results_removed_count += 1
|
| 274 |
|
| 275 |
+
# Update company_research_data with ONLY the relevant results
|
| 276 |
+
company_research_data["tavily_search"] = results_kept
|
| 277 |
+
state["company_research_data"] = company_research_data
|
| 278 |
|
| 279 |
# Log filtering summary
|
| 280 |
total_evaluated = len(raw_search_results)
|
src/job_writing_agent/utils/application_cli_interface.py
CHANGED
|
@@ -1,18 +1,29 @@
|
|
| 1 |
import argparse
|
| 2 |
-
import
|
| 3 |
from typing import Iterable
|
| 4 |
|
| 5 |
import requests
|
| 6 |
-
from requests.exceptions import RequestException
|
| 7 |
|
| 8 |
|
| 9 |
-
DEFAULT_MODEL = "
|
| 10 |
DEFAULT_CONTENT_TYPE = "cover_letter"
|
| 11 |
|
| 12 |
|
| 13 |
def readable_file(path: str) -> str:
|
| 14 |
-
"""
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
raise argparse.ArgumentTypeError(f"File not found: {path}")
|
| 17 |
if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
|
| 18 |
raise argparse.ArgumentTypeError(
|
|
@@ -22,7 +33,18 @@ def readable_file(path: str) -> str:
|
|
| 22 |
|
| 23 |
|
| 24 |
def valid_temp(temp: str) -> float:
|
| 25 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
value = float(temp)
|
| 27 |
if not (0 <= value <= 2):
|
| 28 |
raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
|
|
@@ -31,42 +53,41 @@ def valid_temp(temp: str) -> float:
|
|
| 31 |
|
| 32 |
def is_valid_url(
|
| 33 |
job_posting: str, allowed_statuses: Iterable[int] | None = None
|
| 34 |
-
) ->
|
| 35 |
"""
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
successful codes).
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
Specific status codes that are considered “valid”.
|
| 48 |
-
If ``None`` (default) any 200‑399 status is accepted.
|
| 49 |
-
|
| 50 |
-
Returns
|
| 51 |
-
-------
|
| 52 |
-
bool
|
| 53 |
-
``True`` if the URL succeeded, ``False`` otherwise.
|
| 54 |
"""
|
| 55 |
if allowed_statuses is None:
|
| 56 |
# All 2xx and 3xx responses are considered “valid”
|
| 57 |
allowed_statuses = range(200, 400)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
def handle_cli() -> argparse.Namespace:
|
| 69 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
parser = argparse.ArgumentParser(
|
| 71 |
description="""Assist the candidate in writing content for
|
| 72 |
job application such as answering to question in application
|
|
|
|
| 1 |
import argparse
|
| 2 |
+
from pathlib import Path
|
| 3 |
from typing import Iterable
|
| 4 |
|
| 5 |
import requests
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
+
DEFAULT_MODEL = "allenai/olmo-3.1-32b-think:free"
|
| 9 |
DEFAULT_CONTENT_TYPE = "cover_letter"
|
| 10 |
|
| 11 |
|
| 12 |
def readable_file(path: str) -> str:
|
| 13 |
+
"""
|
| 14 |
+
Validate that the file exists and has a supported extension.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
path: File path to validate
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
Original path string if valid
|
| 21 |
+
|
| 22 |
+
Raises:
|
| 23 |
+
ArgumentTypeError: If file doesn't exist or has unsupported extension
|
| 24 |
+
"""
|
| 25 |
+
file_path = Path(path)
|
| 26 |
+
if not file_path.is_file():
|
| 27 |
raise argparse.ArgumentTypeError(f"File not found: {path}")
|
| 28 |
if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
|
| 29 |
raise argparse.ArgumentTypeError(
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def valid_temp(temp: str) -> float:
|
| 36 |
+
"""
|
| 37 |
+
Ensure temperature is within a reasonable range.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
temp: Temperature value as string
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
Temperature as float
|
| 44 |
+
|
| 45 |
+
Raises:
|
| 46 |
+
ArgumentTypeError: If temperature is outside valid range [0, 2]
|
| 47 |
+
"""
|
| 48 |
value = float(temp)
|
| 49 |
if not (0 <= value <= 2):
|
| 50 |
raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
|
|
|
|
| 53 |
|
| 54 |
def is_valid_url(
|
| 55 |
job_posting: str, allowed_statuses: Iterable[int] | None = None
|
| 56 |
+
) -> str:
|
| 57 |
"""
|
| 58 |
+
Validate that a URL is reachable and returns an acceptable HTTP status.
|
| 59 |
+
|
| 60 |
+
Defaults to any 2xx or 3xx response (common successful codes).
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
job_posting: The URL for the job posting
|
| 64 |
+
allowed_statuses: Specific status codes that are considered valid.
|
| 65 |
+
If None (default), any 200-399 status is accepted.
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
URL of the job posting if successful, error message if failed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
"""
|
| 70 |
if allowed_statuses is None:
|
| 71 |
# All 2xx and 3xx responses are considered “valid”
|
| 72 |
allowed_statuses = range(200, 400)
|
| 73 |
|
| 74 |
+
try:
|
| 75 |
+
response = requests.get(
|
| 76 |
+
job_posting, timeout=30, allow_redirects=True, stream=True
|
| 77 |
+
)
|
| 78 |
+
response.raise_for_status()
|
| 79 |
+
return job_posting
|
| 80 |
+
except requests.exceptions.RequestException as e:
|
| 81 |
+
return f"Error: {e.response.text if e.response else 'Unknown error'}"
|
| 82 |
|
| 83 |
|
| 84 |
def handle_cli() -> argparse.Namespace:
|
| 85 |
+
"""
|
| 86 |
+
Parse and validate CLI arguments for job application generator.
|
| 87 |
+
|
| 88 |
+
Returns:
|
| 89 |
+
Parsed command-line arguments namespace
|
| 90 |
+
"""
|
| 91 |
parser = argparse.ArgumentParser(
|
| 92 |
description="""Assist the candidate in writing content for
|
| 93 |
job application such as answering to question in application
|
src/job_writing_agent/utils/config.py
CHANGED
|
@@ -1,25 +1,44 @@
|
|
| 1 |
"""
|
| 2 |
Configuration utilities for the job writer application.
|
| 3 |
|
| 4 |
-
This module provides functions for initializing and configuring
|
| 5 |
language models and other resources.
|
| 6 |
"""
|
| 7 |
|
|
|
|
| 8 |
import os
|
| 9 |
-
|
|
|
|
| 10 |
from langchain.chat_models import init_chat_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
config = config or {}
|
| 15 |
-
|
| 16 |
# Model configuration with defaults
|
| 17 |
model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
|
| 18 |
temperature = float(config.get("temperature", "0.3"))
|
| 19 |
precise_temperature = float(config.get("precise_temperature", "0.2"))
|
| 20 |
-
|
| 21 |
# Initialize models
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
return
|
|
|
|
| 1 |
"""
|
| 2 |
Configuration utilities for the job writer application.
|
| 3 |
|
| 4 |
+
This module provides functions for initializing and configuring
|
| 5 |
language models and other resources.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
# Standard library imports
|
| 9 |
import os
|
| 10 |
+
|
| 11 |
+
# Third-party imports
|
| 12 |
from langchain.chat_models import init_chat_model
|
| 13 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def init_models(
|
| 17 |
+
config: dict[str, str | float] | None = None,
|
| 18 |
+
) -> tuple[BaseChatModel, BaseChatModel]:
|
| 19 |
+
"""
|
| 20 |
+
Initialize language models based on configuration.
|
| 21 |
|
| 22 |
+
Args:
|
| 23 |
+
config: Optional configuration dictionary with keys:
|
| 24 |
+
- model_name: Name of the model to use
|
| 25 |
+
- temperature: Temperature for general LLM
|
| 26 |
+
- precise_temperature: Temperature for precise LLM
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
Tuple of (general_llm, precise_llm) instances
|
| 30 |
+
"""
|
| 31 |
config = config or {}
|
| 32 |
+
|
| 33 |
# Model configuration with defaults
|
| 34 |
model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
|
| 35 |
temperature = float(config.get("temperature", "0.3"))
|
| 36 |
precise_temperature = float(config.get("precise_temperature", "0.2"))
|
| 37 |
+
|
| 38 |
# Initialize models
|
| 39 |
+
general_llm = init_chat_model(f"ollama:{model_name}", temperature=temperature)
|
| 40 |
+
precise_llm = init_chat_model(
|
| 41 |
+
f"ollama:{model_name}", temperature=precise_temperature
|
| 42 |
+
)
|
| 43 |
|
| 44 |
+
return general_llm, precise_llm
|
src/job_writing_agent/utils/document_processing.py
CHANGED
|
@@ -2,27 +2,28 @@
|
|
| 2 |
Document processing utilities for parsing resumes and job descriptions.
|
| 3 |
"""
|
| 4 |
|
|
|
|
| 5 |
import logging
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
from pathlib import Path
|
| 9 |
from urllib.parse import urlparse
|
| 10 |
-
from typing_extensions import Dict, List, Any
|
| 11 |
-
|
| 12 |
|
|
|
|
| 13 |
import dspy
|
| 14 |
from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
|
| 15 |
from langchain_community.document_transformers import Html2TextTransformer
|
|
|
|
| 16 |
from langchain_text_splitters import (
|
| 17 |
RecursiveCharacterTextSplitter,
|
| 18 |
MarkdownHeaderTextSplitter,
|
| 19 |
)
|
| 20 |
-
from langchain_core.documents import Document
|
| 21 |
from langfuse import observe
|
| 22 |
from pydantic import BaseModel, Field
|
|
|
|
| 23 |
|
| 24 |
-
# Local imports
|
| 25 |
-
from .errors import
|
| 26 |
|
| 27 |
# Set up logging
|
| 28 |
logger = logging.getLogger(__name__)
|
|
@@ -64,8 +65,8 @@ class ResumeSection(BaseModel):
|
|
| 64 |
class StructuredResume(BaseModel):
|
| 65 |
"""Model for a structured resume with sections."""
|
| 66 |
|
| 67 |
-
sections:
|
| 68 |
-
contact_info:
|
| 69 |
description="Contact information extracted from the resume"
|
| 70 |
)
|
| 71 |
|
|
@@ -122,7 +123,7 @@ def clean_resume_text(text: str) -> str:
|
|
| 122 |
|
| 123 |
|
| 124 |
@observe()
|
| 125 |
-
def extract_contact_info(text: str) ->
|
| 126 |
"""Extract contact information from resume text.
|
| 127 |
|
| 128 |
Args:
|
|
@@ -162,7 +163,7 @@ def extract_contact_info(text: str) -> Dict[str, str]:
|
|
| 162 |
|
| 163 |
|
| 164 |
@observe()
|
| 165 |
-
def identify_resume_sections(text: str) ->
|
| 166 |
"""Identify sections in a resume text.
|
| 167 |
|
| 168 |
Args:
|
|
@@ -231,16 +232,33 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
|
|
| 231 |
|
| 232 |
|
| 233 |
def _collapse_ws(text: str) -> str:
|
| 234 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
|
| 236 |
return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
|
| 237 |
|
| 238 |
|
| 239 |
def _is_heading(line: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
|
| 241 |
|
| 242 |
|
| 243 |
-
def parse_resume(file_path: str | Path) ->
|
| 244 |
"""
|
| 245 |
Load a résumé from PDF or TXT file → list[Document] chunks
|
| 246 |
(≈400 chars, 50‑char overlap) with {source, section} metadata.
|
|
@@ -326,7 +344,7 @@ async def get_job_description(file_path_or_url: str) -> Document:
|
|
| 326 |
)
|
| 327 |
|
| 328 |
|
| 329 |
-
async def scrape_job_description_from_web(urls:
|
| 330 |
"""This function will first scrape the data from the job listing.
|
| 331 |
Then using the recursive splitter using the different seperators,
|
| 332 |
it preserves the paragraphs, lines and words"""
|
|
@@ -393,11 +411,15 @@ async def parse_job_description_from_url(url: str) -> Document:
|
|
| 393 |
# 3. Process content with the LLM
|
| 394 |
try:
|
| 395 |
logger.info("Processing content with DSPy LLM...")
|
| 396 |
-
# Configure DSPy LM
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
dspy.configure(
|
| 398 |
lm=dspy.LM(
|
| 399 |
"cerebras/qwen-3-32b",
|
| 400 |
-
api_key=
|
| 401 |
temperature=0.1,
|
| 402 |
max_tokens=60000, # Note: This max_tokens is unusually high
|
| 403 |
)
|
|
|
|
| 2 |
Document processing utilities for parsing resumes and job descriptions.
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
# Standard library imports
|
| 6 |
import logging
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
from pathlib import Path
|
| 10 |
from urllib.parse import urlparse
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Third-party imports
|
| 13 |
import dspy
|
| 14 |
from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
|
| 15 |
from langchain_community.document_transformers import Html2TextTransformer
|
| 16 |
+
from langchain_core.documents import Document
|
| 17 |
from langchain_text_splitters import (
|
| 18 |
RecursiveCharacterTextSplitter,
|
| 19 |
MarkdownHeaderTextSplitter,
|
| 20 |
)
|
|
|
|
| 21 |
from langfuse import observe
|
| 22 |
from pydantic import BaseModel, Field
|
| 23 |
+
from typing_extensions import Any
|
| 24 |
|
| 25 |
+
# Local imports
|
| 26 |
+
from .errors import JobDescriptionParsingError, LLMProcessingError, URLExtractionError
|
| 27 |
|
| 28 |
# Set up logging
|
| 29 |
logger = logging.getLogger(__name__)
|
|
|
|
| 65 |
class StructuredResume(BaseModel):
|
| 66 |
"""Model for a structured resume with sections."""
|
| 67 |
|
| 68 |
+
sections: list[ResumeSection] = Field(description="List of resume sections")
|
| 69 |
+
contact_info: dict[str, str] = Field(
|
| 70 |
description="Contact information extracted from the resume"
|
| 71 |
)
|
| 72 |
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
@observe()
|
| 126 |
+
def extract_contact_info(text: str) -> dict[str, str]:
|
| 127 |
"""Extract contact information from resume text.
|
| 128 |
|
| 129 |
Args:
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
@observe()
|
| 166 |
+
def identify_resume_sections(text: str) -> list[dict[str, Any]]:
|
| 167 |
"""Identify sections in a resume text.
|
| 168 |
|
| 169 |
Args:
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
def _collapse_ws(text: str) -> str:
|
| 235 |
+
"""
|
| 236 |
+
Collapse stray whitespace but keep bullet breaks.
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
text: Input text with potential whitespace issues
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
Text with collapsed whitespace
|
| 243 |
+
"""
|
| 244 |
text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
|
| 245 |
return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
|
| 246 |
|
| 247 |
|
| 248 |
def _is_heading(line: str) -> bool:
|
| 249 |
+
"""
|
| 250 |
+
Check if a line is a heading (all uppercase, short, no digits).
|
| 251 |
+
|
| 252 |
+
Args:
|
| 253 |
+
line: Line of text to check
|
| 254 |
+
|
| 255 |
+
Returns:
|
| 256 |
+
True if line appears to be a heading
|
| 257 |
+
"""
|
| 258 |
return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
|
| 259 |
|
| 260 |
|
| 261 |
+
def parse_resume(file_path: str | Path) -> list[Document]:
|
| 262 |
"""
|
| 263 |
Load a résumé from PDF or TXT file → list[Document] chunks
|
| 264 |
(≈400 chars, 50‑char overlap) with {source, section} metadata.
|
|
|
|
| 344 |
)
|
| 345 |
|
| 346 |
|
| 347 |
+
async def scrape_job_description_from_web(urls: list[str]) -> str:
|
| 348 |
"""This function will first scrape the data from the job listing.
|
| 349 |
Then using the recursive splitter using the different seperators,
|
| 350 |
it preserves the paragraphs, lines and words"""
|
|
|
|
| 411 |
# 3. Process content with the LLM
|
| 412 |
try:
|
| 413 |
logger.info("Processing content with DSPy LLM...")
|
| 414 |
+
# Configure DSPy LM with safe environment variable access
|
| 415 |
+
cerebras_api_key = os.getenv("CEREBRAS_API_KEY")
|
| 416 |
+
if not cerebras_api_key:
|
| 417 |
+
raise ValueError("CEREBRAS_API_KEY environment variable not set")
|
| 418 |
+
|
| 419 |
dspy.configure(
|
| 420 |
lm=dspy.LM(
|
| 421 |
"cerebras/qwen-3-32b",
|
| 422 |
+
api_key=cerebras_api_key,
|
| 423 |
temperature=0.1,
|
| 424 |
max_tokens=60000, # Note: This max_tokens is unusually high
|
| 425 |
)
|
src/job_writing_agent/utils/vector_store.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
| 1 |
"""
|
| 2 |
Vector storage utilities for the job writer application.
|
| 3 |
|
| 4 |
-
This module provides functions for storing and retrieving
|
| 5 |
documents from vector databases.
|
| 6 |
"""
|
| 7 |
|
| 8 |
# Standard library imports
|
| 9 |
import os
|
| 10 |
-
from typing_extensions import List, Optional
|
| 11 |
|
| 12 |
# Third-party library imports
|
| 13 |
from langchain_core.documents import Document
|
|
@@ -18,38 +17,37 @@ from pinecone import Pinecone as PineconeClient, ServerlessSpec
|
|
| 18 |
# Default configuration
|
| 19 |
DEFAULT_PINECONE_INDEX = "job-writer-vector"
|
| 20 |
|
|
|
|
| 21 |
class VectorStoreManager:
|
| 22 |
"""Manager class for vector store operations."""
|
| 23 |
-
|
| 24 |
def __init__(
|
| 25 |
self,
|
| 26 |
index_name: str = DEFAULT_PINECONE_INDEX,
|
| 27 |
-
embedding_model: str = "llama3.2:latest"
|
| 28 |
):
|
| 29 |
"""Initialize the vector store manager.
|
| 30 |
-
|
| 31 |
Args:
|
| 32 |
api_key: Pinecone API key (will use env var if not provided)
|
| 33 |
index_name: Name of the Pinecone index to use
|
| 34 |
embedding_model: Name of the Ollama model to use for embeddings
|
| 35 |
"""
|
| 36 |
-
api_key= os.getenv("PINECONE_API_KEY")
|
| 37 |
if not api_key:
|
| 38 |
raise ValueError("Environment variable PINECONE_API_KEY not set.")
|
| 39 |
-
|
| 40 |
self.index_name = index_name
|
| 41 |
-
|
| 42 |
# Initialize embeddings
|
| 43 |
-
self.embeddings = OllamaEmbeddings(
|
| 44 |
-
|
| 45 |
-
)
|
| 46 |
-
|
| 47 |
# Initialize Pinecone client
|
| 48 |
self.client = PineconeClient(api_key=api_key)
|
| 49 |
-
|
| 50 |
# Ensure index exists
|
| 51 |
self._ensure_index_exists()
|
| 52 |
-
|
| 53 |
def _ensure_index_exists(self):
|
| 54 |
"""Make sure the required index exists, create if not."""
|
| 55 |
# Get embedding dimension from our embeddings model
|
|
@@ -60,7 +58,7 @@ class VectorStoreManager:
|
|
| 60 |
print(f"Error determining embedding dimension: {e}")
|
| 61 |
print("Falling back to default dimension of 384")
|
| 62 |
embedding_dim = 384 # Common default for Ollama embeddings
|
| 63 |
-
|
| 64 |
# Check if the index exists
|
| 65 |
index_exists = False
|
| 66 |
try:
|
|
@@ -69,7 +67,7 @@ class VectorStoreManager:
|
|
| 69 |
index_exists = self.index_name in index_list
|
| 70 |
except Exception as e:
|
| 71 |
print(f"Error checking Pinecone indexes: {e}")
|
| 72 |
-
|
| 73 |
# Create index if it doesn't exist
|
| 74 |
if not index_exists:
|
| 75 |
try:
|
|
@@ -78,20 +76,22 @@ class VectorStoreManager:
|
|
| 78 |
name=self.index_name,
|
| 79 |
dimension=embedding_dim,
|
| 80 |
spec=ServerlessSpec(region="us-east-1", cloud="aws"),
|
| 81 |
-
metric="cosine"
|
| 82 |
)
|
| 83 |
print(f"Successfully created index: {self.index_name}")
|
| 84 |
except Exception as e:
|
| 85 |
if "ALREADY_EXISTS" in str(e):
|
| 86 |
-
print(
|
|
|
|
|
|
|
| 87 |
else:
|
| 88 |
print(f"Error creating index: {e}")
|
| 89 |
else:
|
| 90 |
print(f"Using Pinecone Index: {self.index_name}")
|
| 91 |
-
|
| 92 |
-
def store_documents(self, docs:
|
| 93 |
"""Store documents in vector database.
|
| 94 |
-
|
| 95 |
Args:
|
| 96 |
docs: List of Document objects to store
|
| 97 |
namespace: Namespace to store documents under
|
|
@@ -99,58 +99,60 @@ class VectorStoreManager:
|
|
| 99 |
try:
|
| 100 |
# Get the index
|
| 101 |
index = self.client.Index(self.index_name)
|
| 102 |
-
|
| 103 |
# Create the vector store
|
| 104 |
vector_store = Pinecone(
|
| 105 |
index=index,
|
| 106 |
embedding=self.embeddings,
|
| 107 |
text_key="text",
|
| 108 |
-
namespace=namespace
|
| 109 |
)
|
| 110 |
-
|
| 111 |
# Add documents
|
| 112 |
vector_store.add_documents(docs)
|
| 113 |
-
print(
|
|
|
|
|
|
|
| 114 |
except Exception as e:
|
| 115 |
print(f"Error storing documents: {e}")
|
| 116 |
raise
|
| 117 |
-
|
| 118 |
-
def retrieve_similar(
|
|
|
|
|
|
|
| 119 |
"""Retrieve similar documents based on a query.
|
| 120 |
-
|
| 121 |
Args:
|
| 122 |
query: The query text to search for
|
| 123 |
namespace: Namespace to search in
|
| 124 |
k: Number of results to return
|
| 125 |
-
|
| 126 |
Returns:
|
| 127 |
List of Document objects
|
| 128 |
"""
|
| 129 |
try:
|
| 130 |
# Get the index
|
| 131 |
index = self.client.Index(self.index_name)
|
| 132 |
-
|
| 133 |
# Create the vector store
|
| 134 |
vectorstore = Pinecone(
|
| 135 |
index=index,
|
| 136 |
embedding=self.embeddings,
|
| 137 |
text_key="text",
|
| 138 |
-
namespace=namespace
|
| 139 |
)
|
| 140 |
-
|
| 141 |
# Search for similar documents
|
| 142 |
docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
|
| 143 |
return docs
|
| 144 |
except Exception as e:
|
| 145 |
print(f"Error retrieving documents: {e}")
|
| 146 |
return []
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
|
| 151 |
-
VectorStoreManager = VectorStoreManager()
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
)
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Vector storage utilities for the job writer application.
|
| 3 |
|
| 4 |
+
This module provides functions for storing and retrieving
|
| 5 |
documents from vector databases.
|
| 6 |
"""
|
| 7 |
|
| 8 |
# Standard library imports
|
| 9 |
import os
|
|
|
|
| 10 |
|
| 11 |
# Third-party library imports
|
| 12 |
from langchain_core.documents import Document
|
|
|
|
| 17 |
# Default configuration
|
| 18 |
DEFAULT_PINECONE_INDEX = "job-writer-vector"
|
| 19 |
|
| 20 |
+
|
| 21 |
class VectorStoreManager:
|
| 22 |
"""Manager class for vector store operations."""
|
| 23 |
+
|
| 24 |
def __init__(
|
| 25 |
self,
|
| 26 |
index_name: str = DEFAULT_PINECONE_INDEX,
|
| 27 |
+
embedding_model: str = "llama3.2:latest",
|
| 28 |
):
|
| 29 |
"""Initialize the vector store manager.
|
| 30 |
+
|
| 31 |
Args:
|
| 32 |
api_key: Pinecone API key (will use env var if not provided)
|
| 33 |
index_name: Name of the Pinecone index to use
|
| 34 |
embedding_model: Name of the Ollama model to use for embeddings
|
| 35 |
"""
|
| 36 |
+
api_key = os.getenv("PINECONE_API_KEY")
|
| 37 |
if not api_key:
|
| 38 |
raise ValueError("Environment variable PINECONE_API_KEY not set.")
|
| 39 |
+
|
| 40 |
self.index_name = index_name
|
| 41 |
+
|
| 42 |
# Initialize embeddings
|
| 43 |
+
self.embeddings = OllamaEmbeddings(model=embedding_model)
|
| 44 |
+
|
|
|
|
|
|
|
| 45 |
# Initialize Pinecone client
|
| 46 |
self.client = PineconeClient(api_key=api_key)
|
| 47 |
+
|
| 48 |
# Ensure index exists
|
| 49 |
self._ensure_index_exists()
|
| 50 |
+
|
| 51 |
def _ensure_index_exists(self):
|
| 52 |
"""Make sure the required index exists, create if not."""
|
| 53 |
# Get embedding dimension from our embeddings model
|
|
|
|
| 58 |
print(f"Error determining embedding dimension: {e}")
|
| 59 |
print("Falling back to default dimension of 384")
|
| 60 |
embedding_dim = 384 # Common default for Ollama embeddings
|
| 61 |
+
|
| 62 |
# Check if the index exists
|
| 63 |
index_exists = False
|
| 64 |
try:
|
|
|
|
| 67 |
index_exists = self.index_name in index_list
|
| 68 |
except Exception as e:
|
| 69 |
print(f"Error checking Pinecone indexes: {e}")
|
| 70 |
+
|
| 71 |
# Create index if it doesn't exist
|
| 72 |
if not index_exists:
|
| 73 |
try:
|
|
|
|
| 76 |
name=self.index_name,
|
| 77 |
dimension=embedding_dim,
|
| 78 |
spec=ServerlessSpec(region="us-east-1", cloud="aws"),
|
| 79 |
+
metric="cosine",
|
| 80 |
)
|
| 81 |
print(f"Successfully created index: {self.index_name}")
|
| 82 |
except Exception as e:
|
| 83 |
if "ALREADY_EXISTS" in str(e):
|
| 84 |
+
print(
|
| 85 |
+
f"Index {self.index_name} already exists (created in another process)"
|
| 86 |
+
)
|
| 87 |
else:
|
| 88 |
print(f"Error creating index: {e}")
|
| 89 |
else:
|
| 90 |
print(f"Using Pinecone Index: {self.index_name}")
|
| 91 |
+
|
| 92 |
+
def store_documents(self, docs: list[Document], namespace: str) -> None:
|
| 93 |
"""Store documents in vector database.
|
| 94 |
+
|
| 95 |
Args:
|
| 96 |
docs: List of Document objects to store
|
| 97 |
namespace: Namespace to store documents under
|
|
|
|
| 99 |
try:
|
| 100 |
# Get the index
|
| 101 |
index = self.client.Index(self.index_name)
|
| 102 |
+
|
| 103 |
# Create the vector store
|
| 104 |
vector_store = Pinecone(
|
| 105 |
index=index,
|
| 106 |
embedding=self.embeddings,
|
| 107 |
text_key="text",
|
| 108 |
+
namespace=namespace,
|
| 109 |
)
|
| 110 |
+
|
| 111 |
# Add documents
|
| 112 |
vector_store.add_documents(docs)
|
| 113 |
+
print(
|
| 114 |
+
f"Successfully stored {len(docs)} documents in namespace: {namespace}"
|
| 115 |
+
)
|
| 116 |
except Exception as e:
|
| 117 |
print(f"Error storing documents: {e}")
|
| 118 |
raise
|
| 119 |
+
|
| 120 |
+
def retrieve_similar(
|
| 121 |
+
self, query: str, namespace: str, k: int = 3
|
| 122 |
+
) -> list[Document]:
|
| 123 |
"""Retrieve similar documents based on a query.
|
| 124 |
+
|
| 125 |
Args:
|
| 126 |
query: The query text to search for
|
| 127 |
namespace: Namespace to search in
|
| 128 |
k: Number of results to return
|
| 129 |
+
|
| 130 |
Returns:
|
| 131 |
List of Document objects
|
| 132 |
"""
|
| 133 |
try:
|
| 134 |
# Get the index
|
| 135 |
index = self.client.Index(self.index_name)
|
| 136 |
+
|
| 137 |
# Create the vector store
|
| 138 |
vectorstore = Pinecone(
|
| 139 |
index=index,
|
| 140 |
embedding=self.embeddings,
|
| 141 |
text_key="text",
|
| 142 |
+
namespace=namespace,
|
| 143 |
)
|
| 144 |
+
|
| 145 |
# Search for similar documents
|
| 146 |
docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
|
| 147 |
return docs
|
| 148 |
except Exception as e:
|
| 149 |
print(f"Error retrieving documents: {e}")
|
| 150 |
return []
|
|
|
|
|
|
|
|
|
|
| 151 |
|
|
|
|
| 152 |
|
| 153 |
+
# Example usage (commented out to prevent auto-execution)
|
| 154 |
+
# vector_store_manager = VectorStoreManager()
|
| 155 |
+
# vector_store_manager.store_documents(
|
| 156 |
+
# docs=[Document(page_content="Sample content", metadata={"source": "test"})],
|
| 157 |
+
# namespace="test_namespace"
|
| 158 |
+
# )
|
src/job_writing_agent/workflow.py
CHANGED
|
@@ -3,33 +3,36 @@ Workflow runner for the job application writer.
|
|
| 3 |
This module provides the JobWorkflow class and CLI runner.
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 6 |
import asyncio
|
| 7 |
import logging
|
| 8 |
-
import sys
|
| 9 |
import os
|
|
|
|
| 10 |
from datetime import datetime
|
| 11 |
from functools import cached_property
|
| 12 |
-
from typing import
|
| 13 |
|
|
|
|
| 14 |
from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
|
| 15 |
from langgraph.graph import StateGraph
|
| 16 |
from langgraph.graph.state import CompiledStateGraph
|
| 17 |
|
|
|
|
| 18 |
from job_writing_agent.agents.nodes import (
|
| 19 |
create_draft,
|
| 20 |
critique_draft,
|
| 21 |
finalize_document,
|
| 22 |
human_approval,
|
| 23 |
)
|
| 24 |
-
from job_writing_agent.classes import DataLoadState
|
| 25 |
-
from job_writing_agent.nodes.
|
| 26 |
from job_writing_agent.nodes.research_workflow import research_workflow
|
| 27 |
from job_writing_agent.utils.application_cli_interface import handle_cli
|
| 28 |
-
from job_writing_agent.utils.result_utils import print_result, save_result
|
| 29 |
from job_writing_agent.utils.logging.logging_decorators import (
|
| 30 |
-
log_execution,
|
| 31 |
log_errors,
|
|
|
|
| 32 |
)
|
|
|
|
| 33 |
|
| 34 |
logger = logging.getLogger(__name__)
|
| 35 |
|
|
@@ -84,12 +87,62 @@ class JobWorkflow:
|
|
| 84 |
return {
|
| 85 |
"resume_path": self.resume,
|
| 86 |
"job_description_source": self.job_description_source,
|
| 87 |
-
"
|
| 88 |
"current_node": "",
|
| 89 |
"messages": [],
|
| 90 |
"company_research_data": {},
|
| 91 |
}
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def job_app_graph(self) -> StateGraph:
|
| 94 |
"""
|
| 95 |
Build and configure the job application workflow graph.
|
|
@@ -111,58 +164,40 @@ class JobWorkflow:
|
|
| 111 |
StateGraph
|
| 112 |
Configured LangGraph state machine ready for compilation.
|
| 113 |
"""
|
| 114 |
-
|
| 115 |
|
| 116 |
# Add workflow nodes (subgraphs and individual nodes)
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
# Set entry and exit points
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
def route_after_load(state: DataLoadState) -> str:
|
| 130 |
-
"""
|
| 131 |
-
Route based on next_node set by data loading subgraph.
|
| 132 |
-
|
| 133 |
-
The data loading subgraph sets next_node to either "load" (if validation
|
| 134 |
-
fails) or "research" (if validation passes).
|
| 135 |
-
|
| 136 |
-
Parameters
|
| 137 |
-
----------
|
| 138 |
-
state: DataLoadState
|
| 139 |
-
Current workflow state.
|
| 140 |
-
|
| 141 |
-
Returns
|
| 142 |
-
-------
|
| 143 |
-
str
|
| 144 |
-
Next node name: "load" or "research".
|
| 145 |
-
"""
|
| 146 |
-
next_node = state.get("next_node", "research") # Default to research
|
| 147 |
-
logger.info(f"Routing after load: {next_node}")
|
| 148 |
-
return next_node
|
| 149 |
-
|
| 150 |
-
graph.add_conditional_edges(
|
| 151 |
"load",
|
| 152 |
-
route_after_load,
|
| 153 |
{
|
| 154 |
"load": "load", # Loop back to load subgraph if validation fails
|
| 155 |
-
"research": "
|
| 156 |
},
|
| 157 |
)
|
| 158 |
|
| 159 |
# Sequential edges for main workflow
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
| 164 |
|
| 165 |
-
return
|
| 166 |
|
| 167 |
def _get_callbacks(self) -> list:
|
| 168 |
"""
|
|
@@ -208,7 +243,7 @@ class JobWorkflow:
|
|
| 208 |
|
| 209 |
@log_execution
|
| 210 |
@log_errors
|
| 211 |
-
async def run(self) ->
|
| 212 |
"""
|
| 213 |
Execute the complete job application writer workflow.
|
| 214 |
|
|
@@ -289,7 +324,8 @@ class JobWorkflow:
|
|
| 289 |
Exception
|
| 290 |
If graph compilation fails (e.g., invalid edges, missing nodes).
|
| 291 |
"""
|
| 292 |
-
|
|
|
|
| 293 |
|
| 294 |
|
| 295 |
def main():
|
|
@@ -300,7 +336,6 @@ def main():
|
|
| 300 |
content=args.content_type,
|
| 301 |
)
|
| 302 |
result = asyncio.run(workflow.run())
|
| 303 |
-
# print(f"result: {result}")
|
| 304 |
if result:
|
| 305 |
print_result(args.content_type, result["output_data"])
|
| 306 |
save_result(args.content_type, result["output_data"])
|
|
|
|
| 3 |
This module provides the JobWorkflow class and CLI runner.
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# Standard library imports
|
| 7 |
import asyncio
|
| 8 |
import logging
|
|
|
|
| 9 |
import os
|
| 10 |
+
import sys
|
| 11 |
from datetime import datetime
|
| 12 |
from functools import cached_property
|
| 13 |
+
from typing import Any
|
| 14 |
|
| 15 |
+
# Third-party imports
|
| 16 |
from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
|
| 17 |
from langgraph.graph import StateGraph
|
| 18 |
from langgraph.graph.state import CompiledStateGraph
|
| 19 |
|
| 20 |
+
# Local imports
|
| 21 |
from job_writing_agent.agents.nodes import (
|
| 22 |
create_draft,
|
| 23 |
critique_draft,
|
| 24 |
finalize_document,
|
| 25 |
human_approval,
|
| 26 |
)
|
| 27 |
+
from job_writing_agent.classes import DataLoadState, ResearchState
|
| 28 |
+
from job_writing_agent.nodes.data_loading_workflow import data_loading_workflow
|
| 29 |
from job_writing_agent.nodes.research_workflow import research_workflow
|
| 30 |
from job_writing_agent.utils.application_cli_interface import handle_cli
|
|
|
|
| 31 |
from job_writing_agent.utils.logging.logging_decorators import (
|
|
|
|
| 32 |
log_errors,
|
| 33 |
+
log_execution,
|
| 34 |
)
|
| 35 |
+
from job_writing_agent.utils.result_utils import print_result, save_result
|
| 36 |
|
| 37 |
logger = logging.getLogger(__name__)
|
| 38 |
|
|
|
|
| 87 |
return {
|
| 88 |
"resume_path": self.resume,
|
| 89 |
"job_description_source": self.job_description_source,
|
| 90 |
+
"content_category": self.content,
|
| 91 |
"current_node": "",
|
| 92 |
"messages": [],
|
| 93 |
"company_research_data": {},
|
| 94 |
}
|
| 95 |
|
| 96 |
+
# Conditional routing after data loading
|
| 97 |
+
def route_after_load(self, state: DataLoadState) -> str:
|
| 98 |
+
"""
|
| 99 |
+
Route based on next_node set by data loading subgraph.
|
| 100 |
+
|
| 101 |
+
The data loading subgraph sets next_node to either "load" (if validation
|
| 102 |
+
fails) or "research" (if validation passes).
|
| 103 |
+
|
| 104 |
+
Parameters
|
| 105 |
+
----------
|
| 106 |
+
state: DataLoadState
|
| 107 |
+
Current workflow state.
|
| 108 |
+
|
| 109 |
+
Returns
|
| 110 |
+
-------
|
| 111 |
+
str
|
| 112 |
+
Next node name: "load" or "research".
|
| 113 |
+
"""
|
| 114 |
+
next_node = state.get("next_node", "research") # Default to research
|
| 115 |
+
logger.info(f"Routing after load: {next_node}")
|
| 116 |
+
return next_node
|
| 117 |
+
|
| 118 |
+
def dataload_to_research_adapter(self, state: DataLoadState) -> ResearchState:
|
| 119 |
+
"""
|
| 120 |
+
Adapter to convert DataLoadState to ResearchState.
|
| 121 |
+
|
| 122 |
+
Extracts only fields needed for research workflow following the
|
| 123 |
+
adapter pattern recommended by LangGraph documentation.
|
| 124 |
+
|
| 125 |
+
Parameters
|
| 126 |
+
----------
|
| 127 |
+
state: DataLoadState
|
| 128 |
+
Current workflow state with loaded data.
|
| 129 |
+
|
| 130 |
+
Returns
|
| 131 |
+
-------
|
| 132 |
+
ResearchState
|
| 133 |
+
State formatted for research subgraph with required fields.
|
| 134 |
+
"""
|
| 135 |
+
logger.info("Adapter for converting DataLoadState to ResearchState")
|
| 136 |
+
|
| 137 |
+
return ResearchState(
|
| 138 |
+
company_research_data=state.get("company_research_data", {}),
|
| 139 |
+
attempted_search_queries=[],
|
| 140 |
+
current_node="",
|
| 141 |
+
content_category=state.get("content_category", ""),
|
| 142 |
+
messages=state.get("messages", []),
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
@cached_property
|
| 146 |
def job_app_graph(self) -> StateGraph:
|
| 147 |
"""
|
| 148 |
Build and configure the job application workflow graph.
|
|
|
|
| 164 |
StateGraph
|
| 165 |
Configured LangGraph state machine ready for compilation.
|
| 166 |
"""
|
| 167 |
+
agent_workflow_graph = StateGraph(DataLoadState)
|
| 168 |
|
| 169 |
# Add workflow nodes (subgraphs and individual nodes)
|
| 170 |
+
agent_workflow_graph.add_node("load", data_loading_workflow)
|
| 171 |
+
agent_workflow_graph.add_node(
|
| 172 |
+
"to_research_adapter", self.dataload_to_research_adapter
|
| 173 |
+
)
|
| 174 |
+
agent_workflow_graph.add_node("research", research_workflow)
|
| 175 |
+
agent_workflow_graph.add_node("create_draft", create_draft)
|
| 176 |
+
agent_workflow_graph.add_node("critique", critique_draft)
|
| 177 |
+
agent_workflow_graph.add_node("human_approval", human_approval)
|
| 178 |
+
agent_workflow_graph.add_node("finalize", finalize_document)
|
| 179 |
|
| 180 |
# Set entry and exit points
|
| 181 |
+
agent_workflow_graph.set_entry_point("load")
|
| 182 |
+
agent_workflow_graph.set_finish_point("finalize")
|
| 183 |
+
|
| 184 |
+
agent_workflow_graph.add_conditional_edges(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
"load",
|
| 186 |
+
self.route_after_load,
|
| 187 |
{
|
| 188 |
"load": "load", # Loop back to load subgraph if validation fails
|
| 189 |
+
"research": "to_research_adapter", # Route to adapter first
|
| 190 |
},
|
| 191 |
)
|
| 192 |
|
| 193 |
# Sequential edges for main workflow
|
| 194 |
+
agent_workflow_graph.add_edge("to_research_adapter", "research")
|
| 195 |
+
agent_workflow_graph.add_edge("research", "create_draft")
|
| 196 |
+
agent_workflow_graph.add_edge("create_draft", "critique")
|
| 197 |
+
agent_workflow_graph.add_edge("critique", "human_approval")
|
| 198 |
+
agent_workflow_graph.add_edge("human_approval", "finalize")
|
| 199 |
|
| 200 |
+
return agent_workflow_graph
|
| 201 |
|
| 202 |
def _get_callbacks(self) -> list:
|
| 203 |
"""
|
|
|
|
| 243 |
|
| 244 |
@log_execution
|
| 245 |
@log_errors
|
| 246 |
+
async def run(self) -> dict[str, Any] | None:
|
| 247 |
"""
|
| 248 |
Execute the complete job application writer workflow.
|
| 249 |
|
|
|
|
| 324 |
Exception
|
| 325 |
If graph compilation fails (e.g., invalid edges, missing nodes).
|
| 326 |
"""
|
| 327 |
+
compiled_graph = self.job_app_graph.compile()
|
| 328 |
+
return compiled_graph
|
| 329 |
|
| 330 |
|
| 331 |
def main():
|
|
|
|
| 336 |
content=args.content_type,
|
| 337 |
)
|
| 338 |
result = asyncio.run(workflow.run())
|
|
|
|
| 339 |
if result:
|
| 340 |
print_result(args.content_type, result["output_data"])
|
| 341 |
save_result(args.content_type, result["output_data"])
|