Rishabh2095 commited on
Commit
a01026b
·
1 Parent(s): 046508a

Refactor job writing agent: Improved code structure by introducing new data loading classes, enhancing logging practices, and ensuring safe environment variable access. Updated workflow logic for better readability and maintainability.

Browse files
src/job_writing_agent/__init__.py CHANGED
@@ -7,7 +7,8 @@ using LangChain and LangGraph with LangSmith observability.
7
 
8
  __version__ = "0.1.0"
9
 
10
- import os, getpass
 
11
  import logging
12
  from pathlib import Path
13
  from dotenv import load_dotenv
@@ -16,77 +17,112 @@ from dotenv import load_dotenv
16
  # Set up logging
17
  logger = logging.getLogger(__name__)
18
  logger.setLevel(logging.INFO)
19
- log_dir = Path(__file__).parent / 'logs'
20
  log_dir.mkdir(exist_ok=True)
21
- logger.addHandler(logging.FileHandler(log_dir / 'job_writer.log', mode='a'))
22
- logger.info("Logger initialized. Writing to %s", Path(__file__).parent / 'job_writer.log')
 
 
23
 
24
  # Load environment variables from .env file
25
- env_path = Path(__file__).parent / '.env'
26
 
27
 
28
  def _set_env(var: str):
29
  if not os.environ.get(var):
30
- os.environ[var] = getpass.getpass(f"{var}: ")
31
  logger.info(f"{var} set to {os.environ[var]}")
32
 
 
33
  if env_path.exists():
34
- logger.info("Loading environment variables from %s", env_path)
35
  load_dotenv(dotenv_path=env_path, override=True)
36
  else:
37
- logger.warning(".env file not found at %s. Using system environment variables.", env_path)
 
 
38
 
39
  # Check for critical environment variables
40
  if not os.getenv("TAVILY_API_KEY"):
41
- logger.warning("TAVILY_API_KEY environment variable is not set." \
42
- " Failed to get TAVILY_API_KEY at Path %s", env_path)
 
 
 
43
  _set_env("TAVILY_API_KEY")
44
 
45
 
46
  if not os.getenv("GEMINI_API_KEY"):
47
- logger.warning("GEMINI_API_KEY environment variable is not set. " \
48
- "Failed to get GEMINI_API_KEY at Path %s", env_path)
 
 
 
49
  _set_env("GEMINI_API_KEY")
50
 
51
 
52
  if not os.getenv("PINECONE_API_KEY"):
53
- logger.warning("PINECONE_API_KEY environment variable is not set." \
54
- " Failed to get PINECONE_API_KEY at Path %s", env_path)
 
 
 
55
  _set_env("PINECONE_API_KEY")
56
 
57
  if not os.getenv("LANGFUSE_PUBLIC_KEY"):
58
- logger.warning("LANGFUSE_PUBLIC_KEY environment variable is not set." \
59
- " Failed to get LANGFUSE_PUBLIC_KEY at Path %s", env_path)
 
 
 
60
  _set_env("LANGFUSE_PUBLIC_KEY")
61
 
62
  if not os.getenv("LANGFUSE_SECRET_KEY"):
63
- logger.warning("LANGFUSE_SECRET_KEY environment variable is not set." \
64
- " Failed to get LANGFUSE_SECRET_KEY at Path %s", env_path)
 
 
 
65
  _set_env("LANGFUSE_SECRET_KEY")
66
 
67
  if not os.getenv("LANGSMITH_API_KEY"):
68
- logger.warning("LANGSMITH_API_KEY environment variable is not set." \
69
- " Failed to get LANGSMITH_API_KEY at Path %s", env_path)
 
 
 
70
  _set_env("LANGSMITH_API_KEY")
71
 
72
  if not os.getenv("OPENROUTER_API_KEY"):
73
- logger.warning("OPENROUTER_API_KEY environment variable is not set." \
74
- " Failed to get OPENROUTER_API_KEY at Path %s", env_path)
 
 
 
75
  _set_env("OPENROUTER_API_KEY")
76
 
77
  if not os.getenv("LANGSMITH_PROJECT"):
78
- logger.warning("LANGSMITH_PROJECT environment variable is not set." \
79
- " Failed to get LANGSMITH_PROJECT at Path %s", env_path)
 
 
 
80
  _set_env("LANGSMITH_PROJECT")
81
 
82
  if not os.getenv("LANGSMITH_ENDPOINT"):
83
- logger.warning("LANGSMITH_ENDPOINT environment variable is not set." \
84
- " Failed to get LANGSMITH_ENDPOINT at Path %s", env_path)
 
 
 
85
  _set_env("LANGSMITH_ENDPOINT")
86
 
87
  if not os.getenv("CEREBRAS_API_KEY"):
88
- logger.warning("CEREBRAS_API_KEY environment variable is not set." \
89
- " Failed to get CEREBRAS_API_KEY at Path %s", env_path)
 
 
 
90
  _set_env("CEREBRAS_API_KEY")
91
 
92
  os.environ["LANGSMITH_TRACING"] = "true"
@@ -111,33 +147,48 @@ from dotenv import load_dotenv
111
 
112
  logger = logging.getLogger(__name__)
113
  logger.setLevel(logging.INFO)
114
- log_dir = Path(__file__).parent / 'logs'
115
  log_dir.mkdir(exist_ok=True)
116
- logger.addHandler(logging.FileHandler(log_dir / 'job_writer.log', mode='a'))
117
- logger.info("Logger initialized. Writing to %s", Path(__file__).parent / 'job_writer.log')
 
 
 
 
118
 
119
- env_path = Path(__file__).parent / '.env'
120
 
121
  def _set_env(var: str):
122
  if not os.environ.get(var):
123
  os.environ[var] = getpass.getpass(f"{var}: ")
124
  logger.info(f"{var} set to {os.environ[var]}")
125
 
 
126
  def load_environment_variables(key_array):
127
  for key in key_array:
128
  if not os.getenv(key):
129
- logger.warning(f"{key} environment variable is not set. Failed to get {key} at Path {env_path}")
 
 
130
  _set_env(key)
131
 
 
132
  if env_path.exists():
133
- logger.info("Loading environment variables from %s", env_path)
134
  load_dotenv(dotenv_path=env_path, override=True)
135
  else:
136
- logger.warning(".env file not found at %s. Using system environment variables.", env_path)
137
-
138
-
139
- environment_key_array = ["TAVILY_API_KEY", "GEMINI_API_KEY", "PINECONE_API_KEY", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
 
 
 
 
 
 
 
 
140
  # Check for critical environment variables
141
  load_environment_variables(environment_key_array)
142
 
143
- __all__ = ["job_app_graph", "workflows/research_workflow"]
 
7
 
8
  __version__ = "0.1.0"
9
 
10
+ import os
11
+ from getpass import getpass
12
  import logging
13
  from pathlib import Path
14
  from dotenv import load_dotenv
 
17
  # Set up logging
18
  logger = logging.getLogger(__name__)
19
  logger.setLevel(logging.INFO)
20
+ log_dir = Path(__file__).parent / "logs"
21
  log_dir.mkdir(exist_ok=True)
22
+ logger.addHandler(logging.FileHandler(log_dir / "job_writer.log", mode="a"))
23
+ logger.info(
24
+ "Logger initialized. Writing to %s", Path(__file__).parent / "job_writer.log"
25
+ )
26
 
27
  # Load environment variables from .env file
28
+ env_path = Path(__file__).parent / ".env"
29
 
30
 
31
  def _set_env(var: str):
32
  if not os.environ.get(var):
33
+ os.environ[var] = getpass(f"{var}: ")
34
  logger.info(f"{var} set to {os.environ[var]}")
35
 
36
+
37
  if env_path.exists():
38
+ logger.info("Loading environment variables from %s", env_path)
39
  load_dotenv(dotenv_path=env_path, override=True)
40
  else:
41
+ logger.warning(
42
+ ".env file not found at %s. Using system environment variables.", env_path
43
+ )
44
 
45
  # Check for critical environment variables
46
  if not os.getenv("TAVILY_API_KEY"):
47
+ logger.warning(
48
+ "TAVILY_API_KEY environment variable is not set."
49
+ " Failed to get TAVILY_API_KEY at Path %s",
50
+ env_path,
51
+ )
52
  _set_env("TAVILY_API_KEY")
53
 
54
 
55
  if not os.getenv("GEMINI_API_KEY"):
56
+ logger.warning(
57
+ "GEMINI_API_KEY environment variable is not set. "
58
+ "Failed to get GEMINI_API_KEY at Path %s",
59
+ env_path,
60
+ )
61
  _set_env("GEMINI_API_KEY")
62
 
63
 
64
  if not os.getenv("PINECONE_API_KEY"):
65
+ logger.warning(
66
+ "PINECONE_API_KEY environment variable is not set."
67
+ " Failed to get PINECONE_API_KEY at Path %s",
68
+ env_path,
69
+ )
70
  _set_env("PINECONE_API_KEY")
71
 
72
  if not os.getenv("LANGFUSE_PUBLIC_KEY"):
73
+ logger.warning(
74
+ "LANGFUSE_PUBLIC_KEY environment variable is not set."
75
+ " Failed to get LANGFUSE_PUBLIC_KEY at Path %s",
76
+ env_path,
77
+ )
78
  _set_env("LANGFUSE_PUBLIC_KEY")
79
 
80
  if not os.getenv("LANGFUSE_SECRET_KEY"):
81
+ logger.warning(
82
+ "LANGFUSE_SECRET_KEY environment variable is not set."
83
+ " Failed to get LANGFUSE_SECRET_KEY at Path %s",
84
+ env_path,
85
+ )
86
  _set_env("LANGFUSE_SECRET_KEY")
87
 
88
  if not os.getenv("LANGSMITH_API_KEY"):
89
+ logger.warning(
90
+ "LANGSMITH_API_KEY environment variable is not set."
91
+ " Failed to get LANGSMITH_API_KEY at Path %s",
92
+ env_path,
93
+ )
94
  _set_env("LANGSMITH_API_KEY")
95
 
96
  if not os.getenv("OPENROUTER_API_KEY"):
97
+ logger.warning(
98
+ "OPENROUTER_API_KEY environment variable is not set."
99
+ " Failed to get OPENROUTER_API_KEY at Path %s",
100
+ env_path,
101
+ )
102
  _set_env("OPENROUTER_API_KEY")
103
 
104
  if not os.getenv("LANGSMITH_PROJECT"):
105
+ logger.warning(
106
+ "LANGSMITH_PROJECT environment variable is not set."
107
+ " Failed to get LANGSMITH_PROJECT at Path %s",
108
+ env_path,
109
+ )
110
  _set_env("LANGSMITH_PROJECT")
111
 
112
  if not os.getenv("LANGSMITH_ENDPOINT"):
113
+ logger.warning(
114
+ "LANGSMITH_ENDPOINT environment variable is not set."
115
+ " Failed to get LANGSMITH_ENDPOINT at Path %s",
116
+ env_path,
117
+ )
118
  _set_env("LANGSMITH_ENDPOINT")
119
 
120
  if not os.getenv("CEREBRAS_API_KEY"):
121
+ logger.warning(
122
+ "CEREBRAS_API_KEY environment variable is not set."
123
+ " Failed to get CEREBRAS_API_KEY at Path %s",
124
+ env_path,
125
+ )
126
  _set_env("CEREBRAS_API_KEY")
127
 
128
  os.environ["LANGSMITH_TRACING"] = "true"
 
147
 
148
  logger = logging.getLogger(__name__)
149
  logger.setLevel(logging.INFO)
150
+ log_dir = Path(__file__).parent / "logs"
151
  log_dir.mkdir(exist_ok=True)
152
+ logger.addHandler(logging.FileHandler(log_dir / "job_writer.log", mode="a"))
153
+ logger.info(
154
+ "Logger initialized. Writing to %s", Path(__file__).parent / "job_writer.log"
155
+ )
156
+
157
+ env_path = Path(__file__).parent / ".env"
158
 
 
159
 
160
  def _set_env(var: str):
161
  if not os.environ.get(var):
162
  os.environ[var] = getpass.getpass(f"{var}: ")
163
  logger.info(f"{var} set to {os.environ[var]}")
164
 
165
+
166
  def load_environment_variables(key_array):
167
  for key in key_array:
168
  if not os.getenv(key):
169
+ logger.warning(
170
+ f"{key} environment variable is not set. Failed to get {key} at Path {env_path}"
171
+ )
172
  _set_env(key)
173
 
174
+
175
  if env_path.exists():
176
+ logger.info("Loading environment variables from %s", env_path)
177
  load_dotenv(dotenv_path=env_path, override=True)
178
  else:
179
+ logger.warning(
180
+ ".env file not found at %s. Using system environment variables.", env_path
181
+ )
182
+
183
+
184
+ environment_key_array = [
185
+ "TAVILY_API_KEY",
186
+ "GEMINI_API_KEY",
187
+ "PINECONE_API_KEY",
188
+ "LANGFUSE_PUBLIC_KEY",
189
+ "LANGFUSE_SECRET_KEY",
190
+ ]
191
  # Check for critical environment variables
192
  load_environment_variables(environment_key_array)
193
 
194
+ __all__ = ["job_app_graph", "workflows/research_workflow"]
src/job_writing_agent/agents/nodes.py CHANGED
@@ -5,21 +5,22 @@ This module contains all the node functions used in the job application
5
  writer workflow graph, each handling a specific step in the process.
6
  """
7
 
 
8
  import logging
9
  from datetime import datetime
10
 
11
- from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
12
- from langchain_core.output_parsers import StrOutputParser
13
  from langchain_core.messages import SystemMessage
 
14
 
15
- from ..classes.classes import AppState, ResearchState, ResultState, DataLoadState
 
16
  from ..prompts.templates import (
17
- CRITIQUE_PROMPT,
18
- PERSONA_DEVELOPMENT_PROMPT,
19
- COVER_LETTER_PROMPT,
20
- REVISION_PROMPT,
21
  BULLET_POINTS_PROMPT,
 
 
22
  LINKEDIN_NOTE_PROMPT,
 
23
  )
24
  from ..utils.llm_provider_factory import LLMFactory
25
 
@@ -30,132 +31,67 @@ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
30
 
31
  def create_draft(state: ResearchState) -> ResultState:
32
  """Create initial draft of the application material."""
 
 
 
 
 
 
33
  # Create LLM inside function (lazy initialization)
34
  llm_provider = LLMFactory()
35
  llm = llm_provider.create_langchain(
36
- "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
 
 
37
  )
38
 
 
 
 
 
 
 
39
  # Determine which type of content we're creating
40
- company_background_information = state.get("company_research_data", {})
41
 
42
  content_category = state.get("content_category", "cover_letter")
43
 
44
- # Get the original resume text from state (used later if vector search is available)
45
- original_resume_text = company_background_information.get("resume", "")
46
-
47
- try:
48
- # Not yet implemented
49
- if state.get("vector_store"):
50
- vector_store = state.get("vector_store")
51
-
52
- # Extract key requirements from job description
53
- prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
54
-
55
- if company_background_information:
56
- key_requirements = prompt.invoke(
57
- {
58
- "job_description": company_background_information[
59
- "job_description"
60
- ]
61
- }
62
- )
63
- else:
64
- return key_requirements
65
-
66
- if not key_requirements:
67
- print("Warning: No key requirements found in the job description.")
68
- return state
69
-
70
- # Use the key requirements to query for the most relevant resume parts
71
- namespace = f"resume_{state['session_id']}"
72
- relevant_docs = vector_store.retrieve_similar(
73
- query=key_requirements, namespace=namespace, k=3
74
- )
75
-
76
- # Use these relevant sections with higher weight in the draft creation
77
- highly_relevant_resume = "\n".join(
78
- [doc.page_content for doc in relevant_docs]
79
- )
80
- # Combine highly relevant parts with full resume text
81
- resume_text = f"""
82
- # Most Relevant Experience
83
- {highly_relevant_resume}
84
-
85
- # Full Resume
86
- {original_resume_text}
87
- """
88
- # Update the company_background_information with the enhanced resume
89
- company_background_information["resume"] = resume_text
90
- except Exception as e:
91
- logger.warning(f"Could not use vector search for relevant resume parts: {e}")
92
- # Continue with regular resume text
93
-
94
- # Select the appropriate prompt template based on application type and persona
95
  logger.info(f"The candidate wants the Agent to assist with : {content_category}")
96
- if content_category == "bullets":
97
- FirstDraftGenerationPromptTemplate = ChatPromptTemplate([BULLET_POINTS_PROMPT])
98
- elif content_category == "linkedin_connect_request":
99
- FirstDraftGenerationPromptTemplate = ChatPromptTemplate([LINKEDIN_NOTE_PROMPT])
100
- else:
101
- FirstDraftGenerationPromptTemplate = ChatPromptTemplate([COVER_LETTER_PROMPT])
102
-
103
- # Create the draft using the selected prompt template
104
- CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
105
- """
106
- Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
107
-
108
- **Job Description:**
109
-
110
- '''
111
- {current_job_role}
112
- '''
113
-
114
- **Candidate Resume:**
115
-
116
- '''
117
- {candidate_resume}
118
- '''
119
-
120
- **Company Research Data:**
121
-
122
- '''
123
- {company_research_data}
124
- '''
125
- """,
126
- input_variables=[
127
- "current_job_role",
128
- "company_research_data",
129
- "candidate_resume",
130
- ],
131
  )
132
 
133
- FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
 
 
134
 
135
- # Invoke the chain with the appropriate inputs
136
  draft_generation_chain = (
137
  (
138
  {
139
  "current_job_role": lambda x: x["current_job_role"],
140
- "company_research_data": lambda x: x["company_research_data"],
141
  "candidate_resume": lambda x: x["candidate_resume"],
 
 
142
  }
143
  )
144
- | FirstDraftGenerationPromptTemplate
145
  | llm
146
  )
147
 
148
- # Prepare the inputs
149
  application_background_data = {
150
- "current_job_role": company_background_information["job_description"],
151
- "company_research_data": company_background_information[
152
- "company_research_data_summary"
153
- ],
154
- "candidate_resume": company_background_information["resume"],
 
155
  }
156
 
157
  response = draft_generation_chain.invoke(application_background_data)
158
  logger.info(f"Draft has been created: {response.content}")
 
159
  app_state = ResultState(
160
  draft=response.content,
161
  feedback="",
@@ -176,31 +112,37 @@ def critique_draft(state: ResultState) -> ResultState:
176
  try:
177
  logger.info("Critiquing draft...")
178
 
179
- # Create LLM inside function (lazy initialization)
180
- llm_provider = LLMFactory()
181
- llm = llm_provider.create_langchain(
182
- "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
183
- )
184
-
185
- job_description = str(state["company_research_data"].get("job_description", ""))
186
- draft = str(state.get("draft", ""))
187
 
188
  # Debug logging to verify values
189
  logger.debug(f"Job description length: {len(job_description)}")
190
- logger.debug(f"Draft length: {len(draft)}")
191
 
192
- if not job_description or not draft:
 
193
  logger.warning("Missing job_description or draft in state")
194
- # Return state with empty feedback
195
  return ResultState(
196
- draft=draft,
197
- feedback="",
198
  critique_feedback="",
199
  current_node="critique",
200
- company_research_data=state["company_research_data"],
201
- output_data=state["output_data"],
202
  )
203
 
 
 
 
 
 
 
 
 
204
  # Use the same pattern as create_draft:
205
  # 1. Create ChatPromptTemplate from SystemMessage
206
  # 2. Append HumanMessagePromptTemplate with variables
@@ -213,10 +155,10 @@ def critique_draft(state: ResultState) -> ResultState:
213
  )
214
 
215
  # Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
216
- CritiquePromptTemplate = ChatPromptTemplate([critique_system_message])
217
 
218
  # Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
219
- CritiqueContextMessage = HumanMessagePromptTemplate.from_template(
220
  """
221
  # Job Description
222
  {job_description}
@@ -235,7 +177,7 @@ def critique_draft(state: ResultState) -> ResultState:
235
  input_variables=["job_description", "draft"],
236
  )
237
 
238
- CritiquePromptTemplate.append(CritiqueContextMessage)
239
 
240
  # Create chain (like line 129-139 in create_draft)
241
  critique_chain = (
@@ -243,15 +185,15 @@ def critique_draft(state: ResultState) -> ResultState:
243
  "job_description": lambda x: x["job_description"],
244
  "draft": lambda x: x["draft"],
245
  }
246
- | CritiquePromptTemplate
247
  | llm
248
  )
249
 
250
- # Invoke with input variables (like line 150 in create_draft)
251
  critique = critique_chain.invoke(
252
  {
253
  "job_description": job_description,
254
- "draft": draft,
255
  }
256
  )
257
 
@@ -260,16 +202,15 @@ def critique_draft(state: ResultState) -> ResultState:
260
  )
261
  logger.info("Draft critique completed")
262
 
263
- # Store the critique for reference during revision
264
- app_state = ResultState(
265
- draft=state["draft"],
266
- feedback=state["feedback"],
267
  critique_feedback=critique_content,
268
  current_node="critique",
269
- company_research_data=state["company_research_data"],
270
- output_data=state["output_data"],
271
  )
272
- return app_state
273
 
274
  except Exception as e:
275
  logger.error(f"Error in critique_draft: {e}", exc_info=True)
@@ -279,70 +220,84 @@ def critique_draft(state: ResultState) -> ResultState:
279
 
280
  def human_approval(state: ResultState) -> ResultState:
281
  """Human-in-the-loop checkpoint for feedback on the draft."""
282
- # This is a placeholder function that would be replaced by actual UI interaction
 
 
 
 
 
 
283
  print("\n" + "=" * 80)
284
  print("DRAFT FOR REVIEW:")
285
- print(state["draft"])
286
  print("\nAUTOMATIC CRITIQUE:")
287
- print(state.get("critique_feedback", "No critique available"))
288
  print("=" * 80)
289
  print("\nPlease provide your feedback (press Enter to continue with no changes):")
290
 
291
  # In a real implementation, this would be handled by the UI
292
  human_feedback = input()
293
- result_state = ResultState(
294
- draft=state["draft"],
 
295
  feedback=human_feedback,
296
- critique_feedback=state["critique_feedback"],
297
  current_node="human_approval",
298
- company_research_data=state["company_research_data"],
299
- output_data=state["output_data"],
300
  )
301
- return result_state
302
 
303
 
304
  def finalize_document(state: ResultState) -> DataLoadState:
305
  """Incorporate feedback and finalize the document."""
 
 
 
 
 
 
 
 
306
 
307
  # Create LLM inside function (lazy initialization)
308
  llm_provider = LLMFactory()
309
  llm = llm_provider.create_langchain(
310
- "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
 
 
311
  )
312
 
313
- # Create chain like in critique_draft (line 229-236)
314
  revision_chain = (
315
  {
316
- "draft": lambda x: x["draft"],
317
- "feedback": lambda x: x["feedback"],
318
- "critique_feedback": lambda x: x["critique_feedback"],
319
  }
320
  | REVISION_PROMPT
321
  | llm
322
  )
323
 
324
- print(f"revision_chain: {revision_chain}")
325
-
326
- # Invoke with input variables (like line 239 in critique_draft)
327
  final_content = revision_chain.invoke(
328
  {
329
- "draft": state["draft"],
330
- "feedback": state["feedback"],
331
- "critique_feedback": state["critique_feedback"],
332
  }
333
  )
334
 
335
- app_state = DataLoadState(
336
- draft=state["draft"],
337
- feedback=state["feedback"],
338
- critique_feedback=state["critique_feedback"],
339
- company_research_data=state["company_research_data"],
 
340
  current_node="finalize",
341
  output_data=final_content.content
342
  if hasattr(final_content, "content")
343
  else str(final_content),
344
  )
345
- return app_state
346
 
347
 
348
  """
@@ -351,8 +306,19 @@ Conditional node to determine if next node should be 'draft' node or "research"
351
 
352
 
353
  def determine_next_step(state: AppState) -> str:
354
- """If the company name is missing within the AppState, we can't
355
- create the content draft and therefore redirected to the research node."""
356
- if not state["company_name"]:
 
 
 
 
 
 
 
 
 
 
 
357
  return "draft"
358
  return "research"
 
5
  writer workflow graph, each handling a specific step in the process.
6
  """
7
 
8
+ # Standard library imports
9
  import logging
10
  from datetime import datetime
11
 
12
+ # Third-party imports
 
13
  from langchain_core.messages import SystemMessage
14
+ from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
15
 
16
+ # Local imports
17
+ from ..classes.classes import AppState, DataLoadState, ResearchState, ResultState
18
  from ..prompts.templates import (
 
 
 
 
19
  BULLET_POINTS_PROMPT,
20
+ COVER_LETTER_PROMPT,
21
+ DRAFT_GENERATION_CONTEXT_PROMPT,
22
  LINKEDIN_NOTE_PROMPT,
23
+ REVISION_PROMPT,
24
  )
25
  from ..utils.llm_provider_factory import LLMFactory
26
 
 
31
 
32
  def create_draft(state: ResearchState) -> ResultState:
33
  """Create initial draft of the application material."""
34
+ # Validate state inputs
35
+ company_background_information = state.get("company_research_data", {})
36
+ if not company_background_information:
37
+ logger.error("Missing company_research_data in state")
38
+ raise ValueError("company_research_data is required in state")
39
+
40
  # Create LLM inside function (lazy initialization)
41
  llm_provider = LLMFactory()
42
  llm = llm_provider.create_langchain(
43
+ "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
44
+ provider="openrouter",
45
+ temperature=0.3,
46
  )
47
 
48
+ draft_category_map = {
49
+ "cover_letter": COVER_LETTER_PROMPT,
50
+ "bullets": BULLET_POINTS_PROMPT,
51
+ "linkedin_connect_request": LINKEDIN_NOTE_PROMPT,
52
+ }
53
+
54
  # Determine which type of content we're creating
 
55
 
56
  content_category = state.get("content_category", "cover_letter")
57
 
58
+ # Select appropriate system message template based on content category
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  logger.info(f"The candidate wants the Agent to assist with : {content_category}")
60
+ system_message_template = draft_category_map.get(
61
+ content_category, COVER_LETTER_PROMPT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  )
63
 
64
+ # Build the complete prompt template: system message + context
65
+ draft_prompt_template = ChatPromptTemplate([system_message_template])
66
+ draft_prompt_template.append(DRAFT_GENERATION_CONTEXT_PROMPT)
67
 
68
+ # Build the chain: input formatting -> prompt template -> LLM
69
  draft_generation_chain = (
70
  (
71
  {
72
  "current_job_role": lambda x: x["current_job_role"],
 
73
  "candidate_resume": lambda x: x["candidate_resume"],
74
+ "company_research_data": lambda x: x["company_research_data"],
75
+ "current_date": lambda x: x["current_date"],
76
  }
77
  )
78
+ | draft_prompt_template
79
  | llm
80
  )
81
 
82
+ # Prepare the inputs with safe dictionary access
83
  application_background_data = {
84
+ "current_job_role": company_background_information.get("job_description", ""),
85
+ "candidate_resume": company_background_information.get("resume", ""),
86
+ "company_research_data": company_background_information.get(
87
+ "company_research_data_summary", "Company Research Data is not available"
88
+ ),
89
+ "current_date": CURRENT_DATE,
90
  }
91
 
92
  response = draft_generation_chain.invoke(application_background_data)
93
  logger.info(f"Draft has been created: {response.content}")
94
+
95
  app_state = ResultState(
96
  draft=response.content,
97
  feedback="",
 
112
  try:
113
  logger.info("Critiquing draft...")
114
 
115
+ # Validate and extract required state fields once at the start
116
+ company_research_data = state.get("company_research_data", {})
117
+ job_description = str(company_research_data.get("job_description", ""))
118
+ draft_content = str(state.get("draft", ""))
119
+ feedback = state.get("feedback", "")
120
+ output_data = state.get("output_data", "")
 
 
121
 
122
  # Debug logging to verify values
123
  logger.debug(f"Job description length: {len(job_description)}")
124
+ logger.debug(f"Draft length: {len(draft_content)}")
125
 
126
+ # Early return if required fields are missing
127
+ if not job_description or not draft_content:
128
  logger.warning("Missing job_description or draft in state")
 
129
  return ResultState(
130
+ draft=draft_content,
131
+ feedback=feedback,
132
  critique_feedback="",
133
  current_node="critique",
134
+ company_research_data=company_research_data,
135
+ output_data=output_data,
136
  )
137
 
138
+ # Create LLM inside function (lazy initialization)
139
+ llm_provider = LLMFactory()
140
+ llm = llm_provider.create_langchain(
141
+ "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
142
+ provider="openrouter",
143
+ temperature=0.3,
144
+ )
145
+
146
  # Use the same pattern as create_draft:
147
  # 1. Create ChatPromptTemplate from SystemMessage
148
  # 2. Append HumanMessagePromptTemplate with variables
 
155
  )
156
 
157
  # Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
158
+ critique_prompt_template = ChatPromptTemplate([critique_system_message])
159
 
160
  # Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
161
+ critique_context_message = HumanMessagePromptTemplate.from_template(
162
  """
163
  # Job Description
164
  {job_description}
 
177
  input_variables=["job_description", "draft"],
178
  )
179
 
180
+ critique_prompt_template.append(critique_context_message)
181
 
182
  # Create chain (like line 129-139 in create_draft)
183
  critique_chain = (
 
185
  "job_description": lambda x: x["job_description"],
186
  "draft": lambda x: x["draft"],
187
  }
188
+ | critique_prompt_template
189
  | llm
190
  )
191
 
192
+ # Invoke with validated input variables
193
  critique = critique_chain.invoke(
194
  {
195
  "job_description": job_description,
196
+ "draft": draft_content,
197
  }
198
  )
199
 
 
202
  )
203
  logger.info("Draft critique completed")
204
 
205
+ # Store the critique - using validated variables from top of function
206
+ return ResultState(
207
+ draft=draft_content,
208
+ feedback=feedback,
209
  critique_feedback=critique_content,
210
  current_node="critique",
211
+ company_research_data=company_research_data,
212
+ output_data=output_data,
213
  )
 
214
 
215
  except Exception as e:
216
  logger.error(f"Error in critique_draft: {e}", exc_info=True)
 
220
 
221
  def human_approval(state: ResultState) -> ResultState:
222
  """Human-in-the-loop checkpoint for feedback on the draft."""
223
+ # Validate and extract all required state fields once
224
+ draft_content = state.get("draft", "")
225
+ critique_feedback_content = state.get("critique_feedback", "No critique available")
226
+ company_research_data = state.get("company_research_data", {})
227
+ output_data = state.get("output_data", "")
228
+
229
+ # Display draft and critique for review
230
  print("\n" + "=" * 80)
231
  print("DRAFT FOR REVIEW:")
232
+ print(draft_content)
233
  print("\nAUTOMATIC CRITIQUE:")
234
+ print(critique_feedback_content)
235
  print("=" * 80)
236
  print("\nPlease provide your feedback (press Enter to continue with no changes):")
237
 
238
  # In a real implementation, this would be handled by the UI
239
  human_feedback = input()
240
+
241
+ return ResultState(
242
+ draft=draft_content,
243
  feedback=human_feedback,
244
+ critique_feedback=critique_feedback_content,
245
  current_node="human_approval",
246
+ company_research_data=company_research_data,
247
+ output_data=output_data,
248
  )
 
249
 
250
 
251
  def finalize_document(state: ResultState) -> DataLoadState:
252
  """Incorporate feedback and finalize the document."""
253
+ # Validate and extract all required state fields once
254
+ draft_content = state.get("draft", "")
255
+ feedback_content = state.get("feedback", "")
256
+ critique_feedback_content = state.get("critique_feedback", "")
257
+ company_research_data = state.get("company_research_data", {})
258
+
259
+ if not draft_content:
260
+ logger.warning("Missing draft in state for finalization")
261
 
262
  # Create LLM inside function (lazy initialization)
263
  llm_provider = LLMFactory()
264
  llm = llm_provider.create_langchain(
265
+ "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
266
+ provider="openrouter",
267
+ temperature=0.3,
268
  )
269
 
270
+ # Create revision chain
271
  revision_chain = (
272
  {
273
+ "draft": lambda x: x.get("draft", ""),
274
+ "feedback": lambda x: x.get("feedback", ""),
275
+ "critique_feedback": lambda x: x.get("critique_feedback", ""),
276
  }
277
  | REVISION_PROMPT
278
  | llm
279
  )
280
 
281
+ # Invoke with validated input variables
 
 
282
  final_content = revision_chain.invoke(
283
  {
284
+ "draft": draft_content,
285
+ "feedback": feedback_content,
286
+ "critique_feedback": critique_feedback_content,
287
  }
288
  )
289
 
290
+ # Return final state using validated variables
291
+ return DataLoadState(
292
+ draft=draft_content,
293
+ feedback=feedback_content,
294
+ critique_feedback=critique_feedback_content,
295
+ company_research_data=company_research_data,
296
  current_node="finalize",
297
  output_data=final_content.content
298
  if hasattr(final_content, "content")
299
  else str(final_content),
300
  )
 
301
 
302
 
303
  """
 
306
 
307
 
308
  def determine_next_step(state: AppState) -> str:
309
+ """
310
+ Determine next workflow step based on company name presence.
311
+
312
+ If the company name is missing within the AppState, we can't
313
+ create the content draft and therefore redirect to the research node.
314
+
315
+ Args:
316
+ state: Current application state
317
+
318
+ Returns:
319
+ Next node name: "draft" or "research"
320
+ """
321
+ company_name = state.get("company_name", "")
322
+ if not company_name:
323
  return "draft"
324
  return "research"
src/job_writing_agent/classes/classes.py CHANGED
@@ -74,7 +74,7 @@ class DataLoadState(MessagesState, total=False):
74
 
75
  resume_path: str
76
  job_description_source: str
77
- content: str # "cover_letter", "bullets", "linkedin_note"
78
  resume: str
79
  job_description: str
80
  company_name: str
@@ -96,11 +96,13 @@ class ResearchState(MessagesState):
96
  tavily_search: Dict[str, Any] Stores the results of the Tavily search
97
  attempted_search_queries: List of queries used extracted from the job description
98
  compiled_knowledge: Compiled knowledge from the research
 
99
  """
100
 
101
  company_research_data: Dict[str, Any]
102
  attempted_search_queries: List[str]
103
  current_node: str
 
104
 
105
 
106
  class ResultState(MessagesState):
 
74
 
75
  resume_path: str
76
  job_description_source: str
77
+ content_category: str # "cover_letter", "bullets", "linkedin_note"
78
  resume: str
79
  job_description: str
80
  company_name: str
 
96
  tavily_search: Dict[str, Any] Stores the results of the Tavily search
97
  attempted_search_queries: List of queries used extracted from the job description
98
  compiled_knowledge: Compiled knowledge from the research
99
+ content_category: Type of application material to generate
100
  """
101
 
102
  company_research_data: Dict[str, Any]
103
  attempted_search_queries: List[str]
104
  current_node: str
105
+ content_category: str
106
 
107
 
108
  class ResultState(MessagesState):
src/job_writing_agent/nodes/__init__.py CHANGED
@@ -4,10 +4,30 @@ Created on Mon Oct 23 16:49:52 2023
4
  @author: rishabhaggarwal
5
  """
6
 
7
- from .initializing import Dataloading
 
 
 
 
 
 
 
 
 
8
  # from .createdraft import CreateDraft
9
  from .variations import generate_variations
10
  from .selfconsistency import self_consistency_vote
11
  from .research_workflow import research_workflow
12
 
13
- __all__ = ["Dataloading", "generate_variations", "self_consistency_vote", "research_workflow"]
 
 
 
 
 
 
 
 
 
 
 
 
4
  @author: rishabhaggarwal
5
  """
6
 
7
+ # Legacy import (deprecated - use new classes instead)
8
+ from .data_loading_workflow import data_loading_workflow
9
+
10
+ # New data loading classes following SOLID principles
11
+ from .resume_loader import ResumeLoader
12
+ from .job_description_loader import JobDescriptionLoader
13
+ from .system_initializer import SystemInitializer
14
+ from .validation_helper import ValidationHelper
15
+
16
+ # Other workflow components
17
  # from .createdraft import CreateDraft
18
  from .variations import generate_variations
19
  from .selfconsistency import self_consistency_vote
20
  from .research_workflow import research_workflow
21
 
22
+ __all__ = [
23
+ # New data loading classes
24
+ "ResumeLoader",
25
+ "JobDescriptionLoader",
26
+ "SystemInitializer",
27
+ "ValidationHelper",
28
+ "data_loading_workflow",
29
+ # Other components
30
+ "generate_variations",
31
+ "self_consistency_vote",
32
+ "research_workflow",
33
+ ]
src/job_writing_agent/nodes/data_loading_workflow.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Data Loading Workflow Module
4
+
5
+ This module defines the data loading subgraph workflow, including all node
6
+ functions and the subgraph definition. It uses the separate loader classes
7
+ (ResumeLoader, JobDescriptionLoader, SystemInitializer, ValidationHelper)
8
+ following the Single Responsibility Principle.
9
+ """
10
+
11
+ import logging
12
+ from typing import Any
13
+
14
+ from langgraph.graph import StateGraph, END, START
15
+
16
+ from job_writing_agent.classes import DataLoadState
17
+ from job_writing_agent.nodes.resume_loader import ResumeLoader
18
+ from job_writing_agent.nodes.job_description_loader import JobDescriptionLoader
19
+ from job_writing_agent.nodes.system_initializer import SystemInitializer
20
+ from job_writing_agent.nodes.validation_helper import ValidationHelper
21
+ from job_writing_agent.utils.logging.logging_decorators import (
22
+ log_async,
23
+ log_execution,
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # ============================================================================
30
+ # Data Loading Subgraph Node Functions
31
+ # ============================================================================
32
+
33
+
34
+ @log_async
35
+ async def set_agent_system_message_node(state: DataLoadState) -> DataLoadState:
36
+ """
37
+ Node function to initialize system message in workflow state.
38
+
39
+ This node wraps the SystemInitializer.set_agent_system_message method
40
+ for use in the LangGraph workflow.
41
+
42
+ Parameters
43
+ ----------
44
+ state: DataLoadState
45
+ Current workflow state.
46
+
47
+ Returns
48
+ -------
49
+ DataLoadState
50
+ Updated state with system message added to messages list.
51
+ """
52
+ initializer = SystemInitializer()
53
+ return await initializer.set_agent_system_message(state)
54
+
55
+
56
+ @log_async
57
+ async def parse_resume_node(state: DataLoadState) -> DataLoadState:
58
+ """
59
+ Node to parse resume in parallel with job description parsing.
60
+
61
+ Extracts resume parsing logic for parallel execution.
62
+ Returns only the resume data - reducer will merge with job description data.
63
+
64
+ Parameters
65
+ ----------
66
+ state: DataLoadState
67
+ Current workflow state containing resume_path.
68
+
69
+ Returns
70
+ -------
71
+ DataLoadState
72
+ Partial state update with resume data in company_research_data.
73
+ LangGraph will merge this with other parallel updates.
74
+ """
75
+ loader = ResumeLoader()
76
+ resume_src = state.get("resume_path")
77
+
78
+ resume_text = ""
79
+ if resume_src:
80
+ resume_text = await loader._load_resume(resume_src)
81
+ elif state.get("current_node") == "verify":
82
+ resume_text = await loader._prompt_user_for_resume()
83
+
84
+ # Return only the resume data - reducer will merge this with job description data
85
+ logger.info(f"Resume parsed: {len(resume_text)} characters")
86
+ return {
87
+ "company_research_data": {"resume": resume_text},
88
+ }
89
+
90
+
91
+ @log_async
92
+ async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
93
+ """
94
+ Node to parse job description in parallel with resume parsing.
95
+
96
+ Extracts job description parsing logic for parallel execution.
97
+ Returns only the job description data - reducer will merge with resume data.
98
+
99
+ Parameters
100
+ ----------
101
+ state: DataLoadState
102
+ Current workflow state containing job_description_source.
103
+
104
+ Returns
105
+ -------
106
+ DataLoadState
107
+ Partial state update with job description and company name in
108
+ company_research_data. LangGraph will merge this with other parallel updates.
109
+ """
110
+ loader = JobDescriptionLoader()
111
+ jd_src = state.get("job_description_source")
112
+
113
+ job_text = ""
114
+ company_name = ""
115
+ if jd_src:
116
+ job_text, company_name = await loader._load_job_description(jd_src)
117
+ elif state.get("current_node") == "verify":
118
+ job_text = await loader._prompt_user_for_job_description()
119
+
120
+ # Return only the job description data - reducer will merge this with resume data
121
+ logger.info(
122
+ f"Job description parsed: {len(job_text)} characters, company: {company_name}"
123
+ )
124
+ return {
125
+ "company_research_data": {
126
+ "job_description": job_text,
127
+ "company_name": company_name,
128
+ },
129
+ }
130
+
131
+
132
+ @log_execution
133
+ def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
134
+ """
135
+ Aggregate results from parallel resume and job description parsing nodes.
136
+
137
+ This node runs after both parse_resume_node and parse_job_description_node
138
+ complete. It ensures both results are present, normalizes values to strings,
139
+ and structures the final state.
140
+
141
+ Normalization is performed here (not in ValidationHelper) to follow SRP:
142
+ - This function: Aggregates and normalizes data
143
+ - ValidationHelper: Only validates data
144
+
145
+ Parameters
146
+ ----------
147
+ state: DataLoadState
148
+ Current workflow state with parallel parsing results.
149
+
150
+ Returns
151
+ -------
152
+ DataLoadState
153
+ Updated state with normalized and structured company_research_data.
154
+ """
155
+ # Ensure company_research_data exists
156
+ if "company_research_data" not in state:
157
+ state["company_research_data"] = {}
158
+
159
+ # Extract research data once, then get results from parallel nodes
160
+ company_research_data = state["company_research_data"]
161
+ resume_text = company_research_data.get("resume", "")
162
+ job_text = company_research_data.get("job_description", "")
163
+ company_name = company_research_data.get("company_name", "")
164
+
165
+ # Normalize values to strings (handles list, tuple, dict, str)
166
+ def normalize_value(value: list | tuple | dict | str | Any) -> str:
167
+ """
168
+ Normalize a value to a string representation.
169
+
170
+ Args:
171
+ value: Value to normalize (list, tuple, dict, or any other type)
172
+
173
+ Returns:
174
+ String representation of the value
175
+ """
176
+ if isinstance(value, (list, tuple)):
177
+ return " ".join(str(x) for x in value)
178
+ elif isinstance(value, dict):
179
+ return str(value)
180
+ else:
181
+ return str(value)
182
+
183
+ # Normalize all values
184
+ resume_text = normalize_value(resume_text) if resume_text else ""
185
+ job_text = normalize_value(job_text) if job_text else ""
186
+ company_name = normalize_value(company_name) if company_name else ""
187
+
188
+ # Validate both are present (log warnings but don't fail here - validation node will handle)
189
+ if not resume_text:
190
+ logger.warning("Resume text is empty after parsing")
191
+ if not job_text:
192
+ logger.warning("Job description text is empty after parsing")
193
+
194
+ # Ensure final structure is correct
195
+ state["company_research_data"] = {
196
+ "resume": resume_text,
197
+ "job_description": job_text,
198
+ "company_name": company_name,
199
+ }
200
+ state["current_node"] = "aggregate_results"
201
+
202
+ logger.info("Data loading results aggregated and normalized successfully")
203
+ return state
204
+
205
+
206
+ @log_execution
207
+ def verify_inputs_node(state: DataLoadState) -> DataLoadState:
208
+ """
209
+ Verify that required inputs are present and set next_node for routing.
210
+
211
+ This node wraps the ValidationHelper.verify_inputs method for use in
212
+ the LangGraph workflow. It only validates - normalization is done in
213
+ aggregate_data_loading_results.
214
+
215
+ Parameters
216
+ ----------
217
+ state: DataLoadState
218
+ Current workflow state with aggregated and normalized data.
219
+
220
+ Returns
221
+ -------
222
+ DataLoadState
223
+ Updated state with next_node set for routing ("load" or "research").
224
+ """
225
+ validator = ValidationHelper()
226
+ return validator.verify_inputs(state)
227
+
228
+
229
+ # ============================================================================
230
+ # Data Loading Subgraph Definition
231
+ # ============================================================================
232
+
233
+ # Create data loading subgraph
234
+ data_loading_subgraph = StateGraph(DataLoadState)
235
+
236
+ # Add subgraph nodes
237
+ data_loading_subgraph.add_node(
238
+ "set_agent_system_message", set_agent_system_message_node
239
+ )
240
+ data_loading_subgraph.add_node("parse_resume", parse_resume_node)
241
+ data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
242
+ data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
243
+ data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
244
+
245
+ # Add subgraph edges
246
+ data_loading_subgraph.add_edge(START, "set_agent_system_message")
247
+ # Parallel execution: both nodes start after set_agent_system_message
248
+ data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
249
+ data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
250
+ # Both parallel nodes feed into aggregate (LangGraph waits for both)
251
+ data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
252
+ data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
253
+ # Aggregate feeds into verification
254
+ data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
255
+ # Verification ends the subgraph
256
+ data_loading_subgraph.add_edge("verify_inputs", END)
257
+
258
+ # Compile data loading subgraph
259
+ data_loading_workflow = data_loading_subgraph.compile()
src/job_writing_agent/nodes/initializing.py DELETED
@@ -1,513 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Job Application Writer - Initialization Module
4
-
5
- This module provides the Dataloading class responsible for loading and validating
6
- inputs required for the job-application workflow. It handles parsing resumes and
7
- job descriptions, managing missing inputs, and populating application state.
8
-
9
- The module includes utilities for:
10
- - Parsing resume files and extracting text content
11
- - Parsing job descriptions and extracting company information
12
- - Orchestrating input loading with validation
13
- - Providing user prompts for missing information during verification
14
- """
15
-
16
- import logging
17
- from typing import Tuple, Optional
18
-
19
- from langchain_core.documents import Document
20
- from langchain_core.messages import SystemMessage
21
- from langgraph.graph import StateGraph, END, START
22
-
23
- from job_writing_agent.classes import DataLoadState
24
- from job_writing_agent.utils.document_processing import (
25
- parse_resume,
26
- get_job_description,
27
- )
28
- from job_writing_agent.prompts.templates import agent_system_prompt
29
- from job_writing_agent.utils.logging.logging_decorators import (
30
- log_async,
31
- log_execution,
32
- log_errors,
33
- )
34
-
35
- logger = logging.getLogger(__name__)
36
-
37
-
38
- # Note: Using centralized logging decorators from utils.logging.logging_decorators
39
-
40
-
41
- class Dataloading:
42
- """
43
- Helper class providing utility methods for loading and parsing data.
44
-
45
- This class provides helper methods used by the data loading subgraph nodes.
46
- The actual workflow orchestration is handled by the data_loading_workflow subgraph.
47
-
48
- Methods
49
- -------
50
- set_agent_system_message(state: DataLoadState) -> DataLoadState
51
- Adds the system prompt to the conversation state.
52
- get_resume(resume_source) -> str
53
- Parses a resume file and returns its plain‑text content.
54
- parse_job_description(job_description_source) -> Tuple[str, str]
55
- Parses a job description and returns its text and company name.
56
- verify_inputs(state: DataLoadState) -> DataLoadState
57
- Validates inputs and sets next_node for routing.
58
-
59
- Private Methods (used by subgraph nodes)
60
- -----------------------------------------
61
- _load_resume(resume_source) -> str
62
- Load resume content, raising if the source is missing.
63
- _load_job_description(jd_source) -> Tuple[str, str]
64
- Load job description text and company name, raising if missing.
65
- _prompt_user(prompt_msg: str) -> str
66
- Prompt the user for input (synchronous input wrapped for async use).
67
-
68
- """
69
-
70
- def __init__(self):
71
- """Initialize Dataloading helper class."""
72
- pass
73
-
74
- # =======================================================================
75
- # System/Initialization Methods
76
- # =======================================================================
77
-
78
- @log_async
79
- async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
80
- """Add the system prompt to the conversation state.
81
-
82
- Parameters
83
- ----------
84
- state: DataLoadState
85
- Current workflow state.
86
-
87
- Returns
88
- -------
89
- DataLoadState
90
- Updated state with the system message and the next node identifier.
91
- """
92
- agent_initialization_system_message = SystemMessage(content=agent_system_prompt)
93
- messages = state.get("messages", [])
94
- messages.append(agent_initialization_system_message)
95
- return {
96
- **state,
97
- "messages": messages,
98
- "current_node": "initialize_system",
99
- }
100
-
101
- # =======================================================================
102
- # Public Parsing Methods
103
- # =======================================================================
104
-
105
- @log_async
106
- @log_errors
107
- async def get_resume(self, resume_source):
108
- """
109
- Parse a resume file and return its plain‑text content.
110
-
111
- This method extracts text from resume chunks, handling both Document
112
- objects and plain strings. Empty or invalid chunks are skipped.
113
-
114
- Parameters
115
- ----------
116
- resume_source: Any
117
- Path or file‑like object accepted by ``parse_resume``.
118
-
119
- Returns
120
- -------
121
- str
122
- Plain text content of the resume.
123
-
124
- Raises
125
- ------
126
- AssertionError
127
- If resume_source is None.
128
- Exception
129
- If parsing fails.
130
- """
131
- logger.info("Parsing resume...")
132
- resume_text = ""
133
- assert resume_source is not None
134
- resume_chunks = parse_resume(resume_source)
135
- for chunk in resume_chunks:
136
- if hasattr(chunk, "page_content") and chunk.page_content:
137
- resume_text += chunk.page_content
138
- elif isinstance(chunk, str) and chunk:
139
- resume_text += chunk
140
- else:
141
- logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
142
- return resume_text
143
-
144
- @log_async
145
- @log_errors
146
- async def parse_job_description(self, job_description_source):
147
- """
148
- Parse a job description and return its text and company name.
149
-
150
- Extracts both the job posting text and company name from the document.
151
- Company name is extracted from document metadata if available.
152
-
153
- Parameters
154
- ----------
155
- job_description_source: Any
156
- Source accepted by ``get_job_description`` (URL, file path, etc.).
157
-
158
- Returns
159
- -------
160
- Tuple[str, str]
161
- A tuple of (job_posting_text, company_name).
162
-
163
- Raises
164
- ------
165
- AssertionError
166
- If job_description_source is None.
167
- Exception
168
- If parsing fails.
169
- """
170
- company_name = ""
171
- job_posting_text = ""
172
-
173
- logger.info("Parsing job description from: %s", job_description_source)
174
- assert job_description_source is not None, (
175
- "Job description source cannot be None"
176
- )
177
-
178
- job_description_document: Optional[Document] = await get_job_description(
179
- job_description_source
180
- )
181
-
182
- # Extract company name from metadata
183
- if hasattr(job_description_document, "metadata") and isinstance(
184
- job_description_document.metadata, dict
185
- ):
186
- company_name = job_description_document.metadata.get("company_name", "")
187
- if not company_name:
188
- logger.warning("Company name not found in job description metadata.")
189
- else:
190
- logger.warning(
191
- "Metadata attribute missing or not a dict in job description document."
192
- )
193
-
194
- # Extract job posting text
195
- if hasattr(job_description_document, "page_content"):
196
- job_posting_text = job_description_document.page_content or ""
197
- if not job_posting_text:
198
- logger.info("Parsed job posting text is empty.")
199
- else:
200
- logger.warning(
201
- "page_content attribute missing in job description document."
202
- )
203
-
204
- return job_posting_text, company_name
205
-
206
- @log_async
207
- async def get_application_form_details(self, job_description_source):
208
- """
209
- Placeholder for future method to get application form details.
210
-
211
- This method will be implemented to extract form fields and requirements
212
- from job application forms.
213
-
214
- Parameters
215
- ----------
216
- job_description_source: Any
217
- Source of the job description or application form.
218
- """
219
- # TODO: Implement form field extraction
220
- pass
221
-
222
- # =======================================================================
223
- # Validation Methods
224
- # =======================================================================
225
-
226
- @log_execution
227
- @log_errors
228
- def verify_inputs(self, state: DataLoadState) -> DataLoadState:
229
- """
230
- Validate inputs and set next_node for routing.
231
-
232
- This method validates that both resume and job description are present
233
- in the state, normalizes their values to strings, and sets the next_node
234
- field for conditional routing in the main workflow.
235
-
236
- Parameters
237
- ----------
238
- state: DataLoadState
239
- Current workflow state containing company_research_data.
240
-
241
- Returns
242
- -------
243
- DataLoadState
244
- Updated state with next_node set to "load" (if validation fails)
245
- or "research" (if validation passes).
246
-
247
- Raises
248
- ------
249
- Exception
250
- If normalization fails for any field.
251
- """
252
- logger.info("Verifying loaded inputs!")
253
- state["current_node"] = "verify"
254
-
255
- # Validate required fields
256
- company_research_data = state.get("company_research_data", {})
257
-
258
- if not company_research_data.get("resume"):
259
- logger.error("Resume is missing in company_research_data")
260
- state["next_node"] = "load" # Loop back to load subgraph
261
- return state
262
-
263
- if not company_research_data.get("job_description"):
264
- logger.error("Job description is missing in company_research_data")
265
- state["next_node"] = "load" # Loop back to load subgraph
266
- return state
267
-
268
- # Normalize values to strings
269
- for key in ["resume", "job_description"]:
270
- try:
271
- value = company_research_data[key]
272
- if isinstance(value, (list, tuple)):
273
- company_research_data[key] = " ".join(str(x) for x in value)
274
- elif isinstance(value, dict):
275
- company_research_data[key] = str(value)
276
- else:
277
- company_research_data[key] = str(value)
278
- except Exception as e:
279
- logger.warning("Error converting %s to string: %s", key, e)
280
- state["next_node"] = "load"
281
- return state
282
-
283
- # All validations passed
284
- state["next_node"] = "research"
285
- logger.info("Inputs verified successfully, proceeding to research")
286
- return state
287
-
288
- # =======================================================================
289
- # Private Helper Methods (used by subgraph nodes)
290
- # =======================================================================
291
-
292
- @log_async
293
- @log_errors
294
- async def _load_resume(self, resume_source) -> str:
295
- """
296
- Load resume content, raising if the source is missing.
297
-
298
- This is a wrapper around get_resume() that validates the source first.
299
- Used by subgraph nodes for consistent error handling.
300
-
301
- Parameters
302
- ----------
303
- resume_source: Any
304
- Path or file-like object for the resume.
305
-
306
- Returns
307
- -------
308
- str
309
- Plain text content of the resume.
310
-
311
- Raises
312
- ------
313
- ValueError
314
- If resume_source is None or empty.
315
- """
316
- if not resume_source:
317
- raise ValueError("resume_source is required")
318
- return await self.get_resume(resume_source)
319
-
320
- @log_async
321
- @log_errors
322
- async def _load_job_description(self, jd_source) -> Tuple[str, str]:
323
- """
324
- Load job description text and company name, raising if missing.
325
-
326
- This is a wrapper around parse_job_description() that validates the source first.
327
- Used by subgraph nodes for consistent error handling.
328
-
329
- Parameters
330
- ----------
331
- jd_source: Any
332
- Source for the job description (URL, file path, etc.).
333
-
334
- Returns
335
- -------
336
- Tuple[str, str]
337
- A tuple of (job_posting_text, company_name).
338
-
339
- Raises
340
- ------
341
- ValueError
342
- If jd_source is None or empty.
343
- """
344
- if not jd_source:
345
- raise ValueError("job_description_source is required")
346
- return await self.parse_job_description(jd_source)
347
-
348
- @log_async
349
- @log_errors
350
- async def _prompt_user(self, prompt_msg: str) -> str:
351
- """
352
- Prompt the user for input (synchronous input wrapped for async use).
353
-
354
- This method wraps the synchronous input() function to be used in async contexts.
355
- In a production async UI, this would be replaced with an async input mechanism.
356
-
357
- Parameters
358
- ----------
359
- prompt_msg: str
360
- Message to display to the user.
361
-
362
- Returns
363
- -------
364
- str
365
- User input string.
366
- """
367
- # In a real async UI replace input with an async call.
368
- return input(prompt_msg)
369
-
370
-
371
- # ============================================================================
372
- # Data Loading Subgraph Nodes
373
- # ============================================================================
374
-
375
-
376
- @log_async
377
- async def parse_resume_node(state: DataLoadState) -> DataLoadState:
378
- """
379
- Node to parse resume in parallel with job description parsing.
380
-
381
- Extracts resume parsing logic from load_inputs for parallel execution.
382
- Returns only the resume data - reducer will merge with job description data.
383
- """
384
- dataloading = Dataloading()
385
- resume_src = state.get("resume_path")
386
-
387
- resume_text = ""
388
- if resume_src:
389
- resume_text = await dataloading._load_resume(resume_src)
390
- elif state.get("current_node") == "verify":
391
- resume_text = await dataloading._prompt_user(
392
- "Please paste the resume in text format: "
393
- )
394
-
395
- # Return only the resume data - reducer will merge this with job description data
396
- logger.info(f"Resume parsed: {len(resume_text)} characters")
397
- # Return partial state update - LangGraph will merge this with other parallel updates
398
- return {
399
- "company_research_data": {"resume": resume_text},
400
- }
401
-
402
-
403
- @log_async
404
- async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
405
- """
406
- Node to parse job description in parallel with resume parsing.
407
-
408
- Extracts job description parsing logic from load_inputs for parallel execution.
409
- Returns only the job description data - reducer will merge with resume data.
410
- """
411
- dataloading = Dataloading()
412
- jd_src = state.get("job_description_source")
413
-
414
- job_text = ""
415
- company_name = ""
416
- if jd_src:
417
- job_text, company_name = await dataloading._load_job_description(jd_src)
418
- elif state.get("current_node") == "verify":
419
- job_text = await dataloading._prompt_user(
420
- "Please paste the job posting in text format: "
421
- )
422
-
423
- # Return only the job description data - reducer will merge this with resume data
424
- logger.info(
425
- f"Job description parsed: {len(job_text)} characters, company: {company_name}"
426
- )
427
- # Return partial state update - LangGraph will merge this with other parallel updates
428
- return {
429
- "company_research_data": {
430
- "job_description": job_text,
431
- "company_name": company_name,
432
- },
433
- }
434
-
435
-
436
- @log_execution
437
- def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
438
- """
439
- Aggregate results from parallel resume and job description parsing nodes.
440
-
441
- This node runs after both parse_resume_node and parse_job_description_node
442
- complete. It ensures both results are present and normalizes the state.
443
- """
444
- # Ensure company_research_data exists
445
- if "company_research_data" not in state:
446
- state["company_research_data"] = {}
447
-
448
- # Get results from parallel nodes
449
- resume_text = state["company_research_data"].get("resume", "")
450
- job_text = state["company_research_data"].get("job_description", "")
451
- company_name = state["company_research_data"].get("company_name", "")
452
-
453
- # Validate both are present
454
- if not resume_text:
455
- logger.warning("Resume text is empty after parsing")
456
- if not job_text:
457
- logger.warning("Job description text is empty after parsing")
458
-
459
- # Ensure final structure is correct
460
- state["company_research_data"] = {
461
- "resume": resume_text,
462
- "job_description": job_text,
463
- "company_name": company_name,
464
- }
465
- state["current_node"] = "aggregate_results"
466
-
467
- logger.info("Data loading results aggregated successfully")
468
- return state
469
-
470
-
471
- @log_execution
472
- def verify_inputs_node(state: DataLoadState) -> DataLoadState:
473
- """
474
- Verify that required inputs are present and set next_node for routing.
475
-
476
- Modified from verify_inputs to return state with next_node instead of string.
477
- """
478
- dataloading = Dataloading()
479
- return dataloading.verify_inputs(state)
480
-
481
-
482
- # ============================================================================
483
- # Data Loading Subgraph
484
- # ============================================================================
485
-
486
- # Create data loading subgraph
487
- data_loading_subgraph = StateGraph(DataLoadState)
488
-
489
- # Add subgraph nodes
490
- dataloading_instance = Dataloading()
491
- data_loading_subgraph.add_node(
492
- "set_agent_system_message", dataloading_instance.set_agent_system_message
493
- )
494
- data_loading_subgraph.add_node("parse_resume", parse_resume_node)
495
- data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
496
- data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
497
- data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
498
-
499
- # Add subgraph edges
500
- data_loading_subgraph.add_edge(START, "set_agent_system_message")
501
- # Parallel execution: both nodes start after set_agent_system_message
502
- data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
503
- data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
504
- # Both parallel nodes feed into aggregate (LangGraph waits for both)
505
- data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
506
- data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
507
- # Aggregate feeds into verification
508
- data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
509
- # Verification ends the subgraph
510
- data_loading_subgraph.add_edge("verify_inputs", END)
511
-
512
- # Compile data loading subgraph
513
- data_loading_workflow = data_loading_subgraph.compile()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/job_writing_agent/nodes/job_description_loader.py CHANGED
@@ -166,7 +166,7 @@ class JobDescriptionLoader:
166
  # TODO: Implement form field extraction
167
  pass
168
 
169
- async def _prompt_user(self) -> str:
170
  """
171
  Prompt the user for input (synchronous input wrapped for async use).
172
 
 
166
  # TODO: Implement form field extraction
167
  pass
168
 
169
+ async def _prompt_user_for_job_description(self) -> str:
170
  """
171
  Prompt the user for input (synchronous input wrapped for async use).
172
 
src/job_writing_agent/nodes/research_workflow.py CHANGED
@@ -1,17 +1,25 @@
1
  # research_workflow.py
2
- import logging
3
- import json
 
4
  import asyncio
5
- from typing import Dict, Any, cast
 
 
6
 
7
- from langgraph.graph import StateGraph, END, START
8
  import dspy
9
- from job_writing_agent.tools.SearchTool import TavilyResearchTool
10
- from job_writing_agent.classes.classes import ResearchState
11
- from job_writing_agent.tools.SearchTool import filter_research_results_by_relevance
12
  from job_writing_agent.agents.output_schema import (
13
  CompanyResearchDataSummarizationSchema,
14
  )
 
 
 
 
 
15
  from job_writing_agent.utils.llm_provider_factory import LLMFactory
16
 
17
  logger = logging.getLogger(__name__)
@@ -25,12 +33,19 @@ EVAL_TIMEOUT = 15 # seconds per evaluation
25
 
26
  def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
27
  """
28
- Validate that required inputs are present.
29
- Returns: (is_valid, company_name, job_description)
 
 
 
 
 
30
  """
31
  try:
32
- company_name = state["company_research_data"].get("company_name", "")
33
- job_description = state["company_research_data"].get("job_description", "")
 
 
34
 
35
  if not company_name or not company_name.strip():
36
  logger.error("Company name is missing or empty")
@@ -42,14 +57,14 @@ def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
42
 
43
  return True, company_name.strip(), job_description.strip()
44
 
45
- except (KeyError, TypeError, AttributeError) as e:
46
  logger.error(f"Invalid state structure: {e}")
47
  return False, "", ""
48
 
49
 
50
  def parse_dspy_queries_with_fallback(
51
- raw_queries: Dict[str, Any], company_name: str
52
- ) -> Dict[str, str]:
53
  """
54
  Parse DSPy query output with multiple fallback strategies.
55
  Returns a dict of query_id -> query_string.
@@ -88,7 +103,7 @@ def parse_dspy_queries_with_fallback(
88
  return get_fallback_queries(company_name)
89
 
90
 
91
- def get_fallback_queries(company_name: str) -> Dict[str, str]:
92
  """
93
  Generate basic fallback queries when DSPy fails.
94
  """
@@ -102,19 +117,27 @@ def get_fallback_queries(company_name: str) -> Dict[str, str]:
102
  def company_research_data_summary(state: ResearchState) -> ResearchState:
103
  """
104
  Summarize the filtered research data into a concise summary.
105
- Replaces the raw tavily_search results with a summarized version.
 
 
 
 
 
 
 
106
  """
107
  try:
108
- state["current_node"] = "company_research_data_summary"
 
109
 
110
- # Extract the current research data
111
  company_research_data = state.get("company_research_data", {})
112
  tavily_search_data = company_research_data.get("tavily_search", [])
113
 
114
  # If no research data, skip summarization
115
  if not tavily_search_data or len(tavily_search_data) == 0:
116
  logger.warning("No research data to summarize. Skipping summarization.")
117
- return state
118
 
119
  logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
120
 
@@ -127,7 +150,7 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
127
 
128
  llm_provider = LLMFactory()
129
  llm = llm_provider.create_dspy(
130
- model="mistralai/mistral-7b-instruct:free",
131
  provider="openrouter",
132
  temperature=0.3,
133
  )
@@ -137,29 +160,31 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
137
  response = company_research_data_summarization(
138
  company_research_data=company_research_data
139
  )
140
- # Extract the summary from the response
141
- # The response should have a 'company_research_data_summary' field (JSON string)
142
  if hasattr(response, "company_research_data_summary"):
143
  summary_json_str = response.company_research_data_summary
144
- elif isinstance(response, dict) and "company_research_data_summary" in response:
145
- summary_json_str = response["company_research_data_summary"]
146
  else:
147
  logger.error(
148
  f"Unexpected response format from summarization: {type(response)}"
149
  )
150
- return state
151
 
152
- # Parse the JSON summary
153
- state["company_research_data"]["company_research_data_summary"] = (
 
154
  summary_json_str
155
  )
 
156
 
157
- return state
158
 
159
  except Exception as e:
160
  logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
161
  # Return state unchanged on error
162
- return state
163
 
164
 
165
  async def research_company_with_retry(state: ResearchState) -> ResearchState:
@@ -173,9 +198,16 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
173
 
174
  if not is_valid:
175
  logger.error("Invalid inputs for research. Skipping research phase.")
176
- state["company_research_data"]["tavily_search"] = []
177
- state["attempted_search_queries"] = []
178
- return state
 
 
 
 
 
 
 
179
 
180
  logger.info(f"Researching company: {company_name}")
181
 
@@ -254,14 +286,17 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
254
  if len(search_results) == 0:
255
  logger.warning("No search results returned")
256
 
257
- # Store results
258
- state["attempted_search_queries"] = list(queries.values())
259
- state["company_research_data"]["tavily_search"] = search_results
260
-
261
- logger.info(
262
- f"Research completed successfully with {len(search_results)} result sets"
 
 
 
 
263
  )
264
- return state
265
 
266
  except Exception as e:
267
  logger.error(
@@ -273,22 +308,31 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
273
  await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
274
  else:
275
  logger.error("All retry attempts exhausted. Using empty results.")
276
- state["company_research_data"]["tavily_search"] = []
277
- state["attempted_search_queries"] = []
278
-
279
- return state
280
-
 
 
 
 
 
281
 
282
- async def research_company(state: ResearchState) -> ResearchState:
283
- """Wrapper to call the retry version."""
284
- return await research_company_with_retry(state)
 
 
 
 
285
 
286
 
287
  # Create research subgraph
288
  research_subgraph = StateGraph(ResearchState)
289
 
290
  # Add research subgraph nodes
291
- research_subgraph.add_node("research_company", research_company)
292
  research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
293
  research_subgraph.add_node(
294
  "company_research_data_summary", company_research_data_summary
 
1
  # research_workflow.py
2
+ """Research workflow for company information gathering and filtering."""
3
+
4
+ # Standard library imports
5
  import asyncio
6
+ import json
7
+ import logging
8
+ from typing import Any, Dict, cast
9
 
10
+ # Third-party imports
11
  import dspy
12
+ from langgraph.graph import END, START, StateGraph
13
+
14
+ # Local imports
15
  from job_writing_agent.agents.output_schema import (
16
  CompanyResearchDataSummarizationSchema,
17
  )
18
+ from job_writing_agent.classes.classes import ResearchState
19
+ from job_writing_agent.tools.SearchTool import (
20
+ TavilyResearchTool,
21
+ filter_research_results_by_relevance,
22
+ )
23
  from job_writing_agent.utils.llm_provider_factory import LLMFactory
24
 
25
  logger = logging.getLogger(__name__)
 
33
 
34
  def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
35
  """
36
+ Validate that required inputs are present in research state.
37
+
38
+ Args:
39
+ state: Current research workflow state
40
+
41
+ Returns:
42
+ Tuple of (is_valid, company_name, job_description)
43
  """
44
  try:
45
+ # Safe dictionary access with fallbacks
46
+ company_research_data = state.get("company_research_data", {})
47
+ company_name = company_research_data.get("company_name", "")
48
+ job_description = company_research_data.get("job_description", "")
49
 
50
  if not company_name or not company_name.strip():
51
  logger.error("Company name is missing or empty")
 
57
 
58
  return True, company_name.strip(), job_description.strip()
59
 
60
+ except (TypeError, AttributeError) as e:
61
  logger.error(f"Invalid state structure: {e}")
62
  return False, "", ""
63
 
64
 
65
  def parse_dspy_queries_with_fallback(
66
+ raw_queries: dict[str, Any], company_name: str
67
+ ) -> dict[str, str]:
68
  """
69
  Parse DSPy query output with multiple fallback strategies.
70
  Returns a dict of query_id -> query_string.
 
103
  return get_fallback_queries(company_name)
104
 
105
 
106
+ def get_fallback_queries(company_name: str) -> dict[str, str]:
107
  """
108
  Generate basic fallback queries when DSPy fails.
109
  """
 
117
  def company_research_data_summary(state: ResearchState) -> ResearchState:
118
  """
119
  Summarize the filtered research data into a concise summary.
120
+
121
+ Replaces the raw tavily_search results with a summarized version using LLM.
122
+
123
+ Args:
124
+ state: Current research state with search results
125
+
126
+ Returns:
127
+ Updated state with research summary
128
  """
129
  try:
130
+ # Update current node
131
+ updated_state = {**state, "current_node": "company_research_data_summary"}
132
 
133
+ # Extract the current research data with safe access
134
  company_research_data = state.get("company_research_data", {})
135
  tavily_search_data = company_research_data.get("tavily_search", [])
136
 
137
  # If no research data, skip summarization
138
  if not tavily_search_data or len(tavily_search_data) == 0:
139
  logger.warning("No research data to summarize. Skipping summarization.")
140
+ return updated_state
141
 
142
  logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
143
 
 
150
 
151
  llm_provider = LLMFactory()
152
  llm = llm_provider.create_dspy(
153
+ model="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
154
  provider="openrouter",
155
  temperature=0.3,
156
  )
 
160
  response = company_research_data_summarization(
161
  company_research_data=company_research_data
162
  )
163
+ # Extract the summary from the response with safe access
164
+ summary_json_str = ""
165
  if hasattr(response, "company_research_data_summary"):
166
  summary_json_str = response.company_research_data_summary
167
+ elif isinstance(response, dict):
168
+ summary_json_str = response.get("company_research_data_summary", "")
169
  else:
170
  logger.error(
171
  f"Unexpected response format from summarization: {type(response)}"
172
  )
173
+ return updated_state
174
 
175
+ # Update state with summary using safe dictionary operations
176
+ updated_company_research_data = {**company_research_data}
177
+ updated_company_research_data["company_research_data_summary"] = (
178
  summary_json_str
179
  )
180
+ updated_state["company_research_data"] = updated_company_research_data
181
 
182
+ return updated_state
183
 
184
  except Exception as e:
185
  logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
186
  # Return state unchanged on error
187
+ return updated_state
188
 
189
 
190
  async def research_company_with_retry(state: ResearchState) -> ResearchState:
 
198
 
199
  if not is_valid:
200
  logger.error("Invalid inputs for research. Skipping research phase.")
201
+ return ResearchState(
202
+ company_research_data={
203
+ **state.get("company_research_data", {}),
204
+ "tavily_search": [],
205
+ },
206
+ attempted_search_queries=[],
207
+ current_node="research_company",
208
+ content_category=state.get("content_category", "cover_letter"),
209
+ messages=state.get("messages", []),
210
+ )
211
 
212
  logger.info(f"Researching company: {company_name}")
213
 
 
286
  if len(search_results) == 0:
287
  logger.warning("No search results returned")
288
 
289
+ # Store results and return ResearchState
290
+ return ResearchState(
291
+ company_research_data={
292
+ **state.get("company_research_data", {}),
293
+ "tavily_search": search_results,
294
+ },
295
+ attempted_search_queries=list(queries.values()),
296
+ current_node="research_company",
297
+ content_category=state.get("content_category", "cover_letter"),
298
+ messages=state.get("messages", []),
299
  )
 
300
 
301
  except Exception as e:
302
  logger.error(
 
308
  await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
309
  else:
310
  logger.error("All retry attempts exhausted. Using empty results.")
311
+ return ResearchState(
312
+ company_research_data={
313
+ **state.get("company_research_data", {}),
314
+ "tavily_search": [],
315
+ },
316
+ attempted_search_queries=[],
317
+ current_node="research_company",
318
+ content_category=state.get("content_category", "cover_letter"),
319
+ messages=state.get("messages", []),
320
+ )
321
 
322
+ return ResearchState(
323
+ company_research_data=state.get("company_research_data", {}),
324
+ attempted_search_queries=[],
325
+ current_node="research_company",
326
+ content_category=state.get("content_category", "cover_letter"),
327
+ messages=state.get("messages", []),
328
+ )
329
 
330
 
331
  # Create research subgraph
332
  research_subgraph = StateGraph(ResearchState)
333
 
334
  # Add research subgraph nodes
335
+ research_subgraph.add_node("research_company", research_company_with_retry)
336
  research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
337
  research_subgraph.add_node(
338
  "company_research_data_summary", company_research_data_summary
src/job_writing_agent/nodes/selfconsistency.py CHANGED
@@ -1,10 +1,12 @@
1
- import logging
2
- from datetime import datetime
3
  import json
 
4
  import re
 
5
 
 
6
  from ..classes.classes import AppState
7
- from ..prompts.templates import DRAFT_RATING_PROMPT, BEST_DRAFT_SELECTION_PROMPT
8
  from ..utils.llm_provider_factory import LLMFactory
9
 
10
 
@@ -14,82 +16,104 @@ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
14
 
15
 
16
  def self_consistency_vote(state: AppState) -> AppState:
17
- """Choose the best draft from multiple variations."""
 
 
 
 
 
 
 
 
 
 
 
18
  # Create LLM inside function (lazy initialization)
19
  llm_factory = LLMFactory()
20
- llm_precise = llm_factory.create_langchain(
21
  model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
22
  )
23
 
24
- variations = state.get("variations", {"variations": []})
 
25
 
26
- all_drafts = [state["draft"]] + variations["variations"]
27
 
28
  # First, have the LLM rate each draft
29
- ratings = []
30
 
31
- # Get resume and job summaries, handling different formats
32
  try:
33
- if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
34
- if hasattr(state["resume_path"][0], "page_content"):
35
- resume_summary = state["resume_path"][0].page_content
 
36
  else:
37
- resume_summary = state["resume_path"][0]
38
  else:
39
- resume_summary = str(state["resume_path"])
40
  except Exception as e:
41
- print(f"Warning: Error getting resume summary: {e}")
42
- resume_summary = str(state["resume_path"])
43
 
44
  try:
45
- if (
46
- isinstance(state["job_description_source"], list)
47
- and len(state["job_description_source"]) > 0
48
- ):
49
- job_summary = state["job_description_source"][0]
50
  else:
51
- job_summary = str(state["job_description_source"])
52
  except Exception as e:
53
- print(f"Warning: Error getting job summary: {e}")
54
- job_summary = str(state["job_description_source"])
55
-
56
- for i, draft in enumerate(all_drafts):
57
- rating = llm_precise.invoke(
58
- DRAFT_RATING_PROMPT.format(
59
- resume_summary=resume_summary,
60
- job_summary=job_summary,
61
- draft=draft,
62
- draft_number=i + 1,
63
- )
 
 
64
  )
65
- ratings.append(rating)
66
 
67
- # Create a clearer, more structured prompt for draft selection
68
- selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
69
- ratings_json=json.dumps(ratings, indent=2), num_drafts=len(all_drafts)
 
 
 
 
70
  )
71
 
72
  # Get the selected draft index with error handling
73
  try:
74
- selection = llm_precise.invoke(selection_prompt).strip()
 
 
 
 
75
  # Extract just the first number found in the response
76
- number_match = re.search(r"\d+", selection)
77
  if not number_match:
78
- print(
79
- "Warning: Could not extract draft number from LLM response. Using original draft."
80
  )
81
- best_draft_idx = 0
82
  else:
83
- best_draft_idx = int(number_match.group()) - 1
84
  # Validate the index is in range
85
- if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
86
- print(
87
- f"Warning: Selected draft index {best_draft_idx + 1} out of range. Using original draft."
88
  )
89
- best_draft_idx = 0
90
  except (ValueError, TypeError) as e:
91
- print(f"Warning: Error selecting best draft: {e}. Using original draft.")
92
- best_draft_idx = 0
93
 
94
- state["draft"] = all_drafts[best_draft_idx]
95
- return state
 
 
1
+ # Standard library imports
 
2
  import json
3
+ import logging
4
  import re
5
+ from datetime import datetime
6
 
7
+ # Local imports
8
  from ..classes.classes import AppState
9
+ from ..prompts.templates import BEST_DRAFT_SELECTION_PROMPT, DRAFT_RATING_PROMPT
10
  from ..utils.llm_provider_factory import LLMFactory
11
 
12
 
 
16
 
17
 
18
  def self_consistency_vote(state: AppState) -> AppState:
19
+ """
20
+ Choose the best draft from multiple variations using LLM-based voting.
21
+
22
+ This function rates all draft variations and selects the best one based on
23
+ criteria like relevance, professional tone, personalization, and persuasiveness.
24
+
25
+ Args:
26
+ state: Application state containing the original draft and variations
27
+
28
+ Returns:
29
+ Updated state with the best draft selected
30
+ """
31
  # Create LLM inside function (lazy initialization)
32
  llm_factory = LLMFactory()
33
+ precise_llm = llm_factory.create_langchain(
34
  model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
35
  )
36
 
37
+ variations_data = state.get("variations", {"variations": []})
38
+ original_draft = state.get("draft", "")
39
 
40
+ all_drafts = [original_draft] + variations_data.get("variations", [])
41
 
42
  # First, have the LLM rate each draft
43
+ draft_ratings = []
44
 
45
+ # Get resume and job summaries with safe dictionary access
46
  try:
47
+ resume_path = state.get("resume_path", "")
48
+ if isinstance(resume_path, list) and len(resume_path) > 0:
49
+ if hasattr(resume_path[0], "page_content"):
50
+ resume_summary = resume_path[0].page_content
51
  else:
52
+ resume_summary = resume_path[0]
53
  else:
54
+ resume_summary = str(resume_path)
55
  except Exception as e:
56
+ logger.warning(f"Error getting resume summary: {e}")
57
+ resume_summary = str(state.get("resume_path", ""))
58
 
59
  try:
60
+ job_description_source = state.get("job_description_source", "")
61
+ if isinstance(job_description_source, list) and len(job_description_source) > 0:
62
+ job_summary = job_description_source[0]
 
 
63
  else:
64
+ job_summary = str(job_description_source)
65
  except Exception as e:
66
+ logger.warning(f"Error getting job summary: {e}")
67
+ job_summary = str(state.get("job_description_source", ""))
68
+
69
+ for draft_index, draft_content in enumerate(all_drafts):
70
+ # Create chain with proper prompt template invocation
71
+ rating_chain = DRAFT_RATING_PROMPT | precise_llm
72
+ rating_result = rating_chain.invoke(
73
+ {
74
+ "resume_summary": resume_summary,
75
+ "job_summary": job_summary,
76
+ "draft": draft_content,
77
+ "draft_number": draft_index + 1,
78
+ }
79
  )
80
+ draft_ratings.append(rating_result)
81
 
82
+ # Create chain for draft selection with proper prompt template invocation
83
+ selection_chain = BEST_DRAFT_SELECTION_PROMPT | precise_llm
84
+ selection_result = selection_chain.invoke(
85
+ {
86
+ "ratings_json": json.dumps(draft_ratings, indent=2),
87
+ "num_drafts": len(all_drafts),
88
+ }
89
  )
90
 
91
  # Get the selected draft index with error handling
92
  try:
93
+ selection_text = str(
94
+ selection_result.content
95
+ if hasattr(selection_result, "content")
96
+ else selection_result
97
+ ).strip()
98
  # Extract just the first number found in the response
99
+ number_match = re.search(r"\d+", selection_text)
100
  if not number_match:
101
+ logger.warning(
102
+ "Could not extract draft number from LLM response. Using original draft."
103
  )
104
+ best_draft_index = 0
105
  else:
106
+ best_draft_index = int(number_match.group()) - 1
107
  # Validate the index is in range
108
+ if best_draft_index < 0 or best_draft_index >= len(all_drafts):
109
+ logger.warning(
110
+ f"Selected draft index {best_draft_index + 1} out of range. Using original draft."
111
  )
112
+ best_draft_index = 0
113
  except (ValueError, TypeError) as e:
114
+ logger.warning(f"Error selecting best draft: {e}. Using original draft.")
115
+ best_draft_index = 0
116
 
117
+ # Update state with best draft using safe dictionary operations
118
+ updated_state = {**state, "draft": all_drafts[best_draft_index]}
119
+ return updated_state
src/job_writing_agent/nodes/system_initializer.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ System Initializer Module
4
+
5
+ This module provides the SystemInitializer class responsible for initializing
6
+ system messages in the workflow state. It follows the Single Responsibility
7
+ Principle by focusing solely on system message initialization.
8
+ """
9
+
10
+ import logging
11
+ from typing import Optional
12
+
13
+ from langchain_core.messages import SystemMessage
14
+
15
+ from job_writing_agent.classes import DataLoadState
16
+ from job_writing_agent.prompts.templates import agent_system_prompt
17
+ from job_writing_agent.utils.logging.logging_decorators import log_async
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class SystemInitializer:
23
+ """
24
+ Responsible for initializing system messages in workflow state.
25
+
26
+ Example:
27
+ >>> initializer = SystemInitializer()
28
+ >>> state = await initializer.set_agent_system_message(initial_state)
29
+ >>>
30
+ >>> # With custom prompt for testing
31
+ >>> custom_prompt = "Custom system prompt"
32
+ >>> initializer = SystemInitializer(system_prompt=custom_prompt)
33
+ """
34
+
35
+ def __init__(self, system_prompt: Optional[str] = None):
36
+ """
37
+ Initialize SystemInitializer with optional system prompt dependency injection.
38
+
39
+ Parameters
40
+ ----------
41
+ system_prompt: Optional[str]
42
+ System prompt text to use. Defaults to `agent_system_prompt` from
43
+ prompts.templates. Can be injected for testing or custom prompts.
44
+ """
45
+ self._system_prompt = system_prompt or agent_system_prompt
46
+
47
+ @log_async
48
+ async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
49
+ """
50
+ Add the system prompt to the conversation state.
51
+
52
+ This method creates a SystemMessage from the configured prompt and
53
+ adds it to the messages list in the workflow state.
54
+
55
+ Parameters
56
+ ----------
57
+ state: DataLoadState
58
+ Current workflow state containing messages list.
59
+
60
+ Returns
61
+ -------
62
+ DataLoadState
63
+ Updated state with the system message added to messages list
64
+ and current_node set to "initialize_system".
65
+ """
66
+ agent_initialization_system_message = SystemMessage(content=self._system_prompt)
67
+ messages = state.get("messages", [])
68
+ messages.append(agent_initialization_system_message)
69
+ return {
70
+ **state,
71
+ "messages": messages,
72
+ "current_node": "initialize_system",
73
+ }
src/job_writing_agent/nodes/validation_helper.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Validation Helper Module
4
+
5
+ This module provides the ValidationHelper class responsible for validating
6
+ workflow inputs and setting routing decisions. It follows the Single
7
+ Responsibility Principle by focusing solely on input validation.
8
+ """
9
+
10
+ import logging
11
+
12
+ from job_writing_agent.classes import DataLoadState
13
+ from job_writing_agent.utils.logging.logging_decorators import (
14
+ log_execution,
15
+ log_errors,
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class ValidationHelper:
22
+ """
23
+ Responsible for validating workflow inputs and setting routing decisions.
24
+
25
+
26
+ Example:
27
+ >>> validator = ValidationHelper()
28
+ >>> validated_state = validator.verify_inputs(state)
29
+ >>> next_node = validated_state.get("next_node") # "load" or "research"
30
+ """
31
+
32
+ def __init__(self):
33
+ """
34
+ Initialize ValidationHelper.
35
+
36
+ This class is stateless - no dependencies needed for validation logic.
37
+ """
38
+ pass
39
+
40
+ @log_execution
41
+ @log_errors
42
+ def verify_inputs(self, state: DataLoadState) -> DataLoadState:
43
+ """
44
+ Validate inputs and set next_node for routing.
45
+
46
+ This method validates that both resume and job description are present
47
+ and non-empty in the state.
48
+ Parameters
49
+ ----------
50
+ state: DataLoadState
51
+ Current workflow state containing company_research_data.
52
+
53
+ Returns
54
+ -------
55
+ DataLoadState
56
+ Updated state with next_node set to "load" (if validation fails)
57
+ or "research" (if validation passes).
58
+ """
59
+ logger.info("Verifying loaded inputs!")
60
+ state["current_node"] = "verify"
61
+
62
+ # Validate required fields using helper methods
63
+ if not self._validate_resume(state):
64
+ logger.error("Resume is missing or empty in company_research_data")
65
+ state["next_node"] = "load" # Loop back to load subgraph
66
+ return state
67
+
68
+ if not self._validate_job_description(state):
69
+ logger.error("Job description is missing or empty in company_research_data")
70
+ state["next_node"] = "load" # Loop back to load subgraph
71
+ return state
72
+
73
+ # All validations passed
74
+ state["next_node"] = "research"
75
+ logger.info("Inputs verified successfully, proceeding to research")
76
+ return state
77
+
78
+ def _validate_resume(self, state: DataLoadState) -> bool:
79
+ """
80
+ Validate that resume is present and non-empty in company_research_data.
81
+
82
+ Private helper method for better code organization.
83
+
84
+ Parameters
85
+ ----------
86
+ state: DataLoadState
87
+ Current workflow state.
88
+
89
+ Returns
90
+ -------
91
+ bool
92
+ True if resume is present and non-empty, False otherwise.
93
+ """
94
+ company_research_data = state.get("company_research_data", {})
95
+ resume = company_research_data.get("resume", "")
96
+ # Handle various types: convert to string and check if non-empty
97
+ if not resume:
98
+ return False
99
+ resume_str = str(resume).strip()
100
+ return bool(resume_str)
101
+
102
+ def _validate_job_description(self, state: DataLoadState) -> bool:
103
+ """
104
+ Validate that job description is present and non-empty in company_research_data.
105
+
106
+ Private helper method for better code organization.
107
+
108
+ Parameters
109
+ ----------
110
+ state: DataLoadState
111
+ Current workflow state.
112
+
113
+ Returns
114
+ -------
115
+ bool
116
+ True if job description is present and non-empty, False otherwise.
117
+ """
118
+ company_research_data = state.get("company_research_data", {})
119
+ job_description = company_research_data.get("job_description", "")
120
+ # Handle various types: convert to string and check if non-empty
121
+ if not job_description:
122
+ return False
123
+ job_desc_str = str(job_description).strip()
124
+ return bool(job_desc_str)
src/job_writing_agent/nodes/variations.py CHANGED
@@ -1,22 +1,36 @@
 
1
  import logging
2
  from datetime import datetime
3
- from typing_extensions import Dict, List
4
 
 
5
  from langchain_core.documents import Document
6
 
7
-
8
  from ..classes.classes import ResultState
9
- from ..utils.llm_provider_factory import LLMFactory
10
  from ..prompts.templates import VARIATION_PROMPT
11
-
12
 
13
  logger = logging.getLogger(__name__)
14
  # Constants
15
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
16
 
17
 
18
- def generate_variations(state: ResultState) -> Dict[str, List[str]]:
19
- """Generate multiple variations of the draft for self-consistency voting."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  # Create LLM inside function (lazy initialization)
21
  llm_provider = LLMFactory()
22
  llm = llm_provider.create_langchain(
@@ -27,27 +41,30 @@ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
27
 
28
  # Get resume and job text, handling both string and Document types
29
  try:
30
- resume_text = "\n".join(
31
- doc.page_content if isinstance(doc, Document) else doc
32
- for doc in (
33
- state["resume"][:2]
34
- if isinstance(state["company_research_data"]["resume"], str)
35
- else [state["resume"]]
36
- )
37
- )
38
- job_text = "\n".join(
39
- chunk
40
- for chunk in (
41
- state["company_research_data"]["job_description"][:2]
42
- if isinstance(state["company_research_data"]["job_description"], str)
43
- else [state["company_research_data"]["job_description"]]
44
  )
45
- )
 
 
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
- print(f"Warning: Error processing resume/job text: {e}")
48
  # Fallback to simple string handling
49
- resume_text = str(state["company_research_data"]["resume"])
50
- job_text = str(state["company_research_data"]["job_description"])
51
 
52
  # Generate variations with different temperatures and creativity settings
53
  temp_variations = [
@@ -65,22 +82,23 @@ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
65
 
66
  # Use VARIATION_PROMPT directly with the configured LLM
67
  variation = VARIATION_PROMPT.format_messages(
68
- resume_excerpt=resume_text, job_excerpt=job_text, draft=state["draft"]
69
  )
70
 
71
  response = configured_llm.invoke(variation)
72
 
73
- print(f"Response for setting: {variation} has a response: {response}")
74
 
75
  if response and response.strip(): # Only add non-empty variations
76
  variations.append(response)
77
  except Exception as e:
78
- print(f"Warning: Error generating variation with settings {settings}: {e}")
79
  continue
80
 
81
  # Ensure we have at least one variation
82
  if not variations:
83
  # If all variations failed, add the original draft as a fallback
84
- variations.append(state["draft"])
 
85
 
86
  return {"variations": variations}
 
1
+ # Standard library imports
2
  import logging
3
  from datetime import datetime
 
4
 
5
+ # Third-party imports
6
  from langchain_core.documents import Document
7
 
8
+ # Local imports
9
  from ..classes.classes import ResultState
 
10
  from ..prompts.templates import VARIATION_PROMPT
11
+ from ..utils.llm_provider_factory import LLMFactory
12
 
13
  logger = logging.getLogger(__name__)
14
  # Constants
15
  CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
16
 
17
 
18
+ def generate_variations(state: ResultState) -> dict[str, list[str]]:
19
+ """
20
+ Generate multiple variations of the draft for self-consistency voting.
21
+
22
+ Args:
23
+ state: Current result state with draft and research data
24
+
25
+ Returns:
26
+ Dictionary containing list of draft variations
27
+ """
28
+ # Validate and extract all required state fields once
29
+ company_research_data = state.get("company_research_data", {})
30
+ draft_content = state.get("draft", "")
31
+ resume_data = company_research_data.get("resume", "")
32
+ job_description_data = company_research_data.get("job_description", "")
33
+
34
  # Create LLM inside function (lazy initialization)
35
  llm_provider = LLMFactory()
36
  llm = llm_provider.create_langchain(
 
41
 
42
  # Get resume and job text, handling both string and Document types
43
  try:
44
+ # Extract resume text
45
+ if isinstance(resume_data, str):
46
+ resume_text = resume_data[:2000] # Limit to first 2000 chars
47
+ elif isinstance(resume_data, list):
48
+ resume_text = "\n".join(
49
+ doc.page_content if isinstance(doc, Document) else str(doc)
50
+ for doc in resume_data[:2]
 
 
 
 
 
 
 
51
  )
52
+ else:
53
+ resume_text = str(resume_data)
54
+
55
+ # Extract job description text
56
+ if isinstance(job_description_data, str):
57
+ job_text = job_description_data[:2000] # Limit to first 2000 chars
58
+ elif isinstance(job_description_data, list):
59
+ job_text = "\n".join(str(chunk) for chunk in job_description_data[:2])
60
+ else:
61
+ job_text = str(job_description_data)
62
+
63
  except Exception as e:
64
+ logger.warning(f"Error processing resume/job text: {e}")
65
  # Fallback to simple string handling
66
+ resume_text = str(resume_data)
67
+ job_text = str(job_description_data)
68
 
69
  # Generate variations with different temperatures and creativity settings
70
  temp_variations = [
 
82
 
83
  # Use VARIATION_PROMPT directly with the configured LLM
84
  variation = VARIATION_PROMPT.format_messages(
85
+ resume_excerpt=resume_text, job_excerpt=job_text, draft=draft_content
86
  )
87
 
88
  response = configured_llm.invoke(variation)
89
 
90
+ logger.debug(f"Generated variation with settings {settings}")
91
 
92
  if response and response.strip(): # Only add non-empty variations
93
  variations.append(response)
94
  except Exception as e:
95
+ logger.warning(f"Error generating variation with settings {settings}: {e}")
96
  continue
97
 
98
  # Ensure we have at least one variation
99
  if not variations:
100
  # If all variations failed, add the original draft as a fallback
101
+ logger.warning("All variations failed, using original draft as fallback")
102
+ variations.append(draft_content)
103
 
104
  return {"variations": variations}
src/job_writing_agent/prompts/templates.py CHANGED
@@ -9,6 +9,7 @@ from langchain_core.prompts import (
9
  ChatPromptTemplate,
10
  SystemMessagePromptTemplate,
11
  HumanMessagePromptTemplate,
 
12
  )
13
  from langchain_core.messages import SystemMessage, HumanMessage
14
 
@@ -36,63 +37,59 @@ PERSONA_DEVELOPMENT_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_message
36
 
37
 
38
  # Draft generation prompts
39
-
40
- COVER_LETTER_PROMPT: SystemMessage = SystemMessage(
41
- content="""
42
- You are CoverLetterGPT, a concise career‑writing assistant.
43
-
44
- CORE OBJECTIVE
45
- Draft a 3‑paragraph cover letter (150‑180 words total) that targets hiring managers
46
- and technical recruiters. Assume it may reach the CEO.
47
- Begin exactly with: "To Hiring Team,"
48
- End exactly with: "Thanks, Rishabh"
49
- Tone: polite, casual, enthusiastic but no em dashes (—) and no clichés.
50
- Every fact about achievements, skills, or company details must be traceable to the
51
- provided resume, job description, or company research; otherwise, ask the user.
52
- If any critical detail is missing or ambiguous, STOP and ask a clarifying question
53
- before writing the letter.
54
- Keep sentences tight; avoid filler like “I am excited to…” (enthusiasm comes
55
- through precise language).
56
- • Never exceed 180 words. Never fall below 150 words.
57
-
58
- SELF‑EVALUATION (append after the letter)
59
- After producing the cover letter, output an “### Evaluation” section containing:
60
- Comprehensiveness (1‑5)
61
- Evidence provided (1‑5)
62
- Clarity of explanation (1‑5)
63
- Potential limitations or biases (bullet list)
64
- Areas for improvement (brief notes)
65
-
66
- ERROR HANDLING
67
- If word count, section order, or format rules are violated, regenerate until correct.
68
- """
69
  )
70
 
71
 
72
- BULLET_POINTS_PROMPT: SystemMessage = SystemMessage(
73
- content="""You are an expert job application writer who
74
- creates personalized application materials.
75
-
76
- {persona_instruction}
77
 
78
- Write 5-7 bullet points highlighting the candidate's
79
- qualifications for this specific role.
80
- Create content that genuinely reflects the candidate's
81
- background and is tailored to the specific job.
82
- Ensure the tone is professional, confident, and authentic.
83
- Today is {current_date}."""
 
84
  )
85
 
86
 
87
- LINKEDIN_NOTE_PROMPT: SystemMessage = SystemMessage(
88
- content="""You are an expert job application
89
- writer who creates personalized application materials.
90
- {persona_instruction}
91
 
92
- Write a brief LinkedIn connection note to a hiring manager or recruiter (150 words max).
93
- Create content that genuinely reflects the candidate's background and is tailored to the specific job.
94
- Ensure the tone is professional, confident, and authentic.
95
- Today is {current_date}."""
 
96
  )
97
 
98
  # Variation generation prompt
@@ -230,6 +227,35 @@ REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
230
  ]
231
  )
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  # Tavily query prompt to build knowledge context about the company
234
 
235
  TAVILY_QUERY_PROMPT = """
@@ -247,33 +273,6 @@ The user needs targeted search queries (with rationale) for Tavily Search to res
247
  </Requirements>
248
  """
249
 
250
- JOB_DESCRIPTION_PROMPT = """You are a JSON extraction specialist. Extract job information from the provided text and return ONLY valid JSON.
251
-
252
- CRITICAL: Your response must be parseable by json.loads() - no markdown, no explanations, no extra text.
253
-
254
- Extract these three fields in exact order:
255
- 1. job_description field - Complete job posting formatted in clean markdown with proper headers (## Job Description, ## Responsibilities, ## Requirements, etc.)
256
- 2. company_name field - Exact company name as mentioned
257
- 3. job_title field - Exact job title as posted
258
-
259
- FORMATTING RULES:
260
- - Use double quotes for all strings
261
- - Escape internal quotes with \\"
262
- - Escape newlines as \\\\n in the job description field
263
- - Replace actual line breaks with \\\\n
264
- - If any field is missing, use empty string ""
265
- - No trailing commas
266
- - No comments or extra whitespace
267
-
268
- REQUIRED OUTPUT FORMAT:
269
- {{
270
- "job_description": "markdown formatted job description with \\\\n for line breaks",
271
- "company_name": "exact company name",
272
- "job_title": "exact job title"
273
- }}
274
-
275
- Return only the JSON object - no other text."""
276
-
277
  agent_system_prompt = """I act as your personal job-application assistant.
278
  My function is to help you research, analyze, and write compelling application
279
  materials — primarily LinkedIn reach-outs, short written responses, and cover
 
9
  ChatPromptTemplate,
10
  SystemMessagePromptTemplate,
11
  HumanMessagePromptTemplate,
12
+ AIMessagePromptTemplate,
13
  )
14
  from langchain_core.messages import SystemMessage, HumanMessage
15
 
 
37
 
38
 
39
  # Draft generation prompts
40
+ COVER_LETTER_PROMPT = AIMessagePromptTemplate.from_template(
41
+ """
42
+ I am CoverLetterGPT, a concise career writing assistant.
43
+
44
+ CORE OBJECTIVE
45
+ Draft a 3‑paragraph cover letter (150‑180 words total) that targets hiring managers
46
+ and technical recruiters. Assume it may reach the CEO.
47
+ Begin exactly with: "To Hiring Team,"
48
+ End exactly with: "Thanks, Rishabh"
49
+ Tone: polite, casual, enthusiastic — but no em dashes (—) and no clichés.
50
+ Every fact about achievements, skills, or company details must be traceable to the
51
+ provided resume, job description, or company research; otherwise, ask the user.
52
+ If any critical detail is missing or ambiguous, STOP and ask a clarifying question
53
+ before writing the letter.
54
+ Keep sentences tight; avoid filler like “I am excited to…” (enthusiasm comes
55
+ through precise language).
56
+ Never exceed 180 words. Never fall below 150 words.
57
+
58
+ SELF‑EVALUATION (append after the letter)
59
+ After producing the cover letter, output an “### Evaluation” section containing:
60
+ Comprehensiveness (1‑5)
61
+ Evidence provided (1‑5)
62
+ Clarity of explanation (1‑5)
63
+ Potential limitations or biases (bullet list)
64
+ Areas for improvement (brief notes)
65
+
66
+ ERROR HANDLING
67
+ If word count, section order, or format rules are violated, regenerate until correct.
68
+ """
 
69
  )
70
 
71
 
72
+ BULLET_POINTS_PROMPT = AIMessagePromptTemplate.from_template(
73
+ """I am an expert job application writer who creates personalized application materials.
 
 
 
74
 
75
+ Write 5-7 bullet points highlighting the candidate's
76
+ qualifications for this specific role.
77
+ Create content that genuinely reflects the candidate's
78
+ background and is tailored to the specific job.
79
+ Ensure the tone is professional, confident, and authentic.
80
+ Today is {current_date}.""",
81
+ input_variables=["current_date"],
82
  )
83
 
84
 
85
+ LINKEDIN_NOTE_PROMPT = AIMessagePromptTemplate.from_template(
86
+ """I am an expert job application writer who creates personalized application materials.
 
 
87
 
88
+ Write a brief LinkedIn connection note to a hiring manager or recruiter (100 words max).
89
+ Create content that genuinely reflects the candidate's background and is tailored to the specific job.
90
+ Ensure the tone is professional, confident, and authentic.
91
+ Today is {current_date}.""",
92
+ input_variables=["current_date"],
93
  )
94
 
95
  # Variation generation prompt
 
227
  ]
228
  )
229
 
230
+ DRAFT_GENERATION_CONTEXT_PROMPT = HumanMessagePromptTemplate.from_template(
231
+ """
232
+ Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
233
+
234
+ **Job Description:**
235
+
236
+ START OF JOB DESCRIPTION'''
237
+ {current_job_role}
238
+ '''END OF JOB DESCRIPTION
239
+
240
+ **Candidate Resume:**
241
+
242
+ START OF CANDIDATE RESUME'''
243
+ {candidate_resume}
244
+ '''END OF CANDIDATE RESUME
245
+
246
+ **Company Research Data:**
247
+
248
+ START OF COMPANY RESEARCH DATA'''
249
+ {company_research_data}
250
+ '''END OF COMPANY RESEARCH DATA
251
+ """,
252
+ input_variables=[
253
+ "current_job_role",
254
+ "candidate_resume",
255
+ "company_research_data",
256
+ ],
257
+ )
258
+
259
  # Tavily query prompt to build knowledge context about the company
260
 
261
  TAVILY_QUERY_PROMPT = """
 
273
  </Requirements>
274
  """
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  agent_system_prompt = """I act as your personal job-application assistant.
277
  My function is to help you research, analyze, and write compelling application
278
  materials — primarily LinkedIn reach-outs, short written responses, and cover
src/job_writing_agent/prompts/test_templates.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import (
2
+ ChatPromptTemplate,
3
+ SystemMessagePromptTemplate,
4
+ AIMessagePromptTemplate,
5
+ HumanMessagePromptTemplate,
6
+ )
7
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
8
+
9
+ from job_writing_agent.utils.llm_provider_factory import LLMFactory
10
+
11
+
12
+ llm_provider = LLMFactory()
13
+ llm = llm_provider.create_langchain(
14
+ "allenai/olmo-3.1-32b-think:free",
15
+ provider="openrouter",
16
+ temperature=0.1,
17
+ )
18
+
19
+
20
+ # Use PromptTemplate classes for variable interpolation
21
+ TEST_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
22
+ [
23
+ # Use SystemMessagePromptTemplate for SystemMessage with variables
24
+ SystemMessagePromptTemplate.from_template(
25
+ "You can answer any question that the user asks. If you don't know the answer, say 'I don't know' and don't make up an answer. Todays date is {current_date}.",
26
+ input_variables=["current_date"],
27
+ ),
28
+ # Use AIMessagePromptTemplate for AIMessage with variables (if needed)
29
+ # Or use AIMessage directly if no variables
30
+ AIMessagePromptTemplate.from_template(
31
+ "I am here to help you answer any question that you ask.",
32
+ input_variables=["current_date"],
33
+ ),
34
+ ]
35
+ )
36
+
37
+ # Now the chain will work correctly
38
+ prompt_test_chain = ({"current_date": lambda x: x["current_date"]}) | TEST_PROMPT | llm
39
+
40
+ # Test it
41
+ print(TEST_PROMPT)
42
+
43
+
44
+ BULLET_POINTS_PROMPT = SystemMessagePromptTemplate.from_template(
45
+ """You are an expert job application writer who
46
+ creates personalized application materials.
47
+
48
+ {persona_instruction}
49
+
50
+ Write 5-7 bullet points highlighting the candidate's
51
+ qualifications for this specific role.
52
+ Create content that genuinely reflects the candidate's
53
+ background and is tailored to the specific job.
54
+ Ensure the tone is professional, confident, and authentic.
55
+ Today is {current_date}.""",
56
+ input_variables=["persona_instruction", "current_date"],
57
+ )
58
+
59
+ print(BULLET_POINTS_PROMPT)
src/job_writing_agent/tools/SearchTool.py CHANGED
@@ -1,14 +1,17 @@
 
 
1
  import logging
2
  import os
3
- import asyncio
4
- from dotenv import load_dotenv
5
  from pathlib import Path
6
 
 
 
 
7
  from langchain_tavily import TavilySearch
8
  from openevals.llm import create_async_llm_as_judge
9
- from openevals.prompts import RAG_RETRIEVAL_RELEVANCE_PROMPT, RAG_HELPFULNESS_PROMPT
10
- import dspy
11
 
 
12
  from ..agents.output_schema import TavilySearchQueries
13
  from ..classes.classes import ResearchState
14
  from ..utils.llm_provider_factory import LLMFactory
@@ -21,7 +24,11 @@ env_path = Path(__file__).parent / ".env"
21
  load_dotenv(dotenv_path=env_path, override=True)
22
 
23
 
24
- openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
 
 
 
 
25
 
26
 
27
  class TavilyResearchTool:
@@ -30,7 +37,7 @@ class TavilyResearchTool:
30
  job_description,
31
  company_name,
32
  max_results=5,
33
- model_name="mistralai/mistral-7b-instruct:free",
34
  ):
35
  # Create LLM inside __init__ (lazy initialization)
36
  llm_provider = LLMFactory()
@@ -55,19 +62,34 @@ class TavilyResearchTool:
55
  return response
56
 
57
  def tavily_search_company(self, queries):
 
 
 
 
 
 
 
 
 
58
  query_results: list[list[str]] = []
59
- for query in queries:
60
  try:
 
 
 
 
 
61
  search_query_response = self.tavily_searchtool.invoke(
62
- {"query": queries[query]}
63
  )
 
 
64
  query_results.append(
65
- [res["content"] for res in search_query_response["results"]]
66
  )
67
- # print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
68
  except Exception as e:
69
  logger.error(
70
- f"Failed to perform company research using TavilySearchTool. Error : {e}"
71
  )
72
  continue
73
 
@@ -120,10 +142,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
120
  try:
121
  state["current_node"] = "filter_research_results_by_relevance"
122
 
123
- # Extract search data from state
124
- raw_search_results = state.get("company_research_data", {}).get(
125
- "tavily_search", []
126
- )
127
  search_queries_used = state.get("attempted_search_queries", [])
128
 
129
  # Validate data types
@@ -138,7 +159,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
138
  # Early exit if no results
139
  if len(raw_search_results) == 0:
140
  logger.info("No search results to filter.")
141
- state["company_research_data"]["tavily_search"] = []
 
 
142
  return state
143
 
144
  logger.info(
@@ -201,6 +224,7 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
201
  logger.warning(
202
  f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
203
  )
 
204
  return (search_result_content, True, "timeout")
205
 
206
  except Exception as e:
@@ -248,8 +272,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
248
  else:
249
  results_removed_count += 1
250
 
251
- # Update state with ONLY the relevant results
252
- state["company_research_data"]["tavily_search"] = results_kept
 
253
 
254
  # Log filtering summary
255
  total_evaluated = len(raw_search_results)
 
1
+ # Standard library imports
2
+ import asyncio
3
  import logging
4
  import os
 
 
5
  from pathlib import Path
6
 
7
+ # Third-party imports
8
+ import dspy
9
+ from dotenv import load_dotenv
10
  from langchain_tavily import TavilySearch
11
  from openevals.llm import create_async_llm_as_judge
12
+ from openevals.prompts import RAG_HELPFULNESS_PROMPT, RAG_RETRIEVAL_RELEVANCE_PROMPT
 
13
 
14
+ # Local imports
15
  from ..agents.output_schema import TavilySearchQueries
16
  from ..classes.classes import ResearchState
17
  from ..utils.llm_provider_factory import LLMFactory
 
24
  load_dotenv(dotenv_path=env_path, override=True)
25
 
26
 
27
+ # Safe environment variable access with validation
28
+ openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
29
+ if not openrouter_api_key:
30
+ logger.error("OPENROUTER_API_KEY environment variable not set")
31
+ raise ValueError("OPENROUTER_API_KEY environment variable is required")
32
 
33
 
34
  class TavilyResearchTool:
 
37
  job_description,
38
  company_name,
39
  max_results=5,
40
+ model_name="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
41
  ):
42
  # Create LLM inside __init__ (lazy initialization)
43
  llm_provider = LLMFactory()
 
62
  return response
63
 
64
  def tavily_search_company(self, queries):
65
+ """
66
+ Execute Tavily searches for multiple queries.
67
+
68
+ Args:
69
+ queries: Dictionary of query identifiers to query strings
70
+
71
+ Returns:
72
+ List of search result lists, one per query
73
+ """
74
  query_results: list[list[str]] = []
75
+ for query_key in queries:
76
  try:
77
+ query_string = queries.get(query_key, "")
78
+ if not query_string:
79
+ logger.warning(f"Empty query for key: {query_key}")
80
+ continue
81
+
82
  search_query_response = self.tavily_searchtool.invoke(
83
+ {"query": query_string}
84
  )
85
+ # Safe dictionary access for response
86
+ results = search_query_response.get("results", [])
87
  query_results.append(
88
+ [res.get("content", "") for res in results if isinstance(res, dict)]
89
  )
 
90
  except Exception as e:
91
  logger.error(
92
+ f"Failed to perform company research using TavilySearchTool. Error: {e}"
93
  )
94
  continue
95
 
 
142
  try:
143
  state["current_node"] = "filter_research_results_by_relevance"
144
 
145
+ # Extract and validate required state fields once
146
+ company_research_data = state.get("company_research_data", {})
147
+ raw_search_results = company_research_data.get("tavily_search", [])
 
148
  search_queries_used = state.get("attempted_search_queries", [])
149
 
150
  # Validate data types
 
159
  # Early exit if no results
160
  if len(raw_search_results) == 0:
161
  logger.info("No search results to filter.")
162
+ # Update using the extracted variable
163
+ company_research_data["tavily_search"] = []
164
+ state["company_research_data"] = company_research_data
165
  return state
166
 
167
  logger.info(
 
224
  logger.warning(
225
  f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
226
  )
227
+ # Keep the result on timeout to avoid losing potentially useful data
228
  return (search_result_content, True, "timeout")
229
 
230
  except Exception as e:
 
272
  else:
273
  results_removed_count += 1
274
 
275
+ # Update company_research_data with ONLY the relevant results
276
+ company_research_data["tavily_search"] = results_kept
277
+ state["company_research_data"] = company_research_data
278
 
279
  # Log filtering summary
280
  total_evaluated = len(raw_search_results)
src/job_writing_agent/utils/application_cli_interface.py CHANGED
@@ -1,18 +1,29 @@
1
  import argparse
2
- import os
3
  from typing import Iterable
4
 
5
  import requests
6
- from requests.exceptions import RequestException
7
 
8
 
9
- DEFAULT_MODEL = "mistralai/mistral-7b-instruct:free"
10
  DEFAULT_CONTENT_TYPE = "cover_letter"
11
 
12
 
13
  def readable_file(path: str) -> str:
14
- """Validate and return contents of a readable file."""
15
- if not os.path.isfile(path):
 
 
 
 
 
 
 
 
 
 
 
 
16
  raise argparse.ArgumentTypeError(f"File not found: {path}")
17
  if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
18
  raise argparse.ArgumentTypeError(
@@ -22,7 +33,18 @@ def readable_file(path: str) -> str:
22
 
23
 
24
  def valid_temp(temp: str) -> float:
25
- """Ensure temperature is within a reasonable range."""
 
 
 
 
 
 
 
 
 
 
 
26
  value = float(temp)
27
  if not (0 <= value <= 2):
28
  raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
@@ -31,42 +53,41 @@ def valid_temp(temp: str) -> float:
31
 
32
  def is_valid_url(
33
  job_posting: str, allowed_statuses: Iterable[int] | None = None
34
- ) -> bool:
35
  """
36
- Returns ``True`` if *url* is reachable and its HTTP status code is in
37
- `allowed_statuses`. Defaults to any 2xx or 3xx response (common
38
- successful codes).
39
-
40
- Parameters
41
- ----------
42
- job_posting : str
43
- The URL for the job posting.
44
- timeout : float, optional
45
- Timeout for the request (seconds). Defaults to 10.
46
- allowed_statuses : Iterable[int] | None, optional
47
- Specific status codes that are considered “valid”.
48
- If ``None`` (default) any 200‑399 status is accepted.
49
-
50
- Returns
51
- -------
52
- bool
53
- ``True`` if the URL succeeded, ``False`` otherwise.
54
  """
55
  if allowed_statuses is None:
56
  # All 2xx and 3xx responses are considered “valid”
57
  allowed_statuses = range(200, 400)
58
 
59
- with requests.get(
60
- job_posting, timeout=30, allow_redirects=True, stream=True
61
- ) as resp:
62
- if resp.status_code in allowed_statuses:
63
- return job_posting
64
- else:
65
- raise RequestException("Job Posting could not be reached")
 
66
 
67
 
68
  def handle_cli() -> argparse.Namespace:
69
- """Parse and validate CLI arguments for job application generator."""
 
 
 
 
 
70
  parser = argparse.ArgumentParser(
71
  description="""Assist the candidate in writing content for
72
  job application such as answering to question in application
 
1
  import argparse
2
+ from pathlib import Path
3
  from typing import Iterable
4
 
5
  import requests
 
6
 
7
 
8
+ DEFAULT_MODEL = "allenai/olmo-3.1-32b-think:free"
9
  DEFAULT_CONTENT_TYPE = "cover_letter"
10
 
11
 
12
  def readable_file(path: str) -> str:
13
+ """
14
+ Validate that the file exists and has a supported extension.
15
+
16
+ Args:
17
+ path: File path to validate
18
+
19
+ Returns:
20
+ Original path string if valid
21
+
22
+ Raises:
23
+ ArgumentTypeError: If file doesn't exist or has unsupported extension
24
+ """
25
+ file_path = Path(path)
26
+ if not file_path.is_file():
27
  raise argparse.ArgumentTypeError(f"File not found: {path}")
28
  if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
29
  raise argparse.ArgumentTypeError(
 
33
 
34
 
35
  def valid_temp(temp: str) -> float:
36
+ """
37
+ Ensure temperature is within a reasonable range.
38
+
39
+ Args:
40
+ temp: Temperature value as string
41
+
42
+ Returns:
43
+ Temperature as float
44
+
45
+ Raises:
46
+ ArgumentTypeError: If temperature is outside valid range [0, 2]
47
+ """
48
  value = float(temp)
49
  if not (0 <= value <= 2):
50
  raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
 
53
 
54
  def is_valid_url(
55
  job_posting: str, allowed_statuses: Iterable[int] | None = None
56
+ ) -> str:
57
  """
58
+ Validate that a URL is reachable and returns an acceptable HTTP status.
59
+
60
+ Defaults to any 2xx or 3xx response (common successful codes).
61
+
62
+ Args:
63
+ job_posting: The URL for the job posting
64
+ allowed_statuses: Specific status codes that are considered valid.
65
+ If None (default), any 200-399 status is accepted.
66
+
67
+ Returns:
68
+ URL of the job posting if successful, error message if failed
 
 
 
 
 
 
 
69
  """
70
  if allowed_statuses is None:
71
  # All 2xx and 3xx responses are considered “valid”
72
  allowed_statuses = range(200, 400)
73
 
74
+ try:
75
+ response = requests.get(
76
+ job_posting, timeout=30, allow_redirects=True, stream=True
77
+ )
78
+ response.raise_for_status()
79
+ return job_posting
80
+ except requests.exceptions.RequestException as e:
81
+ return f"Error: {e.response.text if e.response else 'Unknown error'}"
82
 
83
 
84
  def handle_cli() -> argparse.Namespace:
85
+ """
86
+ Parse and validate CLI arguments for job application generator.
87
+
88
+ Returns:
89
+ Parsed command-line arguments namespace
90
+ """
91
  parser = argparse.ArgumentParser(
92
  description="""Assist the candidate in writing content for
93
  job application such as answering to question in application
src/job_writing_agent/utils/config.py CHANGED
@@ -1,25 +1,44 @@
1
  """
2
  Configuration utilities for the job writer application.
3
 
4
- This module provides functions for initializing and configuring
5
  language models and other resources.
6
  """
7
 
 
8
  import os
9
- from typing_extensions import Dict, Any, Tuple, Optional
 
10
  from langchain.chat_models import init_chat_model
 
 
 
 
 
 
 
 
11
 
12
- def init_models(config: Optional[Dict[str, Any]] = None) -> Tuple[Any, Any]:
13
- """Initialize language models based on configuration."""
 
 
 
 
 
 
 
14
  config = config or {}
15
-
16
  # Model configuration with defaults
17
  model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
18
  temperature = float(config.get("temperature", "0.3"))
19
  precise_temperature = float(config.get("precise_temperature", "0.2"))
20
-
21
  # Initialize models
22
- llm = init_chat_model(f"ollama:{model_name}", temperature=temperature)
23
- llm_precise = init_chat_model(f"ollama:{model_name}", temperature=precise_temperature)
 
 
24
 
25
- return llm, llm_precise
 
1
  """
2
  Configuration utilities for the job writer application.
3
 
4
+ This module provides functions for initializing and configuring
5
  language models and other resources.
6
  """
7
 
8
+ # Standard library imports
9
  import os
10
+
11
+ # Third-party imports
12
  from langchain.chat_models import init_chat_model
13
+ from langchain_core.language_models.chat_models import BaseChatModel
14
+
15
+
16
+ def init_models(
17
+ config: dict[str, str | float] | None = None,
18
+ ) -> tuple[BaseChatModel, BaseChatModel]:
19
+ """
20
+ Initialize language models based on configuration.
21
 
22
+ Args:
23
+ config: Optional configuration dictionary with keys:
24
+ - model_name: Name of the model to use
25
+ - temperature: Temperature for general LLM
26
+ - precise_temperature: Temperature for precise LLM
27
+
28
+ Returns:
29
+ Tuple of (general_llm, precise_llm) instances
30
+ """
31
  config = config or {}
32
+
33
  # Model configuration with defaults
34
  model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
35
  temperature = float(config.get("temperature", "0.3"))
36
  precise_temperature = float(config.get("precise_temperature", "0.2"))
37
+
38
  # Initialize models
39
+ general_llm = init_chat_model(f"ollama:{model_name}", temperature=temperature)
40
+ precise_llm = init_chat_model(
41
+ f"ollama:{model_name}", temperature=precise_temperature
42
+ )
43
 
44
+ return general_llm, precise_llm
src/job_writing_agent/utils/document_processing.py CHANGED
@@ -2,27 +2,28 @@
2
  Document processing utilities for parsing resumes and job descriptions.
3
  """
4
 
 
5
  import logging
6
  import os
7
  import re
8
  from pathlib import Path
9
  from urllib.parse import urlparse
10
- from typing_extensions import Dict, List, Any
11
-
12
 
 
13
  import dspy
14
  from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
15
  from langchain_community.document_transformers import Html2TextTransformer
 
16
  from langchain_text_splitters import (
17
  RecursiveCharacterTextSplitter,
18
  MarkdownHeaderTextSplitter,
19
  )
20
- from langchain_core.documents import Document
21
  from langfuse import observe
22
  from pydantic import BaseModel, Field
 
23
 
24
- # Local imports - using relative imports
25
- from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
26
 
27
  # Set up logging
28
  logger = logging.getLogger(__name__)
@@ -64,8 +65,8 @@ class ResumeSection(BaseModel):
64
  class StructuredResume(BaseModel):
65
  """Model for a structured resume with sections."""
66
 
67
- sections: List[ResumeSection] = Field(description="List of resume sections")
68
- contact_info: Dict[str, str] = Field(
69
  description="Contact information extracted from the resume"
70
  )
71
 
@@ -122,7 +123,7 @@ def clean_resume_text(text: str) -> str:
122
 
123
 
124
  @observe()
125
- def extract_contact_info(text: str) -> Dict[str, str]:
126
  """Extract contact information from resume text.
127
 
128
  Args:
@@ -162,7 +163,7 @@ def extract_contact_info(text: str) -> Dict[str, str]:
162
 
163
 
164
  @observe()
165
- def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
166
  """Identify sections in a resume text.
167
 
168
  Args:
@@ -231,16 +232,33 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
231
 
232
 
233
  def _collapse_ws(text: str) -> str:
234
- """Collapse stray whitespace but keep bullet breaks."""
 
 
 
 
 
 
 
 
235
  text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
236
  return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
237
 
238
 
239
  def _is_heading(line: str) -> bool:
 
 
 
 
 
 
 
 
 
240
  return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
241
 
242
 
243
- def parse_resume(file_path: str | Path) -> List[Document]:
244
  """
245
  Load a résumé from PDF or TXT file → list[Document] chunks
246
  (≈400 chars, 50‑char overlap) with {source, section} metadata.
@@ -326,7 +344,7 @@ async def get_job_description(file_path_or_url: str) -> Document:
326
  )
327
 
328
 
329
- async def scrape_job_description_from_web(urls: List[str]):
330
  """This function will first scrape the data from the job listing.
331
  Then using the recursive splitter using the different seperators,
332
  it preserves the paragraphs, lines and words"""
@@ -393,11 +411,15 @@ async def parse_job_description_from_url(url: str) -> Document:
393
  # 3. Process content with the LLM
394
  try:
395
  logger.info("Processing content with DSPy LLM...")
396
- # Configure DSPy LM (it's good practice to do this here if it can change)
 
 
 
 
397
  dspy.configure(
398
  lm=dspy.LM(
399
  "cerebras/qwen-3-32b",
400
- api_key=os.environ.get("CEREBRAS_API_KEY"),
401
  temperature=0.1,
402
  max_tokens=60000, # Note: This max_tokens is unusually high
403
  )
 
2
  Document processing utilities for parsing resumes and job descriptions.
3
  """
4
 
5
+ # Standard library imports
6
  import logging
7
  import os
8
  import re
9
  from pathlib import Path
10
  from urllib.parse import urlparse
 
 
11
 
12
+ # Third-party imports
13
  import dspy
14
  from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
15
  from langchain_community.document_transformers import Html2TextTransformer
16
+ from langchain_core.documents import Document
17
  from langchain_text_splitters import (
18
  RecursiveCharacterTextSplitter,
19
  MarkdownHeaderTextSplitter,
20
  )
 
21
  from langfuse import observe
22
  from pydantic import BaseModel, Field
23
+ from typing_extensions import Any
24
 
25
+ # Local imports
26
+ from .errors import JobDescriptionParsingError, LLMProcessingError, URLExtractionError
27
 
28
  # Set up logging
29
  logger = logging.getLogger(__name__)
 
65
  class StructuredResume(BaseModel):
66
  """Model for a structured resume with sections."""
67
 
68
+ sections: list[ResumeSection] = Field(description="List of resume sections")
69
+ contact_info: dict[str, str] = Field(
70
  description="Contact information extracted from the resume"
71
  )
72
 
 
123
 
124
 
125
  @observe()
126
+ def extract_contact_info(text: str) -> dict[str, str]:
127
  """Extract contact information from resume text.
128
 
129
  Args:
 
163
 
164
 
165
  @observe()
166
+ def identify_resume_sections(text: str) -> list[dict[str, Any]]:
167
  """Identify sections in a resume text.
168
 
169
  Args:
 
232
 
233
 
234
  def _collapse_ws(text: str) -> str:
235
+ """
236
+ Collapse stray whitespace but keep bullet breaks.
237
+
238
+ Args:
239
+ text: Input text with potential whitespace issues
240
+
241
+ Returns:
242
+ Text with collapsed whitespace
243
+ """
244
  text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
245
  return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
246
 
247
 
248
  def _is_heading(line: str) -> bool:
249
+ """
250
+ Check if a line is a heading (all uppercase, short, no digits).
251
+
252
+ Args:
253
+ line: Line of text to check
254
+
255
+ Returns:
256
+ True if line appears to be a heading
257
+ """
258
  return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
259
 
260
 
261
+ def parse_resume(file_path: str | Path) -> list[Document]:
262
  """
263
  Load a résumé from PDF or TXT file → list[Document] chunks
264
  (≈400 chars, 50‑char overlap) with {source, section} metadata.
 
344
  )
345
 
346
 
347
+ async def scrape_job_description_from_web(urls: list[str]) -> str:
348
  """This function will first scrape the data from the job listing.
349
  Then using the recursive splitter using the different seperators,
350
  it preserves the paragraphs, lines and words"""
 
411
  # 3. Process content with the LLM
412
  try:
413
  logger.info("Processing content with DSPy LLM...")
414
+ # Configure DSPy LM with safe environment variable access
415
+ cerebras_api_key = os.getenv("CEREBRAS_API_KEY")
416
+ if not cerebras_api_key:
417
+ raise ValueError("CEREBRAS_API_KEY environment variable not set")
418
+
419
  dspy.configure(
420
  lm=dspy.LM(
421
  "cerebras/qwen-3-32b",
422
+ api_key=cerebras_api_key,
423
  temperature=0.1,
424
  max_tokens=60000, # Note: This max_tokens is unusually high
425
  )
src/job_writing_agent/utils/vector_store.py CHANGED
@@ -1,13 +1,12 @@
1
  """
2
  Vector storage utilities for the job writer application.
3
 
4
- This module provides functions for storing and retrieving
5
  documents from vector databases.
6
  """
7
 
8
  # Standard library imports
9
  import os
10
- from typing_extensions import List, Optional
11
 
12
  # Third-party library imports
13
  from langchain_core.documents import Document
@@ -18,38 +17,37 @@ from pinecone import Pinecone as PineconeClient, ServerlessSpec
18
  # Default configuration
19
  DEFAULT_PINECONE_INDEX = "job-writer-vector"
20
 
 
21
  class VectorStoreManager:
22
  """Manager class for vector store operations."""
23
-
24
  def __init__(
25
  self,
26
  index_name: str = DEFAULT_PINECONE_INDEX,
27
- embedding_model: str = "llama3.2:latest"
28
  ):
29
  """Initialize the vector store manager.
30
-
31
  Args:
32
  api_key: Pinecone API key (will use env var if not provided)
33
  index_name: Name of the Pinecone index to use
34
  embedding_model: Name of the Ollama model to use for embeddings
35
  """
36
- api_key= os.getenv("PINECONE_API_KEY")
37
  if not api_key:
38
  raise ValueError("Environment variable PINECONE_API_KEY not set.")
39
-
40
  self.index_name = index_name
41
-
42
  # Initialize embeddings
43
- self.embeddings = OllamaEmbeddings(
44
- model=embedding_model
45
- )
46
-
47
  # Initialize Pinecone client
48
  self.client = PineconeClient(api_key=api_key)
49
-
50
  # Ensure index exists
51
  self._ensure_index_exists()
52
-
53
  def _ensure_index_exists(self):
54
  """Make sure the required index exists, create if not."""
55
  # Get embedding dimension from our embeddings model
@@ -60,7 +58,7 @@ class VectorStoreManager:
60
  print(f"Error determining embedding dimension: {e}")
61
  print("Falling back to default dimension of 384")
62
  embedding_dim = 384 # Common default for Ollama embeddings
63
-
64
  # Check if the index exists
65
  index_exists = False
66
  try:
@@ -69,7 +67,7 @@ class VectorStoreManager:
69
  index_exists = self.index_name in index_list
70
  except Exception as e:
71
  print(f"Error checking Pinecone indexes: {e}")
72
-
73
  # Create index if it doesn't exist
74
  if not index_exists:
75
  try:
@@ -78,20 +76,22 @@ class VectorStoreManager:
78
  name=self.index_name,
79
  dimension=embedding_dim,
80
  spec=ServerlessSpec(region="us-east-1", cloud="aws"),
81
- metric="cosine"
82
  )
83
  print(f"Successfully created index: {self.index_name}")
84
  except Exception as e:
85
  if "ALREADY_EXISTS" in str(e):
86
- print(f"Index {self.index_name} already exists (created in another process)")
 
 
87
  else:
88
  print(f"Error creating index: {e}")
89
  else:
90
  print(f"Using Pinecone Index: {self.index_name}")
91
-
92
- def store_documents(self, docs: List[Document], namespace: str) -> None:
93
  """Store documents in vector database.
94
-
95
  Args:
96
  docs: List of Document objects to store
97
  namespace: Namespace to store documents under
@@ -99,58 +99,60 @@ class VectorStoreManager:
99
  try:
100
  # Get the index
101
  index = self.client.Index(self.index_name)
102
-
103
  # Create the vector store
104
  vector_store = Pinecone(
105
  index=index,
106
  embedding=self.embeddings,
107
  text_key="text",
108
- namespace=namespace
109
  )
110
-
111
  # Add documents
112
  vector_store.add_documents(docs)
113
- print(f"Successfully stored {len(docs)} documents in namespace: {namespace}")
 
 
114
  except Exception as e:
115
  print(f"Error storing documents: {e}")
116
  raise
117
-
118
- def retrieve_similar(self, query: str, namespace: str, k: int = 3):
 
 
119
  """Retrieve similar documents based on a query.
120
-
121
  Args:
122
  query: The query text to search for
123
  namespace: Namespace to search in
124
  k: Number of results to return
125
-
126
  Returns:
127
  List of Document objects
128
  """
129
  try:
130
  # Get the index
131
  index = self.client.Index(self.index_name)
132
-
133
  # Create the vector store
134
  vectorstore = Pinecone(
135
  index=index,
136
  embedding=self.embeddings,
137
  text_key="text",
138
- namespace=namespace
139
  )
140
-
141
  # Search for similar documents
142
  docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
143
  return docs
144
  except Exception as e:
145
  print(f"Error retrieving documents: {e}")
146
  return []
147
-
148
-
149
-
150
 
151
- VectorStoreManager = VectorStoreManager()
152
 
153
- VectorStoreManager.store_documents(
154
- docs=[Document(page_content="Sample content", metadata={"source": "test"})],
155
- namespace="test_namespace"
156
- )
 
 
 
1
  """
2
  Vector storage utilities for the job writer application.
3
 
4
+ This module provides functions for storing and retrieving
5
  documents from vector databases.
6
  """
7
 
8
  # Standard library imports
9
  import os
 
10
 
11
  # Third-party library imports
12
  from langchain_core.documents import Document
 
17
  # Default configuration
18
  DEFAULT_PINECONE_INDEX = "job-writer-vector"
19
 
20
+
21
  class VectorStoreManager:
22
  """Manager class for vector store operations."""
23
+
24
  def __init__(
25
  self,
26
  index_name: str = DEFAULT_PINECONE_INDEX,
27
+ embedding_model: str = "llama3.2:latest",
28
  ):
29
  """Initialize the vector store manager.
30
+
31
  Args:
32
  api_key: Pinecone API key (will use env var if not provided)
33
  index_name: Name of the Pinecone index to use
34
  embedding_model: Name of the Ollama model to use for embeddings
35
  """
36
+ api_key = os.getenv("PINECONE_API_KEY")
37
  if not api_key:
38
  raise ValueError("Environment variable PINECONE_API_KEY not set.")
39
+
40
  self.index_name = index_name
41
+
42
  # Initialize embeddings
43
+ self.embeddings = OllamaEmbeddings(model=embedding_model)
44
+
 
 
45
  # Initialize Pinecone client
46
  self.client = PineconeClient(api_key=api_key)
47
+
48
  # Ensure index exists
49
  self._ensure_index_exists()
50
+
51
  def _ensure_index_exists(self):
52
  """Make sure the required index exists, create if not."""
53
  # Get embedding dimension from our embeddings model
 
58
  print(f"Error determining embedding dimension: {e}")
59
  print("Falling back to default dimension of 384")
60
  embedding_dim = 384 # Common default for Ollama embeddings
61
+
62
  # Check if the index exists
63
  index_exists = False
64
  try:
 
67
  index_exists = self.index_name in index_list
68
  except Exception as e:
69
  print(f"Error checking Pinecone indexes: {e}")
70
+
71
  # Create index if it doesn't exist
72
  if not index_exists:
73
  try:
 
76
  name=self.index_name,
77
  dimension=embedding_dim,
78
  spec=ServerlessSpec(region="us-east-1", cloud="aws"),
79
+ metric="cosine",
80
  )
81
  print(f"Successfully created index: {self.index_name}")
82
  except Exception as e:
83
  if "ALREADY_EXISTS" in str(e):
84
+ print(
85
+ f"Index {self.index_name} already exists (created in another process)"
86
+ )
87
  else:
88
  print(f"Error creating index: {e}")
89
  else:
90
  print(f"Using Pinecone Index: {self.index_name}")
91
+
92
+ def store_documents(self, docs: list[Document], namespace: str) -> None:
93
  """Store documents in vector database.
94
+
95
  Args:
96
  docs: List of Document objects to store
97
  namespace: Namespace to store documents under
 
99
  try:
100
  # Get the index
101
  index = self.client.Index(self.index_name)
102
+
103
  # Create the vector store
104
  vector_store = Pinecone(
105
  index=index,
106
  embedding=self.embeddings,
107
  text_key="text",
108
+ namespace=namespace,
109
  )
110
+
111
  # Add documents
112
  vector_store.add_documents(docs)
113
+ print(
114
+ f"Successfully stored {len(docs)} documents in namespace: {namespace}"
115
+ )
116
  except Exception as e:
117
  print(f"Error storing documents: {e}")
118
  raise
119
+
120
+ def retrieve_similar(
121
+ self, query: str, namespace: str, k: int = 3
122
+ ) -> list[Document]:
123
  """Retrieve similar documents based on a query.
124
+
125
  Args:
126
  query: The query text to search for
127
  namespace: Namespace to search in
128
  k: Number of results to return
129
+
130
  Returns:
131
  List of Document objects
132
  """
133
  try:
134
  # Get the index
135
  index = self.client.Index(self.index_name)
136
+
137
  # Create the vector store
138
  vectorstore = Pinecone(
139
  index=index,
140
  embedding=self.embeddings,
141
  text_key="text",
142
+ namespace=namespace,
143
  )
144
+
145
  # Search for similar documents
146
  docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
147
  return docs
148
  except Exception as e:
149
  print(f"Error retrieving documents: {e}")
150
  return []
 
 
 
151
 
 
152
 
153
+ # Example usage (commented out to prevent auto-execution)
154
+ # vector_store_manager = VectorStoreManager()
155
+ # vector_store_manager.store_documents(
156
+ # docs=[Document(page_content="Sample content", metadata={"source": "test"})],
157
+ # namespace="test_namespace"
158
+ # )
src/job_writing_agent/workflow.py CHANGED
@@ -3,33 +3,36 @@ Workflow runner for the job application writer.
3
  This module provides the JobWorkflow class and CLI runner.
4
  """
5
 
 
6
  import asyncio
7
  import logging
8
- import sys
9
  import os
 
10
  from datetime import datetime
11
  from functools import cached_property
12
- from typing import Optional, Dict, Any
13
 
 
14
  from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
15
  from langgraph.graph import StateGraph
16
  from langgraph.graph.state import CompiledStateGraph
17
 
 
18
  from job_writing_agent.agents.nodes import (
19
  create_draft,
20
  critique_draft,
21
  finalize_document,
22
  human_approval,
23
  )
24
- from job_writing_agent.classes import DataLoadState
25
- from job_writing_agent.nodes.initializing import data_loading_workflow
26
  from job_writing_agent.nodes.research_workflow import research_workflow
27
  from job_writing_agent.utils.application_cli_interface import handle_cli
28
- from job_writing_agent.utils.result_utils import print_result, save_result
29
  from job_writing_agent.utils.logging.logging_decorators import (
30
- log_execution,
31
  log_errors,
 
32
  )
 
33
 
34
  logger = logging.getLogger(__name__)
35
 
@@ -84,12 +87,62 @@ class JobWorkflow:
84
  return {
85
  "resume_path": self.resume,
86
  "job_description_source": self.job_description_source,
87
- "content": self.content,
88
  "current_node": "",
89
  "messages": [],
90
  "company_research_data": {},
91
  }
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def job_app_graph(self) -> StateGraph:
94
  """
95
  Build and configure the job application workflow graph.
@@ -111,58 +164,40 @@ class JobWorkflow:
111
  StateGraph
112
  Configured LangGraph state machine ready for compilation.
113
  """
114
- graph = StateGraph(DataLoadState)
115
 
116
  # Add workflow nodes (subgraphs and individual nodes)
117
- graph.add_node("load", data_loading_workflow)
118
- graph.add_node("research", research_workflow)
119
- graph.add_node("create_draft", create_draft)
120
- graph.add_node("critique", critique_draft)
121
- graph.add_node("human_approval", human_approval)
122
- graph.add_node("finalize", finalize_document)
 
 
 
123
 
124
  # Set entry and exit points
125
- graph.set_entry_point("load")
126
- graph.set_finish_point("finalize")
127
-
128
- # Conditional routing after data loading
129
- def route_after_load(state: DataLoadState) -> str:
130
- """
131
- Route based on next_node set by data loading subgraph.
132
-
133
- The data loading subgraph sets next_node to either "load" (if validation
134
- fails) or "research" (if validation passes).
135
-
136
- Parameters
137
- ----------
138
- state: DataLoadState
139
- Current workflow state.
140
-
141
- Returns
142
- -------
143
- str
144
- Next node name: "load" or "research".
145
- """
146
- next_node = state.get("next_node", "research") # Default to research
147
- logger.info(f"Routing after load: {next_node}")
148
- return next_node
149
-
150
- graph.add_conditional_edges(
151
  "load",
152
- route_after_load,
153
  {
154
  "load": "load", # Loop back to load subgraph if validation fails
155
- "research": "research", # Proceed to research if validation passes
156
  },
157
  )
158
 
159
  # Sequential edges for main workflow
160
- graph.add_edge("research", "create_draft")
161
- graph.add_edge("create_draft", "critique")
162
- graph.add_edge("critique", "human_approval")
163
- graph.add_edge("human_approval", "finalize")
 
164
 
165
- return graph
166
 
167
  def _get_callbacks(self) -> list:
168
  """
@@ -208,7 +243,7 @@ class JobWorkflow:
208
 
209
  @log_execution
210
  @log_errors
211
- async def run(self) -> Optional[Dict[str, Any]]:
212
  """
213
  Execute the complete job application writer workflow.
214
 
@@ -289,7 +324,8 @@ class JobWorkflow:
289
  Exception
290
  If graph compilation fails (e.g., invalid edges, missing nodes).
291
  """
292
- return self.job_app_graph.compile()
 
293
 
294
 
295
  def main():
@@ -300,7 +336,6 @@ def main():
300
  content=args.content_type,
301
  )
302
  result = asyncio.run(workflow.run())
303
- # print(f"result: {result}")
304
  if result:
305
  print_result(args.content_type, result["output_data"])
306
  save_result(args.content_type, result["output_data"])
 
3
  This module provides the JobWorkflow class and CLI runner.
4
  """
5
 
6
+ # Standard library imports
7
  import asyncio
8
  import logging
 
9
  import os
10
+ import sys
11
  from datetime import datetime
12
  from functools import cached_property
13
+ from typing import Any
14
 
15
+ # Third-party imports
16
  from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
17
  from langgraph.graph import StateGraph
18
  from langgraph.graph.state import CompiledStateGraph
19
 
20
+ # Local imports
21
  from job_writing_agent.agents.nodes import (
22
  create_draft,
23
  critique_draft,
24
  finalize_document,
25
  human_approval,
26
  )
27
+ from job_writing_agent.classes import DataLoadState, ResearchState
28
+ from job_writing_agent.nodes.data_loading_workflow import data_loading_workflow
29
  from job_writing_agent.nodes.research_workflow import research_workflow
30
  from job_writing_agent.utils.application_cli_interface import handle_cli
 
31
  from job_writing_agent.utils.logging.logging_decorators import (
 
32
  log_errors,
33
+ log_execution,
34
  )
35
+ from job_writing_agent.utils.result_utils import print_result, save_result
36
 
37
  logger = logging.getLogger(__name__)
38
 
 
87
  return {
88
  "resume_path": self.resume,
89
  "job_description_source": self.job_description_source,
90
+ "content_category": self.content,
91
  "current_node": "",
92
  "messages": [],
93
  "company_research_data": {},
94
  }
95
 
96
+ # Conditional routing after data loading
97
+ def route_after_load(self, state: DataLoadState) -> str:
98
+ """
99
+ Route based on next_node set by data loading subgraph.
100
+
101
+ The data loading subgraph sets next_node to either "load" (if validation
102
+ fails) or "research" (if validation passes).
103
+
104
+ Parameters
105
+ ----------
106
+ state: DataLoadState
107
+ Current workflow state.
108
+
109
+ Returns
110
+ -------
111
+ str
112
+ Next node name: "load" or "research".
113
+ """
114
+ next_node = state.get("next_node", "research") # Default to research
115
+ logger.info(f"Routing after load: {next_node}")
116
+ return next_node
117
+
118
+ def dataload_to_research_adapter(self, state: DataLoadState) -> ResearchState:
119
+ """
120
+ Adapter to convert DataLoadState to ResearchState.
121
+
122
+ Extracts only fields needed for research workflow following the
123
+ adapter pattern recommended by LangGraph documentation.
124
+
125
+ Parameters
126
+ ----------
127
+ state: DataLoadState
128
+ Current workflow state with loaded data.
129
+
130
+ Returns
131
+ -------
132
+ ResearchState
133
+ State formatted for research subgraph with required fields.
134
+ """
135
+ logger.info("Adapter for converting DataLoadState to ResearchState")
136
+
137
+ return ResearchState(
138
+ company_research_data=state.get("company_research_data", {}),
139
+ attempted_search_queries=[],
140
+ current_node="",
141
+ content_category=state.get("content_category", ""),
142
+ messages=state.get("messages", []),
143
+ )
144
+
145
+ @cached_property
146
  def job_app_graph(self) -> StateGraph:
147
  """
148
  Build and configure the job application workflow graph.
 
164
  StateGraph
165
  Configured LangGraph state machine ready for compilation.
166
  """
167
+ agent_workflow_graph = StateGraph(DataLoadState)
168
 
169
  # Add workflow nodes (subgraphs and individual nodes)
170
+ agent_workflow_graph.add_node("load", data_loading_workflow)
171
+ agent_workflow_graph.add_node(
172
+ "to_research_adapter", self.dataload_to_research_adapter
173
+ )
174
+ agent_workflow_graph.add_node("research", research_workflow)
175
+ agent_workflow_graph.add_node("create_draft", create_draft)
176
+ agent_workflow_graph.add_node("critique", critique_draft)
177
+ agent_workflow_graph.add_node("human_approval", human_approval)
178
+ agent_workflow_graph.add_node("finalize", finalize_document)
179
 
180
  # Set entry and exit points
181
+ agent_workflow_graph.set_entry_point("load")
182
+ agent_workflow_graph.set_finish_point("finalize")
183
+
184
+ agent_workflow_graph.add_conditional_edges(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  "load",
186
+ self.route_after_load,
187
  {
188
  "load": "load", # Loop back to load subgraph if validation fails
189
+ "research": "to_research_adapter", # Route to adapter first
190
  },
191
  )
192
 
193
  # Sequential edges for main workflow
194
+ agent_workflow_graph.add_edge("to_research_adapter", "research")
195
+ agent_workflow_graph.add_edge("research", "create_draft")
196
+ agent_workflow_graph.add_edge("create_draft", "critique")
197
+ agent_workflow_graph.add_edge("critique", "human_approval")
198
+ agent_workflow_graph.add_edge("human_approval", "finalize")
199
 
200
+ return agent_workflow_graph
201
 
202
  def _get_callbacks(self) -> list:
203
  """
 
243
 
244
  @log_execution
245
  @log_errors
246
+ async def run(self) -> dict[str, Any] | None:
247
  """
248
  Execute the complete job application writer workflow.
249
 
 
324
  Exception
325
  If graph compilation fails (e.g., invalid edges, missing nodes).
326
  """
327
+ compiled_graph = self.job_app_graph.compile()
328
+ return compiled_graph
329
 
330
 
331
  def main():
 
336
  content=args.content_type,
337
  )
338
  result = asyncio.run(workflow.run())
 
339
  if result:
340
  print_result(args.content_type, result["output_data"])
341
  save_result(args.content_type, result["output_data"])