Rishabh2095 commited on
Commit
69e77a3
·
1 Parent(s): 9842097

Update recursion limit in job workflow and replace LLM model references with Google Gemma. Add new data loading and research subgraph modules for modularity

Browse files
src/job_writing_agent/agents/nodes.py CHANGED
@@ -41,7 +41,7 @@ def create_draft(state: ResearchState) -> ResultState:
41
  # Create LLM inside function (lazy initialization)
42
  llm_provider = LLMFactory()
43
  llm = llm_provider.create_langchain(
44
- "mistralai/devstral-2512:free",
45
  provider="openrouter",
46
  temperature=0.3,
47
  )
@@ -139,7 +139,7 @@ def critique_draft(state: ResultState) -> ResultState:
139
  # Create LLM inside function (lazy initialization)
140
  llm_provider = LLMFactory()
141
  llm = llm_provider.create_langchain(
142
- "mistralai/devstral-2512:free",
143
  provider="openrouter",
144
  temperature=0.3,
145
  )
@@ -269,7 +269,7 @@ def finalize_document(state: ResultState) -> ResultState:
269
  # Create LLM inside function (lazy initialization)
270
  llm_provider = LLMFactory()
271
  llm = llm_provider.create_langchain(
272
- "mistralai/devstral-2512:free",
273
  provider="openrouter",
274
  temperature=0.3,
275
  )
 
41
  # Create LLM inside function (lazy initialization)
42
  llm_provider = LLMFactory()
43
  llm = llm_provider.create_langchain(
44
+ "google/gemma-3-27b-it:free",
45
  provider="openrouter",
46
  temperature=0.3,
47
  )
 
139
  # Create LLM inside function (lazy initialization)
140
  llm_provider = LLMFactory()
141
  llm = llm_provider.create_langchain(
142
+ "google/gemma-3-27b-it:free",
143
  provider="openrouter",
144
  temperature=0.3,
145
  )
 
269
  # Create LLM inside function (lazy initialization)
270
  llm_provider = LLMFactory()
271
  llm = llm_provider.create_langchain(
272
+ "google/gemma-3-27b-it:free",
273
  provider="openrouter",
274
  temperature=0.3,
275
  )
src/job_writing_agent/graph/background_research_subgraph.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # research_workflow.py
2
+ """Research workflow for company information gathering and filtering."""
3
+
4
+ # Standard library imports
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ from typing import Any, Dict, cast
9
+
10
+ # Third-party imports
11
+ import dspy
12
+ from langgraph.graph import StateGraph
13
+
14
+ # Local imports
15
+ from job_writing_agent.agents.output_schema import (
16
+ CompanyResearchDataSummarizationSchema,
17
+ )
18
+ from job_writing_agent.classes.classes import ResearchState, CompanyResearchData
19
+ from job_writing_agent.tools.SearchTool import (
20
+ TavilyResearchTool,
21
+ filter_research_results_by_relevance,
22
+ )
23
+ from job_writing_agent.utils.llm_provider_factory import LLMFactory
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Configuration
28
+ MAX_RETRIES = 3
29
+ RETRY_DELAY = 2 # seconds
30
+ QUERY_TIMEOUT = 30 # seconds
31
+ EVAL_TIMEOUT = 15 # seconds per evaluation
32
+
33
+
34
+ def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
35
+ """
36
+ Validate that required inputs are present in research state.
37
+
38
+ Args:
39
+ state: Current research workflow state
40
+
41
+ Returns:
42
+ Tuple of (is_valid, company_name, job_description)
43
+ """
44
+ try:
45
+ # Safe dictionary access with fallbacks
46
+ company_research_data = state.company_research_data or CompanyResearchData()
47
+ company_name = company_research_data.company_name
48
+ job_description = company_research_data.job_description
49
+
50
+ if not company_name or not company_name.strip():
51
+ logger.error("Company name is missing or empty")
52
+ return False, "", ""
53
+
54
+ if not job_description or not job_description.strip():
55
+ logger.error("Job description is missing or empty")
56
+ return False, "", ""
57
+
58
+ return True, company_name.strip(), job_description.strip()
59
+
60
+ except (TypeError, AttributeError) as e:
61
+ logger.error(f"Invalid state structure: {e}")
62
+ return False, "", ""
63
+
64
+
65
+ def parse_dspy_queries_with_fallback(
66
+ raw_queries: dict[str, Any], company_name: str
67
+ ) -> dict[str, str]:
68
+ """
69
+ Parse DSPy query output with multiple fallback strategies.
70
+ Returns a dict of query_id -> query_string.
71
+ """
72
+ try:
73
+ # Try to extract search_queries field
74
+ if isinstance(raw_queries, dict) and "search_queries" in raw_queries:
75
+ queries_data = raw_queries["search_queries"]
76
+
77
+ # If it's a JSON string, parse it
78
+ if isinstance(queries_data, str):
79
+ try:
80
+ queries_data = json.loads(queries_data)
81
+ except json.JSONDecodeError as e:
82
+ logger.warning(f"JSON decode failed: {e}. Using fallback queries.")
83
+ return get_fallback_queries(company_name)
84
+
85
+ # Extract query strings
86
+ if isinstance(queries_data, dict):
87
+ parsed = {}
88
+ for key, value in queries_data.items():
89
+ if isinstance(value, str):
90
+ parsed[key] = value
91
+ elif isinstance(value, list) and len(value) > 0:
92
+ parsed[key] = str(value[0])
93
+
94
+ if parsed:
95
+ return parsed
96
+
97
+ # If we reach here, parsing failed
98
+ logger.warning("Could not parse DSPy queries. Using fallback.")
99
+ return get_fallback_queries(company_name)
100
+
101
+ except Exception as e:
102
+ logger.error(f"Error parsing DSPy queries: {e}. Using fallback.")
103
+ return get_fallback_queries(company_name)
104
+
105
+
106
+ def get_fallback_queries(company_name: str) -> dict[str, str]:
107
+ """
108
+ Generate basic fallback queries when DSPy fails.
109
+ """
110
+ return {
111
+ "query1": f"{company_name} company culture and values",
112
+ "query2": f"{company_name} recent news and achievements",
113
+ "query3": f"{company_name} mission statement and goals",
114
+ }
115
+
116
+
117
+ def company_research_data_summary(state: ResearchState) -> dict[str, Any]:
118
+ """
119
+ Summarize the filtered research data into a concise summary.
120
+
121
+ Replaces the raw tavily_search results with a summarized version using LLM.
122
+
123
+ Args:
124
+ state: Current research state with search results
125
+
126
+ Returns:
127
+ Updated state with research summary
128
+ """
129
+ try:
130
+ # Update current node
131
+ updated_state = {
132
+ **state.__dict__,
133
+ "current_node": "company_research_data_summary",
134
+ }
135
+
136
+ # Extract the current research data with safe access
137
+ company_research_data = state.company_research_data or CompanyResearchData()
138
+ tavily_search_data = company_research_data.tavily_search
139
+
140
+ # If no research data, skip summarization
141
+ if not tavily_search_data or len(tavily_search_data) == 0:
142
+ logger.warning("No research data to summarize. Skipping summarization.")
143
+ return updated_state
144
+
145
+ logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
146
+
147
+ # Create DSPy summarization chain
148
+ company_research_data_summarization = dspy.ChainOfThought(
149
+ CompanyResearchDataSummarizationSchema
150
+ )
151
+
152
+ # Initialize LLM provider
153
+
154
+ llm_provider = LLMFactory()
155
+ llm = llm_provider.create_dspy(
156
+ model="liquid/lfm-2.5-1.2b-instruct:free",
157
+ provider="openrouter",
158
+ temperature=0.3,
159
+ )
160
+
161
+ # Generate summary using DSPy
162
+ with dspy.context(lm=llm, adapter=dspy.JSONAdapter()):
163
+ response = company_research_data_summarization(
164
+ company_research_data=company_research_data
165
+ )
166
+ # Extract the summary from the response with safe access
167
+ summary_json_str = ""
168
+ if hasattr(response, "company_research_data_summary"):
169
+ summary_json_str = response.company_research_data_summary
170
+ elif isinstance(response, dict):
171
+ summary_json_str = response.get("company_research_data_summary", "")
172
+ else:
173
+ logger.error(
174
+ f"Unexpected response format from summarization: {type(response)}"
175
+ )
176
+ return updated_state
177
+
178
+ # Update state with summary using safe dictionary operations
179
+ updated_company_research_data = {**company_research_data.__dict__}
180
+ updated_company_research_data["company_research_data_summary"] = (
181
+ summary_json_str
182
+ )
183
+ updated_state["company_research_data"] = CompanyResearchData(
184
+ **updated_company_research_data
185
+ )
186
+
187
+ return updated_state
188
+
189
+ except Exception as e:
190
+ logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
191
+ # Return minimal update so we don't reference updated_state (may be undefined).
192
+ return {"current_node": "company_research_data_summary"}
193
+
194
+
195
+ async def research_company_with_retry(state: ResearchState) -> dict[str, Any]:
196
+ """
197
+ Research company with retry logic and timeouts.
198
+ """
199
+ state.current_node = "research_company"
200
+
201
+ # Validate inputs
202
+ is_valid, company_name, job_description = validate_research_inputs(state)
203
+
204
+ if not is_valid:
205
+ logger.error("Invalid inputs for research. Skipping research phase.")
206
+ cr = state.company_research_data or CompanyResearchData()
207
+ return {
208
+ "company_research_data": cr.model_copy(update={"tavily_search": []}),
209
+ "attempted_search_queries": [],
210
+ "current_node": "research_company",
211
+ }
212
+
213
+ logger.info(f"Researching company: {company_name}")
214
+
215
+ # Try with retries
216
+ for attempt in range(MAX_RETRIES):
217
+ try:
218
+ # Create tool instance
219
+ tavily_search = TavilyResearchTool(
220
+ job_description=job_description, company_name=company_name
221
+ )
222
+
223
+ # Generate queries with timeout
224
+ queries_task = asyncio.create_task(
225
+ asyncio.to_thread(tavily_search.create_tavily_queries)
226
+ )
227
+
228
+ try:
229
+ raw_queries = await asyncio.wait_for(
230
+ queries_task, timeout=QUERY_TIMEOUT
231
+ )
232
+ except asyncio.TimeoutError:
233
+ logger.warning(
234
+ f"Query generation timed out (attempt {attempt + 1}/{MAX_RETRIES})"
235
+ )
236
+ if attempt < MAX_RETRIES - 1:
237
+ await asyncio.sleep(RETRY_DELAY)
238
+ continue
239
+ else:
240
+ raise
241
+
242
+ # Parse queries with fallback
243
+ # Convert DSPy Prediction to dict if needed
244
+ if hasattr(raw_queries, "dict"):
245
+ raw_queries_dict = cast(Dict[str, Any], raw_queries.dict())
246
+ elif hasattr(raw_queries, "__dict__"):
247
+ raw_queries_dict = cast(Dict[str, Any], raw_queries.__dict__)
248
+ elif isinstance(raw_queries, dict):
249
+ raw_queries_dict = cast(Dict[str, Any], raw_queries)
250
+ else:
251
+ raw_queries_dict = cast(Dict[str, Any], dict(raw_queries))
252
+
253
+ queries = parse_dspy_queries_with_fallback(raw_queries_dict, company_name)
254
+
255
+ if not queries:
256
+ logger.warning("No valid queries generated")
257
+ queries = get_fallback_queries(company_name)
258
+
259
+ logger.info(
260
+ f"Generated {len(queries)} search queries: {list(queries.keys())}"
261
+ )
262
+
263
+ # Perform searches with timeout
264
+ search_task = asyncio.create_task(
265
+ asyncio.to_thread(tavily_search.tavily_search_company, queries)
266
+ )
267
+
268
+ try:
269
+ search_results = await asyncio.wait_for(
270
+ search_task, timeout=QUERY_TIMEOUT * len(queries)
271
+ )
272
+ except asyncio.TimeoutError:
273
+ logger.warning(
274
+ f"Search timed out (attempt {attempt + 1}/{MAX_RETRIES})"
275
+ )
276
+ if attempt < MAX_RETRIES - 1:
277
+ await asyncio.sleep(RETRY_DELAY)
278
+ continue
279
+ else:
280
+ raise
281
+
282
+ # Validate results
283
+ if not isinstance(search_results, list):
284
+ logger.warning(f"Invalid search results type: {type(search_results)}")
285
+ search_results = []
286
+
287
+ if len(search_results) == 0:
288
+ logger.warning("No search results returned")
289
+
290
+ # Return partial state update with CompanyResearchData (typed), not raw dict.
291
+ cr = state.company_research_data or CompanyResearchData()
292
+ return {
293
+ "company_research_data": cr.model_copy(update={"tavily_search": search_results}),
294
+ "attempted_search_queries": list(queries.values()),
295
+ "current_node": "research_company",
296
+ }
297
+
298
+ except Exception as e:
299
+ logger.error(
300
+ f"Error in research_company (attempt {attempt + 1}/{MAX_RETRIES}): {e}",
301
+ exc_info=True,
302
+ )
303
+
304
+ if attempt < MAX_RETRIES - 1:
305
+ await asyncio.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
306
+ else:
307
+ logger.error("All retry attempts exhausted. Using empty results.")
308
+ cr = state.company_research_data or CompanyResearchData()
309
+ return {
310
+ "company_research_data": cr.model_copy(update={"tavily_search": []}),
311
+ "attempted_search_queries": [],
312
+ "current_node": "research_company",
313
+ }
314
+
315
+ cr = state.company_research_data or CompanyResearchData()
316
+ return {
317
+ "company_research_data": cr,
318
+ "attempted_search_queries": [],
319
+ "current_node": "research_company",
320
+ }
321
+
322
+
323
+ # Create research subgraph
324
+ research_subgraph = StateGraph(ResearchState)
325
+
326
+ # Add research subgraph nodes
327
+ research_subgraph.add_node("research_company", research_company_with_retry)
328
+ research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
329
+ research_subgraph.add_node(
330
+ "company_research_data_summary", company_research_data_summary
331
+ )
332
+
333
+ # Set entry and finish points
334
+ research_subgraph.set_entry_point("research_company")
335
+ research_subgraph.set_finish_point("company_research_data_summary")
336
+
337
+ # Add research subgraph edges
338
+ research_subgraph.add_edge("research_company", "relevance_filter")
339
+ research_subgraph.add_edge("relevance_filter", "company_research_data_summary")
340
+
341
+ # Compile research subgraph
342
+ background_research_workflow = research_subgraph.compile()
src/job_writing_agent/graph/data_loading_subgraph.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Data Loading Subgraph Module
4
+
5
+ This module defines the data loading subgraph, including all node
6
+ functions and the subgraph definition. It uses the separate loader classes
7
+ (ResumeLoader, JobDescriptionLoader, SystemInitializer) following the
8
+ Single Responsibility Principle.
9
+
10
+ Advanced patterns used:
11
+ - **Decorators**: @log_async (cross-cutting logging), @retry_async_load (tenacity
12
+ retry for I/O nodes). Stack order: retry innermost, log_async outermost so we
13
+ log once per logical call and retry inside.
14
+ - **Type hints**: Literal for conditional edge targets (type-safe routing),
15
+ StateUpdate alias for partial state dicts.
16
+ - **functools.wraps**: Preserved in log_async and in custom retry decorator
17
+ so __name__ and docstrings are correct for debugging and LangGraph.
18
+ """
19
+
20
+ import logging
21
+ from typing import Any, Literal, Callable
22
+ from functools import wraps
23
+
24
+ from langgraph.graph import StateGraph
25
+ from tenacity import (
26
+ retry,
27
+ retry_if_exception_type,
28
+ stop_after_attempt,
29
+ wait_exponential,
30
+ )
31
+
32
+ from job_writing_agent.classes import (
33
+ DataLoadState,
34
+ CompanyResearchData,
35
+ DataLoadingNodes,
36
+ AgentWorkflowNodes,
37
+ )
38
+ from job_writing_agent.nodes.graph_interrupt import GraphInterrupt
39
+ from job_writing_agent.nodes.resume_loader import ResumeLoader
40
+ from job_writing_agent.nodes.job_description_loader import JobDescriptionLoader
41
+ from job_writing_agent.nodes.system_initializer import SystemInitializer
42
+ from job_writing_agent.utils.document_processing import analyze_candidate_job_fit
43
+ from job_writing_agent.utils.logging.logging_decorators import log_async
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+ # Type alias for LangGraph partial state updates (nodes return dicts that get merged).
48
+ StateUpdate = dict[str, Any]
49
+
50
+ # Retry config for I/O-bound load nodes (resume, job description).
51
+ _LOAD_RETRY_ATTEMPTS = 3
52
+ _LOAD_RETRY_MIN_WAIT = 1
53
+ _LOAD_RETRY_MAX_WAIT = 10
54
+
55
+
56
+ def _get_cr(state: DataLoadState) -> CompanyResearchData:
57
+ """Safe access to company_research_data; returns empty instance if missing."""
58
+ return state.company_research_data or CompanyResearchData()
59
+
60
+
61
+ def retry_async_load(func: Callable) -> Callable:
62
+ """
63
+ Decorator: retry async load functions with exponential backoff.
64
+
65
+ Uses tenacity for retries. Apply below @log_async so we log once per
66
+ logical invocation; retries happen inside. Preserves __name__ etc. via wraps.
67
+ """
68
+
69
+ @retry(
70
+ stop=stop_after_attempt(_LOAD_RETRY_ATTEMPTS),
71
+ wait=wait_exponential(
72
+ multiplier=1, min=_LOAD_RETRY_MIN_WAIT, max=_LOAD_RETRY_MAX_WAIT
73
+ ),
74
+ retry=retry_if_exception_type((OSError, ConnectionError, TimeoutError)),
75
+ reraise=True,
76
+ )
77
+ @wraps(func)
78
+ async def wrapper(state: DataLoadState) -> StateUpdate:
79
+ return await func(state)
80
+
81
+ return wrapper
82
+
83
+
84
+ graph_interrupt = GraphInterrupt()
85
+
86
+
87
+ # ============================================================================
88
+ # Data Loading Subgraph Node Functions
89
+ # ============================================================================
90
+
91
+
92
+ @log_async
93
+ async def set_agent_system_message_node(state: DataLoadState) -> StateUpdate:
94
+ """
95
+ Node function to initialize system message in workflow state.
96
+
97
+ This node wraps the SystemInitializer.set_agent_system_message method
98
+ for use in the LangGraph workflow.
99
+
100
+ Parameters
101
+ ----------
102
+ state: DataLoadState
103
+ Current workflow state.
104
+
105
+ Returns
106
+ -------
107
+ StateUpdate
108
+ Partial state update with system message added to messages list.
109
+ """
110
+ initializer = SystemInitializer()
111
+ return await initializer.set_agent_system_message(state)
112
+
113
+
114
+ @log_async
115
+ @retry_async_load
116
+ async def load_resume_node(state: DataLoadState) -> StateUpdate:
117
+ """
118
+ Load the resume from the configured source (file path). Runs before job description parsing.
119
+
120
+ Returns only the resume data; LangGraph merges this update with state.
121
+ Retries on OSError/ConnectionError/TimeoutError (tenacity).
122
+
123
+ Parameters
124
+ ----------
125
+ state: DataLoadState
126
+ Current workflow state containing resume_path.
127
+
128
+ Returns
129
+ -------
130
+ StateUpdate
131
+ Partial state update with resume data in company_research_data.
132
+ """
133
+ resume_src = state.workflow_inputs.resume_file_path_
134
+ if not resume_src:
135
+ resume_text = ""
136
+ else:
137
+ loader = ResumeLoader()
138
+ resume_text = await loader.load_resume(resume_src)
139
+ logger.info(f"Resume loaded: {len(resume_text)} characters")
140
+ cr = _get_cr(state)
141
+ return {"company_research_data": cr.model_copy(update={"resume": resume_text})}
142
+
143
+
144
+ def prompt_user_for_resume_node(state: DataLoadState) -> StateUpdate:
145
+ """
146
+ Prompt user to provide resume manually via chat (paste text).
147
+
148
+ Used when resume extraction failed or no path was given. The workflow
149
+ pauses; the frontend shows the interrupt payload so the user can paste
150
+ their resume. The client resumes with Command(resume=user_input). If the
151
+ user sends empty input, we return {} so the router sends execution back here.
152
+ """
153
+ logger.info("Resume missing or empty, prompting user to paste resume via chat")
154
+ return graph_interrupt.request_input_for_field(
155
+ state, "resume", "Please paste your resume in text format:", "resume"
156
+ )
157
+
158
+
159
+ def prompt_user_for_job_description_node(state: DataLoadState) -> StateUpdate:
160
+ """
161
+ Prompt user to provide job description manually via chat (paste text).
162
+
163
+ Used when job description extraction failed or no URL was given. The workflow
164
+ pauses; the frontend shows the interrupt so the user can paste the job
165
+ description. The client resumes with Command(job_description=user_input).
166
+ If the user sends empty input, we return {} so the router sends execution back here.
167
+ """
168
+ logger.info("Job description missing or empty, prompting user to paste via chat")
169
+ return graph_interrupt.request_input_for_field(
170
+ state, "job_description", "Please paste the job description:", "job description"
171
+ )
172
+
173
+
174
+ def route_after_resume_load(
175
+ state: DataLoadState,
176
+ ) -> Literal["prompt_user_for_resume", "load_job_description"]:
177
+ """After load_resume: if resume is empty, go to prompt_user_for_resume; else go to load_job_description."""
178
+ cr = state.company_research_data
179
+ resume = (cr.resume if cr else "") or ""
180
+ resume = str(resume).strip()
181
+ if not resume:
182
+ logger.info("Resume is empty, routing to prompt_user_for_resume")
183
+ return "prompt_user_for_resume"
184
+ logger.info("Resume is present, routing to load_job_description")
185
+ return "load_job_description"
186
+
187
+
188
+ def route_after_job_load(
189
+ state: DataLoadState,
190
+ ) -> Literal["prompt_user_for_job_description", "candidate_job_fit_analysis"]:
191
+ """After load_job_description: if job_description is empty, go to prompt_user_for_job_description; else go to candidate_job_fit_analysis."""
192
+ cr = state.company_research_data
193
+ job_desc = (cr.job_description if cr else "") or ""
194
+ job_desc = str(job_desc).strip()
195
+ if not job_desc:
196
+ logger.info(
197
+ "Job description is empty, routing to prompt_user_for_job_description"
198
+ )
199
+ return "prompt_user_for_job_description"
200
+ logger.info("Job description is present, routing to candidate_job_fit_analysis")
201
+ return "candidate_job_fit_analysis"
202
+
203
+
204
+ @log_async
205
+ @retry_async_load
206
+ async def load_job_description_node(state: DataLoadState) -> StateUpdate:
207
+ """
208
+ Load the job description from the configured URL. Runs after resume is loaded or provided via interrupt.
209
+
210
+ Returns job description and company name in company_research_data; LangGraph merges this update with state.
211
+ Retries on OSError/ConnectionError/TimeoutError (tenacity).
212
+
213
+ Parameters
214
+ ----------
215
+ state: DataLoadState
216
+ Current workflow state containing job_description_url_.
217
+
218
+ Returns
219
+ -------
220
+ StateUpdate
221
+ Partial state update with job description and company name in company_research_data.
222
+ """
223
+ jd_src = state.workflow_inputs.job_description_url_
224
+ if not jd_src:
225
+ job_text = ""
226
+ company_name = ""
227
+ else:
228
+ loader = JobDescriptionLoader()
229
+ job_text, company_name = await loader.load_job_description(jd_src)
230
+ cr = _get_cr(state)
231
+ resume_text = cr.resume or ""
232
+
233
+ logger.info(
234
+ f"Job description loaded: {len(job_text)} characters, company: {company_name}"
235
+ )
236
+ return {
237
+ "company_research_data": cr.model_copy(
238
+ update={
239
+ "resume": resume_text,
240
+ "job_description": job_text,
241
+ "company_name": company_name,
242
+ }
243
+ )
244
+ }
245
+
246
+
247
+ @log_async
248
+ async def candidate_job_fit_analysis_node(state: DataLoadState) -> StateUpdate:
249
+ """
250
+ Analyze candidate-job fit using DSPy after resume and job description are loaded.
251
+
252
+ Uses the resume and job description to generate actionable insights
253
+ for downstream content generation (cover letter, bullets, LinkedIn note).
254
+
255
+ Parameters
256
+ ----------
257
+ state: DataLoadState
258
+ Current workflow state with resume and job description loaded.
259
+
260
+ Returns
261
+ -------
262
+ StateUpdate
263
+ Partial state update with candidate_job_fit_analysis in company_research_data
264
+ and next_node set to NodeName.RESEARCH for main graph routing.
265
+ """
266
+ cr = _get_cr(state)
267
+ resume_text = cr.resume or ""
268
+ job_description = cr.job_description or ""
269
+ company_name = cr.company_name or ""
270
+
271
+ # Validate inputs (should always pass due to routing, but log if not)
272
+ if not resume_text.strip():
273
+ logger.warning("Resume is empty in candidate_job_fit_analysis_node")
274
+ if not job_description.strip():
275
+ logger.warning("Job description is empty in candidate_job_fit_analysis_node")
276
+
277
+ analysis = await analyze_candidate_job_fit(
278
+ resume_text=resume_text,
279
+ job_description=job_description,
280
+ company_name=company_name,
281
+ )
282
+
283
+ logger.info("Candidate-job fit analysis node completed")
284
+
285
+ return {
286
+ "company_research_data": cr.model_copy(
287
+ update={"candidate_job_fit_analysis": analysis}
288
+ ),
289
+ "next_node": AgentWorkflowNodes.RESEARCH,
290
+ }
291
+
292
+
293
+ # ============================================================================
294
+ # Data Loading Subgraph Definition
295
+ # ============================================================================
296
+
297
+ N = DataLoadingNodes # Shorthand for graph construction
298
+
299
+ data_loading_subgraph = StateGraph(DataLoadState)
300
+
301
+ data_loading_subgraph.add_node(
302
+ N.SET_AGENT_SYSTEM_MESSAGE.value, set_agent_system_message_node
303
+ )
304
+ data_loading_subgraph.add_node(N.LOAD_RESUME.value, load_resume_node)
305
+ data_loading_subgraph.add_node(N.LOAD_JOB_DESCRIPTION.value, load_job_description_node)
306
+ data_loading_subgraph.add_node(
307
+ N.PROMPT_USER_FOR_RESUME.value, prompt_user_for_resume_node
308
+ )
309
+ data_loading_subgraph.add_node(
310
+ N.PROMPT_USER_FOR_JOB_DESCRIPTION.value, prompt_user_for_job_description_node
311
+ )
312
+ data_loading_subgraph.add_node(
313
+ N.CANDIDATE_JOB_FIT_ANALYSIS.value, candidate_job_fit_analysis_node
314
+ )
315
+
316
+ data_loading_subgraph.set_entry_point(N.SET_AGENT_SYSTEM_MESSAGE.value)
317
+ data_loading_subgraph.set_finish_point(N.CANDIDATE_JOB_FIT_ANALYSIS.value)
318
+ data_loading_subgraph.add_edge(N.SET_AGENT_SYSTEM_MESSAGE.value, N.LOAD_RESUME.value)
319
+ data_loading_subgraph.add_conditional_edges(
320
+ N.LOAD_RESUME.value,
321
+ route_after_resume_load,
322
+ {
323
+ N.PROMPT_USER_FOR_RESUME.value: N.PROMPT_USER_FOR_RESUME.value,
324
+ N.LOAD_JOB_DESCRIPTION.value: N.LOAD_JOB_DESCRIPTION.value,
325
+ },
326
+ )
327
+ data_loading_subgraph.add_edge(
328
+ N.PROMPT_USER_FOR_RESUME.value, N.LOAD_JOB_DESCRIPTION.value
329
+ )
330
+ data_loading_subgraph.add_conditional_edges(
331
+ N.LOAD_JOB_DESCRIPTION.value,
332
+ route_after_job_load,
333
+ {
334
+ N.PROMPT_USER_FOR_JOB_DESCRIPTION.value: N.PROMPT_USER_FOR_JOB_DESCRIPTION.value,
335
+ N.CANDIDATE_JOB_FIT_ANALYSIS.value: N.CANDIDATE_JOB_FIT_ANALYSIS.value,
336
+ },
337
+ )
338
+ data_loading_subgraph.add_edge(
339
+ N.PROMPT_USER_FOR_JOB_DESCRIPTION.value, N.CANDIDATE_JOB_FIT_ANALYSIS.value
340
+ )
341
+
342
+ data_loading_workflow = data_loading_subgraph.compile(name="Data Load Subgraph")
src/job_writing_agent/nodes/research_workflow.py CHANGED
@@ -150,7 +150,7 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
150
 
151
  llm_provider = LLMFactory()
152
  llm = llm_provider.create_dspy(
153
- model="mistralai/devstral-2512:free",
154
  provider="openrouter",
155
  temperature=0.3,
156
  )
 
150
 
151
  llm_provider = LLMFactory()
152
  llm = llm_provider.create_dspy(
153
+ model="google/gemma-3-27b-it:free",
154
  provider="openrouter",
155
  temperature=0.3,
156
  )
src/job_writing_agent/tools/SearchTool.py CHANGED
@@ -37,7 +37,7 @@ class TavilyResearchTool:
37
  job_description,
38
  company_name,
39
  max_results=5,
40
- model_name="mistralai/devstral-2512:free",
41
  ):
42
  # Create LLM inside __init__ (lazy initialization)
43
  llm_provider = LLMFactory()
 
37
  job_description,
38
  company_name,
39
  max_results=5,
40
+ model_name="google/gemma-3-2google/gemma-3-27b-it:free7b-it:free",
41
  ):
42
  # Create LLM inside __init__ (lazy initialization)
43
  llm_provider = LLMFactory()
src/job_writing_agent/workflow.py CHANGED
@@ -155,7 +155,7 @@ class JobWorkflow:
155
  "session_id": thread_id,
156
  },
157
  "tags": ["job-application-workflow", self.content],
158
- "recursion_limit": 2,
159
  }
160
 
161
  @log_execution
 
155
  "session_id": thread_id,
156
  },
157
  "tags": ["job-application-workflow", self.content],
158
+ "recursion_limit": 10,
159
  }
160
 
161
  @log_execution