Spaces:

Rishabh2095
/

AgentWorkflowJobApplications

Sleeping

App Files Files Community

Rishabh2095 commited on Jan 5

Commit

a01026b

1 Parent(s): 046508a

Refactor job writing agent: Improved code structure by introducing new data loading classes, enhancing logging practices, and ensuring safe environment variable access. Updated workflow logic for better readability and maintainability.

Browse files

Files changed (20) hide show

src/job_writing_agent/__init__.py +90 -39
src/job_writing_agent/agents/nodes.py +128 -162
src/job_writing_agent/classes/classes.py +3 -1
src/job_writing_agent/nodes/__init__.py +22 -2
src/job_writing_agent/nodes/data_loading_workflow.py +259 -0
src/job_writing_agent/nodes/initializing.py +0 -513
src/job_writing_agent/nodes/job_description_loader.py +1 -1
src/job_writing_agent/nodes/research_workflow.py +92 -48
src/job_writing_agent/nodes/selfconsistency.py +75 -51
src/job_writing_agent/nodes/system_initializer.py +73 -0
src/job_writing_agent/nodes/validation_helper.py +124 -0
src/job_writing_agent/nodes/variations.py +46 -28
src/job_writing_agent/prompts/templates.py +75 -76
src/job_writing_agent/prompts/test_templates.py +59 -0
src/job_writing_agent/tools/SearchTool.py +43 -18
src/job_writing_agent/utils/application_cli_interface.py +54 -33
src/job_writing_agent/utils/config.py +28 -9
src/job_writing_agent/utils/document_processing.py +36 -14
src/job_writing_agent/utils/vector_store.py +42 -40
src/job_writing_agent/workflow.py +85 -50

src/job_writing_agent/__init__.py CHANGED Viewed

@@ -7,7 +7,8 @@ using LangChain and LangGraph with LangSmith observability.
 __version__ = "0.1.0"
-import os, getpass
 import logging
 from pathlib import Path
 from dotenv import load_dotenv
@@ -16,77 +17,112 @@ from dotenv import load_dotenv
 # Set up logging
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-log_dir = Path(__file__).parent / 'logs'
 log_dir.mkdir(exist_ok=True)
-logger.addHandler(logging.FileHandler(log_dir / 'job_writer.log', mode='a'))
-logger.info("Logger initialized. Writing to %s", Path(__file__).parent / 'job_writer.log')
 # Load environment variables from .env file
-env_path = Path(__file__).parent / '.env'
 def _set_env(var: str):
     if not os.environ.get(var):
-        os.environ[var] = getpass.getpass(f"{var}: ")
         logger.info(f"{var} set to {os.environ[var]}")
 if env_path.exists():
-    logger.info("Loading environment variables from %s",  env_path)
     load_dotenv(dotenv_path=env_path, override=True)
 else:
-    logger.warning(".env file not found at %s. Using system environment variables.", env_path)
 # Check for critical environment variables
 if not os.getenv("TAVILY_API_KEY"):
-    logger.warning("TAVILY_API_KEY environment variable is not set." \
-                    " Failed to get TAVILY_API_KEY at Path %s", env_path)
     _set_env("TAVILY_API_KEY")
 if not os.getenv("GEMINI_API_KEY"):
-    logger.warning("GEMINI_API_KEY environment variable is not set. " \
-                    "Failed to get GEMINI_API_KEY at Path %s", env_path)
     _set_env("GEMINI_API_KEY")
 if not os.getenv("PINECONE_API_KEY"):
-    logger.warning("PINECONE_API_KEY environment variable is not set." \
-                " Failed to get PINECONE_API_KEY at Path %s", env_path)
     _set_env("PINECONE_API_KEY")
 if not os.getenv("LANGFUSE_PUBLIC_KEY"):
-    logger.warning("LANGFUSE_PUBLIC_KEY environment variable is not set." \
-                " Failed to get LANGFUSE_PUBLIC_KEY at Path %s", env_path)
     _set_env("LANGFUSE_PUBLIC_KEY")
 if not os.getenv("LANGFUSE_SECRET_KEY"):
-    logger.warning("LANGFUSE_SECRET_KEY environment variable is not set." \
-                " Failed to get LANGFUSE_SECRET_KEY at Path %s", env_path)
     _set_env("LANGFUSE_SECRET_KEY")
 if not os.getenv("LANGSMITH_API_KEY"):
-    logger.warning("LANGSMITH_API_KEY environment variable is not set." \
-                " Failed to get LANGSMITH_API_KEY at Path %s", env_path)
     _set_env("LANGSMITH_API_KEY")
 if not os.getenv("OPENROUTER_API_KEY"):
-    logger.warning("OPENROUTER_API_KEY environment variable is not set." \
-                " Failed to get OPENROUTER_API_KEY at Path %s", env_path)
     _set_env("OPENROUTER_API_KEY")
 if not os.getenv("LANGSMITH_PROJECT"):
-    logger.warning("LANGSMITH_PROJECT environment variable is not set." \
-                " Failed to get LANGSMITH_PROJECT at Path %s", env_path)
     _set_env("LANGSMITH_PROJECT")
 if not os.getenv("LANGSMITH_ENDPOINT"):
-    logger.warning("LANGSMITH_ENDPOINT environment variable is not set." \
-                " Failed to get LANGSMITH_ENDPOINT at Path %s", env_path)
     _set_env("LANGSMITH_ENDPOINT")
 if not os.getenv("CEREBRAS_API_KEY"):
-    logger.warning("CEREBRAS_API_KEY environment variable is not set." \
-                " Failed to get CEREBRAS_API_KEY at Path %s", env_path)
     _set_env("CEREBRAS_API_KEY")
 os.environ["LANGSMITH_TRACING"] = "true"
@@ -111,33 +147,48 @@ from dotenv import load_dotenv
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-log_dir = Path(__file__).parent / 'logs'
 log_dir.mkdir(exist_ok=True)
-logger.addHandler(logging.FileHandler(log_dir / 'job_writer.log', mode='a'))
-logger.info("Logger initialized. Writing to %s", Path(__file__).parent / 'job_writer.log')
-env_path = Path(__file__).parent / '.env'
 def _set_env(var: str):
     if not os.environ.get(var):
         os.environ[var] = getpass.getpass(f"{var}: ")
         logger.info(f"{var} set to {os.environ[var]}")
 def load_environment_variables(key_array):
     for key in key_array:
         if not os.getenv(key):
-            logger.warning(f"{key} environment variable is not set. Failed to get {key} at Path {env_path}")
             _set_env(key)
 if env_path.exists():
-    logger.info("Loading environment variables from %s",  env_path)
     load_dotenv(dotenv_path=env_path, override=True)
 else:
-    logger.warning(".env file not found at %s. Using system environment variables.", env_path)
-environment_key_array = ["TAVILY_API_KEY", "GEMINI_API_KEY", "PINECONE_API_KEY", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]
 # Check for critical environment variables
 load_environment_variables(environment_key_array)
-__all__ = ["job_app_graph", "workflows/research_workflow"]

 __version__ = "0.1.0"
+import os
+from getpass import getpass
 import logging
 from pathlib import Path
 from dotenv import load_dotenv
 # Set up logging
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
+log_dir = Path(__file__).parent / "logs"
 log_dir.mkdir(exist_ok=True)
+logger.addHandler(logging.FileHandler(log_dir / "job_writer.log", mode="a"))
+logger.info(
+    "Logger initialized. Writing to %s", Path(__file__).parent / "job_writer.log"
+)
 # Load environment variables from .env file
+env_path = Path(__file__).parent / ".env"
 def _set_env(var: str):
     if not os.environ.get(var):
+        os.environ[var] = getpass(f"{var}: ")
         logger.info(f"{var} set to {os.environ[var]}")
 if env_path.exists():
+    logger.info("Loading environment variables from %s", env_path)
     load_dotenv(dotenv_path=env_path, override=True)
 else:
+    logger.warning(
+        ".env file not found at %s. Using system environment variables.", env_path
+    )
 # Check for critical environment variables
 if not os.getenv("TAVILY_API_KEY"):
+    logger.warning(
+        "TAVILY_API_KEY environment variable is not set."
+        " Failed to get TAVILY_API_KEY at Path %s",
+        env_path,
+    )
     _set_env("TAVILY_API_KEY")
 if not os.getenv("GEMINI_API_KEY"):
+    logger.warning(
+        "GEMINI_API_KEY environment variable is not set. "
+        "Failed to get GEMINI_API_KEY at Path %s",
+        env_path,
+    )
     _set_env("GEMINI_API_KEY")
 if not os.getenv("PINECONE_API_KEY"):
+    logger.warning(
+        "PINECONE_API_KEY environment variable is not set."
+        " Failed to get PINECONE_API_KEY at Path %s",
+        env_path,
+    )
     _set_env("PINECONE_API_KEY")
 if not os.getenv("LANGFUSE_PUBLIC_KEY"):
+    logger.warning(
+        "LANGFUSE_PUBLIC_KEY environment variable is not set."
+        " Failed to get LANGFUSE_PUBLIC_KEY at Path %s",
+        env_path,
+    )
     _set_env("LANGFUSE_PUBLIC_KEY")
 if not os.getenv("LANGFUSE_SECRET_KEY"):
+    logger.warning(
+        "LANGFUSE_SECRET_KEY environment variable is not set."
+        " Failed to get LANGFUSE_SECRET_KEY at Path %s",
+        env_path,
+    )
     _set_env("LANGFUSE_SECRET_KEY")
 if not os.getenv("LANGSMITH_API_KEY"):
+    logger.warning(
+        "LANGSMITH_API_KEY environment variable is not set."
+        " Failed to get LANGSMITH_API_KEY at Path %s",
+        env_path,
+    )
     _set_env("LANGSMITH_API_KEY")
 if not os.getenv("OPENROUTER_API_KEY"):
+    logger.warning(
+        "OPENROUTER_API_KEY environment variable is not set."
+        " Failed to get OPENROUTER_API_KEY at Path %s",
+        env_path,
+    )
     _set_env("OPENROUTER_API_KEY")
 if not os.getenv("LANGSMITH_PROJECT"):
+    logger.warning(
+        "LANGSMITH_PROJECT environment variable is not set."
+        " Failed to get LANGSMITH_PROJECT at Path %s",
+        env_path,
+    )
     _set_env("LANGSMITH_PROJECT")
 if not os.getenv("LANGSMITH_ENDPOINT"):
+    logger.warning(
+        "LANGSMITH_ENDPOINT environment variable is not set."
+        " Failed to get LANGSMITH_ENDPOINT at Path %s",
+        env_path,
+    )
     _set_env("LANGSMITH_ENDPOINT")
 if not os.getenv("CEREBRAS_API_KEY"):
+    logger.warning(
+        "CEREBRAS_API_KEY environment variable is not set."
+        " Failed to get CEREBRAS_API_KEY at Path %s",
+        env_path,
+    )
     _set_env("CEREBRAS_API_KEY")
 os.environ["LANGSMITH_TRACING"] = "true"
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
+log_dir = Path(__file__).parent / "logs"
 log_dir.mkdir(exist_ok=True)
+logger.addHandler(logging.FileHandler(log_dir / "job_writer.log", mode="a"))
+logger.info(
+    "Logger initialized. Writing to %s", Path(__file__).parent / "job_writer.log"
+)
+env_path = Path(__file__).parent / ".env"
 def _set_env(var: str):
     if not os.environ.get(var):
         os.environ[var] = getpass.getpass(f"{var}: ")
         logger.info(f"{var} set to {os.environ[var]}")
 def load_environment_variables(key_array):
     for key in key_array:
         if not os.getenv(key):
+            logger.warning(
+                f"{key} environment variable is not set. Failed to get {key} at Path {env_path}"
+            )
             _set_env(key)
 if env_path.exists():
+    logger.info("Loading environment variables from %s", env_path)
     load_dotenv(dotenv_path=env_path, override=True)
 else:
+    logger.warning(
+        ".env file not found at %s. Using system environment variables.", env_path
+    )
+environment_key_array = [
+    "TAVILY_API_KEY",
+    "GEMINI_API_KEY",
+    "PINECONE_API_KEY",
+    "LANGFUSE_PUBLIC_KEY",
+    "LANGFUSE_SECRET_KEY",
+]
 # Check for critical environment variables
 load_environment_variables(environment_key_array)
+__all__ = ["job_app_graph", "workflows/research_workflow"]

src/job_writing_agent/agents/nodes.py CHANGED Viewed

@@ -5,21 +5,22 @@ This module contains all the node functions used in the job application
 writer workflow graph, each handling a specific step in the process.
 """
 import logging
 from datetime import datetime
-from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
 from langchain_core.messages import SystemMessage
-from ..classes.classes import AppState, ResearchState, ResultState, DataLoadState
 from ..prompts.templates import (
-    CRITIQUE_PROMPT,
-    PERSONA_DEVELOPMENT_PROMPT,
-    COVER_LETTER_PROMPT,
-    REVISION_PROMPT,
     BULLET_POINTS_PROMPT,
     LINKEDIN_NOTE_PROMPT,
 )
 from ..utils.llm_provider_factory import LLMFactory
@@ -30,132 +31,67 @@ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
 def create_draft(state: ResearchState) -> ResultState:
     """Create initial draft of the application material."""
     # Create LLM inside function (lazy initialization)
     llm_provider = LLMFactory()
     llm = llm_provider.create_langchain(
-        "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
     )
     # Determine which type of content we're creating
-    company_background_information = state.get("company_research_data", {})
     content_category = state.get("content_category", "cover_letter")
-    # Get the original resume text from state (used later if vector search is available)
-    original_resume_text = company_background_information.get("resume", "")
-    try:
-        # Not yet implemented
-        if state.get("vector_store"):
-            vector_store = state.get("vector_store")
-            # Extract key requirements from job description
-            prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
-            if company_background_information:
-                key_requirements = prompt.invoke(
-                    {
-                        "job_description": company_background_information[
-                            "job_description"
-                        ]
-                    }
-                )
-            else:
-                return key_requirements
-            if not key_requirements:
-                print("Warning: No key requirements found in the job description.")
-                return state
-            # Use the key requirements to query for the most relevant resume parts
-            namespace = f"resume_{state['session_id']}"
-            relevant_docs = vector_store.retrieve_similar(
-                query=key_requirements, namespace=namespace, k=3
-            )
-            # Use these relevant sections with higher weight in the draft creation
-            highly_relevant_resume = "\n".join(
-                [doc.page_content for doc in relevant_docs]
-            )
-            # Combine highly relevant parts with full resume text
-            resume_text = f"""
-            # Most Relevant Experience
-            {highly_relevant_resume}
-            # Full Resume
-            {original_resume_text}
-            """
-            # Update the company_background_information with the enhanced resume
-            company_background_information["resume"] = resume_text
-    except Exception as e:
-        logger.warning(f"Could not use vector search for relevant resume parts: {e}")
-        # Continue with regular resume text
-    # Select the appropriate prompt template based on application type and persona
     logger.info(f"The candidate wants the Agent to assist with : {content_category}")
-    if content_category == "bullets":
-        FirstDraftGenerationPromptTemplate = ChatPromptTemplate([BULLET_POINTS_PROMPT])
-    elif content_category == "linkedin_connect_request":
-        FirstDraftGenerationPromptTemplate = ChatPromptTemplate([LINKEDIN_NOTE_PROMPT])
-    else:
-        FirstDraftGenerationPromptTemplate = ChatPromptTemplate([COVER_LETTER_PROMPT])
-    # Create the draft using the selected prompt template
-    CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
-        """
-            Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
-            **Job Description:**
-            '''
-            {current_job_role}
-            '''
-            **Candidate Resume:**
-            '''
-            {candidate_resume}
-            '''
-            **Company Research Data:**
-            '''
-            {company_research_data}
-            '''
-            """,
-        input_variables=[
-            "current_job_role",
-            "company_research_data",
-            "candidate_resume",
-        ],
     )
-    FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
-    # Invoke the chain with the appropriate inputs
     draft_generation_chain = (
         (
             {
                 "current_job_role": lambda x: x["current_job_role"],
-                "company_research_data": lambda x: x["company_research_data"],
                 "candidate_resume": lambda x: x["candidate_resume"],
             }
         )
-        | FirstDraftGenerationPromptTemplate
         | llm
     )
-    # Prepare the inputs
     application_background_data = {
-        "current_job_role": company_background_information["job_description"],
-        "company_research_data": company_background_information[
-            "company_research_data_summary"
-        ],
-        "candidate_resume": company_background_information["resume"],
     }
     response = draft_generation_chain.invoke(application_background_data)
     logger.info(f"Draft has been created: {response.content}")
     app_state = ResultState(
         draft=response.content,
         feedback="",
@@ -176,31 +112,37 @@ def critique_draft(state: ResultState) -> ResultState:
     try:
         logger.info("Critiquing draft...")
-        # Create LLM inside function (lazy initialization)
-        llm_provider = LLMFactory()
-        llm = llm_provider.create_langchain(
-            "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
-        )
-        job_description = str(state["company_research_data"].get("job_description", ""))
-        draft = str(state.get("draft", ""))
         # Debug logging to verify values
         logger.debug(f"Job description length: {len(job_description)}")
-        logger.debug(f"Draft length: {len(draft)}")
-        if not job_description or not draft:
             logger.warning("Missing job_description or draft in state")
-            # Return state with empty feedback
             return ResultState(
-                draft=draft,
-                feedback="",
                 critique_feedback="",
                 current_node="critique",
-                company_research_data=state["company_research_data"],
-                output_data=state["output_data"],
             )
         # Use the same pattern as create_draft:
         # 1. Create ChatPromptTemplate from SystemMessage
         # 2. Append HumanMessagePromptTemplate with variables
@@ -213,10 +155,10 @@ def critique_draft(state: ResultState) -> ResultState:
         )
         # Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
-        CritiquePromptTemplate = ChatPromptTemplate([critique_system_message])
         # Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
-        CritiqueContextMessage = HumanMessagePromptTemplate.from_template(
             """
     # Job Description
     {job_description}
@@ -235,7 +177,7 @@ def critique_draft(state: ResultState) -> ResultState:
             input_variables=["job_description", "draft"],
         )
-        CritiquePromptTemplate.append(CritiqueContextMessage)
         # Create chain (like line 129-139 in create_draft)
         critique_chain = (
@@ -243,15 +185,15 @@ def critique_draft(state: ResultState) -> ResultState:
                 "job_description": lambda x: x["job_description"],
                 "draft": lambda x: x["draft"],
             }
-            | CritiquePromptTemplate
             | llm
         )
-        # Invoke with input variables (like line 150 in create_draft)
         critique = critique_chain.invoke(
             {
                 "job_description": job_description,
-                "draft": draft,
             }
         )
@@ -260,16 +202,15 @@ def critique_draft(state: ResultState) -> ResultState:
         )
         logger.info("Draft critique completed")
-        # Store the critique for reference during revision
-        app_state = ResultState(
-            draft=state["draft"],
-            feedback=state["feedback"],
             critique_feedback=critique_content,
             current_node="critique",
-            company_research_data=state["company_research_data"],
-            output_data=state["output_data"],
         )
-        return app_state
     except Exception as e:
         logger.error(f"Error in critique_draft: {e}", exc_info=True)
@@ -279,70 +220,84 @@ def critique_draft(state: ResultState) -> ResultState:
 def human_approval(state: ResultState) -> ResultState:
     """Human-in-the-loop checkpoint for feedback on the draft."""
-    # This is a placeholder function that would be replaced by actual UI interaction
     print("\n" + "=" * 80)
     print("DRAFT FOR REVIEW:")
-    print(state["draft"])
     print("\nAUTOMATIC CRITIQUE:")
-    print(state.get("critique_feedback", "No critique available"))
     print("=" * 80)
     print("\nPlease provide your feedback (press Enter to continue with no changes):")
     # In a real implementation, this would be handled by the UI
     human_feedback = input()
-    result_state = ResultState(
-        draft=state["draft"],
         feedback=human_feedback,
-        critique_feedback=state["critique_feedback"],
         current_node="human_approval",
-        company_research_data=state["company_research_data"],
-        output_data=state["output_data"],
     )
-    return result_state
 def finalize_document(state: ResultState) -> DataLoadState:
     """Incorporate feedback and finalize the document."""
     # Create LLM inside function (lazy initialization)
     llm_provider = LLMFactory()
     llm = llm_provider.create_langchain(
-        "mistralai/mistral-7b-instruct:free", provider="openrouter", temperature=0.3
     )
-    # Create chain like in critique_draft (line 229-236)
     revision_chain = (
         {
-            "draft": lambda x: x["draft"],
-            "feedback": lambda x: x["feedback"],
-            "critique_feedback": lambda x: x["critique_feedback"],
         }
         | REVISION_PROMPT
         | llm
     )
-    print(f"revision_chain: {revision_chain}")
-    # Invoke with input variables (like line 239 in critique_draft)
     final_content = revision_chain.invoke(
         {
-            "draft": state["draft"],
-            "feedback": state["feedback"],
-            "critique_feedback": state["critique_feedback"],
         }
     )
-    app_state = DataLoadState(
-        draft=state["draft"],
-        feedback=state["feedback"],
-        critique_feedback=state["critique_feedback"],
-        company_research_data=state["company_research_data"],
         current_node="finalize",
         output_data=final_content.content
         if hasattr(final_content, "content")
         else str(final_content),
     )
-    return app_state
 """
@@ -351,8 +306,19 @@ Conditional node to determine if next node should be 'draft' node or "research"
 def determine_next_step(state: AppState) -> str:
-    """If the company name is missing within the AppState, we can't
-    create the content draft and therefore redirected to the research node."""
-    if not state["company_name"]:
         return "draft"
     return "research"

 writer workflow graph, each handling a specific step in the process.
 """
+# Standard library imports
 import logging
 from datetime import datetime
+# Third-party imports
 from langchain_core.messages import SystemMessage
+from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
+# Local imports
+from ..classes.classes import AppState, DataLoadState, ResearchState, ResultState
 from ..prompts.templates import (
     BULLET_POINTS_PROMPT,
+    COVER_LETTER_PROMPT,
+    DRAFT_GENERATION_CONTEXT_PROMPT,
     LINKEDIN_NOTE_PROMPT,
+    REVISION_PROMPT,
 )
 from ..utils.llm_provider_factory import LLMFactory
 def create_draft(state: ResearchState) -> ResultState:
     """Create initial draft of the application material."""
+    # Validate state inputs
+    company_background_information = state.get("company_research_data", {})
+    if not company_background_information:
+        logger.error("Missing company_research_data in state")
+        raise ValueError("company_research_data is required in state")
     # Create LLM inside function (lazy initialization)
     llm_provider = LLMFactory()
     llm = llm_provider.create_langchain(
+        "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+        provider="openrouter",
+        temperature=0.3,
     )
+    draft_category_map = {
+        "cover_letter": COVER_LETTER_PROMPT,
+        "bullets": BULLET_POINTS_PROMPT,
+        "linkedin_connect_request": LINKEDIN_NOTE_PROMPT,
+    }
     # Determine which type of content we're creating
     content_category = state.get("content_category", "cover_letter")
+    # Select appropriate system message template based on content category
     logger.info(f"The candidate wants the Agent to assist with : {content_category}")
+    system_message_template = draft_category_map.get(
+        content_category, COVER_LETTER_PROMPT
     )
+    # Build the complete prompt template: system message + context
+    draft_prompt_template = ChatPromptTemplate([system_message_template])
+    draft_prompt_template.append(DRAFT_GENERATION_CONTEXT_PROMPT)
+    # Build the chain: input formatting -> prompt template -> LLM
     draft_generation_chain = (
         (
             {
                 "current_job_role": lambda x: x["current_job_role"],
                 "candidate_resume": lambda x: x["candidate_resume"],
+                "company_research_data": lambda x: x["company_research_data"],
+                "current_date": lambda x: x["current_date"],
             }
         )
+        | draft_prompt_template
         | llm
     )
+    # Prepare the inputs with safe dictionary access
     application_background_data = {
+        "current_job_role": company_background_information.get("job_description", ""),
+        "candidate_resume": company_background_information.get("resume", ""),
+        "company_research_data": company_background_information.get(
+            "company_research_data_summary", "Company Research Data is not available"
+        ),
+        "current_date": CURRENT_DATE,
     }
     response = draft_generation_chain.invoke(application_background_data)
     logger.info(f"Draft has been created: {response.content}")
     app_state = ResultState(
         draft=response.content,
         feedback="",
     try:
         logger.info("Critiquing draft...")
+        # Validate and extract required state fields once at the start
+        company_research_data = state.get("company_research_data", {})
+        job_description = str(company_research_data.get("job_description", ""))
+        draft_content = str(state.get("draft", ""))
+        feedback = state.get("feedback", "")
+        output_data = state.get("output_data", "")
         # Debug logging to verify values
         logger.debug(f"Job description length: {len(job_description)}")
+        logger.debug(f"Draft length: {len(draft_content)}")
+        # Early return if required fields are missing
+        if not job_description or not draft_content:
             logger.warning("Missing job_description or draft in state")
             return ResultState(
+                draft=draft_content,
+                feedback=feedback,
                 critique_feedback="",
                 current_node="critique",
+                company_research_data=company_research_data,
+                output_data=output_data,
             )
+        # Create LLM inside function (lazy initialization)
+        llm_provider = LLMFactory()
+        llm = llm_provider.create_langchain(
+            "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+            provider="openrouter",
+            temperature=0.3,
+        )
         # Use the same pattern as create_draft:
         # 1. Create ChatPromptTemplate from SystemMessage
         # 2. Append HumanMessagePromptTemplate with variables
         )
         # Create ChatPromptTemplate from SystemMessage (like line 90-94 in create_draft)
+        critique_prompt_template = ChatPromptTemplate([critique_system_message])
         # Append HumanMessagePromptTemplate with variables (like line 97-124 in create_draft)
+        critique_context_message = HumanMessagePromptTemplate.from_template(
             """
     # Job Description
     {job_description}
             input_variables=["job_description", "draft"],
         )
+        critique_prompt_template.append(critique_context_message)
         # Create chain (like line 129-139 in create_draft)
         critique_chain = (
                 "job_description": lambda x: x["job_description"],
                 "draft": lambda x: x["draft"],
             }
+            | critique_prompt_template
             | llm
         )
+        # Invoke with validated input variables
         critique = critique_chain.invoke(
             {
                 "job_description": job_description,
+                "draft": draft_content,
             }
         )
         )
         logger.info("Draft critique completed")
+        # Store the critique - using validated variables from top of function
+        return ResultState(
+            draft=draft_content,
+            feedback=feedback,
             critique_feedback=critique_content,
             current_node="critique",
+            company_research_data=company_research_data,
+            output_data=output_data,
         )
     except Exception as e:
         logger.error(f"Error in critique_draft: {e}", exc_info=True)
 def human_approval(state: ResultState) -> ResultState:
     """Human-in-the-loop checkpoint for feedback on the draft."""
+    # Validate and extract all required state fields once
+    draft_content = state.get("draft", "")
+    critique_feedback_content = state.get("critique_feedback", "No critique available")
+    company_research_data = state.get("company_research_data", {})
+    output_data = state.get("output_data", "")
+    # Display draft and critique for review
     print("\n" + "=" * 80)
     print("DRAFT FOR REVIEW:")
+    print(draft_content)
     print("\nAUTOMATIC CRITIQUE:")
+    print(critique_feedback_content)
     print("=" * 80)
     print("\nPlease provide your feedback (press Enter to continue with no changes):")
     # In a real implementation, this would be handled by the UI
     human_feedback = input()
+    return ResultState(
+        draft=draft_content,
         feedback=human_feedback,
+        critique_feedback=critique_feedback_content,
         current_node="human_approval",
+        company_research_data=company_research_data,
+        output_data=output_data,
     )
 def finalize_document(state: ResultState) -> DataLoadState:
     """Incorporate feedback and finalize the document."""
+    # Validate and extract all required state fields once
+    draft_content = state.get("draft", "")
+    feedback_content = state.get("feedback", "")
+    critique_feedback_content = state.get("critique_feedback", "")
+    company_research_data = state.get("company_research_data", {})
+    if not draft_content:
+        logger.warning("Missing draft in state for finalization")
     # Create LLM inside function (lazy initialization)
     llm_provider = LLMFactory()
     llm = llm_provider.create_langchain(
+        "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+        provider="openrouter",
+        temperature=0.3,
     )
+    # Create revision chain
     revision_chain = (
         {
+            "draft": lambda x: x.get("draft", ""),
+            "feedback": lambda x: x.get("feedback", ""),
+            "critique_feedback": lambda x: x.get("critique_feedback", ""),
         }
         | REVISION_PROMPT
         | llm
     )
+    # Invoke with validated input variables
     final_content = revision_chain.invoke(
         {
+            "draft": draft_content,
+            "feedback": feedback_content,
+            "critique_feedback": critique_feedback_content,
         }
     )
+    # Return final state using validated variables
+    return DataLoadState(
+        draft=draft_content,
+        feedback=feedback_content,
+        critique_feedback=critique_feedback_content,
+        company_research_data=company_research_data,
         current_node="finalize",
         output_data=final_content.content
         if hasattr(final_content, "content")
         else str(final_content),
     )
 """
 def determine_next_step(state: AppState) -> str:
+    """
+    Determine next workflow step based on company name presence.
+    If the company name is missing within the AppState, we can't
+    create the content draft and therefore redirect to the research node.
+    Args:
+        state: Current application state
+    Returns:
+        Next node name: "draft" or "research"
+    """
+    company_name = state.get("company_name", "")
+    if not company_name:
         return "draft"
     return "research"

src/job_writing_agent/classes/classes.py CHANGED Viewed

@@ -74,7 +74,7 @@ class DataLoadState(MessagesState, total=False):
     resume_path: str
     job_description_source: str
-    content: str  # "cover_letter", "bullets", "linkedin_note"
     resume: str
     job_description: str
     company_name: str
@@ -96,11 +96,13 @@ class ResearchState(MessagesState):
         tavily_search: Dict[str, Any] Stores the results of the Tavily search
         attempted_search_queries: List of queries used extracted from the job description
         compiled_knowledge: Compiled knowledge from the research
     """
     company_research_data: Dict[str, Any]
     attempted_search_queries: List[str]
     current_node: str
 class ResultState(MessagesState):

     resume_path: str
     job_description_source: str
+    content_category: str  # "cover_letter", "bullets", "linkedin_note"
     resume: str
     job_description: str
     company_name: str
         tavily_search: Dict[str, Any] Stores the results of the Tavily search
         attempted_search_queries: List of queries used extracted from the job description
         compiled_knowledge: Compiled knowledge from the research
+        content_category: Type of application material to generate
     """
     company_research_data: Dict[str, Any]
     attempted_search_queries: List[str]
     current_node: str
+    content_category: str
 class ResultState(MessagesState):

src/job_writing_agent/nodes/__init__.py CHANGED Viewed

@@ -4,10 +4,30 @@ Created on Mon Oct 23 16:49:52 2023
 @author: rishabhaggarwal
 """
-from .initializing import Dataloading
 # from .createdraft import CreateDraft
 from .variations import generate_variations
 from .selfconsistency import self_consistency_vote
 from .research_workflow import research_workflow
-__all__ = ["Dataloading", "generate_variations", "self_consistency_vote", "research_workflow"]

 @author: rishabhaggarwal
 """
+# Legacy import (deprecated - use new classes instead)
+from .data_loading_workflow import data_loading_workflow
+# New data loading classes following SOLID principles
+from .resume_loader import ResumeLoader
+from .job_description_loader import JobDescriptionLoader
+from .system_initializer import SystemInitializer
+from .validation_helper import ValidationHelper
+# Other workflow components
 # from .createdraft import CreateDraft
 from .variations import generate_variations
 from .selfconsistency import self_consistency_vote
 from .research_workflow import research_workflow
+__all__ = [
+    # New data loading classes
+    "ResumeLoader",
+    "JobDescriptionLoader",
+    "SystemInitializer",
+    "ValidationHelper",
+    "data_loading_workflow",
+    # Other components
+    "generate_variations",
+    "self_consistency_vote",
+    "research_workflow",
+]

src/job_writing_agent/nodes/data_loading_workflow.py ADDED Viewed

	@@ -0,0 +1,259 @@

+# -*- coding: utf-8 -*-
+"""
+Data Loading Workflow Module
+This module defines the data loading subgraph workflow, including all node
+functions and the subgraph definition. It uses the separate loader classes
+(ResumeLoader, JobDescriptionLoader, SystemInitializer, ValidationHelper)
+following the Single Responsibility Principle.
+"""
+import logging
+from typing import Any
+from langgraph.graph import StateGraph, END, START
+from job_writing_agent.classes import DataLoadState
+from job_writing_agent.nodes.resume_loader import ResumeLoader
+from job_writing_agent.nodes.job_description_loader import JobDescriptionLoader
+from job_writing_agent.nodes.system_initializer import SystemInitializer
+from job_writing_agent.nodes.validation_helper import ValidationHelper
+from job_writing_agent.utils.logging.logging_decorators import (
+    log_async,
+    log_execution,
+)
+logger = logging.getLogger(__name__)
+# ============================================================================
+# Data Loading Subgraph Node Functions
+# ============================================================================
+@log_async
+async def set_agent_system_message_node(state: DataLoadState) -> DataLoadState:
+    """
+    Node function to initialize system message in workflow state.
+    This node wraps the SystemInitializer.set_agent_system_message method
+    for use in the LangGraph workflow.
+    Parameters
+    ----------
+    state: DataLoadState
+        Current workflow state.
+    Returns
+    -------
+    DataLoadState
+        Updated state with system message added to messages list.
+    """
+    initializer = SystemInitializer()
+    return await initializer.set_agent_system_message(state)
+@log_async
+async def parse_resume_node(state: DataLoadState) -> DataLoadState:
+    """
+    Node to parse resume in parallel with job description parsing.
+    Extracts resume parsing logic for parallel execution.
+    Returns only the resume data - reducer will merge with job description data.
+    Parameters
+    ----------
+    state: DataLoadState
+        Current workflow state containing resume_path.
+    Returns
+    -------
+    DataLoadState
+        Partial state update with resume data in company_research_data.
+        LangGraph will merge this with other parallel updates.
+    """
+    loader = ResumeLoader()
+    resume_src = state.get("resume_path")
+    resume_text = ""
+    if resume_src:
+        resume_text = await loader._load_resume(resume_src)
+    elif state.get("current_node") == "verify":
+        resume_text = await loader._prompt_user_for_resume()
+    # Return only the resume data - reducer will merge this with job description data
+    logger.info(f"Resume parsed: {len(resume_text)} characters")
+    return {
+        "company_research_data": {"resume": resume_text},
+    }
+@log_async
+async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
+    """
+    Node to parse job description in parallel with resume parsing.
+    Extracts job description parsing logic for parallel execution.
+    Returns only the job description data - reducer will merge with resume data.
+    Parameters
+    ----------
+    state: DataLoadState
+        Current workflow state containing job_description_source.
+    Returns
+    -------
+    DataLoadState
+        Partial state update with job description and company name in
+        company_research_data. LangGraph will merge this with other parallel updates.
+    """
+    loader = JobDescriptionLoader()
+    jd_src = state.get("job_description_source")
+    job_text = ""
+    company_name = ""
+    if jd_src:
+        job_text, company_name = await loader._load_job_description(jd_src)
+    elif state.get("current_node") == "verify":
+        job_text = await loader._prompt_user_for_job_description()
+    # Return only the job description data - reducer will merge this with resume data
+    logger.info(
+        f"Job description parsed: {len(job_text)} characters, company: {company_name}"
+    )
+    return {
+        "company_research_data": {
+            "job_description": job_text,
+            "company_name": company_name,
+        },
+    }
+@log_execution
+def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
+    """
+    Aggregate results from parallel resume and job description parsing nodes.
+    This node runs after both parse_resume_node and parse_job_description_node
+    complete. It ensures both results are present, normalizes values to strings,
+    and structures the final state.
+    Normalization is performed here (not in ValidationHelper) to follow SRP:
+    - This function: Aggregates and normalizes data
+    - ValidationHelper: Only validates data
+    Parameters
+    ----------
+    state: DataLoadState
+        Current workflow state with parallel parsing results.
+    Returns
+    -------
+    DataLoadState
+        Updated state with normalized and structured company_research_data.
+    """
+    # Ensure company_research_data exists
+    if "company_research_data" not in state:
+        state["company_research_data"] = {}
+    # Extract research data once, then get results from parallel nodes
+    company_research_data = state["company_research_data"]
+    resume_text = company_research_data.get("resume", "")
+    job_text = company_research_data.get("job_description", "")
+    company_name = company_research_data.get("company_name", "")
+    # Normalize values to strings (handles list, tuple, dict, str)
+    def normalize_value(value: list | tuple | dict | str | Any) -> str:
+        """
+        Normalize a value to a string representation.
+        Args:
+            value: Value to normalize (list, tuple, dict, or any other type)
+        Returns:
+            String representation of the value
+        """
+        if isinstance(value, (list, tuple)):
+            return " ".join(str(x) for x in value)
+        elif isinstance(value, dict):
+            return str(value)
+        else:
+            return str(value)
+    # Normalize all values
+    resume_text = normalize_value(resume_text) if resume_text else ""
+    job_text = normalize_value(job_text) if job_text else ""
+    company_name = normalize_value(company_name) if company_name else ""
+    # Validate both are present (log warnings but don't fail here - validation node will handle)
+    if not resume_text:
+        logger.warning("Resume text is empty after parsing")
+    if not job_text:
+        logger.warning("Job description text is empty after parsing")
+    # Ensure final structure is correct
+    state["company_research_data"] = {
+        "resume": resume_text,
+        "job_description": job_text,
+        "company_name": company_name,
+    }
+    state["current_node"] = "aggregate_results"
+    logger.info("Data loading results aggregated and normalized successfully")
+    return state
+@log_execution
+def verify_inputs_node(state: DataLoadState) -> DataLoadState:
+    """
+    Verify that required inputs are present and set next_node for routing.
+    This node wraps the ValidationHelper.verify_inputs method for use in
+    the LangGraph workflow. It only validates - normalization is done in
+    aggregate_data_loading_results.
+    Parameters
+    ----------
+    state: DataLoadState
+        Current workflow state with aggregated and normalized data.
+    Returns
+    -------
+    DataLoadState
+        Updated state with next_node set for routing ("load" or "research").
+    """
+    validator = ValidationHelper()
+    return validator.verify_inputs(state)
+# ============================================================================
+# Data Loading Subgraph Definition
+# ============================================================================
+# Create data loading subgraph
+data_loading_subgraph = StateGraph(DataLoadState)
+# Add subgraph nodes
+data_loading_subgraph.add_node(
+    "set_agent_system_message", set_agent_system_message_node
+)
+data_loading_subgraph.add_node("parse_resume", parse_resume_node)
+data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
+data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
+data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
+# Add subgraph edges
+data_loading_subgraph.add_edge(START, "set_agent_system_message")
+# Parallel execution: both nodes start after set_agent_system_message
+data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
+data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
+# Both parallel nodes feed into aggregate (LangGraph waits for both)
+data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
+data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
+# Aggregate feeds into verification
+data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
+# Verification ends the subgraph
+data_loading_subgraph.add_edge("verify_inputs", END)
+# Compile data loading subgraph
+data_loading_workflow = data_loading_subgraph.compile()

src/job_writing_agent/nodes/initializing.py DELETED Viewed

@@ -1,513 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Job Application Writer - Initialization Module
-This module provides the Dataloading class responsible for loading and validating
-inputs required for the job-application workflow. It handles parsing resumes and
-job descriptions, managing missing inputs, and populating application state.
-The module includes utilities for:
-- Parsing resume files and extracting text content
-- Parsing job descriptions and extracting company information
-- Orchestrating input loading with validation
-- Providing user prompts for missing information during verification
-"""
-import logging
-from typing import Tuple, Optional
-from langchain_core.documents import Document
-from langchain_core.messages import SystemMessage
-from langgraph.graph import StateGraph, END, START
-from job_writing_agent.classes import DataLoadState
-from job_writing_agent.utils.document_processing import (
-    parse_resume,
-    get_job_description,
-)
-from job_writing_agent.prompts.templates import agent_system_prompt
-from job_writing_agent.utils.logging.logging_decorators import (
-    log_async,
-    log_execution,
-    log_errors,
-)
-logger = logging.getLogger(__name__)
-# Note: Using centralized logging decorators from utils.logging.logging_decorators
-class Dataloading:
-    """
-    Helper class providing utility methods for loading and parsing data.
-    This class provides helper methods used by the data loading subgraph nodes.
-    The actual workflow orchestration is handled by the data_loading_workflow subgraph.
-    Methods
-    -------
-    set_agent_system_message(state: DataLoadState) -> DataLoadState
-        Adds the system prompt to the conversation state.
-    get_resume(resume_source) -> str
-        Parses a resume file and returns its plain‑text content.
-    parse_job_description(job_description_source) -> Tuple[str, str]
-        Parses a job description and returns its text and company name.
-    verify_inputs(state: DataLoadState) -> DataLoadState
-        Validates inputs and sets next_node for routing.
-    Private Methods (used by subgraph nodes)
-    -----------------------------------------
-    _load_resume(resume_source) -> str
-        Load resume content, raising if the source is missing.
-    _load_job_description(jd_source) -> Tuple[str, str]
-        Load job description text and company name, raising if missing.
-    _prompt_user(prompt_msg: str) -> str
-        Prompt the user for input (synchronous input wrapped for async use).
-    """
-    def __init__(self):
-        """Initialize Dataloading helper class."""
-        pass
-    # =======================================================================
-    # System/Initialization Methods
-    # =======================================================================
-    @log_async
-    async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
-        """Add the system prompt to the conversation state.
-        Parameters
-        ----------
-        state: DataLoadState
-            Current workflow state.
-        Returns
-        -------
-        DataLoadState
-            Updated state with the system message and the next node identifier.
-        """
-        agent_initialization_system_message = SystemMessage(content=agent_system_prompt)
-        messages = state.get("messages", [])
-        messages.append(agent_initialization_system_message)
-        return {
-            **state,
-            "messages": messages,
-            "current_node": "initialize_system",
-        }
-    # =======================================================================
-    # Public Parsing Methods
-    # =======================================================================
-    @log_async
-    @log_errors
-    async def get_resume(self, resume_source):
-        """
-        Parse a resume file and return its plain‑text content.
-        This method extracts text from resume chunks, handling both Document
-        objects and plain strings. Empty or invalid chunks are skipped.
-        Parameters
-        ----------
-        resume_source: Any
-            Path or file‑like object accepted by ``parse_resume``.
-        Returns
-        -------
-        str
-            Plain text content of the resume.
-        Raises
-        ------
-        AssertionError
-            If resume_source is None.
-        Exception
-            If parsing fails.
-        """
-        logger.info("Parsing resume...")
-        resume_text = ""
-        assert resume_source is not None
-        resume_chunks = parse_resume(resume_source)
-        for chunk in resume_chunks:
-            if hasattr(chunk, "page_content") and chunk.page_content:
-                resume_text += chunk.page_content
-            elif isinstance(chunk, str) and chunk:
-                resume_text += chunk
-            else:
-                logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
-        return resume_text
-    @log_async
-    @log_errors
-    async def parse_job_description(self, job_description_source):
-        """
-        Parse a job description and return its text and company name.
-        Extracts both the job posting text and company name from the document.
-        Company name is extracted from document metadata if available.
-        Parameters
-        ----------
-        job_description_source: Any
-            Source accepted by ``get_job_description`` (URL, file path, etc.).
-        Returns
-        -------
-        Tuple[str, str]
-            A tuple of (job_posting_text, company_name).
-        Raises
-        ------
-        AssertionError
-            If job_description_source is None.
-        Exception
-            If parsing fails.
-        """
-        company_name = ""
-        job_posting_text = ""
-        logger.info("Parsing job description from: %s", job_description_source)
-        assert job_description_source is not None, (
-            "Job description source cannot be None"
-        )
-        job_description_document: Optional[Document] = await get_job_description(
-            job_description_source
-        )
-        # Extract company name from metadata
-        if hasattr(job_description_document, "metadata") and isinstance(
-            job_description_document.metadata, dict
-        ):
-            company_name = job_description_document.metadata.get("company_name", "")
-            if not company_name:
-                logger.warning("Company name not found in job description metadata.")
-        else:
-            logger.warning(
-                "Metadata attribute missing or not a dict in job description document."
-            )
-        # Extract job posting text
-        if hasattr(job_description_document, "page_content"):
-            job_posting_text = job_description_document.page_content or ""
-            if not job_posting_text:
-                logger.info("Parsed job posting text is empty.")
-        else:
-            logger.warning(
-                "page_content attribute missing in job description document."
-            )
-        return job_posting_text, company_name
-    @log_async
-    async def get_application_form_details(self, job_description_source):
-        """
-        Placeholder for future method to get application form details.
-        This method will be implemented to extract form fields and requirements
-        from job application forms.
-        Parameters
-        ----------
-        job_description_source: Any
-            Source of the job description or application form.
-        """
-        # TODO: Implement form field extraction
-        pass
-    # =======================================================================
-    # Validation Methods
-    # =======================================================================
-    @log_execution
-    @log_errors
-    def verify_inputs(self, state: DataLoadState) -> DataLoadState:
-        """
-        Validate inputs and set next_node for routing.
-        This method validates that both resume and job description are present
-        in the state, normalizes their values to strings, and sets the next_node
-        field for conditional routing in the main workflow.
-        Parameters
-        ----------
-        state: DataLoadState
-            Current workflow state containing company_research_data.
-        Returns
-        -------
-        DataLoadState
-            Updated state with next_node set to "load" (if validation fails)
-            or "research" (if validation passes).
-        Raises
-        ------
-        Exception
-            If normalization fails for any field.
-        """
-        logger.info("Verifying loaded inputs!")
-        state["current_node"] = "verify"
-        # Validate required fields
-        company_research_data = state.get("company_research_data", {})
-        if not company_research_data.get("resume"):
-            logger.error("Resume is missing in company_research_data")
-            state["next_node"] = "load"  # Loop back to load subgraph
-            return state
-        if not company_research_data.get("job_description"):
-            logger.error("Job description is missing in company_research_data")
-            state["next_node"] = "load"  # Loop back to load subgraph
-            return state
-        # Normalize values to strings
-        for key in ["resume", "job_description"]:
-            try:
-                value = company_research_data[key]
-                if isinstance(value, (list, tuple)):
-                    company_research_data[key] = " ".join(str(x) for x in value)
-                elif isinstance(value, dict):
-                    company_research_data[key] = str(value)
-                else:
-                    company_research_data[key] = str(value)
-            except Exception as e:
-                logger.warning("Error converting %s to string: %s", key, e)
-                state["next_node"] = "load"
-                return state
-        # All validations passed
-        state["next_node"] = "research"
-        logger.info("Inputs verified successfully, proceeding to research")
-        return state
-    # =======================================================================
-    # Private Helper Methods (used by subgraph nodes)
-    # =======================================================================
-    @log_async
-    @log_errors
-    async def _load_resume(self, resume_source) -> str:
-        """
-        Load resume content, raising if the source is missing.
-        This is a wrapper around get_resume() that validates the source first.
-        Used by subgraph nodes for consistent error handling.
-        Parameters
-        ----------
-        resume_source: Any
-            Path or file-like object for the resume.
-        Returns
-        -------
-        str
-            Plain text content of the resume.
-        Raises
-        ------
-        ValueError
-            If resume_source is None or empty.
-        """
-        if not resume_source:
-            raise ValueError("resume_source is required")
-        return await self.get_resume(resume_source)
-    @log_async
-    @log_errors
-    async def _load_job_description(self, jd_source) -> Tuple[str, str]:
-        """
-        Load job description text and company name, raising if missing.
-        This is a wrapper around parse_job_description() that validates the source first.
-        Used by subgraph nodes for consistent error handling.
-        Parameters
-        ----------
-        jd_source: Any
-            Source for the job description (URL, file path, etc.).
-        Returns
-        -------
-        Tuple[str, str]
-            A tuple of (job_posting_text, company_name).
-        Raises
-        ------
-        ValueError
-            If jd_source is None or empty.
-        """
-        if not jd_source:
-            raise ValueError("job_description_source is required")
-        return await self.parse_job_description(jd_source)
-    @log_async
-    @log_errors
-    async def _prompt_user(self, prompt_msg: str) -> str:
-        """
-        Prompt the user for input (synchronous input wrapped for async use).
-        This method wraps the synchronous input() function to be used in async contexts.
-        In a production async UI, this would be replaced with an async input mechanism.
-        Parameters
-        ----------
-        prompt_msg: str
-            Message to display to the user.
-        Returns
-        -------
-        str
-            User input string.
-        """
-        # In a real async UI replace input with an async call.
-        return input(prompt_msg)
-# ============================================================================
-# Data Loading Subgraph Nodes
-# ============================================================================
-@log_async
-async def parse_resume_node(state: DataLoadState) -> DataLoadState:
-    """
-    Node to parse resume in parallel with job description parsing.
-    Extracts resume parsing logic from load_inputs for parallel execution.
-    Returns only the resume data - reducer will merge with job description data.
-    """
-    dataloading = Dataloading()
-    resume_src = state.get("resume_path")
-    resume_text = ""
-    if resume_src:
-        resume_text = await dataloading._load_resume(resume_src)
-    elif state.get("current_node") == "verify":
-        resume_text = await dataloading._prompt_user(
-            "Please paste the resume in text format: "
-        )
-    # Return only the resume data - reducer will merge this with job description data
-    logger.info(f"Resume parsed: {len(resume_text)} characters")
-    # Return partial state update - LangGraph will merge this with other parallel updates
-    return {
-        "company_research_data": {"resume": resume_text},
-    }
-@log_async
-async def parse_job_description_node(state: DataLoadState) -> DataLoadState:
-    """
-    Node to parse job description in parallel with resume parsing.
-    Extracts job description parsing logic from load_inputs for parallel execution.
-    Returns only the job description data - reducer will merge with resume data.
-    """
-    dataloading = Dataloading()
-    jd_src = state.get("job_description_source")
-    job_text = ""
-    company_name = ""
-    if jd_src:
-        job_text, company_name = await dataloading._load_job_description(jd_src)
-    elif state.get("current_node") == "verify":
-        job_text = await dataloading._prompt_user(
-            "Please paste the job posting in text format: "
-        )
-    # Return only the job description data - reducer will merge this with resume data
-    logger.info(
-        f"Job description parsed: {len(job_text)} characters, company: {company_name}"
-    )
-    # Return partial state update - LangGraph will merge this with other parallel updates
-    return {
-        "company_research_data": {
-            "job_description": job_text,
-            "company_name": company_name,
-        },
-    }
-@log_execution
-def aggregate_data_loading_results(state: DataLoadState) -> DataLoadState:
-    """
-    Aggregate results from parallel resume and job description parsing nodes.
-    This node runs after both parse_resume_node and parse_job_description_node
-    complete. It ensures both results are present and normalizes the state.
-    """
-    # Ensure company_research_data exists
-    if "company_research_data" not in state:
-        state["company_research_data"] = {}
-    # Get results from parallel nodes
-    resume_text = state["company_research_data"].get("resume", "")
-    job_text = state["company_research_data"].get("job_description", "")
-    company_name = state["company_research_data"].get("company_name", "")
-    # Validate both are present
-    if not resume_text:
-        logger.warning("Resume text is empty after parsing")
-    if not job_text:
-        logger.warning("Job description text is empty after parsing")
-    # Ensure final structure is correct
-    state["company_research_data"] = {
-        "resume": resume_text,
-        "job_description": job_text,
-        "company_name": company_name,
-    }
-    state["current_node"] = "aggregate_results"
-    logger.info("Data loading results aggregated successfully")
-    return state
-@log_execution
-def verify_inputs_node(state: DataLoadState) -> DataLoadState:
-    """
-    Verify that required inputs are present and set next_node for routing.
-    Modified from verify_inputs to return state with next_node instead of string.
-    """
-    dataloading = Dataloading()
-    return dataloading.verify_inputs(state)
-# ============================================================================
-# Data Loading Subgraph
-# ============================================================================
-# Create data loading subgraph
-data_loading_subgraph = StateGraph(DataLoadState)
-# Add subgraph nodes
-dataloading_instance = Dataloading()
-data_loading_subgraph.add_node(
-    "set_agent_system_message", dataloading_instance.set_agent_system_message
-)
-data_loading_subgraph.add_node("parse_resume", parse_resume_node)
-data_loading_subgraph.add_node("parse_job_description", parse_job_description_node)
-data_loading_subgraph.add_node("aggregate_results", aggregate_data_loading_results)
-data_loading_subgraph.add_node("verify_inputs", verify_inputs_node)
-# Add subgraph edges
-data_loading_subgraph.add_edge(START, "set_agent_system_message")
-# Parallel execution: both nodes start after set_agent_system_message
-data_loading_subgraph.add_edge("set_agent_system_message", "parse_resume")
-data_loading_subgraph.add_edge("set_agent_system_message", "parse_job_description")
-# Both parallel nodes feed into aggregate (LangGraph waits for both)
-data_loading_subgraph.add_edge("parse_resume", "aggregate_results")
-data_loading_subgraph.add_edge("parse_job_description", "aggregate_results")
-# Aggregate feeds into verification
-data_loading_subgraph.add_edge("aggregate_results", "verify_inputs")
-# Verification ends the subgraph
-data_loading_subgraph.add_edge("verify_inputs", END)
-# Compile data loading subgraph
-data_loading_workflow = data_loading_subgraph.compile()

src/job_writing_agent/nodes/job_description_loader.py CHANGED Viewed

@@ -166,7 +166,7 @@ class JobDescriptionLoader:
         # TODO: Implement form field extraction
         pass
-    async def _prompt_user(self) -> str:
         """
         Prompt the user for input (synchronous input wrapped for async use).

         # TODO: Implement form field extraction
         pass
+    async def _prompt_user_for_job_description(self) -> str:
         """
         Prompt the user for input (synchronous input wrapped for async use).

src/job_writing_agent/nodes/research_workflow.py CHANGED Viewed

@@ -1,17 +1,25 @@
 # research_workflow.py
-import logging
-import json
 import asyncio
-from typing import Dict, Any, cast
-from langgraph.graph import StateGraph, END, START
 import dspy
-from job_writing_agent.tools.SearchTool import TavilyResearchTool
-from job_writing_agent.classes.classes import ResearchState
-from job_writing_agent.tools.SearchTool import filter_research_results_by_relevance
 from job_writing_agent.agents.output_schema import (
     CompanyResearchDataSummarizationSchema,
 )
 from job_writing_agent.utils.llm_provider_factory import LLMFactory
 logger = logging.getLogger(__name__)
@@ -25,12 +33,19 @@ EVAL_TIMEOUT = 15  # seconds per evaluation
 def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
     """
-    Validate that required inputs are present.
-    Returns: (is_valid, company_name, job_description)
     """
     try:
-        company_name = state["company_research_data"].get("company_name", "")
-        job_description = state["company_research_data"].get("job_description", "")
         if not company_name or not company_name.strip():
             logger.error("Company name is missing or empty")
@@ -42,14 +57,14 @@ def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
         return True, company_name.strip(), job_description.strip()
-    except (KeyError, TypeError, AttributeError) as e:
         logger.error(f"Invalid state structure: {e}")
         return False, "", ""
 def parse_dspy_queries_with_fallback(
-    raw_queries: Dict[str, Any], company_name: str
-) -> Dict[str, str]:
     """
     Parse DSPy query output with multiple fallback strategies.
     Returns a dict of query_id -> query_string.
@@ -88,7 +103,7 @@ def parse_dspy_queries_with_fallback(
         return get_fallback_queries(company_name)
-def get_fallback_queries(company_name: str) -> Dict[str, str]:
     """
     Generate basic fallback queries when DSPy fails.
     """
@@ -102,19 +117,27 @@ def get_fallback_queries(company_name: str) -> Dict[str, str]:
 def company_research_data_summary(state: ResearchState) -> ResearchState:
     """
     Summarize the filtered research data into a concise summary.
-    Replaces the raw tavily_search results with a summarized version.
     """
     try:
-        state["current_node"] = "company_research_data_summary"
-        # Extract the current research data
         company_research_data = state.get("company_research_data", {})
         tavily_search_data = company_research_data.get("tavily_search", [])
         # If no research data, skip summarization
         if not tavily_search_data or len(tavily_search_data) == 0:
             logger.warning("No research data to summarize. Skipping summarization.")
-            return state
         logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
@@ -127,7 +150,7 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
         llm_provider = LLMFactory()
         llm = llm_provider.create_dspy(
-            model="mistralai/mistral-7b-instruct:free",
             provider="openrouter",
             temperature=0.3,
         )
@@ -137,29 +160,31 @@ def company_research_data_summary(state: ResearchState) -> ResearchState:
             response = company_research_data_summarization(
                 company_research_data=company_research_data
             )
-        # Extract the summary from the response
-        # The response should have a 'company_research_data_summary' field (JSON string)
         if hasattr(response, "company_research_data_summary"):
             summary_json_str = response.company_research_data_summary
-        elif isinstance(response, dict) and "company_research_data_summary" in response:
-            summary_json_str = response["company_research_data_summary"]
         else:
             logger.error(
                 f"Unexpected response format from summarization: {type(response)}"
             )
-            return state
-        # Parse the JSON summary
-        state["company_research_data"]["company_research_data_summary"] = (
             summary_json_str
         )
-        return state
     except Exception as e:
         logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
         # Return state unchanged on error
-        return state
 async def research_company_with_retry(state: ResearchState) -> ResearchState:
@@ -173,9 +198,16 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
     if not is_valid:
         logger.error("Invalid inputs for research. Skipping research phase.")
-        state["company_research_data"]["tavily_search"] = []
-        state["attempted_search_queries"] = []
-        return state
     logger.info(f"Researching company: {company_name}")
@@ -254,14 +286,17 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
             if len(search_results) == 0:
                 logger.warning("No search results returned")
-            # Store results
-            state["attempted_search_queries"] = list(queries.values())
-            state["company_research_data"]["tavily_search"] = search_results
-            logger.info(
-                f"Research completed successfully with {len(search_results)} result sets"
             )
-            return state
         except Exception as e:
             logger.error(
@@ -273,22 +308,31 @@ async def research_company_with_retry(state: ResearchState) -> ResearchState:
                 await asyncio.sleep(RETRY_DELAY * (attempt + 1))  # Exponential backoff
             else:
                 logger.error("All retry attempts exhausted. Using empty results.")
-                state["company_research_data"]["tavily_search"] = []
-                state["attempted_search_queries"] = []
-    return state
-async def research_company(state: ResearchState) -> ResearchState:
-    """Wrapper to call the retry version."""
-    return await research_company_with_retry(state)
 # Create research subgraph
 research_subgraph = StateGraph(ResearchState)
 # Add research subgraph nodes
-research_subgraph.add_node("research_company", research_company)
 research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
 research_subgraph.add_node(
     "company_research_data_summary", company_research_data_summary

 # research_workflow.py
+"""Research workflow for company information gathering and filtering."""
+# Standard library imports
 import asyncio
+import json
+import logging
+from typing import Any, Dict, cast
+# Third-party imports
 import dspy
+from langgraph.graph import END, START, StateGraph
+# Local imports
 from job_writing_agent.agents.output_schema import (
     CompanyResearchDataSummarizationSchema,
 )
+from job_writing_agent.classes.classes import ResearchState
+from job_writing_agent.tools.SearchTool import (
+    TavilyResearchTool,
+    filter_research_results_by_relevance,
+)
 from job_writing_agent.utils.llm_provider_factory import LLMFactory
 logger = logging.getLogger(__name__)
 def validate_research_inputs(state: ResearchState) -> tuple[bool, str, str]:
     """
+    Validate that required inputs are present in research state.
+    Args:
+        state: Current research workflow state
+    Returns:
+        Tuple of (is_valid, company_name, job_description)
     """
     try:
+        # Safe dictionary access with fallbacks
+        company_research_data = state.get("company_research_data", {})
+        company_name = company_research_data.get("company_name", "")
+        job_description = company_research_data.get("job_description", "")
         if not company_name or not company_name.strip():
             logger.error("Company name is missing or empty")
         return True, company_name.strip(), job_description.strip()
+    except (TypeError, AttributeError) as e:
         logger.error(f"Invalid state structure: {e}")
         return False, "", ""
 def parse_dspy_queries_with_fallback(
+    raw_queries: dict[str, Any], company_name: str
+) -> dict[str, str]:
     """
     Parse DSPy query output with multiple fallback strategies.
     Returns a dict of query_id -> query_string.
         return get_fallback_queries(company_name)
+def get_fallback_queries(company_name: str) -> dict[str, str]:
     """
     Generate basic fallback queries when DSPy fails.
     """
 def company_research_data_summary(state: ResearchState) -> ResearchState:
     """
     Summarize the filtered research data into a concise summary.
+    Replaces the raw tavily_search results with a summarized version using LLM.
+    Args:
+        state: Current research state with search results
+    Returns:
+        Updated state with research summary
     """
     try:
+        # Update current node
+        updated_state = {**state, "current_node": "company_research_data_summary"}
+        # Extract the current research data with safe access
         company_research_data = state.get("company_research_data", {})
         tavily_search_data = company_research_data.get("tavily_search", [])
         # If no research data, skip summarization
         if not tavily_search_data or len(tavily_search_data) == 0:
             logger.warning("No research data to summarize. Skipping summarization.")
+            return updated_state
         logger.info(f"Summarizing {len(tavily_search_data)} research result sets...")
         llm_provider = LLMFactory()
         llm = llm_provider.create_dspy(
+            model="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
             provider="openrouter",
             temperature=0.3,
         )
             response = company_research_data_summarization(
                 company_research_data=company_research_data
             )
+        # Extract the summary from the response with safe access
+        summary_json_str = ""
         if hasattr(response, "company_research_data_summary"):
             summary_json_str = response.company_research_data_summary
+        elif isinstance(response, dict):
+            summary_json_str = response.get("company_research_data_summary", "")
         else:
             logger.error(
                 f"Unexpected response format from summarization: {type(response)}"
             )
+            return updated_state
+        # Update state with summary using safe dictionary operations
+        updated_company_research_data = {**company_research_data}
+        updated_company_research_data["company_research_data_summary"] = (
             summary_json_str
         )
+        updated_state["company_research_data"] = updated_company_research_data
+        return updated_state
     except Exception as e:
         logger.error(f"Error in company_research_data_summary: {e}", exc_info=True)
         # Return state unchanged on error
+        return updated_state
 async def research_company_with_retry(state: ResearchState) -> ResearchState:
     if not is_valid:
         logger.error("Invalid inputs for research. Skipping research phase.")
+        return ResearchState(
+            company_research_data={
+                **state.get("company_research_data", {}),
+                "tavily_search": [],
+            },
+            attempted_search_queries=[],
+            current_node="research_company",
+            content_category=state.get("content_category", "cover_letter"),
+            messages=state.get("messages", []),
+        )
     logger.info(f"Researching company: {company_name}")
             if len(search_results) == 0:
                 logger.warning("No search results returned")
+            # Store results and return ResearchState
+            return ResearchState(
+                company_research_data={
+                    **state.get("company_research_data", {}),
+                    "tavily_search": search_results,
+                },
+                attempted_search_queries=list(queries.values()),
+                current_node="research_company",
+                content_category=state.get("content_category", "cover_letter"),
+                messages=state.get("messages", []),
             )
         except Exception as e:
             logger.error(
                 await asyncio.sleep(RETRY_DELAY * (attempt + 1))  # Exponential backoff
             else:
                 logger.error("All retry attempts exhausted. Using empty results.")
+                return ResearchState(
+                    company_research_data={
+                        **state.get("company_research_data", {}),
+                        "tavily_search": [],
+                    },
+                    attempted_search_queries=[],
+                    current_node="research_company",
+                    content_category=state.get("content_category", "cover_letter"),
+                    messages=state.get("messages", []),
+                )
+    return ResearchState(
+        company_research_data=state.get("company_research_data", {}),
+        attempted_search_queries=[],
+        current_node="research_company",
+        content_category=state.get("content_category", "cover_letter"),
+        messages=state.get("messages", []),
+    )
 # Create research subgraph
 research_subgraph = StateGraph(ResearchState)
 # Add research subgraph nodes
+research_subgraph.add_node("research_company", research_company_with_retry)
 research_subgraph.add_node("relevance_filter", filter_research_results_by_relevance)
 research_subgraph.add_node(
     "company_research_data_summary", company_research_data_summary

src/job_writing_agent/nodes/selfconsistency.py CHANGED Viewed

@@ -1,10 +1,12 @@
-import logging
-from datetime import datetime
 import json
 import re
 from ..classes.classes import AppState
-from ..prompts.templates import DRAFT_RATING_PROMPT, BEST_DRAFT_SELECTION_PROMPT
 from ..utils.llm_provider_factory import LLMFactory
@@ -14,82 +16,104 @@ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
 def self_consistency_vote(state: AppState) -> AppState:
-    """Choose the best draft from multiple variations."""
     # Create LLM inside function (lazy initialization)
     llm_factory = LLMFactory()
-    llm_precise = llm_factory.create_langchain(
         model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
     )
-    variations = state.get("variations", {"variations": []})
-    all_drafts = [state["draft"]] + variations["variations"]
     # First, have the LLM rate each draft
-    ratings = []
-    # Get resume and job summaries, handling different formats
     try:
-        if isinstance(state["resume_path"], list) and len(state["resume_path"]) > 0:
-            if hasattr(state["resume_path"][0], "page_content"):
-                resume_summary = state["resume_path"][0].page_content
             else:
-                resume_summary = state["resume_path"][0]
         else:
-            resume_summary = str(state["resume_path"])
     except Exception as e:
-        print(f"Warning: Error getting resume summary: {e}")
-        resume_summary = str(state["resume_path"])
     try:
-        if (
-            isinstance(state["job_description_source"], list)
-            and len(state["job_description_source"]) > 0
-        ):
-            job_summary = state["job_description_source"][0]
         else:
-            job_summary = str(state["job_description_source"])
     except Exception as e:
-        print(f"Warning: Error getting job summary: {e}")
-        job_summary = str(state["job_description_source"])
-    for i, draft in enumerate(all_drafts):
-        rating = llm_precise.invoke(
-            DRAFT_RATING_PROMPT.format(
-                resume_summary=resume_summary,
-                job_summary=job_summary,
-                draft=draft,
-                draft_number=i + 1,
-            )
         )
-        ratings.append(rating)
-    # Create a clearer, more structured prompt for draft selection
-    selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
-        ratings_json=json.dumps(ratings, indent=2), num_drafts=len(all_drafts)
     )
     # Get the selected draft index with error handling
     try:
-        selection = llm_precise.invoke(selection_prompt).strip()
         # Extract just the first number found in the response
-        number_match = re.search(r"\d+", selection)
         if not number_match:
-            print(
-                "Warning: Could not extract draft number from LLM response. Using original draft."
             )
-            best_draft_idx = 0
         else:
-            best_draft_idx = int(number_match.group()) - 1
             # Validate the index is in range
-            if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
-                print(
-                    f"Warning: Selected draft index {best_draft_idx + 1} out of range. Using original draft."
                 )
-                best_draft_idx = 0
     except (ValueError, TypeError) as e:
-        print(f"Warning: Error selecting best draft: {e}. Using original draft.")
-        best_draft_idx = 0
-    state["draft"] = all_drafts[best_draft_idx]
-    return state

+# Standard library imports
 import json
+import logging
 import re
+from datetime import datetime
+# Local imports
 from ..classes.classes import AppState
+from ..prompts.templates import BEST_DRAFT_SELECTION_PROMPT, DRAFT_RATING_PROMPT
 from ..utils.llm_provider_factory import LLMFactory
 def self_consistency_vote(state: AppState) -> AppState:
+    """
+    Choose the best draft from multiple variations using LLM-based voting.
+    This function rates all draft variations and selects the best one based on
+    criteria like relevance, professional tone, personalization, and persuasiveness.
+    Args:
+        state: Application state containing the original draft and variations
+    Returns:
+        Updated state with the best draft selected
+    """
     # Create LLM inside function (lazy initialization)
     llm_factory = LLMFactory()
+    precise_llm = llm_factory.create_langchain(
         model="google/gemma-3-12b-it:free", provider="openrouter", temperature=0.1
     )
+    variations_data = state.get("variations", {"variations": []})
+    original_draft = state.get("draft", "")
+    all_drafts = [original_draft] + variations_data.get("variations", [])
     # First, have the LLM rate each draft
+    draft_ratings = []
+    # Get resume and job summaries with safe dictionary access
     try:
+        resume_path = state.get("resume_path", "")
+        if isinstance(resume_path, list) and len(resume_path) > 0:
+            if hasattr(resume_path[0], "page_content"):
+                resume_summary = resume_path[0].page_content
             else:
+                resume_summary = resume_path[0]
         else:
+            resume_summary = str(resume_path)
     except Exception as e:
+        logger.warning(f"Error getting resume summary: {e}")
+        resume_summary = str(state.get("resume_path", ""))
     try:
+        job_description_source = state.get("job_description_source", "")
+        if isinstance(job_description_source, list) and len(job_description_source) > 0:
+            job_summary = job_description_source[0]
         else:
+            job_summary = str(job_description_source)
     except Exception as e:
+        logger.warning(f"Error getting job summary: {e}")
+        job_summary = str(state.get("job_description_source", ""))
+    for draft_index, draft_content in enumerate(all_drafts):
+        # Create chain with proper prompt template invocation
+        rating_chain = DRAFT_RATING_PROMPT | precise_llm
+        rating_result = rating_chain.invoke(
+            {
+                "resume_summary": resume_summary,
+                "job_summary": job_summary,
+                "draft": draft_content,
+                "draft_number": draft_index + 1,
+            }
         )
+        draft_ratings.append(rating_result)
+    # Create chain for draft selection with proper prompt template invocation
+    selection_chain = BEST_DRAFT_SELECTION_PROMPT | precise_llm
+    selection_result = selection_chain.invoke(
+        {
+            "ratings_json": json.dumps(draft_ratings, indent=2),
+            "num_drafts": len(all_drafts),
+        }
     )
     # Get the selected draft index with error handling
     try:
+        selection_text = str(
+            selection_result.content
+            if hasattr(selection_result, "content")
+            else selection_result
+        ).strip()
         # Extract just the first number found in the response
+        number_match = re.search(r"\d+", selection_text)
         if not number_match:
+            logger.warning(
+                "Could not extract draft number from LLM response. Using original draft."
             )
+            best_draft_index = 0
         else:
+            best_draft_index = int(number_match.group()) - 1
             # Validate the index is in range
+            if best_draft_index < 0 or best_draft_index >= len(all_drafts):
+                logger.warning(
+                    f"Selected draft index {best_draft_index + 1} out of range. Using original draft."
                 )
+                best_draft_index = 0
     except (ValueError, TypeError) as e:
+        logger.warning(f"Error selecting best draft: {e}. Using original draft.")
+        best_draft_index = 0
+    # Update state with best draft using safe dictionary operations
+    updated_state = {**state, "draft": all_drafts[best_draft_index]}
+    return updated_state

src/job_writing_agent/nodes/system_initializer.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# -*- coding: utf-8 -*-
+"""
+System Initializer Module
+This module provides the SystemInitializer class responsible for initializing
+system messages in the workflow state. It follows the Single Responsibility
+Principle by focusing solely on system message initialization.
+"""
+import logging
+from typing import Optional
+from langchain_core.messages import SystemMessage
+from job_writing_agent.classes import DataLoadState
+from job_writing_agent.prompts.templates import agent_system_prompt
+from job_writing_agent.utils.logging.logging_decorators import log_async
+logger = logging.getLogger(__name__)
+class SystemInitializer:
+    """
+    Responsible for initializing system messages in workflow state.
+    Example:
+        >>> initializer = SystemInitializer()
+        >>> state = await initializer.set_agent_system_message(initial_state)
+        >>>
+        >>> # With custom prompt for testing
+        >>> custom_prompt = "Custom system prompt"
+        >>> initializer = SystemInitializer(system_prompt=custom_prompt)
+    """
+    def __init__(self, system_prompt: Optional[str] = None):
+        """
+        Initialize SystemInitializer with optional system prompt dependency injection.
+        Parameters
+        ----------
+        system_prompt: Optional[str]
+            System prompt text to use. Defaults to `agent_system_prompt` from
+            prompts.templates. Can be injected for testing or custom prompts.
+        """
+        self._system_prompt = system_prompt or agent_system_prompt
+    @log_async
+    async def set_agent_system_message(self, state: DataLoadState) -> DataLoadState:
+        """
+        Add the system prompt to the conversation state.
+        This method creates a SystemMessage from the configured prompt and
+        adds it to the messages list in the workflow state.
+        Parameters
+        ----------
+        state: DataLoadState
+            Current workflow state containing messages list.
+        Returns
+        -------
+        DataLoadState
+            Updated state with the system message added to messages list
+            and current_node set to "initialize_system".
+        """
+        agent_initialization_system_message = SystemMessage(content=self._system_prompt)
+        messages = state.get("messages", [])
+        messages.append(agent_initialization_system_message)
+        return {
+            **state,
+            "messages": messages,
+            "current_node": "initialize_system",
+        }

src/job_writing_agent/nodes/validation_helper.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# -*- coding: utf-8 -*-
+"""
+Validation Helper Module
+This module provides the ValidationHelper class responsible for validating
+workflow inputs and setting routing decisions. It follows the Single
+Responsibility Principle by focusing solely on input validation.
+"""
+import logging
+from job_writing_agent.classes import DataLoadState
+from job_writing_agent.utils.logging.logging_decorators import (
+    log_execution,
+    log_errors,
+)
+logger = logging.getLogger(__name__)
+class ValidationHelper:
+    """
+    Responsible for validating workflow inputs and setting routing decisions.
+    Example:
+        >>> validator = ValidationHelper()
+        >>> validated_state = validator.verify_inputs(state)
+        >>> next_node = validated_state.get("next_node")  # "load" or "research"
+    """
+    def __init__(self):
+        """
+        Initialize ValidationHelper.
+        This class is stateless - no dependencies needed for validation logic.
+        """
+        pass
+    @log_execution
+    @log_errors
+    def verify_inputs(self, state: DataLoadState) -> DataLoadState:
+        """
+        Validate inputs and set next_node for routing.
+        This method validates that both resume and job description are present
+        and non-empty in the state.
+        Parameters
+        ----------
+        state: DataLoadState
+            Current workflow state containing company_research_data.
+        Returns
+        -------
+        DataLoadState
+            Updated state with next_node set to "load" (if validation fails)
+            or "research" (if validation passes).
+        """
+        logger.info("Verifying loaded inputs!")
+        state["current_node"] = "verify"
+        # Validate required fields using helper methods
+        if not self._validate_resume(state):
+            logger.error("Resume is missing or empty in company_research_data")
+            state["next_node"] = "load"  # Loop back to load subgraph
+            return state
+        if not self._validate_job_description(state):
+            logger.error("Job description is missing or empty in company_research_data")
+            state["next_node"] = "load"  # Loop back to load subgraph
+            return state
+        # All validations passed
+        state["next_node"] = "research"
+        logger.info("Inputs verified successfully, proceeding to research")
+        return state
+    def _validate_resume(self, state: DataLoadState) -> bool:
+        """
+        Validate that resume is present and non-empty in company_research_data.
+        Private helper method for better code organization.
+        Parameters
+        ----------
+        state: DataLoadState
+            Current workflow state.
+        Returns
+        -------
+        bool
+            True if resume is present and non-empty, False otherwise.
+        """
+        company_research_data = state.get("company_research_data", {})
+        resume = company_research_data.get("resume", "")
+        # Handle various types: convert to string and check if non-empty
+        if not resume:
+            return False
+        resume_str = str(resume).strip()
+        return bool(resume_str)
+    def _validate_job_description(self, state: DataLoadState) -> bool:
+        """
+        Validate that job description is present and non-empty in company_research_data.
+        Private helper method for better code organization.
+        Parameters
+        ----------
+        state: DataLoadState
+            Current workflow state.
+        Returns
+        -------
+        bool
+            True if job description is present and non-empty, False otherwise.
+        """
+        company_research_data = state.get("company_research_data", {})
+        job_description = company_research_data.get("job_description", "")
+        # Handle various types: convert to string and check if non-empty
+        if not job_description:
+            return False
+        job_desc_str = str(job_description).strip()
+        return bool(job_desc_str)

src/job_writing_agent/nodes/variations.py CHANGED Viewed

@@ -1,22 +1,36 @@
 import logging
 from datetime import datetime
-from typing_extensions import Dict, List
 from langchain_core.documents import Document
 from ..classes.classes import ResultState
-from ..utils.llm_provider_factory import LLMFactory
 from ..prompts.templates import VARIATION_PROMPT
 logger = logging.getLogger(__name__)
 # Constants
 CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
-def generate_variations(state: ResultState) -> Dict[str, List[str]]:
-    """Generate multiple variations of the draft for self-consistency voting."""
     # Create LLM inside function (lazy initialization)
     llm_provider = LLMFactory()
     llm = llm_provider.create_langchain(
@@ -27,27 +41,30 @@ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
     # Get resume and job text, handling both string and Document types
     try:
-        resume_text = "\n".join(
-            doc.page_content if isinstance(doc, Document) else doc
-            for doc in (
-                state["resume"][:2]
-                if isinstance(state["company_research_data"]["resume"], str)
-                else [state["resume"]]
-            )
-        )
-        job_text = "\n".join(
-            chunk
-            for chunk in (
-                state["company_research_data"]["job_description"][:2]
-                if isinstance(state["company_research_data"]["job_description"], str)
-                else [state["company_research_data"]["job_description"]]
             )
-        )
     except Exception as e:
-        print(f"Warning: Error processing resume/job text: {e}")
         # Fallback to simple string handling
-        resume_text = str(state["company_research_data"]["resume"])
-        job_text = str(state["company_research_data"]["job_description"])
     # Generate variations with different temperatures and creativity settings
     temp_variations = [
@@ -65,22 +82,23 @@ def generate_variations(state: ResultState) -> Dict[str, List[str]]:
             # Use VARIATION_PROMPT directly with the configured LLM
             variation = VARIATION_PROMPT.format_messages(
-                resume_excerpt=resume_text, job_excerpt=job_text, draft=state["draft"]
             )
             response = configured_llm.invoke(variation)
-            print(f"Response for setting:  {variation} has a response: {response}")
             if response and response.strip():  # Only add non-empty variations
                 variations.append(response)
         except Exception as e:
-            print(f"Warning: Error generating variation with settings {settings}: {e}")
             continue
     # Ensure we have at least one variation
     if not variations:
         # If all variations failed, add the original draft as a fallback
-        variations.append(state["draft"])
     return {"variations": variations}

+# Standard library imports
 import logging
 from datetime import datetime
+# Third-party imports
 from langchain_core.documents import Document
+# Local imports
 from ..classes.classes import ResultState
 from ..prompts.templates import VARIATION_PROMPT
+from ..utils.llm_provider_factory import LLMFactory
 logger = logging.getLogger(__name__)
 # Constants
 CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
+def generate_variations(state: ResultState) -> dict[str, list[str]]:
+    """
+    Generate multiple variations of the draft for self-consistency voting.
+    Args:
+        state: Current result state with draft and research data
+    Returns:
+        Dictionary containing list of draft variations
+    """
+    # Validate and extract all required state fields once
+    company_research_data = state.get("company_research_data", {})
+    draft_content = state.get("draft", "")
+    resume_data = company_research_data.get("resume", "")
+    job_description_data = company_research_data.get("job_description", "")
     # Create LLM inside function (lazy initialization)
     llm_provider = LLMFactory()
     llm = llm_provider.create_langchain(
     # Get resume and job text, handling both string and Document types
     try:
+        # Extract resume text
+        if isinstance(resume_data, str):
+            resume_text = resume_data[:2000]  # Limit to first 2000 chars
+        elif isinstance(resume_data, list):
+            resume_text = "\n".join(
+                doc.page_content if isinstance(doc, Document) else str(doc)
+                for doc in resume_data[:2]
             )
+        else:
+            resume_text = str(resume_data)
+        # Extract job description text
+        if isinstance(job_description_data, str):
+            job_text = job_description_data[:2000]  # Limit to first 2000 chars
+        elif isinstance(job_description_data, list):
+            job_text = "\n".join(str(chunk) for chunk in job_description_data[:2])
+        else:
+            job_text = str(job_description_data)
     except Exception as e:
+        logger.warning(f"Error processing resume/job text: {e}")
         # Fallback to simple string handling
+        resume_text = str(resume_data)
+        job_text = str(job_description_data)
     # Generate variations with different temperatures and creativity settings
     temp_variations = [
             # Use VARIATION_PROMPT directly with the configured LLM
             variation = VARIATION_PROMPT.format_messages(
+                resume_excerpt=resume_text, job_excerpt=job_text, draft=draft_content
             )
             response = configured_llm.invoke(variation)
+            logger.debug(f"Generated variation with settings {settings}")
             if response and response.strip():  # Only add non-empty variations
                 variations.append(response)
         except Exception as e:
+            logger.warning(f"Error generating variation with settings {settings}: {e}")
             continue
     # Ensure we have at least one variation
     if not variations:
         # If all variations failed, add the original draft as a fallback
+        logger.warning("All variations failed, using original draft as fallback")
+        variations.append(draft_content)
     return {"variations": variations}

src/job_writing_agent/prompts/templates.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain_core.prompts import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
 from langchain_core.messages import SystemMessage, HumanMessage
@@ -36,63 +37,59 @@ PERSONA_DEVELOPMENT_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_message
 # Draft generation prompts
-COVER_LETTER_PROMPT: SystemMessage = SystemMessage(
-    content="""
-                                    You are CoverLetterGPT, a concise career‑writing assistant.
-                                    CORE OBJECTIVE
-                                    • Draft a 3‑paragraph cover letter (150‑180 words total) that targets hiring managers
-                                    and technical recruiters. Assume it may reach the CEO.
-                                    • Begin exactly with:  "To Hiring Team,"
-                                    End exactly with:    "Thanks, Rishabh"
-                                    • Tone: polite, casual, enthusiastic — but no em dashes (—) and no clichés.
-                                    • Every fact about achievements, skills, or company details must be traceable to the
-                                    provided resume, job description, or company research; otherwise, ask the user.
-                                    • If any critical detail is missing or ambiguous, STOP and ask a clarifying question
-                                    before writing the letter.
-                                    • Keep sentences tight; avoid filler like “I am excited to…” (enthusiasm comes
-                                    through precise language).
-                                    • Never exceed 180 words. Never fall below 150 words.
-                                    SELF‑EVALUATION (append after the letter)
-                                    After producing the cover letter, output an “### Evaluation” section containing:
-                                    Comprehensiveness (1‑5)
-                                    Evidence provided (1‑5)
-                                    Clarity of explanation (1‑5)
-                                    Potential limitations or biases (bullet list)
-                                    Areas for improvement (brief notes)
-                                    ERROR HANDLING
-                                    If word count, section order, or format rules are violated, regenerate until correct.
-                                    """
 )
-BULLET_POINTS_PROMPT: SystemMessage = SystemMessage(
-    content="""You are an expert job application writer who
-                                creates personalized application materials.
-                                {persona_instruction}
-                                Write 5-7 bullet points highlighting the candidate's
-                                qualifications for this specific role.
-                                Create content that genuinely reflects the candidate's
-                                background and is tailored to the specific job.
-                                Ensure the tone is professional, confident, and authentic.
-                                Today is {current_date}."""
 )
-LINKEDIN_NOTE_PROMPT: SystemMessage = SystemMessage(
-    content="""You are an expert job application
-                                writer who creates personalized application materials.
-                                {persona_instruction}
-                                Write a brief LinkedIn connection note to a hiring manager or recruiter (150 words max).
-                                Create content that genuinely reflects the candidate's background and is tailored to the specific job.
-                                Ensure the tone is professional, confident, and authentic.
-                                Today is {current_date}."""
 )
 # Variation generation prompt
@@ -230,6 +227,35 @@ REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
     ]
 )
 # Tavily query prompt to build knowledge context about the company
 TAVILY_QUERY_PROMPT = """
@@ -247,33 +273,6 @@ The user needs targeted search queries (with rationale) for Tavily Search to res
 </Requirements>
 """
-JOB_DESCRIPTION_PROMPT = """You are a JSON extraction specialist. Extract job information from the provided text and return ONLY valid JSON.
-CRITICAL: Your response must be parseable by json.loads() - no markdown, no explanations, no extra text.
-Extract these three fields in exact order:
-1. job_description field - Complete job posting formatted in clean markdown with proper headers (## Job Description, ## Responsibilities, ## Requirements, etc.)
-2. company_name field - Exact company name as mentioned
-3. job_title field - Exact job title as posted
-FORMATTING RULES:
-- Use double quotes for all strings
-- Escape internal quotes with \\"
-- Escape newlines as \\\\n in the job description field
-- Replace actual line breaks with \\\\n
-- If any field is missing, use empty string ""
-- No trailing commas
-- No comments or extra whitespace
-REQUIRED OUTPUT FORMAT:
-{{
-  "job_description": "markdown formatted job description with \\\\n for line breaks",
-  "company_name": "exact company name",
-  "job_title": "exact job title"
-}}
-Return only the JSON object - no other text."""
 agent_system_prompt = """I act as your personal job-application assistant.
         My function is to help you research, analyze, and write compelling application
         materials — primarily LinkedIn reach-outs, short written responses, and cover

     ChatPromptTemplate,
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
+    AIMessagePromptTemplate,
 )
 from langchain_core.messages import SystemMessage, HumanMessage
 # Draft generation prompts
+COVER_LETTER_PROMPT = AIMessagePromptTemplate.from_template(
+    """
+I am CoverLetterGPT, a concise career writing assistant.
+CORE OBJECTIVE
+• Draft a 3‑paragraph cover letter (150‑180 words total) that targets hiring managers
+and technical recruiters. Assume it may reach the CEO.
+• Begin exactly with:  "To Hiring Team,"
+End exactly with:    "Thanks, Rishabh"
+• Tone: polite, casual, enthusiastic — but no em dashes (—) and no clichés.
+• Every fact about achievements, skills, or company details must be traceable to the
+provided resume, job description, or company research; otherwise, ask the user.
+• If any critical detail is missing or ambiguous, STOP and ask a clarifying question
+before writing the letter.
+• Keep sentences tight; avoid filler like “I am excited to…” (enthusiasm comes
+through precise language).
+• Never exceed 180 words. Never fall below 150 words.
+SELF‑EVALUATION (append after the letter)
+After producing the cover letter, output an “### Evaluation” section containing:
+Comprehensiveness (1‑5)
+Evidence provided (1‑5)
+Clarity of explanation (1‑5)
+Potential limitations or biases (bullet list)
+Areas for improvement (brief notes)
+ERROR HANDLING
+If word count, section order, or format rules are violated, regenerate until correct.
+"""
 )
+BULLET_POINTS_PROMPT = AIMessagePromptTemplate.from_template(
+    """I am an expert job application writer who creates personalized application materials.
+        Write 5-7 bullet points highlighting the candidate's
+        qualifications for this specific role.
+        Create content that genuinely reflects the candidate's
+        background and is tailored to the specific job.
+        Ensure the tone is professional, confident, and authentic.
+        Today is {current_date}.""",
+    input_variables=["current_date"],
 )
+LINKEDIN_NOTE_PROMPT = AIMessagePromptTemplate.from_template(
+    """I am an expert job application writer who creates personalized application materials.
+    Write a brief LinkedIn connection note to a hiring manager or recruiter (100 words max).
+    Create content that genuinely reflects the candidate's background and is tailored to the specific job.
+    Ensure the tone is professional, confident, and authentic.
+    Today is {current_date}.""",
+    input_variables=["current_date"],
 )
 # Variation generation prompt
     ]
 )
+DRAFT_GENERATION_CONTEXT_PROMPT = HumanMessagePromptTemplate.from_template(
+    """
+            Below is the Job Description, Candidate Resume, and Company Research Data enclosed in triple backticks.
+            **Job Description:**
+            START OF JOB DESCRIPTION'''
+            {current_job_role}
+            '''END OF JOB DESCRIPTION
+            **Candidate Resume:**
+            START OF CANDIDATE RESUME'''
+            {candidate_resume}
+            '''END OF CANDIDATE RESUME
+            **Company Research Data:**
+            START OF COMPANY RESEARCH DATA'''
+            {company_research_data}
+            '''END OF COMPANY RESEARCH DATA
+            """,
+    input_variables=[
+        "current_job_role",
+        "candidate_resume",
+        "company_research_data",
+    ],
+)
 # Tavily query prompt to build knowledge context about the company
 TAVILY_QUERY_PROMPT = """
 </Requirements>
 """
 agent_system_prompt = """I act as your personal job-application assistant.
         My function is to help you research, analyze, and write compelling application
         materials — primarily LinkedIn reach-outs, short written responses, and cover

src/job_writing_agent/prompts/test_templates.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from langchain_core.prompts import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    AIMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from job_writing_agent.utils.llm_provider_factory import LLMFactory
+llm_provider = LLMFactory()
+llm = llm_provider.create_langchain(
+    "allenai/olmo-3.1-32b-think:free",
+    provider="openrouter",
+    temperature=0.1,
+)
+# Use PromptTemplate classes for variable interpolation
+TEST_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages(
+    [
+        # Use SystemMessagePromptTemplate for SystemMessage with variables
+        SystemMessagePromptTemplate.from_template(
+            "You can answer any question that the user asks. If you don't know the answer, say 'I don't know' and don't make up an answer. Todays date is {current_date}.",
+            input_variables=["current_date"],
+        ),
+        # Use AIMessagePromptTemplate for AIMessage with variables (if needed)
+        # Or use AIMessage directly if no variables
+        AIMessagePromptTemplate.from_template(
+            "I am here to help you answer any question that you ask.",
+            input_variables=["current_date"],
+        ),
+    ]
+)
+# Now the chain will work correctly
+prompt_test_chain = ({"current_date": lambda x: x["current_date"]}) | TEST_PROMPT | llm
+# Test it
+print(TEST_PROMPT)
+BULLET_POINTS_PROMPT = SystemMessagePromptTemplate.from_template(
+    """You are an expert job application writer who
+                                creates personalized application materials.
+                                {persona_instruction}
+                                Write 5-7 bullet points highlighting the candidate's
+                                qualifications for this specific role.
+                                Create content that genuinely reflects the candidate's
+                                background and is tailored to the specific job.
+                                Ensure the tone is professional, confident, and authentic.
+                                Today is {current_date}.""",
+    input_variables=["persona_instruction", "current_date"],
+)
+print(BULLET_POINTS_PROMPT)

src/job_writing_agent/tools/SearchTool.py CHANGED Viewed

@@ -1,14 +1,17 @@
 import logging
 import os
-import asyncio
-from dotenv import load_dotenv
 from pathlib import Path
 from langchain_tavily import TavilySearch
 from openevals.llm import create_async_llm_as_judge
-from openevals.prompts import RAG_RETRIEVAL_RELEVANCE_PROMPT, RAG_HELPFULNESS_PROMPT
-import dspy
 from ..agents.output_schema import TavilySearchQueries
 from ..classes.classes import ResearchState
 from ..utils.llm_provider_factory import LLMFactory
@@ -21,7 +24,11 @@ env_path = Path(__file__).parent / ".env"
 load_dotenv(dotenv_path=env_path, override=True)
-openrouter_api_key = os.environ["OPENROUTER_API_KEY"]
 class TavilyResearchTool:
@@ -30,7 +37,7 @@ class TavilyResearchTool:
         job_description,
         company_name,
         max_results=5,
-        model_name="mistralai/mistral-7b-instruct:free",
     ):
         # Create LLM inside __init__ (lazy initialization)
         llm_provider = LLMFactory()
@@ -55,19 +62,34 @@ class TavilyResearchTool:
             return response
     def tavily_search_company(self, queries):
         query_results: list[list[str]] = []
-        for query in queries:
             try:
                 search_query_response = self.tavily_searchtool.invoke(
-                    {"query": queries[query]}
                 )
                 query_results.append(
-                    [res["content"] for res in search_query_response["results"]]
                 )
-                # print(f"Tavily Search Tool Response for query '{search_query_response['query']}': {query_results_map[search_query_response['query']]}")
             except Exception as e:
                 logger.error(
-                    f"Failed to perform company research using TavilySearchTool. Error : {e}"
                 )
                 continue
@@ -120,10 +142,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
     try:
         state["current_node"] = "filter_research_results_by_relevance"
-        # Extract search data from state
-        raw_search_results = state.get("company_research_data", {}).get(
-            "tavily_search", []
-        )
         search_queries_used = state.get("attempted_search_queries", [])
         # Validate data types
@@ -138,7 +159,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
         # Early exit if no results
         if len(raw_search_results) == 0:
             logger.info("No search results to filter.")
-            state["company_research_data"]["tavily_search"] = []
             return state
         logger.info(
@@ -201,6 +224,7 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
                     logger.warning(
                         f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
                     )
                     return (search_result_content, True, "timeout")
                 except Exception as e:
@@ -248,8 +272,9 @@ async def filter_research_results_by_relevance(state: ResearchState) -> Research
             else:
                 results_removed_count += 1
-        # Update state with ONLY the relevant results
-        state["company_research_data"]["tavily_search"] = results_kept
         # Log filtering summary
         total_evaluated = len(raw_search_results)

+# Standard library imports
+import asyncio
 import logging
 import os
 from pathlib import Path
+# Third-party imports
+import dspy
+from dotenv import load_dotenv
 from langchain_tavily import TavilySearch
 from openevals.llm import create_async_llm_as_judge
+from openevals.prompts import RAG_HELPFULNESS_PROMPT, RAG_RETRIEVAL_RELEVANCE_PROMPT
+# Local imports
 from ..agents.output_schema import TavilySearchQueries
 from ..classes.classes import ResearchState
 from ..utils.llm_provider_factory import LLMFactory
 load_dotenv(dotenv_path=env_path, override=True)
+# Safe environment variable access with validation
+openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
+if not openrouter_api_key:
+    logger.error("OPENROUTER_API_KEY environment variable not set")
+    raise ValueError("OPENROUTER_API_KEY environment variable is required")
 class TavilyResearchTool:
         job_description,
         company_name,
         max_results=5,
+        model_name="cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
     ):
         # Create LLM inside __init__ (lazy initialization)
         llm_provider = LLMFactory()
             return response
     def tavily_search_company(self, queries):
+        """
+        Execute Tavily searches for multiple queries.
+        Args:
+            queries: Dictionary of query identifiers to query strings
+        Returns:
+            List of search result lists, one per query
+        """
         query_results: list[list[str]] = []
+        for query_key in queries:
             try:
+                query_string = queries.get(query_key, "")
+                if not query_string:
+                    logger.warning(f"Empty query for key: {query_key}")
+                    continue
                 search_query_response = self.tavily_searchtool.invoke(
+                    {"query": query_string}
                 )
+                # Safe dictionary access for response
+                results = search_query_response.get("results", [])
                 query_results.append(
+                    [res.get("content", "") for res in results if isinstance(res, dict)]
                 )
             except Exception as e:
                 logger.error(
+                    f"Failed to perform company research using TavilySearchTool. Error: {e}"
                 )
                 continue
     try:
         state["current_node"] = "filter_research_results_by_relevance"
+        # Extract and validate required state fields once
+        company_research_data = state.get("company_research_data", {})
+        raw_search_results = company_research_data.get("tavily_search", [])
         search_queries_used = state.get("attempted_search_queries", [])
         # Validate data types
         # Early exit if no results
         if len(raw_search_results) == 0:
             logger.info("No search results to filter.")
+            # Update using the extracted variable
+            company_research_data["tavily_search"] = []
+            state["company_research_data"] = company_research_data
             return state
         logger.info(
                     logger.warning(
                         f"Evaluation timed out for query: {original_query[:60]}... (KEEPING result)"
                     )
+                    # Keep the result on timeout to avoid losing potentially useful data
                     return (search_result_content, True, "timeout")
                 except Exception as e:
             else:
                 results_removed_count += 1
+        # Update company_research_data with ONLY the relevant results
+        company_research_data["tavily_search"] = results_kept
+        state["company_research_data"] = company_research_data
         # Log filtering summary
         total_evaluated = len(raw_search_results)

src/job_writing_agent/utils/application_cli_interface.py CHANGED Viewed

@@ -1,18 +1,29 @@
 import argparse
-import os
 from typing import Iterable
 import requests
-from requests.exceptions import RequestException
-DEFAULT_MODEL = "mistralai/mistral-7b-instruct:free"
 DEFAULT_CONTENT_TYPE = "cover_letter"
 def readable_file(path: str) -> str:
-    """Validate and return contents of a readable file."""
-    if not os.path.isfile(path):
         raise argparse.ArgumentTypeError(f"File not found: {path}")
     if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
         raise argparse.ArgumentTypeError(
@@ -22,7 +33,18 @@ def readable_file(path: str) -> str:
 def valid_temp(temp: str) -> float:
-    """Ensure temperature is within a reasonable range."""
     value = float(temp)
     if not (0 <= value <= 2):
         raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
@@ -31,42 +53,41 @@ def valid_temp(temp: str) -> float:
 def is_valid_url(
     job_posting: str, allowed_statuses: Iterable[int] | None = None
-) -> bool:
     """
-    Returns ``True`` if *url* is reachable and its HTTP status code is in
-    `allowed_statuses`.  Defaults to any 2xx or 3xx response (common
-    successful codes).
-    Parameters
-    ----------
-    job_posting : str
-        The URL for the job posting.
-    timeout : float, optional
-        Timeout for the request (seconds). Defaults to 10.
-    allowed_statuses : Iterable[int] | None, optional
-        Specific status codes that are considered “valid”.
-        If ``None`` (default) any 200‑399 status is accepted.
-    Returns
-    -------
-    bool
-        ``True`` if the URL succeeded, ``False`` otherwise.
     """
     if allowed_statuses is None:
         # All 2xx and 3xx responses are considered “valid”
         allowed_statuses = range(200, 400)
-    with requests.get(
-        job_posting, timeout=30, allow_redirects=True, stream=True
-    ) as resp:
-        if resp.status_code in allowed_statuses:
-            return job_posting
-        else:
-            raise RequestException("Job Posting could not be reached")
 def handle_cli() -> argparse.Namespace:
-    """Parse and validate CLI arguments for job application generator."""
     parser = argparse.ArgumentParser(
         description="""Assist the candidate in writing content for
         job application such as answering to question in application

 import argparse
+from pathlib import Path
 from typing import Iterable
 import requests
+DEFAULT_MODEL = "allenai/olmo-3.1-32b-think:free"
 DEFAULT_CONTENT_TYPE = "cover_letter"
 def readable_file(path: str) -> str:
+    """
+    Validate that the file exists and has a supported extension.
+    Args:
+        path: File path to validate
+    Returns:
+        Original path string if valid
+    Raises:
+        ArgumentTypeError: If file doesn't exist or has unsupported extension
+    """
+    file_path = Path(path)
+    if not file_path.is_file():
         raise argparse.ArgumentTypeError(f"File not found: {path}")
     if not path.lower().endswith((".pdf", ".md", ".json", ".txt")):
         raise argparse.ArgumentTypeError(
 def valid_temp(temp: str) -> float:
+    """
+    Ensure temperature is within a reasonable range.
+    Args:
+        temp: Temperature value as string
+    Returns:
+        Temperature as float
+    Raises:
+        ArgumentTypeError: If temperature is outside valid range [0, 2]
+    """
     value = float(temp)
     if not (0 <= value <= 2):
         raise argparse.ArgumentTypeError("Temperature must be between 0 and 2.")
 def is_valid_url(
     job_posting: str, allowed_statuses: Iterable[int] | None = None
+) -> str:
     """
+    Validate that a URL is reachable and returns an acceptable HTTP status.
+    Defaults to any 2xx or 3xx response (common successful codes).
+    Args:
+        job_posting: The URL for the job posting
+        allowed_statuses: Specific status codes that are considered valid.
+            If None (default), any 200-399 status is accepted.
+    Returns:
+        URL of the job posting if successful, error message if failed
     """
     if allowed_statuses is None:
         # All 2xx and 3xx responses are considered “valid”
         allowed_statuses = range(200, 400)
+    try:
+        response = requests.get(
+            job_posting, timeout=30, allow_redirects=True, stream=True
+        )
+        response.raise_for_status()
+        return job_posting
+    except requests.exceptions.RequestException as e:
+        return f"Error: {e.response.text if e.response else 'Unknown error'}"
 def handle_cli() -> argparse.Namespace:
+    """
+    Parse and validate CLI arguments for job application generator.
+    Returns:
+        Parsed command-line arguments namespace
+    """
     parser = argparse.ArgumentParser(
         description="""Assist the candidate in writing content for
         job application such as answering to question in application

src/job_writing_agent/utils/config.py CHANGED Viewed

@@ -1,25 +1,44 @@
 """
 Configuration utilities for the job writer application.
-This module provides functions for initializing and configuring
 language models and other resources.
 """
 import os
-from typing_extensions import Dict, Any, Tuple, Optional
 from langchain.chat_models import init_chat_model
-def init_models(config: Optional[Dict[str, Any]] = None) -> Tuple[Any, Any]:
-    """Initialize language models based on configuration."""
     config = config or {}
     # Model configuration with defaults
     model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
     temperature = float(config.get("temperature", "0.3"))
     precise_temperature = float(config.get("precise_temperature", "0.2"))
     # Initialize models
-    llm = init_chat_model(f"ollama:{model_name}", temperature=temperature)
-    llm_precise = init_chat_model(f"ollama:{model_name}", temperature=precise_temperature)
-    return llm, llm_precise

 """
 Configuration utilities for the job writer application.
+This module provides functions for initializing and configuring
 language models and other resources.
 """
+# Standard library imports
 import os
+# Third-party imports
 from langchain.chat_models import init_chat_model
+from langchain_core.language_models.chat_models import BaseChatModel
+def init_models(
+    config: dict[str, str | float] | None = None,
+) -> tuple[BaseChatModel, BaseChatModel]:
+    """
+    Initialize language models based on configuration.
+    Args:
+        config: Optional configuration dictionary with keys:
+            - model_name: Name of the model to use
+            - temperature: Temperature for general LLM
+            - precise_temperature: Temperature for precise LLM
+    Returns:
+        Tuple of (general_llm, precise_llm) instances
+    """
     config = config or {}
     # Model configuration with defaults
     model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
     temperature = float(config.get("temperature", "0.3"))
     precise_temperature = float(config.get("precise_temperature", "0.2"))
     # Initialize models
+    general_llm = init_chat_model(f"ollama:{model_name}", temperature=temperature)
+    precise_llm = init_chat_model(
+        f"ollama:{model_name}", temperature=precise_temperature
+    )
+    return general_llm, precise_llm

src/job_writing_agent/utils/document_processing.py CHANGED Viewed

@@ -2,27 +2,28 @@
 Document processing utilities for parsing resumes and job descriptions.
 """
 import logging
 import os
 import re
 from pathlib import Path
 from urllib.parse import urlparse
-from typing_extensions import Dict, List, Any
 import dspy
 from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
 from langchain_community.document_transformers import Html2TextTransformer
 from langchain_text_splitters import (
     RecursiveCharacterTextSplitter,
     MarkdownHeaderTextSplitter,
 )
-from langchain_core.documents import Document
 from langfuse import observe
 from pydantic import BaseModel, Field
-# Local imports - using relative imports
-from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
 # Set up logging
 logger = logging.getLogger(__name__)
@@ -64,8 +65,8 @@ class ResumeSection(BaseModel):
 class StructuredResume(BaseModel):
     """Model for a structured resume with sections."""
-    sections: List[ResumeSection] = Field(description="List of resume sections")
-    contact_info: Dict[str, str] = Field(
         description="Contact information extracted from the resume"
     )
@@ -122,7 +123,7 @@ def clean_resume_text(text: str) -> str:
 @observe()
-def extract_contact_info(text: str) -> Dict[str, str]:
     """Extract contact information from resume text.
     Args:
@@ -162,7 +163,7 @@ def extract_contact_info(text: str) -> Dict[str, str]:
 @observe()
-def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
     """Identify sections in a resume text.
     Args:
@@ -231,16 +232,33 @@ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
 def _collapse_ws(text: str) -> str:
-    """Collapse stray whitespace but keep bullet breaks."""
     text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
     return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
 def _is_heading(line: str) -> bool:
     return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
-def parse_resume(file_path: str | Path) -> List[Document]:
     """
     Load a résumé from PDF or TXT file → list[Document] chunks
     (≈400 chars, 50‑char overlap) with {source, section} metadata.
@@ -326,7 +344,7 @@ async def get_job_description(file_path_or_url: str) -> Document:
     )
-async def scrape_job_description_from_web(urls: List[str]):
     """This function will first scrape the data from the job listing.
     Then using the recursive splitter using the different seperators,
     it preserves the paragraphs, lines and words"""
@@ -393,11 +411,15 @@ async def parse_job_description_from_url(url: str) -> Document:
         # 3. Process content with the LLM
         try:
             logger.info("Processing content with DSPy LLM...")
-            # Configure DSPy LM (it's good practice to do this here if it can change)
             dspy.configure(
                 lm=dspy.LM(
                     "cerebras/qwen-3-32b",
-                    api_key=os.environ.get("CEREBRAS_API_KEY"),
                     temperature=0.1,
                     max_tokens=60000,  # Note: This max_tokens is unusually high
                 )

 Document processing utilities for parsing resumes and job descriptions.
 """
+# Standard library imports
 import logging
 import os
 import re
 from pathlib import Path
 from urllib.parse import urlparse
+# Third-party imports
 import dspy
 from langchain_community.document_loaders import PyPDFLoader, AsyncChromiumLoader
 from langchain_community.document_transformers import Html2TextTransformer
+from langchain_core.documents import Document
 from langchain_text_splitters import (
     RecursiveCharacterTextSplitter,
     MarkdownHeaderTextSplitter,
 )
 from langfuse import observe
 from pydantic import BaseModel, Field
+from typing_extensions import Any
+# Local imports
+from .errors import JobDescriptionParsingError, LLMProcessingError, URLExtractionError
 # Set up logging
 logger = logging.getLogger(__name__)
 class StructuredResume(BaseModel):
     """Model for a structured resume with sections."""
+    sections: list[ResumeSection] = Field(description="List of resume sections")
+    contact_info: dict[str, str] = Field(
         description="Contact information extracted from the resume"
     )
 @observe()
+def extract_contact_info(text: str) -> dict[str, str]:
     """Extract contact information from resume text.
     Args:
 @observe()
+def identify_resume_sections(text: str) -> list[dict[str, Any]]:
     """Identify sections in a resume text.
     Args:
 def _collapse_ws(text: str) -> str:
+    """
+    Collapse stray whitespace but keep bullet breaks.
+    Args:
+        text: Input text with potential whitespace issues
+    Returns:
+        Text with collapsed whitespace
+    """
     text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
     return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
 def _is_heading(line: str) -> bool:
+    """
+    Check if a line is a heading (all uppercase, short, no digits).
+    Args:
+        line: Line of text to check
+    Returns:
+        True if line appears to be a heading
+    """
     return line.isupper() and len(line.split()) <= 5 and not re.search(r"\d", line)
+def parse_resume(file_path: str | Path) -> list[Document]:
     """
     Load a résumé from PDF or TXT file → list[Document] chunks
     (≈400 chars, 50‑char overlap) with {source, section} metadata.
     )
+async def scrape_job_description_from_web(urls: list[str]) -> str:
     """This function will first scrape the data from the job listing.
     Then using the recursive splitter using the different seperators,
     it preserves the paragraphs, lines and words"""
         # 3. Process content with the LLM
         try:
             logger.info("Processing content with DSPy LLM...")
+            # Configure DSPy LM with safe environment variable access
+            cerebras_api_key = os.getenv("CEREBRAS_API_KEY")
+            if not cerebras_api_key:
+                raise ValueError("CEREBRAS_API_KEY environment variable not set")
             dspy.configure(
                 lm=dspy.LM(
                     "cerebras/qwen-3-32b",
+                    api_key=cerebras_api_key,
                     temperature=0.1,
                     max_tokens=60000,  # Note: This max_tokens is unusually high
                 )

src/job_writing_agent/utils/vector_store.py CHANGED Viewed

@@ -1,13 +1,12 @@
 """
 Vector storage utilities for the job writer application.
-This module provides functions for storing and retrieving
 documents from vector databases.
 """
 # Standard library imports
 import os
-from typing_extensions import List, Optional
 # Third-party library imports
 from langchain_core.documents import Document
@@ -18,38 +17,37 @@ from pinecone import Pinecone as PineconeClient, ServerlessSpec
 # Default configuration
 DEFAULT_PINECONE_INDEX = "job-writer-vector"
 class VectorStoreManager:
     """Manager class for vector store operations."""
     def __init__(
         self,
         index_name: str = DEFAULT_PINECONE_INDEX,
-        embedding_model: str = "llama3.2:latest"
     ):
         """Initialize the vector store manager.
         Args:
             api_key: Pinecone API key (will use env var if not provided)
             index_name: Name of the Pinecone index to use
             embedding_model: Name of the Ollama model to use for embeddings
         """
-        api_key= os.getenv("PINECONE_API_KEY")
         if not api_key:
             raise ValueError("Environment variable PINECONE_API_KEY not set.")
         self.index_name = index_name
         # Initialize embeddings
-        self.embeddings = OllamaEmbeddings(
-            model=embedding_model
-        )
         # Initialize Pinecone client
         self.client = PineconeClient(api_key=api_key)
         # Ensure index exists
         self._ensure_index_exists()
     def _ensure_index_exists(self):
         """Make sure the required index exists, create if not."""
         # Get embedding dimension from our embeddings model
@@ -60,7 +58,7 @@ class VectorStoreManager:
             print(f"Error determining embedding dimension: {e}")
             print("Falling back to default dimension of 384")
             embedding_dim = 384  # Common default for Ollama embeddings
         # Check if the index exists
         index_exists = False
         try:
@@ -69,7 +67,7 @@ class VectorStoreManager:
             index_exists = self.index_name in index_list
         except Exception as e:
             print(f"Error checking Pinecone indexes: {e}")
         # Create index if it doesn't exist
         if not index_exists:
             try:
@@ -78,20 +76,22 @@ class VectorStoreManager:
                     name=self.index_name,
                     dimension=embedding_dim,
                     spec=ServerlessSpec(region="us-east-1", cloud="aws"),
-                    metric="cosine"
                 )
                 print(f"Successfully created index: {self.index_name}")
             except Exception as e:
                 if "ALREADY_EXISTS" in str(e):
-                    print(f"Index {self.index_name} already exists (created in another process)")
                 else:
                     print(f"Error creating index: {e}")
         else:
             print(f"Using Pinecone Index: {self.index_name}")
-    def store_documents(self, docs: List[Document], namespace: str) -> None:
         """Store documents in vector database.
         Args:
             docs: List of Document objects to store
             namespace: Namespace to store documents under
@@ -99,58 +99,60 @@ class VectorStoreManager:
         try:
             # Get the index
             index = self.client.Index(self.index_name)
             # Create the vector store
             vector_store = Pinecone(
                 index=index,
                 embedding=self.embeddings,
                 text_key="text",
-                namespace=namespace
             )
             # Add documents
             vector_store.add_documents(docs)
-            print(f"Successfully stored {len(docs)} documents in namespace: {namespace}")
         except Exception as e:
             print(f"Error storing documents: {e}")
             raise
-    def retrieve_similar(self, query: str, namespace: str, k: int = 3):
         """Retrieve similar documents based on a query.
         Args:
             query: The query text to search for
             namespace: Namespace to search in
             k: Number of results to return
         Returns:
             List of Document objects
         """
         try:
             # Get the index
             index = self.client.Index(self.index_name)
             # Create the vector store
             vectorstore = Pinecone(
                 index=index,
                 embedding=self.embeddings,
                 text_key="text",
-                namespace=namespace
             )
             # Search for similar documents
             docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
             return docs
         except Exception as e:
             print(f"Error retrieving documents: {e}")
             return []
-VectorStoreManager = VectorStoreManager()
-VectorStoreManager.store_documents(
-    docs=[Document(page_content="Sample content", metadata={"source": "test"})],
-    namespace="test_namespace"
-)

 """
 Vector storage utilities for the job writer application.
+This module provides functions for storing and retrieving
 documents from vector databases.
 """
 # Standard library imports
 import os
 # Third-party library imports
 from langchain_core.documents import Document
 # Default configuration
 DEFAULT_PINECONE_INDEX = "job-writer-vector"
 class VectorStoreManager:
     """Manager class for vector store operations."""
     def __init__(
         self,
         index_name: str = DEFAULT_PINECONE_INDEX,
+        embedding_model: str = "llama3.2:latest",
     ):
         """Initialize the vector store manager.
         Args:
             api_key: Pinecone API key (will use env var if not provided)
             index_name: Name of the Pinecone index to use
             embedding_model: Name of the Ollama model to use for embeddings
         """
+        api_key = os.getenv("PINECONE_API_KEY")
         if not api_key:
             raise ValueError("Environment variable PINECONE_API_KEY not set.")
         self.index_name = index_name
         # Initialize embeddings
+        self.embeddings = OllamaEmbeddings(model=embedding_model)
         # Initialize Pinecone client
         self.client = PineconeClient(api_key=api_key)
         # Ensure index exists
         self._ensure_index_exists()
     def _ensure_index_exists(self):
         """Make sure the required index exists, create if not."""
         # Get embedding dimension from our embeddings model
             print(f"Error determining embedding dimension: {e}")
             print("Falling back to default dimension of 384")
             embedding_dim = 384  # Common default for Ollama embeddings
         # Check if the index exists
         index_exists = False
         try:
             index_exists = self.index_name in index_list
         except Exception as e:
             print(f"Error checking Pinecone indexes: {e}")
         # Create index if it doesn't exist
         if not index_exists:
             try:
                     name=self.index_name,
                     dimension=embedding_dim,
                     spec=ServerlessSpec(region="us-east-1", cloud="aws"),
+                    metric="cosine",
                 )
                 print(f"Successfully created index: {self.index_name}")
             except Exception as e:
                 if "ALREADY_EXISTS" in str(e):
+                    print(
+                        f"Index {self.index_name} already exists (created in another process)"
+                    )
                 else:
                     print(f"Error creating index: {e}")
         else:
             print(f"Using Pinecone Index: {self.index_name}")
+    def store_documents(self, docs: list[Document], namespace: str) -> None:
         """Store documents in vector database.
         Args:
             docs: List of Document objects to store
             namespace: Namespace to store documents under
         try:
             # Get the index
             index = self.client.Index(self.index_name)
             # Create the vector store
             vector_store = Pinecone(
                 index=index,
                 embedding=self.embeddings,
                 text_key="text",
+                namespace=namespace,
             )
             # Add documents
             vector_store.add_documents(docs)
+            print(
+                f"Successfully stored {len(docs)} documents in namespace: {namespace}"
+            )
         except Exception as e:
             print(f"Error storing documents: {e}")
             raise
+    def retrieve_similar(
+        self, query: str, namespace: str, k: int = 3
+    ) -> list[Document]:
         """Retrieve similar documents based on a query.
         Args:
             query: The query text to search for
             namespace: Namespace to search in
             k: Number of results to return
         Returns:
             List of Document objects
         """
         try:
             # Get the index
             index = self.client.Index(self.index_name)
             # Create the vector store
             vectorstore = Pinecone(
                 index=index,
                 embedding=self.embeddings,
                 text_key="text",
+                namespace=namespace,
             )
             # Search for similar documents
             docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
             return docs
         except Exception as e:
             print(f"Error retrieving documents: {e}")
             return []
+# Example usage (commented out to prevent auto-execution)
+# vector_store_manager = VectorStoreManager()
+# vector_store_manager.store_documents(
+#     docs=[Document(page_content="Sample content", metadata={"source": "test"})],
+#     namespace="test_namespace"
+# )

src/job_writing_agent/workflow.py CHANGED Viewed

@@ -3,33 +3,36 @@ Workflow runner for the job application writer.
 This module provides the JobWorkflow class and CLI runner.
 """
 import asyncio
 import logging
-import sys
 import os
 from datetime import datetime
 from functools import cached_property
-from typing import Optional, Dict, Any
 from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
 from langgraph.graph import StateGraph
 from langgraph.graph.state import CompiledStateGraph
 from job_writing_agent.agents.nodes import (
     create_draft,
     critique_draft,
     finalize_document,
     human_approval,
 )
-from job_writing_agent.classes import DataLoadState
-from job_writing_agent.nodes.initializing import data_loading_workflow
 from job_writing_agent.nodes.research_workflow import research_workflow
 from job_writing_agent.utils.application_cli_interface import handle_cli
-from job_writing_agent.utils.result_utils import print_result, save_result
 from job_writing_agent.utils.logging.logging_decorators import (
-    log_execution,
     log_errors,
 )
 logger = logging.getLogger(__name__)
@@ -84,12 +87,62 @@ class JobWorkflow:
         return {
             "resume_path": self.resume,
             "job_description_source": self.job_description_source,
-            "content": self.content,
             "current_node": "",
             "messages": [],
             "company_research_data": {},
         }
     def job_app_graph(self) -> StateGraph:
         """
         Build and configure the job application workflow graph.
@@ -111,58 +164,40 @@ class JobWorkflow:
         StateGraph
             Configured LangGraph state machine ready for compilation.
         """
-        graph = StateGraph(DataLoadState)
         # Add workflow nodes (subgraphs and individual nodes)
-        graph.add_node("load", data_loading_workflow)
-        graph.add_node("research", research_workflow)
-        graph.add_node("create_draft", create_draft)
-        graph.add_node("critique", critique_draft)
-        graph.add_node("human_approval", human_approval)
-        graph.add_node("finalize", finalize_document)
         # Set entry and exit points
-        graph.set_entry_point("load")
-        graph.set_finish_point("finalize")
-        # Conditional routing after data loading
-        def route_after_load(state: DataLoadState) -> str:
-            """
-            Route based on next_node set by data loading subgraph.
-            The data loading subgraph sets next_node to either "load" (if validation
-            fails) or "research" (if validation passes).
-            Parameters
-            ----------
-            state: DataLoadState
-                Current workflow state.
-            Returns
-            -------
-            str
-                Next node name: "load" or "research".
-            """
-            next_node = state.get("next_node", "research")  # Default to research
-            logger.info(f"Routing after load: {next_node}")
-            return next_node
-        graph.add_conditional_edges(
             "load",
-            route_after_load,
             {
                 "load": "load",  # Loop back to load subgraph if validation fails
-                "research": "research",  # Proceed to research if validation passes
             },
         )
         # Sequential edges for main workflow
-        graph.add_edge("research", "create_draft")
-        graph.add_edge("create_draft", "critique")
-        graph.add_edge("critique", "human_approval")
-        graph.add_edge("human_approval", "finalize")
-        return graph
     def _get_callbacks(self) -> list:
         """
@@ -208,7 +243,7 @@ class JobWorkflow:
     @log_execution
     @log_errors
-    async def run(self) -> Optional[Dict[str, Any]]:
         """
         Execute the complete job application writer workflow.
@@ -289,7 +324,8 @@ class JobWorkflow:
         Exception
             If graph compilation fails (e.g., invalid edges, missing nodes).
         """
-        return self.job_app_graph.compile()
 def main():
@@ -300,7 +336,6 @@ def main():
         content=args.content_type,
     )
     result = asyncio.run(workflow.run())
-    # print(f"result: {result}")
     if result:
         print_result(args.content_type, result["output_data"])
         save_result(args.content_type, result["output_data"])

 This module provides the JobWorkflow class and CLI runner.
 """
+# Standard library imports
 import asyncio
 import logging
 import os
+import sys
 from datetime import datetime
 from functools import cached_property
+from typing import Any
+# Third-party imports
 from langchain_core.tracers import ConsoleCallbackHandler, LangChainTracer
 from langgraph.graph import StateGraph
 from langgraph.graph.state import CompiledStateGraph
+# Local imports
 from job_writing_agent.agents.nodes import (
     create_draft,
     critique_draft,
     finalize_document,
     human_approval,
 )
+from job_writing_agent.classes import DataLoadState, ResearchState
+from job_writing_agent.nodes.data_loading_workflow import data_loading_workflow
 from job_writing_agent.nodes.research_workflow import research_workflow
 from job_writing_agent.utils.application_cli_interface import handle_cli
 from job_writing_agent.utils.logging.logging_decorators import (
     log_errors,
+    log_execution,
 )
+from job_writing_agent.utils.result_utils import print_result, save_result
 logger = logging.getLogger(__name__)
         return {
             "resume_path": self.resume,
             "job_description_source": self.job_description_source,
+            "content_category": self.content,
             "current_node": "",
             "messages": [],
             "company_research_data": {},
         }
+    # Conditional routing after data loading
+    def route_after_load(self, state: DataLoadState) -> str:
+        """
+        Route based on next_node set by data loading subgraph.
+        The data loading subgraph sets next_node to either "load" (if validation
+        fails) or "research" (if validation passes).
+        Parameters
+        ----------
+        state: DataLoadState
+            Current workflow state.
+        Returns
+        -------
+        str
+            Next node name: "load" or "research".
+        """
+        next_node = state.get("next_node", "research")  # Default to research
+        logger.info(f"Routing after load: {next_node}")
+        return next_node
+    def dataload_to_research_adapter(self, state: DataLoadState) -> ResearchState:
+        """
+        Adapter to convert DataLoadState to ResearchState.
+        Extracts only fields needed for research workflow following the
+        adapter pattern recommended by LangGraph documentation.
+        Parameters
+        ----------
+        state: DataLoadState
+            Current workflow state with loaded data.
+        Returns
+        -------
+        ResearchState
+            State formatted for research subgraph with required fields.
+        """
+        logger.info("Adapter for converting DataLoadState to ResearchState")
+        return ResearchState(
+            company_research_data=state.get("company_research_data", {}),
+            attempted_search_queries=[],
+            current_node="",
+            content_category=state.get("content_category", ""),
+            messages=state.get("messages", []),
+        )
+    @cached_property
     def job_app_graph(self) -> StateGraph:
         """
         Build and configure the job application workflow graph.
         StateGraph
             Configured LangGraph state machine ready for compilation.
         """
+        agent_workflow_graph = StateGraph(DataLoadState)
         # Add workflow nodes (subgraphs and individual nodes)
+        agent_workflow_graph.add_node("load", data_loading_workflow)
+        agent_workflow_graph.add_node(
+            "to_research_adapter", self.dataload_to_research_adapter
+        )
+        agent_workflow_graph.add_node("research", research_workflow)
+        agent_workflow_graph.add_node("create_draft", create_draft)
+        agent_workflow_graph.add_node("critique", critique_draft)
+        agent_workflow_graph.add_node("human_approval", human_approval)
+        agent_workflow_graph.add_node("finalize", finalize_document)
         # Set entry and exit points
+        agent_workflow_graph.set_entry_point("load")
+        agent_workflow_graph.set_finish_point("finalize")
+        agent_workflow_graph.add_conditional_edges(
             "load",
+            self.route_after_load,
             {
                 "load": "load",  # Loop back to load subgraph if validation fails
+                "research": "to_research_adapter",  # Route to adapter first
             },
         )
         # Sequential edges for main workflow
+        agent_workflow_graph.add_edge("to_research_adapter", "research")
+        agent_workflow_graph.add_edge("research", "create_draft")
+        agent_workflow_graph.add_edge("create_draft", "critique")
+        agent_workflow_graph.add_edge("critique", "human_approval")
+        agent_workflow_graph.add_edge("human_approval", "finalize")
+        return agent_workflow_graph
     def _get_callbacks(self) -> list:
         """
     @log_execution
     @log_errors
+    async def run(self) -> dict[str, Any] | None:
         """
         Execute the complete job application writer workflow.
         Exception
             If graph compilation fails (e.g., invalid edges, missing nodes).
         """
+        compiled_graph = self.job_app_graph.compile()
+        return compiled_graph
 def main():
         content=args.content_type,
     )
     result = asyncio.run(workflow.run())
     if result:
         print_result(args.content_type, result["output_data"])
         save_result(args.content_type, result["output_data"])