Rishabh2095 commited on
Commit
a8b79ed
·
0 Parent(s):

First Commit

Browse files
.gitignore ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+ MANIFEST
23
+
24
+ # Python Virtual Environments
25
+ env/
26
+ venv/
27
+ ENV/
28
+ env.bak/
29
+ venv.bak/
30
+ .env
31
+ .venv
32
+
33
+ # Jupyter Notebook
34
+ .ipynb_checkpoints
35
+ */.ipynb_checkpoints/*
36
+
37
+ # IDE specific files
38
+ .idea/
39
+ .vscode/
40
+ *.swp
41
+ *.swo
42
+ *.swn
43
+ .DS_Store
44
+
45
+ # API keys and secrets
46
+ .env
47
+ .secrets
48
+ *.pem
49
+ *.key
50
+ langsmith_api_key.txt
51
+
52
+ # Logs and databases
53
+ *.log
54
+ *.sql
55
+ *.sqlite
56
+ logs/
57
+
58
+ # Local development settings
59
+ local_settings.py
60
+
61
+ # Pytest and coverage reports
62
+ .pytest_cache/
63
+ htmlcov/
64
+ .tox/
65
+ .coverage
66
+ .coverage.*
67
+ coverage.xml
68
+ *.cover
69
+ .hypothesis/
70
+ .pylintrcls
71
+
72
+ # Documentation
73
+ docs/_build/
74
+ site/
75
+
76
+ # Type checking
77
+ .mypy_cache/
78
+ .dmypy.json
79
+ dmypy.json
80
+ .pyre/
81
+
82
+ # LangChain related
83
+ .langchain.db
84
+ langsmith.db
85
+ .langgraph_api/
86
+
87
+ # Temporary files
88
+ tmp/
89
+ tests/
90
+ temp/
91
+ *.tmp
92
+ *.temp
93
+
94
+ # LangGraph specific
95
+ langgraph.db
96
+ *.db
97
+
98
+ # Output files (if you generate reports/documents)
99
+ output/
100
+ reports/
101
+ generated/
102
+
103
+ # Test artifacts
104
+ .pytest_cache/
105
+ test-results/
106
+ test_output/
107
+
108
+ # OS specific
109
+ Thumbs.db
110
+ ehthumbs.db
111
+ Desktop.ini
112
+
113
+ # Github
114
+ .github/
115
+
116
+ # Miscellaneous
117
+ parsed_text.json
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Job Writer Module
2
+
3
+ A modular, well-structured package for creating tailored job applications using LangChain and LangGraph with LangSmith observability.
4
+
5
+ ## Features
6
+
7
+ - Creates personalized job application materials based on resumes and job descriptions
8
+ - Supports multiple application types: cover letters, bullet points, and LinkedIn messages
9
+ - Uses RAG for personalization and web search for company research
10
+ - Provides human-in-the-loop feedback integration
11
+ - Implements self-consistency voting for quality control
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ # Install the package and its dependencies
17
+ pip install -e .
18
+
19
+ # Install development dependencies (including linting tools)
20
+ pip install -r requirements-dev.txt
21
+ ```
22
+
23
+ ## Code Standards and Linting
24
+
25
+ This project uses several tools to ensure code quality:
26
+
27
+ 1. **Black** - Code formatter that enforces consistent style
28
+ 2. **isort** - Sorts imports according to best practices
29
+ 3. **Flake8** - Style guide enforcement
30
+ 4. **mypy** - Static type checking
31
+
32
+ ### Running the Linters
33
+
34
+ ```bash
35
+ # Format code with Black
36
+ black job_writer/
37
+
38
+ # Sort imports
39
+ isort job_writer/
40
+
41
+ # Check style with Flake8
42
+ flake8 job_writer/
43
+
44
+ # Type checking with mypy
45
+ mypy job_writer/
46
+ ```
47
+
48
+ ### Pre-commit Hooks
49
+
50
+ We use pre-commit hooks to automatically run linters before each commit:
51
+
52
+ ```bash
53
+ # Install the pre-commit hooks
54
+ pip install pre-commit
55
+ pre-commit install
56
+
57
+ # You can also run the hooks manually
58
+ pre-commit run --all-files
59
+ ```
60
+
61
+ ## Usage Example
62
+
63
+ ```python
64
+ import asyncio
65
+ from job_writer.workflow import run_job_application_writer
66
+
67
+ # Run the job application writer
68
+ result = asyncio.run(run_job_application_writer(
69
+ resume_path="path/to/resume.pdf",
70
+ job_desc_path="https://example.com/job-posting",
71
+ content="cover_letter"
72
+ ))
73
+
74
+ print(result["final"])
75
+ ```
76
+
77
+ Alternatively, you can use the command-line interface:
78
+
79
+ ```bash
80
+ python -m job_writer.workflow --resume path/to/resume.pdf --job https://example.com/job-posting --type cover_letter
81
+ ```
__init__.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Job Application Writer Package
3
+
4
+ A modular, well-structured package for creating tailored job applications
5
+ using LangChain and LangGraph with LangSmith observability.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+
10
+ import os, getpass
11
+ import logging
12
+ from pathlib import Path
13
+ from dotenv import load_dotenv
14
+ from langfuse import Langfuse
15
+
16
+
17
+ # Set up logging
18
+ logger = logging.getLogger(__name__)
19
+ logger.setLevel(logging.INFO)
20
+ log_dir = Path(__file__).parent / 'logs'
21
+ log_dir.mkdir(exist_ok=True)
22
+ logger.addHandler(logging.FileHandler(log_dir / 'job_writer.log', mode='a'))
23
+ logger.info("Logger initialized. Writing to %s", Path(__file__).parent / 'job_writer.log')
24
+
25
+ # Load environment variables from .env file
26
+ env_path = Path(__file__).parent / '.env'
27
+
28
+
29
+ def _set_env(var: str):
30
+ if not os.environ.get(var):
31
+ os.environ[var] = getpass.getpass(f"{var}: ")
32
+ logger.info(f"{var} set to {os.environ[var]}")
33
+
34
+ if env_path.exists():
35
+ logger.info("Loading environment variables from %s", env_path)
36
+ load_dotenv(dotenv_path=env_path, override=True)
37
+ else:
38
+ logger.warning(".env file not found at %s. Using system environment variables.", env_path)
39
+
40
+ # Check for critical environment variables
41
+ if not os.getenv("TAVILY_API_KEY"):
42
+ logger.warning("TAVILY_API_KEY environment variable is not set." \
43
+ " Failed to get TAVILY_API_KEY at Path %s", env_path)
44
+ _set_env("TAVILY_API_KEY")
45
+
46
+
47
+ if not os.getenv("GEMINI_API_KEY"):
48
+ logger.warning("GEMINI_API_KEY environment variable is not set. " \
49
+ "Failed to get GEMINI_API_KEY at Path %s", env_path)
50
+ _set_env("GEMINI_API_KEY")
51
+
52
+
53
+ if not os.getenv("PINECONE_API_KEY"):
54
+ logger.warning("PINECONE_API_KEY environment variable is not set." \
55
+ " Failed to get PINECONE_API_KEY at Path %s", env_path)
56
+ _set_env("PINECONE_API_KEY")
57
+
58
+ if not os.getenv("LANGFUSE_PUBLIC_KEY"):
59
+ logger.warning("LANGFUSE_PUBLIC_KEY environment variable is not set." \
60
+ " Failed to get LANGFUSE_PUBLIC_KEY at Path %s", env_path)
61
+ _set_env("LANGFUSE_PUBLIC_KEY")
62
+
63
+ if not os.getenv("LANGFUSE_SECRET_KEY"):
64
+ logger.warning("LANGFUSE_SECRET_KEY environment variable is not set." \
65
+ " Failed to get LANGFUSE_SECRET_KEY at Path %s", env_path)
66
+ _set_env("LANGFUSE_SECRET_KEY")
67
+
68
+
69
+ __all__: list[str] = ["job_app_graph", "workflows/research_workflow"]
agents/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Agent modules for job application generation.
3
+ """
agents/nodes.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Node functions for the job application writer LangGraph.
3
+
4
+ This module contains all the node functions used in the job application
5
+ writer workflow graph, each handling a specific step in the process.
6
+ """
7
+
8
+ import logging
9
+ from datetime import datetime
10
+
11
+ from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
12
+ from langchain_core.output_parsers import StrOutputParser
13
+
14
+ from ..classes.classes import AppState
15
+ from ..prompts.templates import (
16
+ CRITIQUE_PROMPT,
17
+ PERSONA_DEVELOPMENT_PROMPT,
18
+ COVER_LETTER_PROMPT,
19
+ REVISION_PROMPT,
20
+ BULLET_POINTS_PROMPT,
21
+ LINKEDIN_NOTE_PROMPT,
22
+ )
23
+ from ..utils.llm_client import LLMClient
24
+
25
+ logger = logging.getLogger(__name__)
26
+ # Constants
27
+ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
28
+
29
+ LLM = LLMClient()
30
+ llm = LLMClient().get_llm()
31
+
32
+
33
+ def create_draft(state: AppState) -> AppState:
34
+ """Create initial draft of the application material."""
35
+ # Determine which type of content we're creating
36
+ current_application_session = state.get("company_research_data", {})
37
+
38
+ content_category = state.get("content_category", "cover_letter")
39
+
40
+
41
+ try:
42
+ if state.get("vector_store"):
43
+ vector_store = state.get("vector_store")
44
+
45
+ # Extract key requirements from job description
46
+ prompt = PERSONA_DEVELOPMENT_PROMPT | llm | StrOutputParser()
47
+
48
+ if current_application_session:
49
+ key_requirements = prompt.invoke({"job_description": current_application_session["job_description"]})
50
+ else:
51
+ return key_requirements
52
+
53
+ if not key_requirements:
54
+ print("Warning: No key requirements found in the job description.")
55
+ return state
56
+
57
+ # Use the key requirements to query for the most relevant resume parts
58
+ namespace = f"resume_{state['session_id']}"
59
+ relevant_docs = vector_store.retrieve_similar(
60
+ query=key_requirements,
61
+ namespace=namespace,
62
+ k=3
63
+ )
64
+
65
+ # Use these relevant sections with higher weight in the draft creation
66
+ highly_relevant_resume = "\n".join([doc.page_content for doc in relevant_docs])
67
+ resume_text = f"""
68
+ # Most Relevant Experience
69
+ {highly_relevant_resume}
70
+
71
+ # Full Resume
72
+ {resume_text}
73
+ """
74
+ except Exception as e:
75
+ print(f"Warning: Could not use vector search for relevant resume parts: {e}")
76
+ # Continue with regular resume text
77
+
78
+ # Select the appropriate prompt template based on application type and persona
79
+ print(f"Content category: {content_category}")
80
+ if content_category == "bullets":
81
+ FirstDraftGenerationPromptTemplate = ChatPromptTemplate([BULLET_POINTS_PROMPT])
82
+ elif content_category == "linkedin_connect_request":
83
+ FirstDraftGenerationPromptTemplate = ChatPromptTemplate([LINKEDIN_NOTE_PROMPT])
84
+ else:
85
+ FirstDraftGenerationPromptTemplate = ChatPromptTemplate([COVER_LETTER_PROMPT])
86
+
87
+ # Create the draft using the selected prompt template
88
+ CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(
89
+ """
90
+ Below is the Job Description and Resume enclosed in triple backticks.
91
+
92
+ Job Description and Resume:
93
+
94
+ ```
95
+ {current_job_role}
96
+
97
+ ```
98
+ Use the Company Research Data below in to create a cover letter that highlights the match between my qualifications and the job requirements and aligns with the company's values and culture.
99
+ Company Research Data:
100
+ #company_research_data
101
+
102
+ Create a cover letter that highlights the match between my qualifications and the job requirements.
103
+ """,
104
+ input_variables=["current_job_role",
105
+ "company_research_data"])
106
+
107
+ FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)
108
+
109
+ # Invoke the chain with the appropriate inputs
110
+ chain = (
111
+ ({"current_job_role": lambda x: x["current_job_role"],
112
+ "company_research_data": lambda x: x["company_research_data"]})
113
+ | FirstDraftGenerationPromptTemplate
114
+ | llm
115
+ )
116
+
117
+ # Prepare the inputs
118
+ inputs = {
119
+ "current_job_role": current_application_session['job_description'],
120
+ "company_research_data": current_application_session["tavily_search"]}
121
+
122
+ response = chain.invoke(inputs)
123
+ print(f"Draft created: {response}")
124
+ state["draft"] = response
125
+ return state
126
+
127
+ def critique_draft(state: AppState) -> AppState:
128
+ """Critique the draft for improvements."""
129
+ critique = llm.invoke(CRITIQUE_PROMPT.format(
130
+ job_description=state["job_description"][0],
131
+ draft=state["draft"]
132
+ ))
133
+
134
+ # Store the critique for reference during human feedback
135
+ state["critique"] = critique
136
+ return state
137
+
138
+
139
+ def human_approval(state: AppState) -> AppState:
140
+ """Human-in-the-loop checkpoint for feedback on the draft."""
141
+ # This is a placeholder function that would be replaced by actual UI interaction
142
+ print("\n" + "="*80)
143
+ print("DRAFT FOR REVIEW:")
144
+ print(state["draft"])
145
+ print("\nAUTOMATIC CRITIQUE:")
146
+ print(state.get("critique", "No critique available"))
147
+ print("="*80)
148
+ print("\nPlease provide your feedback (press Enter to continue with no changes):")
149
+
150
+ # In a real implementation, this would be handled by the UI
151
+ feedback = input()
152
+ state["feedback"] = feedback
153
+ return state
154
+
155
+
156
+ def finalize_document(state: AppState) -> AppState:
157
+ """Incorporate feedback and finalize the document."""
158
+ if not state["feedback"].strip():
159
+ state["final"] = state["draft"]
160
+ return state
161
+
162
+ final = llm.invoke(REVISION_PROMPT.format(
163
+ draft=state["draft"],
164
+ feedback=state["feedback"]
165
+ ))
166
+
167
+ state["final"] = final
168
+ return state
169
+
170
+
171
+ # Decision function for conditional routing
172
+ def determine_next_step(state: AppState) -> str:
173
+ """Determine the next node in the graph based on state."""
174
+ # If we're missing the company name, we can't do company research
175
+ if not state["company_name"]:
176
+ return "draft"
177
+ return "research"
agents/output_schema.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field, field_validator
2
+ from typing import List, Optional
3
+
4
+ class TavilyQuerySet(BaseModel):
5
+ query1: Optional[List[str]] = Field(default=None, description="First search query and its rationale, e.g., ['query text']")
6
+ query2: Optional[List[str]] = Field(default=None, description="Second search query and its rationale")
7
+ query3: Optional[List[str]] = Field(default=None, description="Third search query and its rationale")
8
+ query4: Optional[List[str]] = Field(default=None, description="Fourth search query and its rationale")
9
+ query5: Optional[List[str]] = Field(default=None, description="Fifth search query and its rationale")
10
+
11
+ @field_validator("query1", "query2", "query3", "query4", "query5", mode="after")
12
+ @classmethod
13
+ def ensure_len_two(cls, v):
14
+ """Ensure each provided query list contains exactly one strings: [query]."""
15
+ if v is not None: # Only validate if the list is actually provided
16
+ if len(v) != 1:
17
+ # Updated error message for clarity
18
+ raise ValueError("Each query list, when provided, must contain exactly one string: the query text.")
19
+ return v
classes/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .classes import AppState, ResearchState, DataLoadState
2
+
3
+ __all__ = ["AppState", "ResearchState", "DataLoadState"]
classes/classes.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ State definitions for the Job Writer LangGraph Workflow.
3
+ """
4
+
5
+ from typing_extensions import List, Dict, Any
6
+ from langgraph.graph import MessagesState
7
+
8
+
9
+ class AppState(MessagesState):
10
+ """
11
+ State container for the job application writer workflow.
12
+
13
+ Attributes:
14
+ resume: List of text chunks from the candidate's resume
15
+ job_description: List of text chunks from the job description
16
+ company_name: Extracted company name
17
+ company_research_data: Additional information about the company from research
18
+ persona: The writing persona to use ("recruiter" or "hiring_manager")
19
+ draft: Current draft of the application material
20
+ feedback: Human feedback on the draft
21
+ final: Final version of the application material
22
+ content: Type of application material to generate
23
+ """
24
+ resume_path: str
25
+ job_description_source: str
26
+ company_research_data: Dict[str, Any]
27
+ draft: str
28
+ feedback: str
29
+ final: str
30
+ content: str # "cover_letter", "bullets", "linkedin_note"
31
+ current_node: str
32
+
33
+
34
+ class DataLoadState(MessagesState):
35
+ """
36
+ State container for the job application writer workflow.
37
+
38
+ Attributes:
39
+ resume: List of text chunks from the candidate's resume
40
+ job_description: List of text chunks from the job description
41
+ persona: The writing persona to use ("recruiter" or "hiring_manager")
42
+ content: Type of application material to generate
43
+ """
44
+ resume_path: str
45
+ job_description_source: str
46
+ resume: str
47
+ job_description: str
48
+ company_name: str
49
+ current_node: str
50
+ company_research_data: Dict[str, Any]
51
+
52
+
53
+ class ResearchState(MessagesState):
54
+ """
55
+ State container for the job application writer workflow.
56
+ Attributes:
57
+ tavily_search: Dict[str, Any] Stores the results of the Tavily search
58
+ attempted_search_queries: List of queries used extracted from the job description
59
+ compiled_knowledge: Compiled knowledge from the research
60
+ """
61
+ company_research_data: Dict[str, Any]
62
+ attempted_search_queries: List[str]
63
+ current_node: str
langgraph.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dependencies": [
3
+ "."
4
+ ],
5
+ "graphs": {
6
+ "job_application": "langgraph_init:job_app_graph"
7
+ },
8
+ "env": "./.env",
9
+ "python_version": "3.11"
10
+ }
langgraph_init.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .workflow import JobWorkflow
2
+
3
+
4
+ job_app_graph= JobWorkflow().compile()
nodes/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Oct 23 16:49:52 2023
4
+ @author: rishabhaggarwal
5
+ """
6
+
7
+ from .initializing import Dataloading
8
+ # from .createdraft import CreateDraft
9
+ from .variations import generate_variations
10
+ from .selfconsistency import self_consistency_vote
11
+ from .research_workflow import research_workflow
12
+
13
+ __all__ = ["Dataloading", "generate_variations", "self_consistency_vote", "research_workflow"]
nodes/createdraft.py ADDED
File without changes
nodes/initializing.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Oct 23 16:49:52 2023
4
+ @author: rishabhaggarwal
5
+ """
6
+ import os
7
+ import logging
8
+ from typing_extensions import Literal
9
+
10
+ from langchain_core.documents import Document
11
+ from langchain_core.messages import SystemMessage
12
+
13
+ from job_writer.classes import AppState, DataLoadState
14
+ from job_writer.utils.document_processing import (
15
+ parse_resume,
16
+ get_job_description
17
+ )
18
+
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class Dataloading:
24
+ """
25
+ Initialize the state for the job application writer workflow.
26
+ """
27
+ def __init__(self):
28
+ pass
29
+
30
+
31
+ async def system_setup(self, state: AppState) -> DataLoadState:
32
+ """Initialize conversation by setting up a persona through System Prompt."""
33
+
34
+ resume_path = state.get("resume_path")
35
+
36
+ # Verify if the resume file path provided is valid
37
+ if not resume_path:
38
+ logger.error("Resume path is not provided in the state.")
39
+ elif not os.path.exists(resume_path):
40
+ logger.error("Resume file does not exist at path: %s", resume_path)
41
+ # Similar handling as above:
42
+ # raise FileNotFoundError(f"Resume file not found: {resume_path}")
43
+ elif not os.path.isfile(resume_path):
44
+ logger.error("The path provided for the resume is not a file: %s", resume_path)
45
+ # Similar handling:
46
+ # raise ValueError(f"Resume path is not a file: {resume_path}")
47
+ else:
48
+ logger.info("Resume path verified: %s", resume_path)
49
+
50
+
51
+ persona_init_message = SystemMessage(
52
+ content="You are my dedicated assistant for writing job application content, "
53
+ "including cover letters, LinkedIn outreach messages, and responses to "
54
+ "job-specfific questions (e.g., experience, culture fit, or motivation)."
55
+ )
56
+ messages = state.get("messages", [])
57
+ messages.append(persona_init_message)
58
+
59
+ return {
60
+ **state,
61
+ "messages": messages,
62
+ "current_node": "initialize_system"
63
+
64
+ }
65
+
66
+
67
+ async def get_resume(self, resume_source):
68
+ """
69
+ Get the resume te
70
+ """
71
+ try:
72
+ print("Parsing resume....")
73
+ resume_text = ""
74
+ resume_chunks = parse_resume(resume_source)
75
+ for chunk in resume_chunks:
76
+ if hasattr(chunk, 'page_content') and chunk.page_content:
77
+ resume_text += chunk.page_content
78
+ elif isinstance(chunk, str) and chunk: # If parse_resume (util) returns list of strings
79
+ resume_text += chunk
80
+ else:
81
+ logger.debug("Skipping empty or invalid chunk in resume: %s", chunk)
82
+ continue
83
+ return resume_text
84
+ except Exception as e:
85
+ print(f"Error parsing resume: {e}")
86
+ raise e
87
+
88
+
89
+ async def parse_job_description(self, job_description_source):
90
+ try:
91
+ logger.info("Parsing job description from: %s", job_description_source)
92
+ document: Document = get_job_description(job_description_source)
93
+
94
+ company_name = ""
95
+ job_posting_text = ""
96
+
97
+ if document:
98
+ # Extract company name from metadata
99
+ if hasattr(document, 'metadata') and isinstance(document.metadata, dict):
100
+ company_name = document.metadata.get("company_name", "")
101
+ if not company_name:
102
+ logger.warning("Company name not found in job description metadata.")
103
+ else:
104
+ logger.warning("Metadata attribute not found or not a dictionary in the Document for job description.")
105
+
106
+ # Extract the job posting text from page_content
107
+ if hasattr(document, 'page_content'):
108
+ job_posting_text = document.page_content
109
+ if not job_posting_text:
110
+ logger.info("Parsed job posting text is empty.")
111
+ else:
112
+ logger.warning("page_content attribute not found in the Document for job description.")
113
+ else:
114
+ logger.warning("get_job_description returned None for source: %s", job_description_source)
115
+
116
+ return job_posting_text, company_name
117
+
118
+ except Exception as e:
119
+ logger.error("Error parsing job description from source '%s': %s", job_description_source, e, exc_info=True)
120
+ raise e
121
+
122
+ async def load_inputs(self, state: DataLoadState) -> AppState:
123
+ """
124
+ Parse the resume and job description to prepare the data from the context
125
+ which is required for the job application writer for the current state
126
+ """
127
+
128
+ resume_source = state.get("resume_path", "")
129
+ job_description_source = state.get("job_description_source", None)
130
+
131
+ # Initialize result containers\
132
+ resume_text = ""
133
+ job_posting_text = ""
134
+ company_name = ""
135
+ resume_chunks = [] # Handle job description input
136
+ if job_description_source:
137
+ try:
138
+ job_posting_text, company_name = await self.parse_job_description(job_description_source)
139
+ print(f"Job description parsing complete. Length: {len(job_posting_text) if job_posting_text else 0}")
140
+
141
+ # Ensure job_posting_text is not empty
142
+ if not job_posting_text:
143
+ print("WARNING: Job posting text is empty after parsing.")
144
+ job_posting_text = "No job description available. Please check the URL or provide a different source."
145
+ except Exception as e:
146
+ print(f"Error parsing job description: {e} in file {__file__}")
147
+ # Set a default value to prevent errors
148
+ job_posting_text = "Error parsing job description."
149
+ company_name = "Unknown Company"
150
+
151
+ if resume_source:
152
+ try:
153
+ resume_text = await self.get_resume(resume_source)
154
+ except Exception as e:
155
+ print(f"Error parsing resume: {e} in file {__file__}")
156
+ raise e
157
+
158
+
159
+ # If either is missing, prompt the user
160
+ if state["current_node"] == "verify" and not resume_text:
161
+ resume_chunks = input("Please paste the resume in text format: ")
162
+ resume_text = [Document(page_content=resume_chunks, metadata={"source": "resume"})]
163
+
164
+
165
+ if state["current_node"] == "verify" and not job_posting_text:
166
+ job_text = input("Please paste the job posting in text format: ")
167
+ job_posting_text = [job_text]
168
+
169
+
170
+ # Extract company name
171
+ state["company_research_data"] = {'resume': resume_text, 'job_description': job_posting_text, 'company_name': company_name}
172
+
173
+ state["current_node"] = "load_inputs"
174
+
175
+ return state
176
+
177
+
178
+ def validate_data_load_state(self,state: DataLoadState):
179
+ assert state.company_research_data.get("resume"), "Resume is missing in company_research_data"
180
+ assert state.company_research_data.get("job_description"), "Job description is missing"
181
+
182
+
183
+ def verify_inputs(self, state: AppState) -> Literal["load", "research"]:
184
+ """Verify that required inputs are present."""
185
+
186
+ print("Verifying Inputs")
187
+ state["current_node"] = "verify"
188
+
189
+ logger.info("Verifying loaded inputs!")
190
+
191
+ assert state["company_research_data"].get("resume"), "Resume is missing in company_research_data"
192
+ assert state["company_research_data"].get("job_description"), "Job description is missing"
193
+
194
+ if not state.get("company_research_data"):
195
+ missing_items = []
196
+ if not state.get("company_research_data").get("resume", ""):
197
+ missing_items.append("resume")
198
+ if not state.get("company_research_data").get("job_description", ""):
199
+ missing_items.append("job description")
200
+ print(f'Missing required data: {", ".join(missing_items)}')
201
+
202
+ return "load"
203
+
204
+ # Normalize state content to strings
205
+ for key in ["resume", "job_description"]:
206
+ try:
207
+ if isinstance(state["company_research_data"][key], (list, tuple)):
208
+ state["company_research_data"][key] = " ".join(str(x) for x in state["company_research_data"][key])
209
+ elif isinstance(state["company_research_data"][key], dict):
210
+ state["company_research_data"][key] = str(state["company_research_data"][key])
211
+ else:
212
+ state["company_research_data"][key] = str(state["company_research_data"][key])
213
+ except Exception as e:
214
+ logger.warning("Error converting %s to string: %s", key, e)
215
+ raise e
216
+
217
+ return "research"
218
+
219
+ async def run(self, state: DataLoadState) -> AppState:
220
+ """
221
+ Run the InitializeState class to initialize
222
+ the state for the job application writer workflow.
223
+ """
224
+ state = await self.load_inputs(state)
225
+ return state
nodes/research_workflow.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ This module performs the research phase of the job application writing process.
4
+ One of the stages is Tavily Search which will be use to search for the company
5
+ """
6
+ import logging
7
+ from langgraph.graph import StateGraph, START, END
8
+
9
+ from job_writer.tools.TavilySearch import relevance_filter, search_company
10
+ from job_writer.classes.classes import ResearchState
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Set up logging
15
+ logger = logging.getLogger(__name__)
16
+ logging.basicConfig(level=logging.INFO)
17
+
18
+
19
+ async def research_company(state: ResearchState) -> ResearchState:
20
+ """Research the company if name is available."""
21
+ state["current_node"] = "research_company"
22
+
23
+ try:
24
+ # Extract values from state
25
+ company_name = state["company_research_data"].get("company_name", "")
26
+ job_description = state["company_research_data"].get("job_description", "")
27
+
28
+ logger.info(f"Researching company: {company_name}")
29
+ # Call search_company using the invoke method instead of __call__
30
+ # The tool expects job_description and company_name and returns a tuple
31
+ result = search_company.invoke({
32
+ "job_description": job_description,
33
+ "company_name": company_name
34
+ })
35
+ # Unpack the tuple
36
+ if isinstance(result, tuple) and len(result) == 2:
37
+ results, attempted_tavily_query_list = result
38
+ else:
39
+ # Handle the case when it's not a tuple
40
+ results = result
41
+ attempted_tavily_query_list = []
42
+
43
+ logger.info(f"Search completed with results and {len(attempted_tavily_query_list)} queries")
44
+
45
+ # Store results in state - note that results is the first item in the tuple
46
+ state["attempted_search_queries"] = attempted_tavily_query_list
47
+ state["company_research_data"]["tavily_search"] = results
48
+
49
+ except Exception as e:
50
+ logger.error(f"Error in research_company: {str(e)}")
51
+ # Provide empty results to avoid breaking the workflow
52
+ state["company_research_data"]["tavily_search"] = {"error": str(e), "tavily_search": []}
53
+ state["attempted_search_queries"] = []
54
+
55
+ return state
56
+
57
+ print("\n\n\nInitializing research workflow...\n\n\n")
58
+ # Create research subgraph
59
+ research_subgraph = StateGraph(ResearchState)
60
+
61
+ # Add research subgraph nodes
62
+ research_subgraph.add_node("research_company", research_company)
63
+ research_subgraph.add_node("relevance_filter", relevance_filter)
64
+
65
+
66
+ # Add research subgraph edges
67
+ research_subgraph.add_edge(START, "research_company")
68
+ research_subgraph.add_edge("research_company", "relevance_filter")
69
+ research_subgraph.add_edge("relevance_filter", END)
70
+
71
+ # Compile research subgraph
72
+ research_workflow = research_subgraph.compile()
73
+
74
+
75
+ # class ResearchWorkflow:
76
+
77
+ # def __init__(self):
78
+ # self.research_workflow = research_workflow
79
+
80
+
81
+
nodes/selfconsistency.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from datetime import datetime
3
+
4
+ from ..classes.classes import AppState
5
+ from ..prompts.templates import (
6
+ DRAFT_RATING_PROMPT,
7
+ BEST_DRAFT_SELECTION_PROMPT
8
+ )
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+ # Constants
13
+ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
14
+
15
+ # LLM = LLMClient()
16
+ # llm = LLMClient().get_llm()
17
+ # llm_precise = LLMClient().get_llm()
18
+
19
+
20
+ def self_consistency_vote(state: AppState) -> AppState:
21
+ """Choose the best draft from multiple variations."""
22
+ variations = state.get("variations", {"variations": []})
23
+
24
+ all_drafts = [state["draft"]] + variations["variations"]
25
+
26
+ # First, have the LLM rate each draft
27
+ ratings = []
28
+
29
+ # Get resume and job summaries, handling different formats
30
+ try:
31
+ if isinstance(state["resume"], list) and len(state["resume"]) > 0:
32
+ if hasattr(state["resume"][0], 'page_content'):
33
+ resume_summary = state["resume"][0].page_content
34
+ else:
35
+ resume_summary = state["resume"][0]
36
+ else:
37
+ resume_summary = str(state["resume"])
38
+ except Exception as e:
39
+ print(f"Warning: Error getting resume summary: {e}")
40
+ resume_summary = str(state["resume"])
41
+
42
+ try:
43
+ if isinstance(state["job_description"], list) and len(state["job_description"]) > 0:
44
+ job_summary = state["job_description"][0]
45
+ else:
46
+ job_summary = str(state["job_description"])
47
+ except Exception as e:
48
+ print(f"Warning: Error getting job summary: {e}")
49
+ job_summary = str(state["job_description"])
50
+
51
+ for i, draft in enumerate(all_drafts):
52
+ rating = llm_precise.invoke(DRAFT_RATING_PROMPT.format(
53
+ resume_summary=resume_summary,
54
+ job_summary=job_summary,
55
+ draft=draft,
56
+ draft_number=i+1
57
+ ))
58
+ ratings.append(rating)
59
+
60
+ # Create a clearer, more structured prompt for draft selection
61
+ selection_prompt = BEST_DRAFT_SELECTION_PROMPT.format(
62
+ ratings_json=json.dumps(ratings, indent=2),
63
+ num_drafts=len(all_drafts)
64
+ )
65
+
66
+ # Get the selected draft index with error handling
67
+ try:
68
+ selection = llm_precise.invoke(selection_prompt).strip()
69
+ # Extract just the first number found in the response
70
+ number_match = re.search(r'\d+', selection)
71
+ if not number_match:
72
+ print("Warning: Could not extract draft number from LLM response. Using original draft.")
73
+ best_draft_idx = 0
74
+ else:
75
+ best_draft_idx = int(number_match.group()) - 1
76
+ # Validate the index is in range
77
+ if best_draft_idx < 0 or best_draft_idx >= len(all_drafts):
78
+ print(f"Warning: Selected draft index {best_draft_idx + 1} out of range. Using original draft.")
79
+ best_draft_idx = 0
80
+ except (ValueError, TypeError) as e:
81
+ print(f"Warning: Error selecting best draft: {e}. Using original draft.")
82
+ best_draft_idx = 0
83
+
84
+ state["draft"] = all_drafts[best_draft_idx]
85
+ return state
nodes/test_workflow.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing_extensions import List, Dict, Any, Optional
2
+ from langgraph.graph import MessagesState, StateGraph
3
+
4
+ class DataLoadState(MessagesState):
5
+ """
6
+ State container for the job application writer workflow.
7
+
8
+ Attributes:
9
+ resume: List of text chunks from the candidate's resume
10
+ job_description: List of text chunks from the job description
11
+ persona: The writing persona to use ("recruiter" or "hiring_manager")
12
+ content: Type of application material to generate
13
+ """
14
+ resume_path: str
15
+ job_description_source: str
16
+ resume: str
17
+ job_description: str
18
+ company_name: str
19
+ current_node: str
20
+ company_research_data: Dict[str, Any]
21
+
22
+
23
+ test_graph = StateGraph(DataLoadState)
nodes/variations.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from datetime import datetime
3
+ from typing_extensions import Dict, List
4
+
5
+ from langchain_core.documents import Document
6
+
7
+
8
+ from ..classes.classes import AppState
9
+ from ..utils.llm_client import LLMClient
10
+ from ..prompts.templates import (
11
+ VARIATION_PROMPT
12
+ )
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+ # Constants
17
+ CURRENT_DATE = datetime.now().strftime("%A, %B %d, %Y")
18
+
19
+ LLM = LLMClient()
20
+ llm = LLMClient().get_llm()
21
+
22
+ def generate_variations(state: AppState) -> Dict[str, List[str]]:
23
+ """Generate multiple variations of the draft for self-consistency voting."""
24
+ variations = []
25
+
26
+ # Get resume and job text, handling both string and Document types
27
+ try:
28
+ resume_text = "\n".join(doc.page_content if isinstance(doc, Document) else doc
29
+ for doc in (state["resume"][:2] if isinstance(state["company_research_data"]["resume"], str)
30
+ else [state["resume"]]))
31
+ job_text = "\n".join(chunk for chunk in (state["company_research_data"]["job_description"][:2] if isinstance(state["company_research_data"]["job_description"], str)
32
+ else [state["company_research_data"]["job_description"]]))
33
+ except Exception as e:
34
+ print(f"Warning: Error processing resume/job text: {e}")
35
+ # Fallback to simple string handling
36
+ resume_text = str(state["company_research_data"]["resume"])
37
+ job_text = str(state["company_research_data"]["job_description"])
38
+
39
+ # Generate variations with different temperatures and creativity settings
40
+ temp_variations = [
41
+ {"temperature": 0.7, "top_p": 0.9}, # More conservative
42
+ {"temperature": 0.75, "top_p": 0.92}, # Balanced
43
+ {"temperature": 0.8, "top_p": 0.95}, # More creative
44
+ {"temperature": 0.7, "top_p": 0.85}, # Alternative conservative
45
+ {"temperature": 0.8, "top_p": 0.98} # Most creative
46
+ ]
47
+
48
+ for settings in temp_variations:
49
+ try:
50
+ # Create a configured version of the LLM with the variation settings
51
+ configured_llm = llm.with_config(configurable=settings)
52
+
53
+ # Use VARIATION_PROMPT directly with the configured LLM
54
+ variation = VARIATION_PROMPT.format_messages(
55
+ resume_excerpt=resume_text,
56
+ job_excerpt=job_text,
57
+ draft=state["draft"]
58
+ )
59
+
60
+ response = configured_llm.invoke(variation)
61
+
62
+ if response and response.strip(): # Only add non-empty variations
63
+ variations.append(response)
64
+ except Exception as e:
65
+ print(f"Warning: Error generating variation with settings {settings}: {e}")
66
+ continue
67
+
68
+ # Ensure we have at least one variation
69
+ if not variations:
70
+ # If all variations failed, add the original draft as a fallback
71
+ variations.append(state["draft"])
72
+
73
+ return {"variations": variations}
prompts.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 1. Title / One-Line Summary
2
+ > *E.g.* “embed_query returns empty vector with OllamaEmbeddings”
3
+
4
+ ---
5
+
6
+ ## 2. Goal / Expected Behavior
7
+ - What you’re trying to achieve
8
+ *E.g.* “Index documents with OllamaEmbeddings and query via Pinecone, then feed them into Llama3.2 for answer generation.”
9
+
10
+ ---
11
+
12
+ ## 3. Environment
13
+ - **Python**:
14
+ - **langchain**:
15
+ - **Ollama CLI / Daemon**:
16
+ - **OS** (and version):
17
+ - **Other dependencies**:
18
+
19
+ ---
20
+
21
+ ## 4. Minimal Reproducible Code
22
+ ```python
23
+ # Paste just enough code to reproduce the issue:
24
+ from langchain.embeddings import OllamaEmbeddings
25
+ emb = OllamaEmbeddings(model="ollama/llama3.2-embed")
26
+ vec = emb.embed_query("hello")
27
+ print(len(vec)) # unexpected result
prompts/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Prompt templates for job application generation.
3
+ """
prompts/templates.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompt templates for the job application writer.
3
+
4
+ This module contains all prompt templates used throughout the job application
5
+ generation process, organized by task.
6
+ """
7
+
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_core.messages import SystemMessage, HumanMessage
10
+
11
+
12
+ # Persona selection prompts
13
+
14
+ PERSONA_DEVELOPMENT_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages([
15
+ SystemMessage(content="""
16
+ You are my dedicated Job‑Application Writing Assistant.
17
+ MISSION
18
+ • Draft cover letters, LinkedIn messages, and answer's to questions within the job applications.
19
+ • Sound like me: grounded, confident, clear—never fluffy or journalistic.
20
+ • You will be provided "STYLE & LANGUAGE RULES" and "SELF‑EVALUATION CHECKLIST" to follow.
21
+ """),
22
+ HumanMessage(content="""Analyze this job description and determine if it's better to write as if addressing a recruiter
23
+ or a hiring manager. Return ONLY 'recruiter' or 'hiring_manager':
24
+
25
+ {job_description}""")
26
+ ])
27
+
28
+
29
+ # Draft generation prompts
30
+
31
+ COVER_LETTER_PROMPT: SystemMessage = SystemMessage(content=
32
+ """
33
+ You are CoverLetterGPT, a concise career‑writing assistant.
34
+
35
+ CORE OBJECTIVE
36
+ • Draft a 3‑paragraph cover letter (150‑180 words total) that targets hiring managers
37
+ and technical recruiters. Assume it may reach the CEO.
38
+ • Begin exactly with: "To Hiring Team,"
39
+ End exactly with: "Thanks, Rishabh"
40
+ • Tone: polite, casual, enthusiastic — but no em dashes (—) and no clichés.
41
+ • Every fact about achievements, skills, or company details must be traceable to the
42
+ provided resume, job description, or company research; otherwise, ask the user.
43
+ • If any critical detail is missing or ambiguous, STOP and ask a clarifying question
44
+ before writing the letter.
45
+ • Keep sentences tight; avoid filler like “I am excited to…” (enthusiasm comes
46
+ through precise language).
47
+ • Never exceed 180 words. Never fall below 150 words.
48
+
49
+ SELF‑EVALUATION (append after the letter)
50
+ After producing the cover letter, output an “### Evaluation” section containing:
51
+ Comprehensiveness (1‑5)
52
+ Evidence provided (1‑5)
53
+ Clarity of explanation (1‑5)
54
+ Potential limitations or biases (bullet list)
55
+ Areas for improvement (brief notes)
56
+
57
+ ERROR HANDLING
58
+ If word count, section order, or format rules are violated, regenerate until correct.
59
+ """
60
+ )
61
+
62
+
63
+
64
+ BULLET_POINTS_PROMPT: SystemMessage = SystemMessage(content=
65
+ """You are an expert job application writer who
66
+ creates personalized application materials.
67
+
68
+ {persona_instruction}
69
+
70
+ Write 5-7 bullet points highlighting the candidate's
71
+ qualifications for this specific role.
72
+ Create content that genuinely reflects the candidate's
73
+ background and is tailored to the specific job.
74
+ Ensure the tone is professional, confident, and authentic.
75
+ Today is {current_date}.""")
76
+
77
+
78
+ LINKEDIN_NOTE_PROMPT: SystemMessage = SystemMessage(content="""You are an expert job application
79
+ writer who creates personalized application materials.
80
+ {persona_instruction}
81
+
82
+ Write a brief LinkedIn connection note to a hiring manager or recruiter (150 words max).
83
+ Create content that genuinely reflects the candidate's background and is tailored to the specific job.
84
+ Ensure the tone is professional, confident, and authentic.
85
+ Today is {current_date}.""")
86
+
87
+ # Variation generation prompt
88
+ VARIATION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages([
89
+ SystemMessage(content="You are an expert job application writer. Create a variation of the given draft."),
90
+ HumanMessage(content="""
91
+ # Resume Excerpt
92
+ {resume_excerpt}
93
+
94
+ # Job Description Excerpt
95
+ {job_excerpt}
96
+
97
+ # Original Draft
98
+ {draft}
99
+
100
+ Create a variation of this draft with the same key points but different wording or structure.
101
+ """)
102
+ ])
103
+
104
+
105
+ # Critique prompt
106
+
107
+ CRITIQUE_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages([
108
+ SystemMessage(content="You are a professional editor who specializes in job applications. Provide constructive feedback."),
109
+ HumanMessage(content="""
110
+ # Job Description
111
+ {job_description}
112
+
113
+ # Current Draft
114
+ {draft}
115
+
116
+ Critique this draft and suggest specific improvements. Focus on:
117
+ 1. How well it targets the job requirements
118
+ 2. Professional tone and language
119
+ 3. Clarity and impact
120
+ 4. Grammar and style
121
+
122
+ Return your critique in a constructive, actionable format.
123
+ """)
124
+ ])
125
+
126
+
127
+ # Draft rating prompt
128
+
129
+ DRAFT_RATING_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages([
130
+ SystemMessage(content="You evaluate job application materials for effectiveness, appropriateness, and impact."),
131
+ HumanMessage(content="""
132
+ # Resume Summary
133
+ {resume_summary}
134
+
135
+ # Job Description Summary
136
+ {job_summary}
137
+
138
+ # Draft #{draft_number}
139
+ {draft}
140
+
141
+ Rate this draft on a scale of 1-10 for:
142
+ 1. Relevance to the job requirements
143
+ 2. Professional tone
144
+ 3. Personalization
145
+ 4. Persuasiveness
146
+ 5. Clarity
147
+
148
+ Return ONLY a JSON object with these ratings and a brief explanation for each.
149
+ """)
150
+ ])
151
+
152
+
153
+ # Best draft selection prompt
154
+
155
+ BEST_DRAFT_SELECTION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages([
156
+ SystemMessage(content="""You are a job application expert who selects the best draft based on multiple ratings.
157
+ You MUST return ONLY a single number between 1 and the number of drafts.
158
+ For example, if draft #2 is best, return ONLY '2'.
159
+ Do NOT include ANY other text, explanations, or characters in your response."""),
160
+ HumanMessage(content="""Here are the ratings for {num_drafts} different drafts:
161
+
162
+ {ratings_json}
163
+
164
+ Based on these ratings, return ONLY the number of the best draft (1-{num_drafts}).
165
+ Your entire response must be just one number.
166
+ Example: If draft #2 is best, return ONLY '2'.
167
+ """)
168
+ ])
169
+
170
+
171
+ REVISION_PROMPT: ChatPromptTemplate = ChatPromptTemplate.from_messages([
172
+ SystemMessage(content="You are an expert job application writer. Revise the draft based on feedback."),
173
+ HumanMessage(content="""
174
+ # Original Draft
175
+ {draft}
176
+
177
+ # Feedback
178
+ {feedback}
179
+
180
+ Revise the draft to incorporate this feedback while maintaining professionalism and impact.
181
+ Return the complete, final version.
182
+ """)
183
+ ])
184
+
185
+ # Tavily query prompt to build knowledge context about the company
186
+
187
+ TAVILY_QUERY_PROMPT = '''
188
+ <Context>
189
+ The user needs targeted search queries (with rationale) for Tavily Search to research company {} and inform a personalized cover letter.
190
+ </Context>
191
+
192
+ <Requirements>
193
+ - Output a JSON object with five fields:
194
+ - Keys: recent_developments, recent_news, role_info, customers_partners, culture_values
195
+ - Each value: an array of exactly two strings: [search query for Tavily Search, reasoning].
196
+ - Always include the company name in the search query to boost relevance.
197
+ - If any data is missing, supply a sensible fallback query that still references the company.
198
+ - Do not repeat queries across fields.
199
+ </Requirements>
200
+
201
+ <OutputFormat>
202
+ ```json
203
+ {
204
+ "recent_developments": ["…", "…"],
205
+ "recent_news": ["…", "…"],
206
+ "role_info": ["…", "…"],
207
+ "customers_partners":["…", "…"],
208
+ "culture_values": ["…", "…"]
209
+ }
210
+ ```
211
+ </OutputFormat>
212
+ '''
213
+
214
+ JOB_DESCRIPTION_PROMPT = """You are a JSON extraction specialist. Extract job information from the provided text and return ONLY valid JSON.
215
+
216
+ CRITICAL: Your response must be parseable by json.loads() - no markdown, no explanations, no extra text.
217
+
218
+ Extract these three fields in exact order:
219
+ 1. job_description field - Complete job posting formatted in clean markdown with proper headers (## Job Description, ## Responsibilities, ## Requirements, etc.)
220
+ 2. company_name field - Exact company name as mentioned
221
+ 3. job_title field - Exact job title as posted
222
+
223
+ FORMATTING RULES:
224
+ - Use double quotes for all strings
225
+ - Escape internal quotes with \\"
226
+ - Escape newlines as \\\\n in the job description field
227
+ - Replace actual line breaks with \\\\n
228
+ - If any field is missing, use empty string ""
229
+ - No trailing commas
230
+ - No comments or extra whitespace
231
+
232
+ REQUIRED OUTPUT FORMAT:
233
+ {{
234
+ "job_description": "markdown formatted job description with \\\\n for line breaks",
235
+ "company_name": "exact company name",
236
+ "job_title": "exact job title"
237
+ }}
238
+
239
+ Return only the JSON object - no other text."""
setup.py ADDED
File without changes
testing.ipynb ADDED
@@ -0,0 +1,1069 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "id": "d26f6647",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from langchain.prompts import ChatPromptTemplate\n",
11
+ "from langchain_core.messages import AIMessage, HumanMessage, SystemMessage"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "markdown",
16
+ "id": "f337ecb5",
17
+ "metadata": {},
18
+ "source": []
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 9,
23
+ "id": "92b12890",
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "messages = ChatPromptTemplate.from_messages([SystemMessage(content=f\"\"\"\n",
28
+ " You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
29
+ "\n",
30
+ " Rules:\n",
31
+ " 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
32
+ " 2. Map the job description into five categories:\n",
33
+ " • query1: recent developments\n",
34
+ " • query2: recent news\n",
35
+ " • query3:company profile\n",
36
+ " • query4: key customers & partners\n",
37
+ " • query5: culture & values\n",
38
+ " 3. Each value is a two‑element list:\n",
39
+ " [<query string>, <one‑sentence rationale>]\n",
40
+ " 4. Use filters (source:, date:[now-30d TO now], site:…, etc.) where helpful.\n",
41
+ " 5. If information is missing in the JD, fall back sensibly\n",
42
+ " (e.g. search for “employee testimonials”).\n",
43
+ " 6. Return **only** valid JSON.\n",
44
+ " \"\"\"\n",
45
+ " )\n",
46
+ " , HumanMessage(content=\"Hello World\")])"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 6,
52
+ "id": "e38c3632",
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "input_message = ChatPromptTemplate.from_messages([HumanMessage(content=\"Hello World\")])\n"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 11,
62
+ "id": "dac1ec19",
63
+ "metadata": {},
64
+ "outputs": [
65
+ {
66
+ "name": "stdout",
67
+ "output_type": "stream",
68
+ "text": [
69
+ "================================\u001b[1m System Message \u001b[0m================================\n",
70
+ "\n",
71
+ "\n",
72
+ " You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
73
+ "\n",
74
+ " Rules:\n",
75
+ " 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
76
+ " 2. Map the job description into five categories:\n",
77
+ " • query1: recent developments\n",
78
+ " • query2: recent news\n",
79
+ " • query3:company profile\n",
80
+ " • query4: key customers & partners\n",
81
+ " • query5: culture & values\n",
82
+ " 3. Each value is a two‑element list:\n",
83
+ " [<query string>, <one‑sentence rationale>]\n",
84
+ " 4. Use filters (source:, date:[now-30d TO now], site:…, etc.) where helpful.\n",
85
+ " 5. If information is missing in the JD, fall back sensibly\n",
86
+ " (e.g. search for “employee testimonials”).\n",
87
+ " 6. Return **only** valid JSON.\n",
88
+ " \n",
89
+ "\n",
90
+ "================================\u001b[1m Human Message \u001b[0m=================================\n",
91
+ "\n",
92
+ "Hello World\n"
93
+ ]
94
+ }
95
+ ],
96
+ "source": [
97
+ "messages.pretty_print()"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 14,
103
+ "id": "7ebd0d0d",
104
+ "metadata": {},
105
+ "outputs": [],
106
+ "source": [
107
+ "from langchain.prompts import (\n",
108
+ " ChatPromptTemplate,\n",
109
+ " HumanMessagePromptTemplate,\n",
110
+ " SystemMessagePromptTemplate,\n",
111
+ ")\n",
112
+ "\n",
113
+ "input_message = HumanMessagePromptTemplate.from_template(\"Below is the required job description and resume: {background_information}\", input_variables=[\"background_information\"])"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 17,
119
+ "id": "cd6b3cb8",
120
+ "metadata": {},
121
+ "outputs": [
122
+ {
123
+ "data": {
124
+ "text/plain": [
125
+ "HumanMessage(content='Below is the required job description and resume: This is Rishabh', additional_kwargs={}, response_metadata={})"
126
+ ]
127
+ },
128
+ "execution_count": 17,
129
+ "metadata": {},
130
+ "output_type": "execute_result"
131
+ }
132
+ ],
133
+ "source": [
134
+ "input_message.format(background_information=\"This is Rishabh\")"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "execution_count": 18,
140
+ "id": "c9628bed",
141
+ "metadata": {},
142
+ "outputs": [],
143
+ "source": [
144
+ "import re\n",
145
+ "from pathlib import Path\n",
146
+ "from typing import List\n",
147
+ "\n",
148
+ "from langchain_community.document_loaders import PyPDFLoader\n",
149
+ "from langchain.text_splitter import (\n",
150
+ " MarkdownHeaderTextSplitter,\n",
151
+ " RecursiveCharacterTextSplitter,\n",
152
+ ")\n",
153
+ "from langchain.schema import Document"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 29,
159
+ "id": "c352da72",
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "def _collapse_ws(text: str) -> str:\n",
164
+ " \"\"\"Collapse stray whitespace but keep bullet breaks.\"\"\"\n",
165
+ " text = re.sub(r\"\\n\\s*([•\\-–])\\s*\", r\"\\n\\1 \", text)\n",
166
+ " return re.sub(r\"[ \\t\\r\\f\\v]+\", \" \", text).replace(\" \\n\", \"\\n\").strip()\n",
167
+ "\n",
168
+ "\n",
169
+ "def _is_heading(line: str) -> bool:\n",
170
+ " return (\n",
171
+ " line.isupper()\n",
172
+ " and len(line.split()) <= 5\n",
173
+ " and not re.search(r\"\\d\", line)\n",
174
+ " )\n",
175
+ "\n",
176
+ "\n",
177
+ "def parse_resume(pdf_path: str | Path) -> List[Document]:\n",
178
+ " \"\"\"\n",
179
+ " Load a single‑page résumé PDF → list[Document] chunks\n",
180
+ " (≈400 chars, 50‑char overlap) with {source, section} metadata.\n",
181
+ " \"\"\"\n",
182
+ " text = PyPDFLoader(str(pdf_path), extraction_mode=\"layout\").load()[0].page_content\n",
183
+ " print(text)\n",
184
+ " text = _collapse_ws(text)\n",
185
+ "\n",
186
+ " # Tag headings with \"###\" so Markdown splitter can see them\n",
187
+ " tagged_lines = [\n",
188
+ " f\"### {ln}\" if _is_heading(ln) else ln\n",
189
+ " for ln in text.splitlines()\n",
190
+ " ]\n",
191
+ " md_text = \"\\n\".join(tagged_lines)\n",
192
+ "\n",
193
+ " if \"###\" in md_text:\n",
194
+ " splitter = MarkdownHeaderTextSplitter(\n",
195
+ " headers_to_split_on=[(\"###\", \"section\")]\n",
196
+ " )\n",
197
+ " chunks = splitter.split_text(md_text) # already returns Documents\n",
198
+ " else:\n",
199
+ " print(f\"No headings found.\")\n",
200
+ " splitter = RecursiveCharacterTextSplitter(\n",
201
+ " chunk_size=400, chunk_overlap=50\n",
202
+ " )\n",
203
+ " chunks = [\n",
204
+ " Document(page_content=chunk, metadata={})\n",
205
+ " for chunk in splitter.split_text(md_text)\n",
206
+ " ]\n",
207
+ "\n",
208
+ " # Attach metadata\n",
209
+ " for doc in chunks:\n",
210
+ " doc.metadata.setdefault(\"source\", str(pdf_path))\n",
211
+ " # section already present if header‑splitter was used\n",
212
+ " return chunks\n"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": 31,
218
+ "id": "14e062e4",
219
+ "metadata": {},
220
+ "outputs": [
221
+ {
222
+ "name": "stdout",
223
+ "output_type": "stream",
224
+ "text": [
225
+ "Rishabh Aggarwal\n",
226
+ " (602) 580-5734 • raggar15@asu.edu • LinkedIn • Tempe, AZ\n",
227
+ "TECHNICAL SKILLS\n",
228
+ "Programming Languages: Python, Java, JavaScript, Bash, HTML, CSS\n",
229
+ "Databases: SQL (PostgreSQL, MySQL, SQLite), NoSQL (MongoDB, Redis, DynamoDB, Pinecone)\n",
230
+ "Frameworks/Tools: SpringBoot, React, JUnit, Node.js, RESTful APIs, Django, Kafka, Airflow, FastAPI, Pydantic, Tableau\n",
231
+ "DevOps/Cloud: AWS, GCP, GitHub Actions, Docker, Jenkins, Terraform, Kubernetes, MLFlow, GitLab\n",
232
+ "AI Tools/Frameworks: PyTorch, Tensorflow, scikit-learn, LangGraph, LangChain, LangSmith, ChatGPT\n",
233
+ "PROFESSIONAL EXPERIENCE\n",
234
+ "Amazon Inc, Tempe, AZ: Software Development Engineer | Seller Payment Services Dec 2023 - Aug 2024\n",
235
+ "● Established AWS Evidently setup to handle 50K+ daily API requests to new Lambda service using AWS CDK(TypeScript)\n",
236
+ "● Added metrics to monitor traffic and enhance service observability of the Lambda service through CloudWatch logs\n",
237
+ "● Developed SNS Event Publishers in Java using Spring Boot to process 10K+ daily events in an event-driven architecture\n",
238
+ "● Led load balancer migration planning for a microservice with a focus on safe rollbacks and minimum downtime\n",
239
+ "● Designed a dashboard for ALB migration to monitor traffic with high-severity alarms to enhance observability\n",
240
+ "● Directed weekly meetings with a 7-member agile team to analyze metrics and customer data, guiding decision-making for\n",
241
+ " live campaigns involving over 50K sellers\n",
242
+ "MetaJungle, Ozark, MO: Lead Backend Engineer Jun 2023 - Dec 2023\n",
243
+ "● Architected a scalable AWS cloud infrastructure for a Marketplace using Terraform IaC with ECS and Fargate\n",
244
+ " instances, reduced costs by 40% while maintaining high reliability using Blue/Green deployment strategy\n",
245
+ "● Engineered and managed Jenkins CI/CD pipeline allowing faster iterative development by reducing deployment time by\n",
246
+ " 75% , leveraging Github hooks and Docker Containerization\n",
247
+ "● Migrated over 1.2TB on-premises Microsoft SQL Server database with over 2 million records to AWS RDS, utilizing\n",
248
+ " AWS DMS ensuring efficient indexing and retrieval\n",
249
+ "● Developed 10+ RESTful APIs in Node.js to manage data for over 500 NFT collections and 10,000 listings from MongoDB\n",
250
+ "● Automated extraction and compression of 50,000+ images from Ethereum Blockchain and stored on AWS S3 using\n",
251
+ " Airflow workflows in Python, leading to almost 30% storage cost savings\n",
252
+ "Omnipresent Robot Technologies, Delhi, India: Software Engineer Jun 2018 - Jul 2021\n",
253
+ "● Engineered a distributed, scalable AI surveillance application with edge-device computation using Python, OpenCV,\n",
254
+ " and scikit-learn, ensuring security for 10,000+ daily park visitors\n",
255
+ "● Architected a distributed system for real-time video streaming using Apache Kafka and Python to process 50+ parallel\n",
256
+ " video streams, reducing latency by 60% by rigorous debugging and performance optimization\n",
257
+ "● Led the development of an analytics dashboard using Django, React and Postgres to show breach records, alerts, and\n",
258
+ " intuitive data visualizations using Google Charts, allowing data-driven decision making\n",
259
+ "● Developed a drone compliance platform using Django to automate flight authorization and authentication process,\n",
260
+ " leading to enhanced productivity of the drone engineering team\n",
261
+ "● Led collaboration of a team of engineers and drone operators to conduct real-world testing of the compliance system\n",
262
+ "● Mentored interns to understand software development best practices, coding standards, and version control systems\n",
263
+ "ADDITIONAL EXPERIENCE\n",
264
+ "ML Software Developer at ASU Jul 2022 - May 2023\n",
265
+ "● Trained deep learning models using PyTorch and Scikit to detect low-resolution objects in 15,000+ satellite images\n",
266
+ "● Executed adversarial attacks and utilized MLFlow for fine-tuning multi-class classification machine learning model,\n",
267
+ " enhancing model robustness and improving accuracy by 20%\n",
268
+ "Mayhem Heroes Cybersecurity Open Source Hackathon Apr 2022\n",
269
+ "Integrated Mayhem into CI/CD pipeline for Open Source repos using GitHub Actions, reducing security risks by over 80%\n",
270
+ " EDUCATION\n",
271
+ "Master of Science in Information Technology\n",
272
+ "Arizona State University, Tempe, Arizona\n"
273
+ ]
274
+ }
275
+ ],
276
+ "source": [
277
+ "chunks = parse_resume(\"C:\\\\Users\\\\risha\\\\Downloads\\\\Rishabh_SDE_Resume.pdf\")"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 40,
283
+ "id": "0100cc62",
284
+ "metadata": {},
285
+ "outputs": [
286
+ {
287
+ "name": "stdout",
288
+ "output_type": "stream",
289
+ "text": [
290
+ "Resume chunk: Rishabh Aggarwal\n",
291
+ "(602) 580-5734 • raggar15@asu.edu • LinkedIn • Tempe, AZ\n",
292
+ "Resume chunk: Programming Languages: Python, Java, JavaScript, Bash, HTML, CSS\n",
293
+ "Databases: SQL (PostgreSQL, MySQL, SQLite), NoSQL (MongoDB, Redis, DynamoDB, Pinecone)\n",
294
+ "Frameworks/Tools: SpringBoot, React, JUnit, Node.js, RESTful APIs, Django, Kafka, Airflow, FastAPI, Pydantic, Tableau\n",
295
+ "DevOps/Cloud: AWS, GCP, GitHub Actions, Docker, Jenkins, Terraform, Kubernetes, MLFlow, GitLab\n",
296
+ "AI Tools/Frameworks: PyTorch, Tensorflow, scikit-learn, LangGraph, LangChain, LangSmith, ChatGPT\n",
297
+ "Resume chunk: Amazon Inc, Tempe, AZ: Software Development Engineer | Seller Payment Services Dec 2023 - Aug 2024\n",
298
+ "● Established AWS Evidently setup to handle 50K+ daily API requests to new Lambda service using AWS CDK(TypeScript)\n",
299
+ "● Added metrics to monitor traffic and enhance service observability of the Lambda service through CloudWatch logs\n",
300
+ "● Developed SNS Event Publishers in Java using Spring Boot to process 10K+ daily events in an event-driven architecture\n",
301
+ "● Led load balancer migration planning for a microservice with a focus on safe rollbacks and minimum downtime\n",
302
+ "● Designed a dashboard for ALB migration to monitor traffic with high-severity alarms to enhance observability\n",
303
+ "● Directed weekly meetings with a 7-member agile team to analyze metrics and customer data, guiding decision-making for\n",
304
+ "live campaigns involving over 50K sellers\n",
305
+ "MetaJungle, Ozark, MO: Lead Backend Engineer Jun 2023 - Dec 2023\n",
306
+ "● Architected a scalable AWS cloud infrastructure for a Marketplace using Terraform IaC with ECS and Fargate\n",
307
+ "instances, reduced costs by 40% while maintaining high reliability using Blue/Green deployment strategy\n",
308
+ "● Engineered and managed Jenkins CI/CD pipeline allowing faster iterative development by reducing deployment time by\n",
309
+ "75% , leveraging Github hooks and Docker Containerization\n",
310
+ "● Migrated over 1.2TB on-premises Microsoft SQL Server database with over 2 million records to AWS RDS, utilizing\n",
311
+ "AWS DMS ensuring efficient indexing and retrieval\n",
312
+ "● Developed 10+ RESTful APIs in Node.js to manage data for over 500 NFT collections and 10,000 listings from MongoDB\n",
313
+ "● Automated extraction and compression of 50,000+ images from Ethereum Blockchain and stored on AWS S3 using\n",
314
+ "Airflow workflows in Python, leading to almost 30% storage cost savings\n",
315
+ "Omnipresent Robot Technologies, Delhi, India: Software Engineer Jun 2018 - Jul 2021\n",
316
+ "● Engineered a distributed, scalable AI surveillance application with edge-device computation using Python, OpenCV,\n",
317
+ "and scikit-learn, ensuring security for 10,000+ daily park visitors\n",
318
+ "● Architected a distributed system for real-time video streaming using Apache Kafka and Python to process 50+ parallel\n",
319
+ "video streams, reducing latency by 60% by rigorous debugging and performance optimization\n",
320
+ "● Led the development of an analytics dashboard using Django, React and Postgres to show breach records, alerts, and\n",
321
+ "intuitive data visualizations using Google Charts, allowing data-driven decision making\n",
322
+ "● Developed a drone compliance platform using Django to automate flight authorization and authentication process,\n",
323
+ "leading to enhanced productivity of the drone engineering team\n",
324
+ "● Led collaboration of a team of engineers and drone operators to conduct real-world testing of the compliance system\n",
325
+ "● Mentored interns to understand software development best practices, coding standards, and version control systems\n",
326
+ "Resume chunk: ML Software Developer at ASU Jul 2022 - May 2023\n",
327
+ "● Trained deep learning models using PyTorch and Scikit to detect low-resolution objects in 15,000+ satellite images\n",
328
+ "● Executed adversarial attacks and utilized MLFlow for fine-tuning multi-class classification machine learning model,\n",
329
+ "enhancing model robustness and improving accuracy by 20%\n",
330
+ "Mayhem Heroes Cybersecurity Open Source Hackathon Apr 2022\n",
331
+ "Integrated Mayhem into CI/CD pipeline for Open Source repos using GitHub Actions, reducing security risks by over 80%\n",
332
+ "Resume chunk: Master of Science in Information Technology\n",
333
+ "Arizona State University, Tempe, Arizona\n"
334
+ ]
335
+ }
336
+ ],
337
+ "source": [
338
+ "resume_text = \"\"\n",
339
+ "for chunk in chunks:\n",
340
+ " print(f\"Resume chunk: {chunk.page_content}\")\n",
341
+ " resume_text+= (chunk.page_content)"
342
+ ]
343
+ },
344
+ {
345
+ "cell_type": "code",
346
+ "execution_count": 41,
347
+ "id": "b045de91",
348
+ "metadata": {},
349
+ "outputs": [],
350
+ "source": [
351
+ "from pydantic import BaseModel, Field\n",
352
+ "\n",
353
+ "class TavilyQuerySet(BaseModel):\n",
354
+ " query1: tuple[str, str] = Field(\n",
355
+ " ...,\n",
356
+ " description=\"DSL for Recent Developments + 1‑sentence rationale\",\n",
357
+ " )\n",
358
+ " query2: tuple[str, str] = Field(\n",
359
+ " ...,\n",
360
+ " description=\"DSL for Recent News + rationale\",\n",
361
+ " )\n",
362
+ " query3: tuple[str, str]\n",
363
+ " query4: tuple[str, str]\n",
364
+ " query5: tuple[str, str]"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": 42,
370
+ "id": "eda95e9a",
371
+ "metadata": {},
372
+ "outputs": [],
373
+ "source": [
374
+ "from langchain.output_parsers import PydanticOutputParser\n",
375
+ "parser = PydanticOutputParser(pydantic_object=TavilyQuerySet)\n",
376
+ "\n",
377
+ "messages = SystemMessage(content=f\"\"\"\n",
378
+ " You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
379
+ " {parser.get_format_instructions()}\n",
380
+ "\n",
381
+ " \n",
382
+ " Rules:\n",
383
+ " 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
384
+ " 2. Map the job description into five categories:\n",
385
+ " • query1: recent developments\n",
386
+ " • query2: recent news\n",
387
+ " • query3:company profile\n",
388
+ " • query4: key customers & partners\n",
389
+ " • query5: culture & values\n",
390
+ " 3. Each value is a two‑element list:\n",
391
+ " [<query string>, <one‑sentence rationale>]\n",
392
+ " 4. Use filters (source:, date:[now-30d TO now], site:…, etc.) where helpful.\n",
393
+ " 5. If information is missing in the JD, fall back sensibly\n",
394
+ " (e.g. search for “employee testimonials”).\n",
395
+ " 6. Return **only** valid JSON.\n",
396
+ " \"\"\")"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": 53,
402
+ "id": "9738103e",
403
+ "metadata": {},
404
+ "outputs": [
405
+ {
406
+ "data": {
407
+ "text/plain": [
408
+ "'The output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"properties\": {\"query1\": {\"description\": \"DSL for Recent Developments + 1‑sentence rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query1\", \"type\": \"array\"}, \"query2\": {\"description\": \"DSL for Recent News + rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query2\", \"type\": \"array\"}, \"query3\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query3\", \"type\": \"array\"}, \"query4\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query4\", \"type\": \"array\"}, \"query5\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query5\", \"type\": \"array\"}}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]}\\n```'"
409
+ ]
410
+ },
411
+ "execution_count": 53,
412
+ "metadata": {},
413
+ "output_type": "execute_result"
414
+ }
415
+ ],
416
+ "source": [
417
+ "parser.get_format_instructions()"
418
+ ]
419
+ },
420
+ {
421
+ "cell_type": "code",
422
+ "execution_count": 52,
423
+ "id": "c3174432",
424
+ "metadata": {},
425
+ "outputs": [
426
+ {
427
+ "data": {
428
+ "text/plain": [
429
+ "{'properties': {'query1': {'description': 'DSL for Recent Developments + 1‑sentence rationale',\n",
430
+ " 'maxItems': 2,\n",
431
+ " 'minItems': 2,\n",
432
+ " 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
433
+ " 'title': 'Query1',\n",
434
+ " 'type': 'array'},\n",
435
+ " 'query2': {'description': 'DSL for Recent News + rationale',\n",
436
+ " 'maxItems': 2,\n",
437
+ " 'minItems': 2,\n",
438
+ " 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
439
+ " 'title': 'Query2',\n",
440
+ " 'type': 'array'},\n",
441
+ " 'query3': {'maxItems': 2,\n",
442
+ " 'minItems': 2,\n",
443
+ " 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
444
+ " 'title': 'Query3',\n",
445
+ " 'type': 'array'},\n",
446
+ " 'query4': {'maxItems': 2,\n",
447
+ " 'minItems': 2,\n",
448
+ " 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
449
+ " 'title': 'Query4',\n",
450
+ " 'type': 'array'},\n",
451
+ " 'query5': {'maxItems': 2,\n",
452
+ " 'minItems': 2,\n",
453
+ " 'prefixItems': [{'type': 'string'}, {'type': 'string'}],\n",
454
+ " 'title': 'Query5',\n",
455
+ " 'type': 'array'}},\n",
456
+ " 'required': ['query1', 'query2', 'query3', 'query4', 'query5'],\n",
457
+ " 'title': 'TavilyQuerySet',\n",
458
+ " 'type': 'object'}"
459
+ ]
460
+ },
461
+ "execution_count": 52,
462
+ "metadata": {},
463
+ "output_type": "execute_result"
464
+ }
465
+ ],
466
+ "source": [
467
+ "TavilyQuerySet.model_json_schema()"
468
+ ]
469
+ },
470
+ {
471
+ "cell_type": "code",
472
+ "execution_count": 44,
473
+ "id": "5884df35",
474
+ "metadata": {},
475
+ "outputs": [
476
+ {
477
+ "name": "stdout",
478
+ "output_type": "stream",
479
+ "text": [
480
+ "================================\u001b[1m System Message \u001b[0m================================\n",
481
+ "\n",
482
+ "\n",
483
+ " You are a Tavily Search Query specialist. Follow the JSON schema below exactly:\n",
484
+ " The output should be formatted as a JSON instance that conforms to the JSON schema below.\n",
485
+ "\n",
486
+ "As an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\n",
487
+ "the object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n",
488
+ "\n",
489
+ "Here is the output schema:\n",
490
+ "```\n",
491
+ "{\"properties\": {\"query1\": {\"description\": \"DSL for Recent Developments + 1‑sentence rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query1\", \"type\": \"array\"}, \"query2\": {\"description\": \"DSL for Recent News + rationale\", \"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query2\", \"type\": \"array\"}, \"query3\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query3\", \"type\": \"array\"}, \"query4\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query4\", \"type\": \"array\"}, \"query5\": {\"maxItems\": 2, \"minItems\": 2, \"prefixItems\": [{\"type\": \"string\"}, {\"type\": \"string\"}], \"title\": \"Query5\", \"type\": \"array\"}}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]}\n",
492
+ "```\n",
493
+ "\n",
494
+ "\n",
495
+ " Rules:\n",
496
+ " 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
497
+ " 2. Map the job description into five categories:\n",
498
+ " • query1: recent developments\n",
499
+ " • query2: recent news\n",
500
+ " • query3:company profile\n",
501
+ " • query4: key customers & partners\n",
502
+ " • query5: culture & values\n",
503
+ " 3. Each value is a two‑element list:\n",
504
+ " [<query string>, <one‑sentence rationale>]\n",
505
+ " 4. Use filters (source:, date:[now-30d TO now], site:…, etc.) where helpful.\n",
506
+ " 5. If information is missing in the JD, fall back sensibly\n",
507
+ " (e.g. search for “employee testimonials”).\n",
508
+ " 6. Return **only** valid JSON.\n",
509
+ " \n"
510
+ ]
511
+ }
512
+ ],
513
+ "source": [
514
+ "messages.pretty_print()"
515
+ ]
516
+ },
517
+ {
518
+ "cell_type": "code",
519
+ "execution_count": 46,
520
+ "id": "d2c3cc8b",
521
+ "metadata": {},
522
+ "outputs": [],
523
+ "source": [
524
+ "x = \"\"\"properties\": {\"query1\": [{\"query\": \"Shalin Mehta AND \\\"Computational Microscopy Platform\\\"\", \"rationale\": \"Recent developments within the company\"}, {\"query\": \"Shalin Mehta AND \\\"Biohub SF\\\"\", \"rationale\": \"Recent developments within the company\"}], \"query2\": [{\"query\": \"Chan Zuckerberg Biohub - San Francisco AND recent news\", \"rationale\": \"Recent news about the company\"}, {\"query\": \"COVID-19 AND Chan Zuckerberg Biohub - San Francisco\", \"rationale\": \"Recent news about the company\"}], \"query3\": [{\"query\": \"Shalin Mehta AND \\\"role: Software Engineer\\\"\", \"rationale\": \"Information about the company that relates to the role\"}, {\"query\": \"Chan Zuckerberg Biohub - San Francisco AND \\\"team: Bioengineering\\\"\", \"rationale\": \"Information about the company that relates to the role\"}], \"query4\": [{\"query\": \"key customers: Chan Zuckerberg Biohub\", \"rationale\": \"Key customers & partners\"}, {\"query\": \"partners: Chan Zuckerberg Biohub SF\", \"rationale\": \"Key customers & partners\"}], \"query5\": [{\"query\": \"company culture: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}, {\"query\": \"values: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}]}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]\"\"\""
525
+ ]
526
+ },
527
+ {
528
+ "cell_type": "code",
529
+ "execution_count": 49,
530
+ "id": "7d8508a4",
531
+ "metadata": {},
532
+ "outputs": [
533
+ {
534
+ "name": "stdout",
535
+ "output_type": "stream",
536
+ "text": [
537
+ "properties\": {\"query1\": [{\"query\": \"Shalin Mehta AND \"Computational Microscopy Platform\"\", \"rationale\": \"Recent developments within the company\"}, {\"query\": \"Shalin Mehta AND \"Biohub SF\"\", \"rationale\": \"Recent developments within the company\"}], \"query2\": [{\"query\": \"Chan Zuckerberg Biohub - San Francisco AND recent news\", \"rationale\": \"Recent news about the company\"}, {\"query\": \"COVID-19 AND Chan Zuckerberg Biohub - San Francisco\", \"rationale\": \"Recent news about the company\"}], \"query3\": [{\"query\": \"Shalin Mehta AND \"role: Software Engineer\"\", \"rationale\": \"Information about the company that relates to the role\"}, {\"query\": \"Chan Zuckerberg Biohub - San Francisco AND \"team: Bioengineering\"\", \"rationale\": \"Information about the company that relates to the role\"}], \"query4\": [{\"query\": \"key customers: Chan Zuckerberg Biohub\", \"rationale\": \"Key customers & partners\"}, {\"query\": \"partners: Chan Zuckerberg Biohub SF\", \"rationale\": \"Key customers & partners\"}], \"query5\": [{\"query\": \"company culture: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}, {\"query\": \"values: Chan Zuckerberg Biohub\", \"rationale\": \"Culture & values of the company\"}]}, \"required\": [\"query1\", \"query2\", \"query3\", \"query4\", \"query5\"]\n"
538
+ ]
539
+ }
540
+ ],
541
+ "source": [
542
+ "print(x)"
543
+ ]
544
+ },
545
+ {
546
+ "cell_type": "code",
547
+ "execution_count": 54,
548
+ "id": "1fab5ee9",
549
+ "metadata": {},
550
+ "outputs": [],
551
+ "source": [
552
+ "from langchain_core.prompts import (\n",
553
+ " PromptTemplate,\n",
554
+ ")"
555
+ ]
556
+ },
557
+ {
558
+ "cell_type": "code",
559
+ "execution_count": null,
560
+ "id": "e93695ff",
561
+ "metadata": {},
562
+ "outputs": [],
563
+ "source": [
564
+ "prompt = PromptTemplate.from_template(\"Below is the required job description and resume: {background_information}\", input_variables=[\"background_information\"])"
565
+ ]
566
+ },
567
+ {
568
+ "cell_type": "code",
569
+ "execution_count": 55,
570
+ "id": "f5330010",
571
+ "metadata": {},
572
+ "outputs": [],
573
+ "source": [
574
+ "x = ('query1', ('recent developments within the company', 'The Associate Software engineer will build open source software tools for managing and processing 10-100 terabyte-scale datasets.'))"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": 61,
580
+ "id": "5753afd4",
581
+ "metadata": {},
582
+ "outputs": [],
583
+ "source": [
584
+ "keys = ('q', ('y', 'z'))\n",
585
+ "\n",
586
+ "dict_x = dict(zip(keys, x))"
587
+ ]
588
+ },
589
+ {
590
+ "cell_type": "code",
591
+ "execution_count": 63,
592
+ "id": "06d50119",
593
+ "metadata": {},
594
+ "outputs": [
595
+ {
596
+ "data": {
597
+ "text/plain": [
598
+ "('recent developments within the company',\n",
599
+ " 'The Associate Software engineer will build open source software tools for managing and processing 10-100 terabyte-scale datasets.')"
600
+ ]
601
+ },
602
+ "execution_count": 63,
603
+ "metadata": {},
604
+ "output_type": "execute_result"
605
+ }
606
+ ],
607
+ "source": [
608
+ "dict_x[('y', 'z')]"
609
+ ]
610
+ },
611
+ {
612
+ "cell_type": "code",
613
+ "execution_count": null,
614
+ "id": "f03d758e",
615
+ "metadata": {},
616
+ "outputs": [],
617
+ "source": [
618
+ "from langchain.output_parsers import PydanticOutputParser, OutputFixingParser, RetryOutputParser\n",
619
+ "base_parser = PydanticOutputParser(pydantic_object=TavilyQuerySet)\n",
620
+ "\n"
621
+ ]
622
+ },
623
+ {
624
+ "cell_type": "code",
625
+ "execution_count": 1,
626
+ "id": "d8dd9c74",
627
+ "metadata": {},
628
+ "outputs": [
629
+ {
630
+ "ename": "NameError",
631
+ "evalue": "name 'parser' is not defined",
632
+ "output_type": "error",
633
+ "traceback": [
634
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
635
+ "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
636
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m format_instructions = \u001b[43mparser\u001b[49m.get_format_instructions()\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[34;01mollama\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m chat\n\u001b[32m 5\u001b[39m tavily_role_messages = SystemMessage(content=\n\u001b[32m 6\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\"\"\u001b[39m\n\u001b[32m 7\u001b[39m \u001b[33m When you reply, output **only** valid JSON that can be parsed\u001b[39m\n\u001b[32m (...)\u001b[39m\u001b[32m 31\u001b[39m \u001b[33m 5. Return **only** valid JSON that matches the schema exactly. No other fields\u001b[39m\n\u001b[32m 32\u001b[39m \u001b[33m \u001b[39m\u001b[33m\"\"\"\u001b[39m)\n",
637
+ "\u001b[31mNameError\u001b[39m: name 'parser' is not defined"
638
+ ]
639
+ }
640
+ ],
641
+ "source": [
642
+ "format_instructions = parser.get_format_instructions()\n",
643
+ "from ollama import chat\n",
644
+ "\n",
645
+ "\n",
646
+ "tavily_role_messages = SystemMessage(content=\n",
647
+ " f\"\"\"\n",
648
+ " When you reply, output **only** valid JSON that can be parsed\n",
649
+ " into the Pydantic model shown below. Do **not** wrap it in \"properties\"\n",
650
+ " or \"required\".:\n",
651
+ " \n",
652
+ " ------------------------------------------------\n",
653
+ "\n",
654
+ "\n",
655
+ " {format_instructions}\n",
656
+ "\n",
657
+ " \n",
658
+ " -------------------------------------------------\n",
659
+ "\n",
660
+ " Rules:\n",
661
+ " 1. Generate Tavily DSL only (no natural language outside the JSON).\n",
662
+ " 2. Map the job description into five categories:\n",
663
+ " • query1: recent developments within the company\n",
664
+ " • query2: recent news about the company\n",
665
+ " • query3: information about the company that relates to the role\n",
666
+ " • query4: key customers & partners\n",
667
+ " • query5: culture & values of the company\n",
668
+ " 3. Each value is a two‑element list:\n",
669
+ " [<query string>, <one‑sentence rationale>]\n",
670
+ " 4. If information is missing in the JD, fall back sensibly\n",
671
+ " (e.g. search for “employee testimonials”).\n",
672
+ " 5. Return **only** valid JSON that matches the schema exactly. No other fields\n",
673
+ " \"\"\")\n",
674
+ "\n",
675
+ "\n",
676
+ "response = chat(\n",
677
+ " messages=[{\n",
678
+ " tavily_role_messages,\n",
679
+ " input_message}\n",
680
+ " ],\n",
681
+ " model='llama3.2:latest',\n",
682
+ " format=TavilyQuerySet.model_json_schema(),\n",
683
+ " )"
684
+ ]
685
+ },
686
+ {
687
+ "cell_type": "code",
688
+ "execution_count": 2,
689
+ "id": "8deb0abd",
690
+ "metadata": {},
691
+ "outputs": [],
692
+ "source": [
693
+ "p = ('query1', ['Recent developments within the company using computational microscopy platform', 'This project will require working on microscopes in a BSL-2 imaging laboratory'])"
694
+ ]
695
+ },
696
+ {
697
+ "cell_type": "code",
698
+ "execution_count": 3,
699
+ "id": "d2fcab19",
700
+ "metadata": {},
701
+ "outputs": [
702
+ {
703
+ "data": {
704
+ "text/plain": [
705
+ "'Recent developments within the company using computational microscopy platform'"
706
+ ]
707
+ },
708
+ "execution_count": 3,
709
+ "metadata": {},
710
+ "output_type": "execute_result"
711
+ }
712
+ ],
713
+ "source": [
714
+ "p[1][0]"
715
+ ]
716
+ },
717
+ {
718
+ "cell_type": "code",
719
+ "execution_count": 6,
720
+ "id": "55e3f46a",
721
+ "metadata": {},
722
+ "outputs": [],
723
+ "source": [
724
+ "COVER_LETTER_PROMPT = SystemMessage(content=\"\"\"You are my dedicated assistant for writing job application content, including cover letters, LinkedIn outreach messages, and responses to job-specific questions (e.g., experience, culture fit, or motivation).\n",
725
+ "\n",
726
+ "Your goal is to generate content that:\n",
727
+ "1. Reflects **my personality**, tone, and authentic voice, based on examples I provide.\n",
728
+ "2. Matches **my knowledge, experience, and interests**, which I’ll also share or update as needed.\n",
729
+ "3. Adopts **my writing style and energy** (e.g., grounded, confident, thoughtful—but not overly polished or generic).\n",
730
+ "4. Embeds **genuine enthusiasm or alignment** with the company or role, without sounding performative.\n",
731
+ "5. Avoids filler, clichés, or overused corporate phrases—keep it **authentic and specific**.\n",
732
+ "6. Learns over time by asking me relevant clarifying questions when needed (e.g., change in tone, new experience, updates to goals).\n",
733
+ "7. Balances job description alignment with personal storytelling, roughly in a 75:25 ratio.\n",
734
+ "8. Keeps outputs **concise** and within any given word or character limits.\"\"\")\n"
735
+ ]
736
+ },
737
+ {
738
+ "cell_type": "code",
739
+ "execution_count": 7,
740
+ "id": "ea061e0e",
741
+ "metadata": {},
742
+ "outputs": [],
743
+ "source": [
744
+ "from langchain_core.prompts import (\n",
745
+ " ChatPromptTemplate,\n",
746
+ " HumanMessagePromptTemplate,\n",
747
+ " SystemMessagePromptTemplate,\n",
748
+ ")\n",
749
+ "from langchain_core.messages import (\n",
750
+ " AIMessage,\n",
751
+ " HumanMessage,\n",
752
+ " SystemMessage,\n",
753
+ ")\n",
754
+ "\n",
755
+ "FirstDraftGenerationPromptTemplate = ChatPromptTemplate.from_messages([COVER_LETTER_PROMPT])"
756
+ ]
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": 8,
761
+ "id": "b96cbe64",
762
+ "metadata": {},
763
+ "outputs": [
764
+ {
765
+ "data": {
766
+ "text/plain": [
767
+ "ChatPromptTemplate(input_variables=[], input_types={}, partial_variables={}, messages=[SystemMessage(content='You are my dedicated assistant for writing job application content, including cover letters, LinkedIn outreach messages, and responses to job-specific questions (e.g., experience, culture fit, or motivation).\\n\\nYour goal is to generate content that:\\n1. Reflects **my personality**, tone, and authentic voice, based on examples I provide.\\n2. Matches **my knowledge, experience, and interests**, which I’ll also share or update as needed.\\n3. Adopts **my writing style and energy** (e.g., grounded, confident, thoughtful—but not overly polished or generic).\\n4. Embeds **genuine enthusiasm or alignment** with the company or role, without sounding performative.\\n5. Avoids filler, clichés, or overused corporate phrases—keep it **authentic and specific**.\\n6. Learns over time by asking me relevant clarifying questions when needed (e.g., change in tone, new experience, updates to goals).\\n7. Balances job description alignment with personal storytelling, roughly in a 75:25 ratio.\\n8. Keeps outputs **concise** and within any given word or character limits.', additional_kwargs={}, response_metadata={})])"
768
+ ]
769
+ },
770
+ "execution_count": 8,
771
+ "metadata": {},
772
+ "output_type": "execute_result"
773
+ }
774
+ ],
775
+ "source": [
776
+ "FirstDraftGenerationPromptTemplate"
777
+ ]
778
+ },
779
+ {
780
+ "cell_type": "code",
781
+ "execution_count": null,
782
+ "id": "dfd03f8d",
783
+ "metadata": {},
784
+ "outputs": [],
785
+ "source": [
786
+ "current_application_session = \"Heello World\"\n",
787
+ "company_research_data = \"Company Research Data\""
788
+ ]
789
+ },
790
+ {
791
+ "cell_type": "code",
792
+ "execution_count": 10,
793
+ "id": "c5fef665",
794
+ "metadata": {},
795
+ "outputs": [],
796
+ "source": [
797
+ "CurrentSessionContextMessage = HumanMessagePromptTemplate.from_template(\n",
798
+ " \"\"\"\n",
799
+ " # Resume and Job Description\n",
800
+ " {current_job_role}\n",
801
+ "\n",
802
+ " # Company Information\n",
803
+ " {company_research_data}\n",
804
+ "\n",
805
+ " Create a cover letter that highlights the match between my qualifications and the job requirements.\n",
806
+ " \"\"\",\n",
807
+ " input_variables=[\"current_job_role\",\n",
808
+ " \"company_research_data\"])"
809
+ ]
810
+ },
811
+ {
812
+ "cell_type": "code",
813
+ "execution_count": 17,
814
+ "id": "c89ba644",
815
+ "metadata": {},
816
+ "outputs": [],
817
+ "source": [
818
+ "FirstDraftGenerationPromptTemplate.append(CurrentSessionContextMessage)"
819
+ ]
820
+ },
821
+ {
822
+ "cell_type": "code",
823
+ "execution_count": 18,
824
+ "id": "6997c553",
825
+ "metadata": {},
826
+ "outputs": [],
827
+ "source": [
828
+ "chain = (\n",
829
+ " ({\"current_job_role\": lambda x: x[\"current_job_role\"],\n",
830
+ " \"company_research_data\": lambda x: x[\"company_research_data\"]})\n",
831
+ " | FirstDraftGenerationPromptTemplate\n",
832
+ " )"
833
+ ]
834
+ },
835
+ {
836
+ "cell_type": "code",
837
+ "execution_count": 19,
838
+ "id": "55f51dbf",
839
+ "metadata": {},
840
+ "outputs": [
841
+ {
842
+ "data": {
843
+ "text/plain": [
844
+ "{\n",
845
+ " current_job_role: RunnableLambda(...),\n",
846
+ " company_research_data: RunnableLambda(...)\n",
847
+ "}\n",
848
+ "| ChatPromptTemplate(input_variables=[], input_types={}, partial_variables={}, messages=[SystemMessage(content='You are my dedicated assistant for writing job application content, including cover letters, LinkedIn outreach messages, and responses to job-specific questions (e.g., experience, culture fit, or motivation).\\n\\nYour goal is to generate content that:\\n1. Reflects **my personality**, tone, and authentic voice, based on examples I provide.\\n2. Matches **my knowledge, experience, and interests**, which I’ll also share or update as needed.\\n3. Adopts **my writing style and energy** (e.g., grounded, confident, thoughtful—but not overly polished or generic).\\n4. Embeds **genuine enthusiasm or alignment** with the company or role, without sounding performative.\\n5. Avoids filler, clichés, or overused corporate phrases—keep it **authentic and specific**.\\n6. Learns over time by asking me relevant clarifying questions when needed (e.g., change in tone, new experience, updates to goals).\\n7. Balances job description alignment with personal storytelling, roughly in a 75:25 ratio.\\n8. Keeps outputs **concise** and within any given word or character limits.', additional_kwargs={}, response_metadata={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['company_research_data', 'current_job_role'], input_types={}, partial_variables={}, template='\\n # Resume and Job Description\\n {current_job_role}\\n\\n # Company Information\\n {company_research_data}\\n\\n Create a cover letter that highlights the match between my qualifications and the job requirements.\\n '), additional_kwargs={})])"
849
+ ]
850
+ },
851
+ "execution_count": 19,
852
+ "metadata": {},
853
+ "output_type": "execute_result"
854
+ }
855
+ ],
856
+ "source": [
857
+ "chain"
858
+ ]
859
+ },
860
+ {
861
+ "cell_type": "code",
862
+ "execution_count": null,
863
+ "id": "48c54667",
864
+ "metadata": {},
865
+ "outputs": [
866
+ {
867
+ "ename": "ModuleNotFoundError",
868
+ "evalue": "No module named 'utils'",
869
+ "output_type": "error",
870
+ "traceback": [
871
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
872
+ "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)",
873
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[34;01mutils\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mllm_client\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LLMClient\n\u001b[32m 3\u001b[39m LLM = LLMClient()\n\u001b[32m 4\u001b[39m llm = LLMClient().get_llm()\n",
874
+ "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'utils'"
875
+ ]
876
+ }
877
+ ],
878
+ "source": [
879
+ "from job_writer.utils.llm_client import LLMClient\n",
880
+ "\n",
881
+ "LLM = LLMClient()\n",
882
+ "llm = LLMClient().get_llm()"
883
+ ]
884
+ },
885
+ {
886
+ "cell_type": "code",
887
+ "execution_count": null,
888
+ "id": "421df9ca",
889
+ "metadata": {},
890
+ "outputs": [],
891
+ "source": [
892
+ "from job_writer.tools.TavilySearch import search_company\n",
893
+ "\n",
894
+ "# Test job description\n",
895
+ "test_job = \"\"\"\n",
896
+ "Software Engineer - Backend\n",
897
+ "OpenAI\n",
898
+ "\n",
899
+ "We are looking for experienced backend engineers to join our team. Our ideal candidate will have experience with one or more of the following technologies: Python, Java, C++. \n",
900
+ "\n",
901
+ "Responsibilities:\n",
902
+ "- Design and implement scalable and efficient backend systems\n",
903
+ "- Write clean, maintainable code\n",
904
+ "- Work with cross-functional teams\n",
905
+ "\n",
906
+ "Requirements:\n",
907
+ "- Strong proficiency in one or more programming languages\n",
908
+ "- Strong understanding of software design patterns and principles\n",
909
+ "- Experience with distributed systems\n",
910
+ "\"\"\"\n",
911
+ "\n",
912
+ "# Test the search_company function\n",
913
+ "results = search_company(test_job)\n",
914
+ "for query_key, data in results.items():\n",
915
+ " print(f\"\\n{query_key}:\")\n",
916
+ " print(f\"Query: {data['query']}\")\n",
917
+ " print(f\"Rationale: {data['rationale']}\")\n",
918
+ " if data['results']:\n",
919
+ " print(f\"First result: {data['results'][0][:200]}...\")\n",
920
+ " else:\n",
921
+ " print(\"No results found\")\n"
922
+ ]
923
+ },
924
+ {
925
+ "cell_type": "code",
926
+ "execution_count": 1,
927
+ "id": "18f12ff8",
928
+ "metadata": {},
929
+ "outputs": [],
930
+ "source": [
931
+ "from langchain_core.prompts import (\n",
932
+ " ChatPromptTemplate,\n",
933
+ " HumanMessagePromptTemplate,\n",
934
+ " SystemMessagePromptTemplate,\n",
935
+ ")\n",
936
+ "from langchain_core.messages import (\n",
937
+ " AIMessage,\n",
938
+ " HumanMessage,\n",
939
+ " SystemMessage,\n",
940
+ ")"
941
+ ]
942
+ },
943
+ {
944
+ "cell_type": "code",
945
+ "execution_count": 2,
946
+ "id": "3ba77224",
947
+ "metadata": {},
948
+ "outputs": [],
949
+ "source": [
950
+ "from job_writer.prompts.templates import (\n",
951
+ " TAVILY_QUERY_PROMPT\n",
952
+ ")"
953
+ ]
954
+ },
955
+ {
956
+ "cell_type": "code",
957
+ "execution_count": 3,
958
+ "id": "50bb7c0c",
959
+ "metadata": {},
960
+ "outputs": [],
961
+ "source": [
962
+ "tavily_search_prompt = ChatPromptTemplate.from_messages([\n",
963
+ " SystemMessage(content=TAVILY_QUERY_PROMPT),\n",
964
+ " HumanMessage(\n",
965
+ " \"Below is the required job description and resume: {background_information}\",\n",
966
+ " input_variables=[\"background_information\"]\n",
967
+ " )\n",
968
+ "])"
969
+ ]
970
+ },
971
+ {
972
+ "cell_type": "code",
973
+ "execution_count": 5,
974
+ "id": "372e6346",
975
+ "metadata": {},
976
+ "outputs": [],
977
+ "source": [
978
+ "job_description = \"\"\"\n",
979
+ "Software Engineer - Backend\n",
980
+ "OpenAI\n",
981
+ "\n",
982
+ "We are looking for experienced backend engineers to join our team. Our ideal candidate will have experience with one or more of the following technologies: Python, Java, C++. \n",
983
+ "\n",
984
+ "Responsibilities:\n",
985
+ "- Design and implement scalable and efficient backend systems\n",
986
+ "- Write clean, maintainable code\n",
987
+ "- Work with cross-functional teams\n",
988
+ "\n",
989
+ "Requirements:\n",
990
+ "- Strong proficiency in one or more programming languages\n",
991
+ "- Strong understanding of software design patterns and principles\n",
992
+ "- Experience with distributed systems\n",
993
+ "\"\"\""
994
+ ]
995
+ },
996
+ {
997
+ "cell_type": "code",
998
+ "execution_count": 6,
999
+ "id": "3a27365f",
1000
+ "metadata": {},
1001
+ "outputs": [
1002
+ {
1003
+ "data": {
1004
+ "text/plain": [
1005
+ "'System: \\n<Background>\\nSINCE THE USER IS APPPLYING FOR A JOB, THE QUERIES SHOULD BE WRITTEN IN A WAY THAT RESULST IN RELEVANT INFORMATION ABOUT THE COMPANY. THIS WILL HELP THE USER WRITE A MORE PERSONALIZED AND RELEVANT APPLICATION.\\n\\nCategory mapping (remember this!):\\n query1 : recent developments\\n query2 : recent news\\n query3 : role-related info\\n query4 : key customers & partners \\n query5 : culture & values\\n\\nNote: The above are just categories. The queries should be written in a way that results in relevant information about the company. Must include the company name in the query to ensure results have a higher confidence.\\n</Background>\\n\\n<Instructions>\\n 1. Each array must contain **exactly two** strings: [search_query, one_sentence_rationale] \\n 2. If data is missing, craft a sensible fallback query; never return an empty array. \\n 3. If the employer name cannot be found, use `\"UNKNOWN\"`. \\n 4. Escape JSON only where required.\\n 5. Query cannot be repeated. It will lead to irrelevant results.\\n</Instructions>\\n\\n<EXAMPLE>\\n JSON->\\n \"query1\": (\"....\", \"...\")\\n \"query2\": (\"....\", \"...\")\\n \"query3\": (\"....\", \"...\")\\n \"query4\": (\"....\", \"...\")\\n \"query5\": (\"....\", \"...\")\\n</EXAMPLE>\\n \\nHuman: Below is the required job description and resume: {background_information}'"
1006
+ ]
1007
+ },
1008
+ "execution_count": 6,
1009
+ "metadata": {},
1010
+ "output_type": "execute_result"
1011
+ }
1012
+ ],
1013
+ "source": [
1014
+ "tavily_search_prompt.format(background_information=job_description)"
1015
+ ]
1016
+ },
1017
+ {
1018
+ "cell_type": "code",
1019
+ "execution_count": 8,
1020
+ "id": "6b973991",
1021
+ "metadata": {},
1022
+ "outputs": [
1023
+ {
1024
+ "name": "stdout",
1025
+ "output_type": "stream",
1026
+ "text": [
1027
+ "Initializing LLM with model llama3.2:latest and provider ollama in c:\\users\\risha\\python-dir\\knowledgebase\\job_writer\\utils\\llm_client.py\n",
1028
+ "Initializing LLM with model llama3.2:latest and provider ollama in c:\\users\\risha\\python-dir\\knowledgebase\\job_writer\\utils\\llm_client.py\n"
1029
+ ]
1030
+ }
1031
+ ],
1032
+ "source": [
1033
+ "from job_writer.utils.llm_client import LLMClient\n",
1034
+ "\n",
1035
+ "LLM = LLMClient()\n",
1036
+ "llm = LLMClient().get_llm()"
1037
+ ]
1038
+ },
1039
+ {
1040
+ "cell_type": "code",
1041
+ "execution_count": null,
1042
+ "id": "5ff5ac65",
1043
+ "metadata": {},
1044
+ "outputs": [],
1045
+ "source": []
1046
+ }
1047
+ ],
1048
+ "metadata": {
1049
+ "kernelspec": {
1050
+ "display_name": "Python 3",
1051
+ "language": "python",
1052
+ "name": "python3"
1053
+ },
1054
+ "language_info": {
1055
+ "codemirror_mode": {
1056
+ "name": "ipython",
1057
+ "version": 3
1058
+ },
1059
+ "file_extension": ".py",
1060
+ "mimetype": "text/x-python",
1061
+ "name": "python",
1062
+ "nbconvert_exporter": "python",
1063
+ "pygments_lexer": "ipython3",
1064
+ "version": "3.12.10"
1065
+ }
1066
+ },
1067
+ "nbformat": 4,
1068
+ "nbformat_minor": 5
1069
+ }
tools/TavilySearch.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import json
4
+ import asyncio
5
+
6
+
7
+ from langchain_core.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
8
+ from langchain_core.prompt_values import PromptValue
9
+ from langchain_community.tools.tavily_search import TavilySearchResults
10
+ from langchain_community.tools import tool
11
+ from langchain.output_parsers import PydanticOutputParser, RetryOutputParser
12
+ from openevals.llm import create_async_llm_as_judge
13
+ from openevals.prompts import (
14
+ RAG_RETRIEVAL_RELEVANCE_PROMPT,
15
+ RAG_HELPFULNESS_PROMPT
16
+ )
17
+
18
+ from ..utils.llm_client import LLMClient
19
+ from ..agents.output_schema import TavilyQuerySet
20
+ from ..prompts.templates import TAVILY_QUERY_PROMPT
21
+ from ..classes.classes import ResearchState
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ LLM = LLMClient()
26
+ llm_client = LLM.get_instance(model_name="ejschwar/llama3.2-better-prompts:latest", model_provider="ollama_llm")
27
+ llm_structured = llm_client.get_llm()
28
+
29
+ relevance_evaluator = create_async_llm_as_judge(
30
+ judge=llm_structured,
31
+ prompt=RAG_RETRIEVAL_RELEVANCE_PROMPT,
32
+ feedback_key="retrieval_relevance",
33
+ )
34
+
35
+ helpfulness_evaluator = create_async_llm_as_judge(
36
+ judge=llm_structured,
37
+ prompt=RAG_HELPFULNESS_PROMPT
38
+ + '\nReturn "true" if the answer is helpful, and "false" otherwise.',
39
+ feedback_key="helpfulness",
40
+ )
41
+
42
+ @tool
43
+ def search_company(job_description: str, company_name: str) -> dict:
44
+ """Gather information about a company to understand more about the role,
45
+ recent developments, culture, and values of the company."""
46
+
47
+ try:
48
+ # Get format instructions from the parser
49
+ base_parser = PydanticOutputParser(pydantic_object=TavilyQuerySet)
50
+ parser = RetryOutputParser.from_llm(llm_structured, base_parser)
51
+ format_instructions = parser.get_format_instructions()
52
+
53
+
54
+ # Create the prompt with both messages
55
+ chat_prompt_tavily: ChatPromptTemplate = ChatPromptTemplate.from_messages([
56
+ SystemMessagePromptTemplate.from_template(
57
+ TAVILY_QUERY_PROMPT,
58
+ input_variables=["company_name"]
59
+ ),
60
+ HumanMessagePromptTemplate.from_template(
61
+ "Below is the required job description to parse:\n\n{job_description}",
62
+ input_variables=["job_description"]
63
+ )
64
+ ])
65
+
66
+
67
+ chat_prompt_value: PromptValue = chat_prompt_tavily.format_prompt(
68
+ company_name=company_name,
69
+ job_description=job_description
70
+ )
71
+
72
+
73
+ # Format messages and get LLM response
74
+ chat_prompt_tavily_messages = chat_prompt_tavily.format_messages(
75
+ company_name=company_name,
76
+ job_description=job_description
77
+ )
78
+
79
+
80
+ # Get response from LLM
81
+ search_results_llm = llm_structured.invoke(chat_prompt_tavily_messages)
82
+ # logger.info("Raw LLM Response content: %s", search_results_llm.content)
83
+
84
+
85
+ try:
86
+ parsed_query_set: TavilyQuerySet = parser.parse_with_prompt(search_results_llm.content, chat_prompt_value)
87
+ logger.info("Parsed TavilyQuerySet: %s", parsed_query_set.model_dump_json(indent=2))
88
+ except json.JSONDecodeError as e:
89
+ logger.error("JSON decoding error while parsing LLM response: %s. LLM content was: %s", e, search_results_llm.content, exc_info=True)
90
+ raise
91
+ except Exception as e: # Catches PydanticValidationErrors and other parsing issues
92
+ logger.error("Error parsing TavilyQuerySet from LLM completion: %s. LLM content was: %s", e, search_results_llm.content, exc_info=True)
93
+ raise
94
+
95
+
96
+ # Initialize search with advanced parameters
97
+ search = TavilySearchResults(max_results=4, search_depth="advanced")
98
+
99
+
100
+ # Prepare the structure for storing queries, rationales, and Tavily results
101
+ company_research_data = {}
102
+ attempted_queries = []
103
+ query_attributes = [f"query{i}" for i in range(1, 6)]
104
+
105
+
106
+ for attr_name in query_attributes:
107
+ query_list = getattr(parsed_query_set, attr_name, None)
108
+ if query_list and isinstance(query_list, list) and len(query_list) > 0:
109
+ actual_query = query_list[0]
110
+ rationale = query_list[1] if len(query_list) > 1 else "N/A" # Handle if rationale is missing
111
+ company_research_data[attr_name] = {
112
+ 'query': actual_query,
113
+ 'rationale': rationale,
114
+ 'results': []
115
+ }
116
+
117
+
118
+ # logger.info("Prepared company research structure: %s", json.dumps(company_research_data, indent=2))
119
+ # Execute each query and store results
120
+ for query_key, query_info in company_research_data.items():
121
+ try:
122
+ if not isinstance(query_info['query'], str) or not query_info['query'].strip():
123
+ logger.warning("Skipping Tavily search for %s due to invalid/empty query: '%s'", query_key, query_info['query'])
124
+ query_info['results'] = []
125
+ continue
126
+
127
+
128
+ logger.info("Executing Tavily search for %s: '%s'", query_key, query_info['query'])
129
+ # tool.invoke({"args": {'query': 'who won the last french open'}, "type": "tool_call", "id": "foo", "name": "tavily"})
130
+ tavily_api_results = search.invoke({"args": {'query': query_info['query']}, "type": "tool_call", "id": "job_search", "name": "tavily"})
131
+ attempted_queries.append(query_info['query'])
132
+ del query_info['query']
133
+
134
+ if tavily_api_results and isinstance(tavily_api_results, list) and len(tavily_api_results) > 0:
135
+ query_info['results'] = [result['content'] for result in tavily_api_results if 'content' in result]
136
+ else:
137
+ logger.info("No results or unexpected format from Tavily for %s.", query_key)
138
+ query_info['results'] = []
139
+ except Exception as e:
140
+ logger.error("Error executing Tavily search for query %s ('%s'): %s", query_key, query_info['query'], str(e), exc_info=True)
141
+ query_info['results'] = []
142
+
143
+ # print("Results: ", results)
144
+ return company_research_data, attempted_queries
145
+
146
+ except json.JSONDecodeError as e:
147
+ logger.error("JSON decoding error: %s", e)
148
+ raise
149
+ except AttributeError as e:
150
+ logger.error("Attribute error: %s", e)
151
+ raise
152
+ except Exception as e:
153
+ logger.error("Unexpected error: %s", e)
154
+ raise
155
+
156
+ async def relevance_filter(state: ResearchState) -> ResearchState:
157
+ try:
158
+ # Mark the current node
159
+ state["current_node"] = "relevance_filter"
160
+
161
+ # Check if company_research_data exists
162
+ if not state.get("company_research_data"):
163
+ print("ERROR: company_research_data not found in state")
164
+ return state
165
+
166
+ # Check if tavily_search results exist
167
+ if not state["company_research_data"].get("tavily_search"):
168
+ print("ERROR: tavily_search not found in company_research_data")
169
+ state["company_research_data"]["tavily_search"] = []
170
+ return state
171
+
172
+ # Initialize compiled_results if not present
173
+ if "compiled_results" not in state:
174
+ state["compiled_results"] = []
175
+
176
+ print("Filtering results...")
177
+ # Get the company research data which contains results for different queries
178
+ # Example: {'query1': {'rationale': ..., 'results': [...]}, 'query2': ...}
179
+
180
+ all_query_data = state["company_research_data"].get("tavily_search", {})
181
+ # print("All query data:", all_query_data)
182
+ filtered_results_for_current_run = [] # Stores results deemed relevant in this specific call
183
+
184
+ # Create a semaphore to limit concurrent tasks to 2
185
+ semaphore = asyncio.Semaphore(2)
186
+
187
+ async def evaluate_with_semaphore(query_result_item: dict):
188
+ # query_result_item is a dict like {'rationale': '...', 'results': [...]}
189
+ async with semaphore:
190
+ # Safely get the query to use for relevance evaluation
191
+ attempted_queries_list = state.get("attempted_search_queries", [])
192
+ input_query = attempted_queries_list[-1] if attempted_queries_list else "No query context available"
193
+
194
+ eval_result = await relevance_evaluator(
195
+ inputs=input_query, context=query_result_item # context is the whole result block for the query
196
+ )
197
+ return query_result_item, eval_result
198
+
199
+ # Create tasks for all results
200
+ tasks = [evaluate_with_semaphore(query_info) for query_info in all_query_data.values() if isinstance(query_info, dict) and "results" in query_info]
201
+
202
+ # Process tasks as they complete
203
+ for completed_task in asyncio.as_completed(tasks):
204
+ query_result_item, eval_result = await completed_task
205
+ if eval_result.get("score"): # Safely check for score
206
+ # Assuming query_result_item["results"] is a list of content strings
207
+ if isinstance(query_result_item.get("results"), list):
208
+ # print(f"Evaluated result: {query_result_item}")
209
+ filtered_results_for_current_run.extend(query_result_item["results"])
210
+ else:
211
+ # Handle cases where "results" might not be a list or is missing
212
+ logger.warning("Expected a list for 'results' in query_result_item, got: %s", type(query_result_item.get('results')))
213
+
214
+ logger.info("Filtered results for current run: %s",filtered_results_for_current_run)
215
+
216
+ # The error occurs at a line like the following (line 178 in your traceback):
217
+ # This print statement will now safely access "compiled_results"
218
+ # print("Compiled results (before append): ", state["compiled_results"]) # Append the newly filtered results to the main compiled_results list
219
+ state["compiled_results"].extend(filtered_results_for_current_run)
220
+ state["company_research_data"]["tavily_search"] = filtered_results_for_current_run
221
+ # logger.info(f"Compiled results (after append): {state['compiled_results']}")
222
+ return state
223
+
224
+ except Exception as e:
225
+ print(f"ERROR in relevance_filter: {e}")
226
+ import traceback
227
+ traceback.print_exc()
228
+ logger.error(f"Error in relevance_filter: {str(e)}")
229
+ # Return original state to avoid breaking the flow
230
+ return state
tools/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Oct 23 16:49:52 2023
4
+ @author: rishabhaggarwal
5
+ """
6
+
7
+ from .TavilySearch import search_company, relevance_filter
8
+
9
+ __all__ = ["search_company", "relevance_filter"]
utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Utility modules for the job_writer package.
3
+ """
utils/config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration utilities for the job writer application.
3
+
4
+ This module provides functions for initializing and configuring
5
+ language models and other resources.
6
+ """
7
+
8
+ import os
9
+ from typing_extensions import Dict, Any, Tuple, Optional
10
+ from langchain.chat_models import init_chat_model
11
+
12
+ def init_models(config: Optional[Dict[str, Any]] = None) -> Tuple[Any, Any]:
13
+ """Initialize language models based on configuration."""
14
+ config = config or {}
15
+
16
+ # Model configuration with defaults
17
+ model_name = config.get("model_name", os.getenv("OLLAMA_MODEL", "llama3.2:latest"))
18
+ temperature = float(config.get("temperature", "0.3"))
19
+ precise_temperature = float(config.get("precise_temperature", "0.2"))
20
+
21
+ # Initialize models
22
+ llm = init_chat_model(f"ollama:{model_name}", temperature=temperature)
23
+ llm_precise = init_chat_model(f"ollama:{model_name}", temperature=precise_temperature)
24
+
25
+ return llm, llm_precise
utils/document_processing.py ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Document processing utilities for parsing resumes and job descriptions.
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import re
8
+ import json
9
+
10
+ from pathlib import Path
11
+ from urllib.parse import urlparse
12
+ from typing_extensions import Dict, List, Any
13
+
14
+
15
+ # Langchain imports
16
+ from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
17
+ from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
18
+ from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
19
+ from langchain_core.messages import SystemMessage
20
+ from langchain_core.documents import Document
21
+ from langchain_core.output_parsers.json import JsonOutputParser
22
+ from langfuse.decorators import observe, langfuse_context
23
+ from pydantic import BaseModel, Field
24
+
25
+ # Local imports - using relative imports
26
+ from .errors import URLExtractionError, LLMProcessingError, JobDescriptionParsingError
27
+ from .llm_client import LLMClient
28
+ from ..prompts.templates import JOB_DESCRIPTION_PROMPT
29
+
30
+ # Set up logging
31
+ logger = logging.getLogger(__name__)
32
+ logging.basicConfig(level=logging.INFO)
33
+
34
+
35
+ # Default paths
36
+ DEFAULT_RESUME_PATH: str = os.getenv("DEFAULT_RESUME_PATH", "")
37
+
38
+
39
+ # Most Occurring Resume Section Headers
40
+ RESUME_SECTIONS: list[str] = [
41
+ "EDUCATION", "EXPERIENCE", "SKILLS", "WORK EXPERIENCE",
42
+ "PROFESSIONAL EXPERIENCE", "PROJECTS", "CERTIFICATIONS",
43
+ "SUMMARY", "OBJECTIVE", "CONTACT", "PUBLICATIONS",
44
+ "AWARDS", "LANGUAGES", "INTERESTS", "REFERENCES"
45
+ ]
46
+
47
+ # Initialize LLM client
48
+ LLM: LLMClient = LLMClient()
49
+
50
+ llm_client: LLMClient = LLM.get_instance(
51
+ model_name="ejschwar/llama3.2-better-prompts:latest",
52
+ model_provider="ollama_json")
53
+ llm_structured = llm_client.get_llm()
54
+
55
+
56
+ class ResumeSection(BaseModel):
57
+ """Model for a structured resume section."""
58
+ title: str = Field(description="The section title (e.g., 'Experience', 'Education')")
59
+ content: str = Field(description="The full content of this section")
60
+
61
+
62
+ class StructuredResume(BaseModel):
63
+ """Model for a structured resume with sections."""
64
+ sections: List[ResumeSection] = Field(description="List of resume sections")
65
+ contact_info: Dict[str, str] = Field(description="Contact information extracted from the resume")
66
+
67
+ class JobDescriptionComponents(BaseModel):
68
+ """Model for job description components."""
69
+ company_name: str = Field(description="The company name")
70
+ job_description: str = Field(description="The job description")
71
+ reasoning: str = Field(description="The reasoning for the extracted information")
72
+
73
+ @observe()
74
+ def clean_resume_text(text: str) -> str:
75
+ """Clean and normalize resume text by removing extra whitespace, fixing common PDF extraction issues.
76
+
77
+ Args:
78
+ text: Raw text extracted from resume
79
+
80
+ Returns:
81
+ Cleaned text
82
+ """
83
+ # Remove excessive whitespace
84
+ text = re.sub(r'\s+', ' ', text)
85
+
86
+ # Fix common PDF extraction issues
87
+ text = re.sub(r'([a-z])- ([a-z])', r'\1\2', text) # Fix hyphenated words
88
+
89
+ # Remove header/footer page numbers
90
+ text = re.sub(r'\n\s*\d+\s*\n', '\n', text)
91
+
92
+ # Replace bullet variations with standard markdown bullets
93
+ text = re.sub(r'[•●○◘◙♦♣♠★]', '* ', text)
94
+
95
+ return text.strip()
96
+
97
+ @observe()
98
+ def extract_contact_info(text: str) -> Dict[str, str]:
99
+ """Extract contact information from resume text.
100
+
101
+ Args:
102
+ text: Resume text to extract from
103
+
104
+ Returns:
105
+ Dictionary with contact information
106
+ """
107
+ contact_info = {}
108
+
109
+ # Extract email
110
+ email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
111
+ if email_match:
112
+ contact_info['email'] = email_match.group(0)
113
+
114
+ # Extract phone (various formats)
115
+ phone_match = re.search(r'(\+\d{1,3}[-.\s]?)?(\(?\d{3}\)?[-.\s]?)?\d{3}[-.\s]?\d{4}', text)
116
+ if phone_match:
117
+ contact_info['phone'] = phone_match.group(0)
118
+
119
+ # Extract LinkedIn URL
120
+ linkedin_match = re.search(r'linkedin\.com/in/[a-zA-Z0-9_-]+/?', text)
121
+ if linkedin_match:
122
+ contact_info['linkedin'] = 'https://www.' + linkedin_match.group(0)
123
+
124
+ # Try to extract name (this is approximate and might need LLM for better accuracy)
125
+ # Typically name appears at the top of the resume
126
+ first_line = text.strip().split('\n')[0].strip()
127
+ if len(first_line) < 40 and not any(char.isdigit() for char in first_line):
128
+ contact_info['name'] = first_line
129
+
130
+ return contact_info
131
+
132
+ @observe()
133
+ def identify_resume_sections(text: str) -> List[Dict[str, Any]]:
134
+ """Identify sections in a resume text.
135
+
136
+ Args:
137
+ text: Full resume text
138
+ llm: Optional language model for advanced section detection
139
+
140
+ Returns:
141
+ List of dictionaries with section info
142
+ """
143
+ sections = []
144
+
145
+ # if llm:
146
+ # # Use LLM for more accurate section identification
147
+ # prompt = ChatPromptTemplate.from_messages([
148
+ # SystemMessage(content="""You are an expert at parsing resumes.
149
+ # Identify the main sections in this resume text and structure them.
150
+ # For each section, extract the title and content."""),
151
+ # HumanMessage(content=f"Resume text:\n\n{text}")
152
+ # ])
153
+
154
+ # class ResumeStructure(BaseModel):
155
+ # sections: List[Dict[str, str]] = Field(description="List of identified sections with title and content")
156
+
157
+ # parser = PydanticOutputParser(pydantic_object=ResumeStructure)
158
+ # chain = prompt | llm | parser
159
+
160
+ # try:
161
+ # result = chain.invoke({})
162
+ # return result.sections
163
+ # except Exception as e:
164
+ # print(f"LLM section extraction failed: {e}")
165
+
166
+ # Regex-based section identification
167
+ # Create a pattern that matches common section headers
168
+ section_pattern = r'(?:^|\n)(?:[^a-zA-Z\d\s]|\s)*(' + '|'.join(RESUME_SECTIONS) + r')(?:[^a-zA-Z\d\s]|\s)*(?:$|\n)'
169
+ matches = list(re.finditer(section_pattern, text, re.IGNORECASE))
170
+
171
+ if not matches:
172
+ # If no sections found, treat the whole resume as one section
173
+ sections.append({
174
+ "title": "resume",
175
+ "content": text,
176
+ })
177
+ return sections
178
+
179
+ # Process each section
180
+ for i, match in enumerate(matches):
181
+ section_title = match.group(1).strip()
182
+ start_pos = match.start()
183
+
184
+ # Find the end position (start of next section or end of text)
185
+ end_pos = matches[i+1].start() if i < len(matches) - 1 else len(text)
186
+
187
+ # Extract section content (excluding the header)
188
+ section_content = text[start_pos:end_pos].strip()
189
+
190
+ sections.append({
191
+ "title": section_title.lower(),
192
+ "content": section_content
193
+ })
194
+
195
+ return sections
196
+
197
+
198
+ def _collapse_ws(text: str) -> str:
199
+ """Collapse stray whitespace but keep bullet breaks."""
200
+ text = re.sub(r"\n\s*([•\-–])\s*", r"\n\1 ", text)
201
+ return re.sub(r"[ \t\r\f\v]+", " ", text).replace(" \n", "\n").strip()
202
+
203
+
204
+ def _is_heading(line: str) -> bool:
205
+ return (
206
+ line.isupper()
207
+ and len(line.split()) <= 5
208
+ and not re.search(r"\d", line)
209
+ )
210
+
211
+ def parse_resume(file_path: str | Path) -> List[Document]:
212
+ """
213
+ Load a résumé from PDF or TXT file → list[Document] chunks
214
+ (≈400 chars, 50‑char overlap) with {source, section} metadata.
215
+ """
216
+ file_extension = Path(file_path).suffix.lower()
217
+
218
+ # Handle different file types
219
+ if file_extension == '.pdf':
220
+ text = PyPDFLoader(str(file_path), extraction_mode="layout").load()[0].page_content
221
+ elif file_extension == '.txt':
222
+ try:
223
+ with open(file_path, 'r', encoding='utf-8') as f:
224
+ text = f.read()
225
+ if not text.strip():
226
+ raise ValueError("File is empty")
227
+ except Exception as e:
228
+ logger.error(f"Error reading text file: {str(e)}")
229
+ raise ValueError(f"Could not read text file: {file_path}. Error: {str(e)}")
230
+ else:
231
+ raise ValueError(f"Unsupported resume file type: {file_path}. Supported types: .pdf, .txt")
232
+
233
+ text = _collapse_ws(text)
234
+
235
+ # Tag headings with "###" so Markdown splitter can see them
236
+ tagged_lines = [
237
+ f"### {ln}" if _is_heading(ln) else ln
238
+ for ln in text.splitlines()]
239
+
240
+ md_text = "\n".join(tagged_lines)
241
+
242
+ if "###" in md_text:
243
+ splitter = MarkdownHeaderTextSplitter(
244
+ headers_to_split_on=[("###", "section")]
245
+ )
246
+ chunks = splitter.split_text(md_text) # already returns Documents
247
+ else:
248
+ splitter = RecursiveCharacterTextSplitter(
249
+ chunk_size=400, chunk_overlap=50
250
+ )
251
+ chunks: list[Document] = [Document(page_content=chunk, metadata={}) for chunk in splitter.split_text(md_text)] # Attach metadata
252
+ for doc in chunks:
253
+ doc.metadata.setdefault("source", str(file_path))
254
+ # section already present if header‑splitter was used
255
+
256
+ return chunks
257
+
258
+
259
+ def get_job_description(file_path_or_url: str) -> Document:
260
+ """Parse a job description from a file or URL into chunks.
261
+
262
+ Args:
263
+ file_path_or_url: Local file path or URL of job posting
264
+
265
+ Returns:
266
+
267
+ Document containing the job description
268
+ """
269
+ # Check if the input is a URL
270
+ if file_path_or_url.startswith(('http://', 'https://')):
271
+ return parse_job_desc_from_url(file_path_or_url)
272
+
273
+ # Handle local files based on extension
274
+ file_extension = Path(file_path_or_url).suffix.lower()
275
+
276
+ # Handle txt files
277
+ if file_extension == '.txt':
278
+ try:
279
+ with open(file_path_or_url, 'r', encoding='utf-8') as f:
280
+ content = f.read()
281
+ if not content.strip():
282
+ raise ValueError(f"File is empty: {file_path_or_url}")
283
+ return Document(page_content=content, metadata={"source": file_path_or_url})
284
+ except Exception as e:
285
+ logger.error(f"Error reading text file: {str(e)}")
286
+ raise ValueError(f"Could not read text file: {file_path_or_url}. Error: {str(e)}")
287
+
288
+ # For other file types
289
+ raise ValueError(f"Unsupported file type: {file_path_or_url}. Supported types: .pdf, .docx, .txt, .md")
290
+
291
+
292
+ def parse_job_desc_from_url(url: str) -> Document:
293
+ """Extract job description from a URL.
294
+
295
+ Args:
296
+ url: URL of the job posting
297
+
298
+ Returns:
299
+ List[str]: [job_description_markdown, company_name]
300
+
301
+ Raises:
302
+ ValueError: If URL format is invalid
303
+ URLExtractionError: If content extraction fails
304
+ LLMProcessingError: If LLM processing fails
305
+ """
306
+
307
+ logger.info("Starting job description extraction from URL: %s", url)
308
+ # langfuse_handler = langfuse_context.get_current_langchain_handler()
309
+ extracted_text = None
310
+
311
+ try:
312
+ # Validate URL format
313
+ parsed_url = urlparse(url)
314
+ if not all([parsed_url.scheme, parsed_url.netloc]):
315
+ logger.error("Invalid URL format: %s", url)
316
+ raise ValueError("URL must start with http:// or https://")
317
+
318
+ # Extract content from URL
319
+ try:
320
+ loader = WebBaseLoader(url)
321
+ text_splitter = RecursiveCharacterTextSplitter(
322
+ chunk_size=1000,
323
+ chunk_overlap=200,
324
+ separators=["\n\n", "\n", ". ", " ", ""]
325
+ )
326
+ document_splitted = loader.load_and_split(text_splitter=text_splitter)
327
+
328
+ if not document_splitted:
329
+ logger.error("No content could be extracted from URL: %s", url)
330
+ raise URLExtractionError("No content could be extracted from URL")
331
+
332
+ extracted_text = " ".join(doc.page_content for doc in document_splitted)
333
+ logger.info("Successfully extracted %d characters from URL", len(extracted_text))
334
+
335
+ except Exception as e:
336
+ raise URLExtractionError(f"Failed to extract content from URL: {str(e)}") from e
337
+
338
+ # Process with LLM
339
+ if not llm_structured:
340
+ logger.warning("LLM not available, returning raw extracted text")
341
+ return [extracted_text, "Unknown Company"]
342
+
343
+ try:
344
+ output_parser: JsonOutputParser = JsonOutputParser(pydantic_object=JobDescriptionComponents)
345
+
346
+ human_prompt = "Below is the job description enclosed in triple quotes:\n\n '''{extracted_text}'''\n\n"
347
+
348
+ job_description_parser_system_message = SystemMessagePromptTemplate.from_template(
349
+ template=JOB_DESCRIPTION_PROMPT,
350
+ input_variables=[])
351
+ job_description_parser_human_message = HumanMessagePromptTemplate.from_template(
352
+ template=human_prompt,
353
+ input_variables=["extracted_text"])
354
+ chat_prompt = ChatPromptTemplate.from_messages([job_description_parser_system_message, job_description_parser_human_message])
355
+
356
+ # print("Chat prompt created successfully")
357
+ chain = chat_prompt | llm_structured | output_parser
358
+
359
+ try:
360
+ # Process with LLM
361
+
362
+ try:
363
+ result = chain.invoke({"extracted_text": extracted_text})
364
+ except Exception as e:
365
+ logger.error("LLM invocation failed: %s", str(e))
366
+ raise LLMProcessingError(f"LLM invocation failed: {str(e)}") from e
367
+ print("LLM processing result: ", result)
368
+ # Handle different types of LLM results
369
+ if isinstance(result, JobDescriptionComponents):
370
+ # Direct Pydantic model
371
+ result = result.model_dump()
372
+ if isinstance(result, dict):
373
+ print("LLM returned a dictionary, converting to JobDescriptionComponents model", result)
374
+ else:
375
+ # Unexpected result type
376
+ print(f"Unexpected LLM result type: {type(result)}")
377
+ logger.error("Unexpected LLM result type: %s", type(result))
378
+ raise LLMProcessingError("Invalid LLM response format")
379
+
380
+ # Validate required fields
381
+ if not result.get("job_description") or not result.get("company_name"):
382
+ logger.warning("LLM returned empty required fields")
383
+ raise LLMProcessingError("Missing required fields in LLM response")
384
+
385
+ logger.info("Successfully processed job description with LLM")
386
+ # Create a Document object for the job description
387
+ job_doc = Document(
388
+ page_content=result["job_description"],
389
+ metadata={"company_name": result["company_name"]}
390
+ )
391
+
392
+ # print("Job description Document created successfully. Company name: ", result["company_name"])
393
+ # print("Job description content: ", job_doc.metadata) # Print first 100 chars for debugging
394
+ return job_doc
395
+
396
+ except Exception as e:
397
+ # Handle LLM processing errors first
398
+ if isinstance(e, LLMProcessingError):
399
+ raise
400
+
401
+ # Try to recover from JSON parsing errors
402
+ error_msg = str(e)
403
+ if "Invalid json output" in error_msg:
404
+ logger.warning("Attempting to recover from invalid JSON output")
405
+
406
+ # Extract JSON from error message
407
+ output = error_msg.split("Invalid json output:", 1)[1].strip()
408
+ start = output.find('{')
409
+ end = output.rfind('}') + 1
410
+
411
+ if start >= 0 and end > start:
412
+ try:
413
+ clean_json = output[start:end]
414
+ result = output_parser.parse(clean_json)
415
+ if hasattr(result, "job_description") and hasattr(result, "company_name"):
416
+ return [result.job_description, result.company_name]
417
+ except json.JSONDecodeError as json_e:
418
+ logger.error("Failed to recover from JSON error: %s", json_e)
419
+
420
+ raise LLMProcessingError(f"Failed to process job description with LLM: {str(e)}") from e
421
+
422
+ except Exception as e:
423
+ if isinstance(e, LLMProcessingError):
424
+ if extracted_text:
425
+ logger.warning("LLM processing failed, falling back to raw text")
426
+ raise e
427
+ return [extracted_text, "Unknown Company"]
428
+ raise LLMProcessingError(f"Failed to process job description with LLM: {str(e)}") from e
429
+
430
+ except ValueError as e:
431
+ logger.error("URL validation error: %s", str(e))
432
+ raise
433
+ except URLExtractionError as e:
434
+ logger.error("Content extraction error: %s", str(e))
435
+ raise
436
+ except LLMProcessingError as e:
437
+ if extracted_text:
438
+ logger.warning("Using extracted text as fallback")
439
+ return [extracted_text, "Unknown Company"]
440
+ raise
441
+ except Exception as e:
442
+ logger.error("Unexpected error during job description parsing: %s", str(e))
443
+ raise JobDescriptionParsingError(f"Failed to parse job description: {str(e)}") from e
utils/errors.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ModelNotFoundError(Exception):
2
+ """Exception raised when a requested model is not found."""
3
+ def __init__(self, model_name: str):
4
+ super().__init__(f"Model '{model_name}' not found.")
5
+ self.model_name = model_name
6
+
7
+ def __str__(self):
8
+ return f"ModelNotFoundError: {self.model_name}"
9
+
10
+ class URLExtractionError(Exception):
11
+ """Raised when content cannot be extracted from a URL."""
12
+ pass
13
+
14
+ class LLMProcessingError(Exception):
15
+ """Raised when LLM processing fails."""
16
+ pass
17
+
18
+ class JobDescriptionParsingError(Exception):
19
+ """Base class for job description parsing errors."""
20
+ pass
utils/langfuse_handler.py ADDED
File without changes
utils/llm_client.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM Client module for managing language model interactions.
3
+ """
4
+
5
+ import os
6
+ from typing_extensions import Optional, Union
7
+
8
+
9
+ from langchain_core.language_models.chat_models import BaseChatModel
10
+ from langchain_core.language_models.llms import BaseLLM
11
+ from langchain_ollama import ChatOllama
12
+ from langchain_openai import ChatOpenAI
13
+
14
+ from .errors import ModelNotFoundError
15
+
16
+
17
+ class LLMClient:
18
+ """
19
+ Client for managing language model interactions.
20
+ Provides a unified interface for different LLM backends.
21
+ """
22
+
23
+ _instance = None # Singleton instance
24
+
25
+ @classmethod
26
+ def get_instance(cls, model_name: Optional[str] = None, model_provider: Optional[str] = None):
27
+ """Get or create a singleton instance of the LLM client.
28
+
29
+ Args:
30
+ model_name: Optional model name to override the default
31
+
32
+ Returns:
33
+ LLMClient instance
34
+ """
35
+ if cls._instance is None:
36
+ cls._instance = LLMClient(model_name, model_provider)
37
+ elif model_name is not None and cls._instance.model_name != model_name:
38
+ # Reinitialize if a different model is requested
39
+ cls._instance = LLMClient(model_name)
40
+
41
+ return cls._instance
42
+
43
+ def __init__(self, model_name: Optional[str] = None, model_provider: Optional[str] = None):
44
+ """Initialize the LLM client with the specified model.
45
+
46
+ Args:
47
+ model_name: Name of the model to use (default: from environment or "llama3.2:latest")
48
+ """
49
+ print("Initializing LLM Client with model:", model_name, "and provider:", model_provider)
50
+ self.model_name = model_name or os.getenv("DEFAULT_LLM_MODEL", "llama3.2:latest")
51
+ self.model_provider = model_provider or os.getenv("LLM_PROVIDER", "ollama").lower()
52
+ self.llm = self._initialize_llm()
53
+
54
+ def __str__(self):
55
+ return f"LLMClient(model_name={self.model_name}, provider={self.model_provider})"
56
+
57
+ def _initialize_llm(self) -> Union[BaseLLM, BaseChatModel]:
58
+ """Initialize the appropriate LLM based on configuration.
59
+
60
+ Returns:
61
+ Initialized LLM instance
62
+ """
63
+ print(f"Initializing LLM with model {self.model_name} and provider {self.model_provider} in {__file__}")
64
+ if self.model_provider == "ollama":
65
+ return self._initialize_llama()
66
+ elif self.model_provider == "openai":
67
+ return self._initialize_openai()
68
+ elif self.model_provider == "ollama_json":
69
+ return self._initialize_jsonllm()
70
+ else:
71
+ raise ValueError(f"Unsupported LLM provider: {self.model_provider}")
72
+
73
+ def _initialize_llama(self) -> BaseChatModel:
74
+ """Initialize an Ollama LLM.
75
+
76
+ Returns:
77
+ Ollama LLM instance
78
+ """
79
+ try:
80
+ # model = OllamaLLM(model=self.model_name, temperature=0.1, top_k=1, repeat_penalty=1.2)
81
+ model: ChatOllama = ChatOllama(model=self.model_name, temperature=0.1, top_k=1, repeat_penalty=1.2)
82
+ return model
83
+ except Exception as e:
84
+ raise ModelNotFoundError(f"Failed to initialize Ollama with model {self.model_name}: {e}") from e
85
+
86
+
87
+ def _initialize_jsonllm(self) -> BaseChatModel:
88
+ """
89
+ Initialize a Mistral chat model.
90
+ Returns:
91
+ Mistral chat model instance
92
+ """
93
+ try:
94
+ model: ChatOllama = ChatOllama(model=self.model_name, format='json', temperature=0.1, top_k=1, repeat_penalty=1.2)
95
+ return model
96
+ except Exception as e:
97
+ raise ModelNotFoundError(f"Failed to initialize Ollama with model {self.model_name}: {e}") from e
98
+
99
+ def _initialize_openai(self) -> BaseChatModel:
100
+ """Initialize an OpenAI chat model.
101
+
102
+ Returns:
103
+ OpenAI chat model instance
104
+ """
105
+ api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJhcHAiLCJleHAiOjE3OTk5OTk5OTksInN1YiI6NjU1MDM3LCJhdWQiOiJXRUIiLCJpYXQiOjE2OTQwNzY4NTF9.hBcFcCqO1UF2Jb-m8Nv5u5zJPvQIuXUSZgyqggAD-ww"
106
+ # api_key = os.getenv("OPENAI_API_KEY")
107
+ if not api_key:
108
+ raise ValueError("OPENAI_API_KEY environment variable not set")
109
+
110
+ try:
111
+ return ChatOpenAI(model_name=self.model_name, api_key=api_key)
112
+ except Exception as e:
113
+ raise ModelNotFoundError(f"Failed to initialize Ollama with model {self.model_name}: {e}") from e
114
+
115
+
116
+ def get_llm(self) -> Union[BaseLLM, BaseChatModel]:
117
+ """Get the initialized LLM instance.
118
+
119
+ Returns:
120
+ LLM instance
121
+ """
122
+ if self.llm is None:
123
+ raise RuntimeError("LLM client not initialized")
124
+ return self.llm
125
+
126
+
127
+ def reinitialize(self, model_name: Optional[str] = None, provider: Optional[str] = None) -> None:
128
+ """Reinitialize the LLM with a different model or provider.
129
+
130
+ Args:
131
+ model_name: New model name to use
132
+ provider: New provider to use
133
+ """
134
+ print(f"Reinitializing LLM client from {self.model_name} to {model_name}")
135
+ if model_name:
136
+ self.model_name = model_name
137
+ if provider:
138
+ self.model_provider = provider.lower()
139
+
140
+ self.llm = self._initialize_llm()
141
+
utils/vector_store.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Vector storage utilities for the job writer application.
3
+
4
+ This module provides functions for storing and retrieving
5
+ documents from vector databases.
6
+ """
7
+
8
+ # Standard library imports
9
+ import os
10
+ from typing_extensions import List, Optional
11
+
12
+ # Third-party library imports
13
+ from langchain_core.documents import Document
14
+ from langchain_community.vectorstores import Pinecone
15
+ from langchain_ollama import OllamaEmbeddings
16
+ from pinecone import Pinecone as PineconeClient, ServerlessSpec
17
+
18
+ # Default configuration
19
+ DEFAULT_PINECONE_INDEX = "job-writer-vector"
20
+
21
+ class VectorStoreManager:
22
+ """Manager class for vector store operations."""
23
+
24
+ def __init__(
25
+ self,
26
+ index_name: str = DEFAULT_PINECONE_INDEX,
27
+ embedding_model: str = "llama3.2:latest"
28
+ ):
29
+ """Initialize the vector store manager.
30
+
31
+ Args:
32
+ api_key: Pinecone API key (will use env var if not provided)
33
+ index_name: Name of the Pinecone index to use
34
+ embedding_model: Name of the Ollama model to use for embeddings
35
+ """
36
+ api_key= os.getenv("PINECONE_API_KEY")
37
+ if not api_key:
38
+ raise ValueError("Environment variable PINECONE_API_KEY not set.")
39
+
40
+ self.index_name = index_name
41
+
42
+ # Initialize embeddings
43
+ self.embeddings = OllamaEmbeddings(
44
+ model=embedding_model
45
+ )
46
+
47
+ # Initialize Pinecone client
48
+ self.client = PineconeClient(api_key=api_key)
49
+
50
+ # Ensure index exists
51
+ self._ensure_index_exists()
52
+
53
+ def _ensure_index_exists(self):
54
+ """Make sure the required index exists, create if not."""
55
+ # Get embedding dimension from our embeddings model
56
+ try:
57
+ sample_embedding = self.embeddings.embed_query("Test query")
58
+ embedding_dim = len(sample_embedding)
59
+ except Exception as e:
60
+ print(f"Error determining embedding dimension: {e}")
61
+ print("Falling back to default dimension of 384")
62
+ embedding_dim = 384 # Common default for Ollama embeddings
63
+
64
+ # Check if the index exists
65
+ index_exists = False
66
+ try:
67
+ index_list = self.client.list_indexes()
68
+ index_list = [i.name for i in index_list]
69
+ index_exists = self.index_name in index_list
70
+ except Exception as e:
71
+ print(f"Error checking Pinecone indexes: {e}")
72
+
73
+ # Create index if it doesn't exist
74
+ if not index_exists:
75
+ try:
76
+ print(f"Creating new index: {self.index_name}")
77
+ self.client.create_index(
78
+ name=self.index_name,
79
+ dimension=embedding_dim,
80
+ spec=ServerlessSpec(region="us-east-1", cloud="aws"),
81
+ metric="cosine"
82
+ )
83
+ print(f"Successfully created index: {self.index_name}")
84
+ except Exception as e:
85
+ if "ALREADY_EXISTS" in str(e):
86
+ print(f"Index {self.index_name} already exists (created in another process)")
87
+ else:
88
+ print(f"Error creating index: {e}")
89
+ else:
90
+ print(f"Using Pinecone Index: {self.index_name}")
91
+
92
+ def store_documents(self, docs: List[Document], namespace: str) -> None:
93
+ """Store documents in vector database.
94
+
95
+ Args:
96
+ docs: List of Document objects to store
97
+ namespace: Namespace to store documents under
98
+ """
99
+ try:
100
+ # Get the index
101
+ index = self.client.Index(self.index_name)
102
+
103
+ # Create the vector store
104
+ vector_store = Pinecone(
105
+ index=index,
106
+ embedding=self.embeddings,
107
+ text_key="text",
108
+ namespace=namespace
109
+ )
110
+
111
+ # Add documents
112
+ vector_store.add_documents(docs)
113
+ print(f"Successfully stored {len(docs)} documents in namespace: {namespace}")
114
+ except Exception as e:
115
+ print(f"Error storing documents: {e}")
116
+ raise
117
+
118
+ def retrieve_similar(self, query: str, namespace: str, k: int = 3):
119
+ """Retrieve similar documents based on a query.
120
+
121
+ Args:
122
+ query: The query text to search for
123
+ namespace: Namespace to search in
124
+ k: Number of results to return
125
+
126
+ Returns:
127
+ List of Document objects
128
+ """
129
+ try:
130
+ # Get the index
131
+ index = self.client.Index(self.index_name)
132
+
133
+ # Create the vector store
134
+ vectorstore = Pinecone(
135
+ index=index,
136
+ embedding=self.embeddings,
137
+ text_key="text",
138
+ namespace=namespace
139
+ )
140
+
141
+ # Search for similar documents
142
+ docs = vectorstore.similarity_search(query, k=k, namespace=namespace)
143
+ return docs
144
+ except Exception as e:
145
+ print(f"Error retrieving documents: {e}")
146
+ return []
147
+
148
+
149
+
150
+
151
+ VectorStoreManager = VectorStoreManager()
152
+
153
+ VectorStoreManager.store_documents(
154
+ docs=[Document(page_content="Sample content", metadata={"source": "test"})],
155
+ namespace="test_namespace"
156
+ )
workflow.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Workflow runner for the job application writer.
3
+
4
+ This module provides functions for running the job application
5
+ writer graph in both interactive and batch modes.
6
+ """
7
+
8
+ import asyncio
9
+ import argparse
10
+ import sys
11
+
12
+ from datetime import datetime
13
+ from langchain_core.tracers import ConsoleCallbackHandler
14
+ from langgraph.graph import StateGraph
15
+ from langfuse import Langfuse
16
+
17
+
18
+ from job_writer.nodes import Dataloading
19
+ from job_writer.nodes.research_workflow import research_workflow
20
+ from job_writer.classes import AppState, DataLoadState
21
+ from job_writer.agents.nodes import (
22
+ create_draft,
23
+ critique_draft,
24
+ finalize_document,
25
+ human_approval,
26
+ )
27
+ from job_writer.nodes import (
28
+ generate_variations,
29
+ self_consistency_vote
30
+ )
31
+
32
+
33
+ class JobWorkflow:
34
+ """
35
+ Workflow runner for the job application writer.
36
+ Args:
37
+ resume: Resume text or file path
38
+ job_description: Job description text or URL
39
+ content:
40
+ Type of application material to generate
41
+ model_config: Configuration for language models
42
+ """
43
+
44
+ #
45
+ def __init__(self, resume=None, job_description_source=None, content=None, model_configuration=None):
46
+ """Initialize the Writing Workflow."""
47
+ print(f"Initializing Workflow for {content}")
48
+ self.resume = resume
49
+ self.job_description_source = job_description_source
50
+ self.content = content
51
+ self.model_configuration = model_configuration
52
+
53
+ # Initialize the app state
54
+ self.app_state = AppState(
55
+ resume_path=resume,
56
+ job_description_source=job_description_source,
57
+ company_research_data=None,
58
+ draft="",
59
+ feedback="",
60
+ final="",
61
+ content=content,
62
+ current_node=""
63
+ )
64
+
65
+ self.__init__nodes()
66
+ self._build_workflow()
67
+
68
+ self.langfuse = Langfuse()
69
+
70
+
71
+ def __init__nodes(self):
72
+ self.dataloading = Dataloading()
73
+ # self.createdraft = create_draft()
74
+
75
+
76
+ def _build_workflow(self):
77
+ # Build the graph with config
78
+ self.job_app_graph = StateGraph(DataLoadState)
79
+
80
+
81
+ self.job_app_graph.add_node("initialize_system", self.dataloading.system_setup)
82
+ self.job_app_graph.add_node("load", self.dataloading.run)
83
+ # self.job_app_graph.add_node("build_persona", select_persona)
84
+
85
+
86
+ # Add research workflow as a node
87
+ self.job_app_graph.add_node("research", research_workflow)
88
+ self.job_app_graph.add_node("create_draft", create_draft)
89
+ self.job_app_graph.add_node("variations", generate_variations)
90
+ self.job_app_graph.add_node("self_consistency", self_consistency_vote)
91
+ self.job_app_graph.add_node("critique", critique_draft)
92
+ self.job_app_graph.add_node("human_approval", human_approval)
93
+ self.job_app_graph.add_node("finalize", finalize_document)
94
+
95
+ self.job_app_graph.set_entry_point("initialize_system")
96
+ self.job_app_graph.set_finish_point("finalize")
97
+
98
+ self.job_app_graph.add_edge("initialize_system", "load")
99
+ self.job_app_graph.add_conditional_edges("load", self.dataloading.verify_inputs)
100
+ self.job_app_graph.add_edge("research", "create_draft")
101
+ self.job_app_graph.add_edge("create_draft", "variations")
102
+ self.job_app_graph.add_edge("variations", "self_consistency")
103
+ self.job_app_graph.add_edge("self_consistency", "critique")
104
+ self.job_app_graph.add_edge("critique", "human_approval")
105
+ self.job_app_graph.add_edge("human_approval", "finalize")
106
+
107
+
108
+ async def run(self) -> str | None:
109
+ """
110
+ Run the job application writer workflow.
111
+ """
112
+ # Compile the graph
113
+ try:
114
+ compiled_graph = self.compile()
115
+ except Exception as e:
116
+ print(f"Error compiling graph: {e}")
117
+ return
118
+ # Set up run configuration
119
+ run_name = f"Job Application Writer - {self.app_state['content']} - {datetime.now().strftime('%Y-%m-%d-%H%M%S')}"
120
+ config = {
121
+ "configurable": {
122
+ "thread_id": f"job_app_session_{datetime.now().strftime('%Y%m%d%H%M%S')}",
123
+ "callbacks": [ConsoleCallbackHandler()],
124
+ "run_name": run_name,
125
+ "tags": ["job-application", self.app_state['content']]
126
+ },
127
+ "recursion_limit": 10
128
+ }
129
+ # Run the graph
130
+ try:
131
+ self.app_state["current_node"] = "initialize_system"
132
+ graph_output = await compiled_graph.ainvoke(self.app_state, config=config)
133
+ except Exception as e:
134
+ print(f"Error running graph: {e}")
135
+ return
136
+
137
+ return graph_output
138
+
139
+
140
+ def compile(self):
141
+ """Compile the graph."""
142
+ graph = self.job_app_graph.compile()
143
+ return graph
144
+
145
+ def print_result(self, content_type, final_content):
146
+ """Print the final generated content to the console."""
147
+ print("\n" + "="*80)
148
+ print(f"FINAL {content_type.upper()}:")
149
+ print(final_content)
150
+ print("="*80)
151
+
152
+
153
+ def save_result(self, content_type, final_content):
154
+ """Save the final generated content to a file and return the filename."""
155
+ output_file = f"{content_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
156
+ with open(output_file, "w", encoding="utf-8") as f:
157
+ f.write(final_content)
158
+ print(f"\nSaved to {output_file}")
159
+ return output_file
160
+
161
+ if __name__ == "__main__":
162
+
163
+ parser = argparse.ArgumentParser(description="Generate job application materials")
164
+ parser.add_argument("--resume", required=True, help="Path to resume file or resume text")
165
+ parser.add_argument("--job", required=True, help="Path/URL to job description or description text")
166
+ parser.add_argument("--type", default="cover_letter",
167
+ choices=["cover_letter", "bullets", "linkedin_note"],
168
+ help="Type of application material to generate")
169
+ parser.add_argument("--model", help="Ollama model to use")
170
+ parser.add_argument("--temp", type=float, help="Temperature for generation")
171
+
172
+ args = parser.parse_args()
173
+
174
+ # Configure models if specified
175
+ model_config = {}
176
+ if args.model:
177
+ model_config["model_name"] = args.model
178
+ if args.temp is not None:
179
+ model_config["temperature"] = min(0.25, args.temp)
180
+ model_config["precise_temperature"] = min(0.2, args.temp)
181
+
182
+
183
+ # Initialize the workflow
184
+ workflow = JobWorkflow(
185
+ resume=args.resume,
186
+ job_description_source=args.job,
187
+ content=args.type,
188
+ model_configuration=model_config
189
+ )
190
+
191
+ # Run the workflow
192
+ result = asyncio.run(workflow.run())
193
+
194
+ if result:
195
+ # Print the result to the console
196
+ workflow.print_result(args.type, result["final"])
197
+ else:
198
+ print("Error running workflow.")
199
+ sys.exit(1)
200
+
201
+
202
+ # Save the result to a file
203
+ if result:
204
+ workflow.save_result(args.type, result["final"])
205
+ else:
206
+ print("Error saving result.")
207
+ sys.exit(1)
208
+
209
+ # Print a success message
210
+ print("Workflow completed successfully.")