Jayandhan Soruban
New APIs added
8437d61
import json
import asyncio
import os
from dotenv import load_dotenv
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
raise ValueError("GOOGLE_API_KEY not found. Make sure it's set in the .env file.")
llm_model = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
google_api_key=api_key
)
page = None
resume_text = """Name: Johnathan R. Smith
Phone: +91-XXXXXXXXXX | Email: johnsmith@email.com
| LinkedIn: linkedin.com/in/johnsmith | Location: Bangalore, India
Executive Summary
Strategic and results-driven professional with 12+ years of experience in Technology Management, AI/ML Solutions, and Enterprise Software Development. Proven track record of leading cross-functional teams, delivering large-scale digital transformation projects, and driving business growth through innovative technology solutions. Adept at stakeholder management, process automation, and mentoring high-performance teams.
Core Competencies
AI/ML & Generative AI Solutions
Cloud Computing (AWS, Azure, GCP)
Enterprise Application Development
Project & Program Management (Agile/Scrum)
Stakeholder & Client Engagement
Strategic Roadmap Planning
Data Engineering & Analytics
Leadership & People Management
Professional Experience
Senior Engineering Manager – Infosys Ltd, Bangalore
Jan 2018 – Present
Spearheaded AI-driven digital transformation projects worth $10M+, improving client efficiency by 30%.
Directed a 40+ member engineering team across India, US, and Europe.
Designed and deployed a Generative AI-based HR Assistant handling 100k+ queries monthly with 95% accuracy.
Established cloud migration roadmap, moving legacy ERP systems to AWS with zero downtime.
Mentored mid-level managers and engineers, resulting in 20+ team members promoted internally.
Key Achievement:
Reduced project turnaround time by 25% by implementing Agile-Scaled frameworks across 5 business units.
Project Lead – Wipro Technologies, Hyderabad
Aug 2013 – Dec 2017
Led the development of enterprise AI chatbots and RPA solutions for banking & retail clients.
Implemented data preprocessing pipelines for large-scale analytics projects (~5TB datasets).
Coordinated with C-suite stakeholders to define KPIs, saving clients $2M annually.
Conducted regular training programs to upskill 100+ employees on AI/ML adoption.
Software Engineer – Tata Consultancy Services, Chennai
Jul 2010 – Jul 2013
Built scalable web applications serving 1M+ users across telecom and finance domains.
Improved system performance by 40% by optimizing backend algorithms.
Collaborated with product managers to translate business requirements into technical deliverables.
Education
MBA, Technology Management – IIM Bangalore (2017)
B.Tech, Computer Science – Anna University (2010)
Certifications
AWS Certified Solutions Architect – Professional
PMP® – Project Management Professional
DeepLearning.AI – Generative AI Specialization
Awards & Recognition
Infosys Excellence Award (2021): For leading enterprise-wide AI adoption.
Best Innovator (2016): Wipro Technologies for automation framework.
Publications & Speaking Engagements
Speaker at NASSCOM 2023 – “Agentic AI in Enterprise Solutions”
Published article in Analytics India Magazine – “RAG Systems for HR Automation”
Technical Skills
Languages: Python, Java, C++
Frameworks: LangChain, TensorFlow, PyTorch, FastAPI
Databases: PostgreSQL, MongoDB, Qdrant, Neo4j
Tools: Docker, Kubernetes, Git, Jenkins
References
Available on request
"""
resume_file_path = "john_doe_resume.txt"
with open(resume_file_path, "w") as f:
f.write(resume_text)
# --- 3. TOOLS DEFINITION ---
@tool
async def scrape_website(url: str, headful: bool = False) -> dict:
"""
Scrapes a website to extract job application form details.
"""
print(f"Scraping URL: {url}...")
# Helper functions...
async def extract_label(page, el):
try:
if el_id := await el.get_attribute("id"):
if label := await page.query_selector(f'label[for="{el_id}"]'):
if t := (await label.inner_text()).strip(): return t
if aria := await el.get_attribute("aria-label"): return aria.strip()
if pl := await el.get_attribute("placeholder"): return pl.strip()
if prev := await page.evaluate("e => e.previousElementSibling?.innerText", el):
if prev.strip(): return prev.strip()
except: pass
return None
async def unique_xpath_for_element(page, handle):
return await page.evaluate("""(e) => {
function idx(n){let i=1,s=n.previousElementSibling;while(s){if(s.nodeName===n.nodeName)i++;s=s.previousElementSibling}return i}
let seg='';while(e&&e.nodeType===1){let n=e.nodeName.toLowerCase(),i=idx(e);seg='/'+n+'['+i+']'+seg;e=e.parentElement}return seg;
}""", handle)
async with async_playwright() as p:
browser = await p.chromium.launch(headless=headful)
context = await browser.new_context()
page = await context.new_page()
try:
await page.goto(url, wait_until="networkidle", timeout=30000)
apply_button_selector = "text=/Apply for this job/i"
print(f"Looking for the 'Apply' button with selector: '{apply_button_selector}'...")
await page.wait_for_selector(apply_button_selector, state='visible', timeout=15000)
await page.click(apply_button_selector)
print("Successfully clicked the 'Apply' button.")
form_ready_selector = "text=/resume/i"
print(f"Waiting for form to be ready by looking for a reliable keyword: '{form_ready_selector}'...")
await page.wait_for_selector(form_ready_selector, state='visible', timeout=10000)
print("Application form is now visible and ready for scraping.")
except Exception as e:
await browser.close()
return {"error": f"An unexpected error occurred during page interaction: {str(e)}"}
forms_data = []
form_container = page.locator(form_ready_selector).locator("xpath=ancestor::form").first
if not await form_container.is_visible():
form_container = page.locator(form_ready_selector).locator("xpath=ancestor::div[.//input or .//button]").first
if await form_container.is_visible():
controls_data = []
elems = await form_container.locator("input, textarea, select, button, [role='button']").all()
for el in elems:
try:
if not (el_handle := await el.element_handle()): continue
tag = await el.evaluate("e => e.tagName.toLowerCase()")
xpath = await unique_xpath_for_element(page, el_handle)
controls_data.append({
"xpath": xpath, "tag": tag, "label_text": await extract_label(page, el),
"input_type": await el.get_attribute("type") or None,
"button_text": (await el.inner_text()).strip() if (tag == "button" or await el.get_attribute("role") == "button") else "",
"name_attr": await el.get_attribute("name"),
"required": await el.get_attribute("required") is not None,
"visible": await el.is_visible(),
})
except Exception as e: print(f"Could not process an element: {e}")
forms_data.append({"controls": controls_data})
result = {"application_url": page.url, "forms": forms_data}
await browser.close()
print("Scraping finished successfully.")
return result
@tool
async def fill_text_field(xpath: str, value: str) -> str:
"""Fills a text input field identified by its XPath with the provided value."""
global page
try:
print(f"FILLING field at '{xpath}' with value '{value}'...")
await page.locator(xpath).fill(value)
return f"Successfully filled field at xpath {xpath}."
except Exception as e: return f"Error filling field at xpath {xpath}: {e}"
@tool
async def upload_resume(xpath: str, file_path: str) -> str:
"""Uploads a file to a file input element identified by its XPath."""
global page
try:
print(f"UPLOADING file '{file_path}' to input at '{xpath}'...")
await page.locator(xpath).set_input_files(file_path)
return f"Successfully set file input at {xpath} to '{file_path}'."
except Exception as e: return f"Error uploading file at xpath {xpath}: {e}"
@tool
async def click_element(xpath: str) -> str:
"""Clicks an element on the page identified by its XPath (e.g., a submit button)."""
global page
try:
print(f"CLICKING element at '{xpath}'...")
await page.locator(xpath).click()
return f"Successfully clicked element at xpath {xpath}."
except Exception as e: return f"Error clicking element at xpath {xpath}: {e}"
# --- 4. AGENT DEFINITIONS ---
analyzer_system_prompt = ChatPromptTemplate.from_messages([
("system", "You are a Job Apply Agent. Your goal is to analyze a webpage. Use the `scrape_website` tool to get the form data from the URL."),
("human", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad")
])
analyzer_agent = create_tool_calling_agent(llm_model, [scrape_website], analyzer_system_prompt)
analyzer_executor = AgentExecutor(agent=analyzer_agent, tools=[scrape_website], verbose=True, return_intermediate_steps=True)
filler_system_prompt = ChatPromptTemplate.from_messages([
("system", """You are an expert job application assistant. Your goal is to accurately fill out and submit a job application form.
You will be given:
1. A "form_data" JSON object which is a map of the application page, including the XPath for every field.
2. A "resume_data" JSON object containing the applicant's personal information.
3. The full text of the applicant's resume for context.
4. A file path for the applicant's resume file.
Your instructions are:
1. **Prioritize Resume Upload:** The absolute first step is to find the `input` field with `type='file'` and use the `upload_resume` tool.
2. **Fill Known Fields:** Go through each control in the `form_data.controls` list. For each, find the corresponding information in the `resume_data`. Use the `fill_text_field` tool for all text inputs.
3. **Generate Answers for Unknown Questions:** If you encounter a `textarea` for a question that is NOT in `resume_data`, you MUST generate a concise, professional answer (2-3 sentences) based on the provided resume context. Then, use `fill_text_field` to input your generated answer. DO NOT skip these fields if they are required.
4. **Handle Optional Fields:** For optional, non-essential fields like demographic questions (age, gender, ethnicity), you should skip them. Do not call any tools for these.
5. **Final Submission:** After all required fields are filled, find the control for the 'Submit Application' button and use the `click_element` tool to submit the form.
Think step-by-step. Announce which field you are filling before calling the tool.
"""),
("human", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad")
])
filler_tools = [fill_text_field, upload_resume, click_element]
filler_agent = create_tool_calling_agent(llm_model, filler_tools, filler_system_prompt)
filler_executor = AgentExecutor(agent=filler_agent, tools=filler_tools, verbose=True)
# --- 5. MAIN ORCHESTRATION LOGIC ---
async def main():
job_url = "https://jobs.ashbyhq.com/ashby/81eb43b9-e8f1-412c-8b9f-3c81b377248d"
print("--- PARSING RESUME ---")
parsing_prompt = f"""
Extract the following information from the resume text into a valid JSON object.
Do NOT include any extra text, comments, or markdown formatting like ```json.
Your entire response must be only the JSON object itself.
- fullName
- email
- phone
- linkedinURL
Resume:
{resume_text}
"""
response = await llm_model.ainvoke(parsing_prompt)
try:
json_start = response.content.find('{')
json_end = response.content.rfind('}') + 1
if json_start != -1 and json_end != 0:
clean_json_str = response.content[json_start:json_end]
resume_data = json.loads(clean_json_str)
print("Resume parsed successfully:", resume_data)
else: raise json.JSONDecodeError("Could not find JSON object in LLM response.", response.content, 0)
except json.JSONDecodeError as e:
print(f"Error parsing resume JSON from LLM response: {e}")
print("Raw LLM response was:\n", response.content)
return
print("\n--- PHASE 1: ANALYZING JOB PAGE ---")
analyzer_input = {"input": f"Scrape the website at the following URL: {job_url}"}
analysis_result = await analyzer_executor.ainvoke(analyzer_input)
# --- DEFINITIVELY CORRECTED FIX ---
if 'intermediate_steps' in analysis_result and analysis_result['intermediate_steps']:
# Get the last (action, observation) tuple from the list
last_step_tuple = analysis_result['intermediate_steps'][-1]
# The tool's output dictionary is the SECOND element (index 1) of the tuple
tool_output_dict = last_step_tuple
else:
print("Analysis failed. No tool output found in intermediate steps.")
return
# Now, all checks are performed on the correctly extracted dictionary.
if "error" in tool_output_dict:
print(f"Analysis tool returned an error: {tool_output_dict['error']}")
return
if not tool_output_dict.get("forms") or not tool_output_dict["forms"]:
print("Analysis failed. Could not find 'forms' in the tool output. Exiting.")
return
application_url = tool_output_dict.get("application_url", job_url)
form_data = tool_output_dict["forms"]
print("\n--- PHASE 2: FILLING APPLICATION ---")
global page
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
print(f"Navigating to application page: {application_url}")
await page.goto(application_url, wait_until="networkidle")
await page.wait_for_timeout(2000)
filler_task_prompt = f"""
Here is the form data map:
{json.dumps(form_data, indent=2)}
Here is the applicant's resume data:
{json.dumps(resume_data, indent=2)}
Here is the full resume text for context on essay questions:
---
{resume_text}
---
The resume file is located at the local path:
'{os.path.abspath(resume_file_path)}'
Please fill out and submit the application based on these details.
"""
await filler_executor.ainvoke({"input": filler_task_prompt})
print("\nApplication process finished. Browser will close in 30 seconds.")
await asyncio.sleep(30)
await browser.close()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\nExecution stopped by user.")