CareerBoost / Agent.py
Musabbirkm's picture
Upload 6 files
0af67a4 verified
import os
import json
import random
import asyncio
import aiohttp
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.agents import AgentType, initialize_agent, Tool
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.tools import Tool
from duckduckgo_search import DDGS
from tenacity import retry, stop_after_attempt, wait_exponential
from functools import lru_cache
import re
import http.client
import urllib.parse
# Random User-Agent
def get_random_user_agent():
""" Various user-agent strings for Windows, macOS, Linux, Mobile devices, Tablets, Consoles, Smart TVs
This helps avoid being blocked by websites due to repetitive scraping
List of user agents truncated for brevity"""
USER_AGENTS = [
# Windows User Agents
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
# macOS User Agents
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0',
# Linux User Agents
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/91.0.4472.124 Safari/537.36',
# Mobile User Agents (Android)
'Mozilla/5.0 (Linux; Android 10; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36',
'Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36',
'Mozilla/5.0 (Linux; Android 9; SM-G960F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Mobile Safari/537.36',
# Mobile User Agents (iOS)
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.124 Mobile/15E148 Safari/604.1', # Chrome on iOS
# Tablet User Agents
'Mozilla/5.0 (Linux; Android 10; SM-T860) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Safari/537.36',
'Mozilla/5.0 (Linux; Android 11; Lenovo TB-X606F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.120 Safari/537.36',
'Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
# Gaming Consoles
'Mozilla/5.0 (PlayStation 4 8.52) AppleWebKit/605.1.15 (KHTML, like Gecko)',
'Mozilla/5.0 (Nintendo Switch; WifiWebAuthApplet) AppleWebKit/609.4 (KHTML, like Gecko) NF/6.0.2.19.3 NintendoBrowser/5.1.0.22401',
# Smart TVs
'Mozilla/5.0 (Web0S; Linux/SmartTV) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 WebAppManager',
'Mozilla/5.0 (SMART-TV; Linux; Tizen 5.5) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.0 Chrome/91.0.4472.124 Safari/537.36',
]
return random.choice(USER_AGENTS)
# Load API keys
google_api_key = os.getenv("GOOGLE_API_KEY")
rapidapi_key = os.getenv("RAPIDAPI_KEY")
if not google_api_key:
raise ValueError("Google API key not found.")
if not rapidapi_key:
raise ValueError("RapidAPI key not found.")
# Initialize LLM
llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
google_api_key=google_api_key
)
# DuckDuckGo search
def duckduckgo_search(query: str) -> str:
try:
with DDGS() as ddgs:
results = [r for r in ddgs.text(query, max_results=20)]
return json.dumps(results, indent=2)
except Exception as e:
return json.dumps({"error": f"Error in WebSearch: {str(e)}"}, indent=2)
duckduckgo_tool = Tool(
name="WebSearch",
func=duckduckgo_search,
description="Use this tool to search the web for job listings or interview questionss"
)
# Job finding agent
job_prompt = PromptTemplate(
input_variables=["input", "chat_history", "tools", "tool_names", "agent_scratchpad"],
template="""
You are an advanced job search assistant focused on finding job vacancies worldwide, with a special emphasis on India.
Your tasks:
- Find relevant job listings based on the field and location, including job titles, companies, locations, and links.
- Use the JobScraper tool first to get detailed job listings from job boards like Naukri.com, Shine.com, LinkedIn, and Indeed and other indian job boards.
- If JobScraper fails or returns no valid jobs, use WebSearch to find job-related information and extract relevant details.
- Avoid duplicate listings by checking job titles, companies, and locations.
- Format the output as a numbered list with: Title, Company, Location, Link, Source.
- If no jobs are found, clearly state so and provide any relevant links from WebSearch.
- Include all valid job details from JobScraper observations in the final answer.
Available tools: {tools}
Tool names: {tool_names}
User input: {input}
Chat history: {chat_history}
Agent scratchpad: {agent_scratchpad}
"""
)
job_memory = ConversationBufferMemory(memory_key="chat_history")
job_agent = initialize_agent(
tools=[duckduckgo_tool],
llm=llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
memory=job_memory,
handle_parsing_errors=True,
custom_prompt=job_prompt
)
# RapidAPI job searcher
def rapid_job_seacrher(job: str, location: str, pages: int = 1, country: str = "in") -> str:
conn = http.client.HTTPSConnection("jsearch.p.rapidapi.com")
headers = {
'x-rapidapi-key': rapidapi_key,
'x-rapidapi-host': "jsearch.p.rapidapi.com"
}
query = urllib.parse.quote(f"{job} jobs in {location}")
conn.request("GET", f"/search?query={query}&page=1&num_pages={pages}&country={country}&date_posted=all", headers=headers)
res = conn.getresponse()
data = res.read()
results = []
try:
data_json = json.loads(data.decode("utf-8"))
except json.JSONDecodeError as e:
return json.dumps({"error": f"Error parsing JSON: {str(e)}"}, indent=2)
for job in data_json.get('data', []):
title = job.get('job_title', 'N/A')
company = job.get('employer_name', 'N/A')
city = job.get('job_city', '')
state = job.get('job_state', '')
location_parts = [part for part in [city, state] if part]
location = ", ".join(location_parts) if location_parts else "N/A"
job_url = job.get('job_apply_link', 'N/A')
results.append({
"title": title,
"company": company,
"location": location,
"link": job_url,
"source": "RapidAPI"
})
return json.dumps(results if results else {"error": "No jobs found."}, indent=2)
# #Remove common markdown characters from text using regex
# def remove_markdown(text: str) -> str:
# patterns = [
# (r'^#+ ?', ''),
# (r'\*\*(.*?)\*\*', r'\1'),
# (r'\*(.*?)\*', r'\1'),
# (r'^- ?', ''),
# (r'\[([^\]]+)\]\([^\)]+\)', r'\1'),
# (r'^\s*:\s*', ''),
# (r'`{1,3}[^`]+`{1,3}', lambda m: m.group(0).replace('`', ''))
# ]
# cleaned_text = text
# for pattern, replacement in patterns:
# cleaned_text = re.sub(pattern, replacement, cleaned_text, flags=re.MULTILINE)
# cleaned_text = re.sub(r'\n\s*\n', '\n', cleaned_text).strip()
# return cleaned_text
# Interview preparation
def interview_preparer(job_field: str) -> str:
try:
if not job_field or not isinstance(job_field, str):
return "Error: Invalid job field provided."
search_queries = [
f"{job_field} interview questions 2022-2025",
f"site:reddit.com {job_field} interview questions",
f"site:quora.com {job_field} interview questions"
]
search_results = []
for query in search_queries:
try:
result = duckduckgo_search(query)
if not result.startswith("Error"):
search_results.append(json.loads(result))
except Exception as e:
search_results.append({"source": query, "error": str(e)})
combined_results = json.dumps(search_results, indent=2)
interview_prompt = PromptTemplate(
input_variables=["job_field", "search_results"],
template="""
You are an interview preparation expert. Generate exactly 10 interview questions with detailed, professional answers for {job_field}. Do NOT provide links or references to external resources; focus on self-contained questions and answers.
Requirements:
- Include 4 technical questions, 3 behavioral questions, and 3 situational questions.
- Incorporate trends and frequently asked questions from 2022-2025.
- Use the search results for context to inform answers, but do not include raw search data or URLs in the output: {search_results}.
- Format as plain text with question numbers, type (Technical/Behavioral/Situational), questions, and answers.
Example:
1. Technical: [Question]
Answer: [Detailed answer]
"""
)
response = llm.invoke(interview_prompt.format(job_field=job_field, search_results=combined_results))
return response.content
except Exception as e:
return f"Error generating interview questions: {str(e)}"
interview_tool = Tool(
name="InterviewPreparer",
func=interview_preparer,
description="Generate 10 interview questions and answers for a job field (4 technical, 3 behavioral, 3 situational)."
)
interview_prompt = PromptTemplate(
input_variables=["input", "chat_history", "tools", "tool_names", "agent_scratchpad"],
template="""
You are an interview preparation assistant. Your task is to:
- Extract the job field from the user input (e.g., 'Prepare interview for data science' → job_field='data science').
- Use the InterviewPreparer tool exactly once to generate 10 interview questions with answers (4 technical, 3 behavioral, 3 situational).
- Do NOT attempt to create questions manually or simulate the tool's output.
- If the job field is unclear, return an error message asking for clarification.
- In the Final Answer, return only the tool's output as plain text, with no additional commentary.
Follow this strict format:
Thought: [Your reasoning]
Action: InterviewPreparer
Action Input: job_field="[job_field]"
Observation: [Tool output]
Final Answer: [Tool output]
Available tools: {tools}
Tool names: {tool_names}
User input: {input}
Chat history: {chat_history}
Agent scratchpad: {agent_scratchpad}
"""
)
interview_memory = ConversationBufferMemory(memory_key="chat_history")
interview_agent = initialize_agent(
tools=[interview_tool],
llm=llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
memory=interview_memory,
handle_parsing_errors=True,
custom_prompt=interview_prompt
)
# CV creator
cv_llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
google_api_key=google_api_key,
temperature=0.1,
max_output_tokens=2048
)
cv_prompt = PromptTemplate(
input_variables=["job_field", "experience"],
template="""
You are a professional CV writer. Create a concise, ATS-friendly CV for a {job_field} position based on the following details:
- User Details: Name: John Doe, Email: john.doe@example.com, Phone: +91-9876543210
- Experience and skills: {experience}
Include sections for Summary, Skills, Experience, and Education. Format as plain text for clarity.
"""
)
def generate_cv(job_field: str, experience: str) -> str:
try:
prompt = cv_prompt.format(job_field=job_field, experience=experience)
response = cv_llm.invoke(prompt)
return response.content
except Exception as e:
return f"Error generating CV: {str(e)}"