Rishabh2095's picture
Stop tracking job_writer.log
45de167
import argparse
import socket
import tempfile
from pathlib import Path
from typing import Iterable
import re
import requests
from urllib3.exceptions import NameResolutionError
DEFAULT_MODEL = "allenai/olmo-3.1-32b-think:free"
DEFAULT_CONTENT_TYPE = "cover_letter"
SUPPORTED_FILE_EXTENSIONS = {".pdf", ".md", ".json", ".txt"}
VALID_CONTENT_TYPES = ["cover_letter", "bullets", "linkedin_note"]
DEFAULT_CONTENT_TYPE = "cover_letter"
DEFAULT_MODEL_TEMPERATURE = 0.2
DEFAULT_TIMEOUT = 30
TEMP_MIN, TEMP_MAX = 0.0, 2.0
# Google Docs patterns and export formats
GOOGLE_DOCS_PATTERN = r'https://docs\.google\.com/document/d/([a-zA-Z0-9-_]+)'
GOOGLE_DOCS_EXPORT_FORMATS = {
'pdf': 'application/pdf',
'txt': 'text/plain',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
}
def is_google_docs_url(url: str) -> bool:
"""
Check if the given URL is a Google Docs sharing link.
Args:
url: URL string to check
Returns:
True if it's a Google Docs URL, False otherwise
"""
return bool(re.match(GOOGLE_DOCS_PATTERN, url))
def extract_google_docs_id(url: str) -> str | None:
"""
Extract the document ID from a Google Docs URL.
Args:
url: Google Docs URL
Returns:
Document ID if found, None otherwise
"""
match = re.search(GOOGLE_DOCS_PATTERN, url)
return match.group(1) if match else None
def download_google_docs(url: str, export_format: str = 'txt') -> str:
"""
Download a Google Docs document and save it to a temporary file.
Args:
url: Google Docs sharing URL
export_format: Export format ('pdf', 'txt', 'docx')
Returns:
Path to downloaded temporary file
Raises:
ArgumentTypeError: If download fails or format is unsupported
"""
doc_id = extract_google_docs_id(url)
if not doc_id:
raise argparse.ArgumentTypeError(f"Invalid Google Docs URL: {url}")
if export_format not in GOOGLE_DOCS_EXPORT_FORMATS:
raise argparse.ArgumentTypeError(
f"Unsupported export format: {export_format}. "
f"Supported formats: {list(GOOGLE_DOCS_EXPORT_FORMATS.keys())}"
)
export_url = f"https://docs.google.com/document/d/{doc_id}/export?format={export_format}"
try:
response = requests.get(export_url, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
response.raise_for_status()
# Create temporary file with appropriate extension
suffix = f".{export_format}"
with tempfile.NamedTemporaryFile(mode='wb', suffix=suffix, delete=False) as tmp_file:
tmp_file.write(response.content)
return tmp_file.name
except requests.exceptions.RequestException as e:
raise argparse.ArgumentTypeError(
f"Failed to download Google Docs document: {e}"
)
def is_readable_file(path: str) -> str:
"""
Validate that the file exists and has a supported extension, or download from Google Docs.
Args:
path: File path or Google Docs URL to validate
Returns:
Original path string if valid local file, or path to downloaded temp file for Google Docs
Raises:
ArgumentTypeError: If file doesn't exist, has unsupported extension, or download fails
"""
# Check if it's a Google Docs URL
if is_google_docs_url(path):
# Try to download as text first (most compatible), fallback to PDF if needed
try:
return download_google_docs(path, 'txt')
except argparse.ArgumentTypeError:
# If text export fails, try PDF
return download_google_docs(path, 'pdf')
# Handle local file path
file_path = Path(path)
if not file_path.is_file():
raise argparse.ArgumentTypeError(f"File not found: {path}")
if not path.lower().endswith(tuple(SUPPORTED_FILE_EXTENSIONS)):
raise argparse.ArgumentTypeError(
"Only text files (.txt, .md, .pdf, .json) are supported."
)
return path
def valid_temp(temp: str) -> float:
"""
Ensure temperature is within a reasonable range.
Args:
temp: Temperature value as string
Returns:
Temperature as float
Raises:
ArgumentTypeError: If temperature is outside valid range [0, 2]
"""
value = float(temp)
if not (TEMP_MIN <= value <= TEMP_MAX):
raise argparse.ArgumentTypeError(f"Temperature must be between {TEMP_MIN} and {TEMP_MAX}.")
return value
def is_valid_url(job_posting: str, allowed_statuses: Iterable[int] | None = None) -> str:
"""Validate URL is reachable. Raises ArgumentTypeError if invalid."""
if allowed_statuses is None:
allowed_statuses = range(200, 400)
try:
response = requests.get(job_posting, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
if response.status_code not in allowed_statuses:
raise argparse.ArgumentTypeError(f"URL returned status {response.status_code}")
return job_posting
except socket.gaierror as e:
raise argparse.ArgumentTypeError(f"Domain name resolution failed: {e}")
except requests.exceptions.ConnectionError as e:
# Check if this ConnectionError was caused by a NameResolutionError
if "NameResolutionError" in str(e) or "Failed to resolve" in str(e):
raise argparse.ArgumentTypeError(f"ConnectionError. Domain name could not be resolved: {job_posting}")
raise argparse.ArgumentTypeError(f"Connection failed: {e}")
except requests.exceptions.Timeout as e:
raise argparse.ArgumentTypeError(f"Request timed out: {e}")
except requests.exceptions.InvalidURL as e:
raise argparse.ArgumentTypeError(f"Invalid URL format: {e}")
except requests.exceptions.RequestException as e:
raise argparse.ArgumentTypeError(f"URL validation failed: {e}")
def handle_cli() -> argparse.Namespace:
"""
Parse and validate CLI arguments for job application generator.
Returns:
Parsed command-line arguments namespace
"""
parser = argparse.ArgumentParser(
description="""Assist the candidate in writing content for
job application such as answering to question in application
process, cover letters and more."""
)
parser.add_argument(
"-r",
"--resume",
required=True,
metavar="resume",
type=is_readable_file,
help="""
Provide the path to the file containing the candidate's resume. \
It can be a local file path or a Google Docs sharing URL.
Supported formats are .pdf, .md, .txt, and .json.
For Google Docs, the document will be downloaded automatically.
""",
)
parser.add_argument(
"-j",
"--jd-source",
required=True,
metavar="jd_source",
type=is_valid_url,
help="URL to job posting or paste raw text of job description text.",
)
parser.add_argument(
"-t",
"--content_type",
default=DEFAULT_CONTENT_TYPE,
choices=VALID_CONTENT_TYPES,
help=f"Type of application material to generate (default: {DEFAULT_CONTENT_TYPE}).",
)
parser.add_argument(
"-m",
"--model",
default=DEFAULT_MODEL,
metavar="model_nam",
help=f"Model to use (default: {DEFAULT_MODEL}).",
)
parser.add_argument(
"--temp",
type=valid_temp,
default=DEFAULT_MODEL_TEMPERATURE,
metavar="model_temperature",
help=f"Temperature for the LLM, {TEMP_MIN}-{TEMP_MAX}.",
)
parser.add_argument("--version", action="version", version="%(prog)s 1.0")
return parser.parse_args()