File size: 7,811 Bytes
8acadd7 45de167 a01026b 046508a 45de167 8acadd7 45de167 8acadd7 a01026b 8acadd7 45de167 8acadd7 45de167 a01026b 45de167 a01026b 45de167 a01026b 45de167 a01026b 45de167 a01026b 45de167 a01026b 45de167 a01026b 45de167 a01026b 8acadd7 45de167 8acadd7 a01026b 8acadd7 45de167 8acadd7 45de167 8acadd7 a01026b 45de167 a01026b 45de167 a01026b 45de167 8acadd7 a01026b 8acadd7 45de167 8acadd7 45de167 8acadd7 45de167 8acadd7 45de167 8acadd7 45de167 8acadd7 45de167 8acadd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
import argparse
import socket
import tempfile
from pathlib import Path
from typing import Iterable
import re
import requests
from urllib3.exceptions import NameResolutionError
DEFAULT_MODEL = "allenai/olmo-3.1-32b-think:free"
DEFAULT_CONTENT_TYPE = "cover_letter"
SUPPORTED_FILE_EXTENSIONS = {".pdf", ".md", ".json", ".txt"}
VALID_CONTENT_TYPES = ["cover_letter", "bullets", "linkedin_note"]
DEFAULT_CONTENT_TYPE = "cover_letter"
DEFAULT_MODEL_TEMPERATURE = 0.2
DEFAULT_TIMEOUT = 30
TEMP_MIN, TEMP_MAX = 0.0, 2.0
# Google Docs patterns and export formats
GOOGLE_DOCS_PATTERN = r'https://docs\.google\.com/document/d/([a-zA-Z0-9-_]+)'
GOOGLE_DOCS_EXPORT_FORMATS = {
'pdf': 'application/pdf',
'txt': 'text/plain',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
}
def is_google_docs_url(url: str) -> bool:
"""
Check if the given URL is a Google Docs sharing link.
Args:
url: URL string to check
Returns:
True if it's a Google Docs URL, False otherwise
"""
return bool(re.match(GOOGLE_DOCS_PATTERN, url))
def extract_google_docs_id(url: str) -> str | None:
"""
Extract the document ID from a Google Docs URL.
Args:
url: Google Docs URL
Returns:
Document ID if found, None otherwise
"""
match = re.search(GOOGLE_DOCS_PATTERN, url)
return match.group(1) if match else None
def download_google_docs(url: str, export_format: str = 'txt') -> str:
"""
Download a Google Docs document and save it to a temporary file.
Args:
url: Google Docs sharing URL
export_format: Export format ('pdf', 'txt', 'docx')
Returns:
Path to downloaded temporary file
Raises:
ArgumentTypeError: If download fails or format is unsupported
"""
doc_id = extract_google_docs_id(url)
if not doc_id:
raise argparse.ArgumentTypeError(f"Invalid Google Docs URL: {url}")
if export_format not in GOOGLE_DOCS_EXPORT_FORMATS:
raise argparse.ArgumentTypeError(
f"Unsupported export format: {export_format}. "
f"Supported formats: {list(GOOGLE_DOCS_EXPORT_FORMATS.keys())}"
)
export_url = f"https://docs.google.com/document/d/{doc_id}/export?format={export_format}"
try:
response = requests.get(export_url, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
response.raise_for_status()
# Create temporary file with appropriate extension
suffix = f".{export_format}"
with tempfile.NamedTemporaryFile(mode='wb', suffix=suffix, delete=False) as tmp_file:
tmp_file.write(response.content)
return tmp_file.name
except requests.exceptions.RequestException as e:
raise argparse.ArgumentTypeError(
f"Failed to download Google Docs document: {e}"
)
def is_readable_file(path: str) -> str:
"""
Validate that the file exists and has a supported extension, or download from Google Docs.
Args:
path: File path or Google Docs URL to validate
Returns:
Original path string if valid local file, or path to downloaded temp file for Google Docs
Raises:
ArgumentTypeError: If file doesn't exist, has unsupported extension, or download fails
"""
# Check if it's a Google Docs URL
if is_google_docs_url(path):
# Try to download as text first (most compatible), fallback to PDF if needed
try:
return download_google_docs(path, 'txt')
except argparse.ArgumentTypeError:
# If text export fails, try PDF
return download_google_docs(path, 'pdf')
# Handle local file path
file_path = Path(path)
if not file_path.is_file():
raise argparse.ArgumentTypeError(f"File not found: {path}")
if not path.lower().endswith(tuple(SUPPORTED_FILE_EXTENSIONS)):
raise argparse.ArgumentTypeError(
"Only text files (.txt, .md, .pdf, .json) are supported."
)
return path
def valid_temp(temp: str) -> float:
"""
Ensure temperature is within a reasonable range.
Args:
temp: Temperature value as string
Returns:
Temperature as float
Raises:
ArgumentTypeError: If temperature is outside valid range [0, 2]
"""
value = float(temp)
if not (TEMP_MIN <= value <= TEMP_MAX):
raise argparse.ArgumentTypeError(f"Temperature must be between {TEMP_MIN} and {TEMP_MAX}.")
return value
def is_valid_url(job_posting: str, allowed_statuses: Iterable[int] | None = None) -> str:
"""Validate URL is reachable. Raises ArgumentTypeError if invalid."""
if allowed_statuses is None:
allowed_statuses = range(200, 400)
try:
response = requests.get(job_posting, timeout=DEFAULT_TIMEOUT, allow_redirects=True)
if response.status_code not in allowed_statuses:
raise argparse.ArgumentTypeError(f"URL returned status {response.status_code}")
return job_posting
except socket.gaierror as e:
raise argparse.ArgumentTypeError(f"Domain name resolution failed: {e}")
except requests.exceptions.ConnectionError as e:
# Check if this ConnectionError was caused by a NameResolutionError
if "NameResolutionError" in str(e) or "Failed to resolve" in str(e):
raise argparse.ArgumentTypeError(f"ConnectionError. Domain name could not be resolved: {job_posting}")
raise argparse.ArgumentTypeError(f"Connection failed: {e}")
except requests.exceptions.Timeout as e:
raise argparse.ArgumentTypeError(f"Request timed out: {e}")
except requests.exceptions.InvalidURL as e:
raise argparse.ArgumentTypeError(f"Invalid URL format: {e}")
except requests.exceptions.RequestException as e:
raise argparse.ArgumentTypeError(f"URL validation failed: {e}")
def handle_cli() -> argparse.Namespace:
"""
Parse and validate CLI arguments for job application generator.
Returns:
Parsed command-line arguments namespace
"""
parser = argparse.ArgumentParser(
description="""Assist the candidate in writing content for
job application such as answering to question in application
process, cover letters and more."""
)
parser.add_argument(
"-r",
"--resume",
required=True,
metavar="resume",
type=is_readable_file,
help="""
Provide the path to the file containing the candidate's resume. \
It can be a local file path or a Google Docs sharing URL.
Supported formats are .pdf, .md, .txt, and .json.
For Google Docs, the document will be downloaded automatically.
""",
)
parser.add_argument(
"-j",
"--jd-source",
required=True,
metavar="jd_source",
type=is_valid_url,
help="URL to job posting or paste raw text of job description text.",
)
parser.add_argument(
"-t",
"--content_type",
default=DEFAULT_CONTENT_TYPE,
choices=VALID_CONTENT_TYPES,
help=f"Type of application material to generate (default: {DEFAULT_CONTENT_TYPE}).",
)
parser.add_argument(
"-m",
"--model",
default=DEFAULT_MODEL,
metavar="model_nam",
help=f"Model to use (default: {DEFAULT_MODEL}).",
)
parser.add_argument(
"--temp",
type=valid_temp,
default=DEFAULT_MODEL_TEMPERATURE,
metavar="model_temperature",
help=f"Temperature for the LLM, {TEMP_MIN}-{TEMP_MAX}.",
)
parser.add_argument("--version", action="version", version="%(prog)s 1.0")
return parser.parse_args()
|