Spaces:

khirodsahoo93
/

resume-optimizer

Runtime error

App Files Files Community

resume-optimizer / app.py

khirodsahoo93

Upload app.py

5b70e06 verified 4 months ago

raw

history blame contribute delete

29 kB

	"""
	Job Application Optimizer - AI-Powered Resume Tailoring

	Supported Models:
	- GPT-4o (OpenAI) - Premium, fastest, most accurate
	- Claude-3.5-Sonnet (Anthropic) - Premium, excellent for professional writing

	⚠️ ETHICAL NOTICE:
	This tool ONLY optimizes existing resume content. It NEVER fabricates experience.
	All outputs remain truthful to your original resume.
	"""

	import os
	import io
	import sys
	import zipfile
	import tempfile
	import httpx
	from openai import OpenAI
	import anthropic
	import gradio as gr

	try:
	from PyPDF2 import PdfReader
	except ImportError:
	print("Warning: PyPDF2 not available")
	PdfReader = None

	try:
	from reportlab.lib.pagesizes import letter
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch
	except ImportError:
	print("Warning: reportlab not available")

	# Try to load from .env file if available
	try:
	from dotenv import load_dotenv
	load_dotenv()
	except ImportError:
	pass

	# PASSWORD PROTECTION
	APP_PASSWORD = os.environ.get("APP_PASSWORD", "jobapp123") # Default password

	# Lazy initialization of AI clients
	def get_openai_client():
	api_key = os.environ.get("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("OPENAI_API_KEY not found")
	http_client = httpx.Client(
	timeout=60.0,
	limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
	)
	return OpenAI(api_key=api_key, http_client=http_client)

	def get_anthropic_client():
	api_key = os.environ.get("ANTHROPIC_API_KEY")
	if not api_key:
	raise ValueError("ANTHROPIC_API_KEY not found")
	http_client = httpx.Client(
	timeout=60.0,
	limits=httpx.Limits(max_keepalive_connections=5, max_connections=10)
	)
	return anthropic.Anthropic(api_key=api_key, http_client=http_client)

	# Model configurations
	OPENAI_MODEL = "gpt-4o"
	CLAUDE_MODEL = "claude-3-5-sonnet-20240620"

	# System prompts with tone variations
	def get_linkedin_system_prompt(tone, word_limit):
	tone_styles = {
	"Casual": "friendly, conversational, and approachable",
	"Semi-Professional": "balanced between friendly and professional, warm yet respectful",
	"Professional": "formal, polished, and business-like"
	}
	style = tone_styles.get(tone, tone_styles["Semi-Professional"])

	return f"""You are a professional career coach writing personalized LinkedIn messages.
	Write a message that is {style}.
	Keep it to approximately {word_limit} words (be flexible by ±10 words).
	Express genuine interest and mention 1-2 key qualifications from the resume that match the job.
	DO NOT fabricate any experience."""

	def get_email_system_prompt(tone, word_limit):
	tone_styles = {
	"Casual": "friendly and conversational while maintaining professionalism",
	"Semi-Professional": "professional yet warm and personable",
	"Professional": "highly formal, polished, and business-oriented"
	}
	style = tone_styles.get(tone, tone_styles["Semi-Professional"])

	return f"""You are a professional career coach writing job application emails.
	Write an email that is {style}.
	Target approximately {word_limit} words (be flexible by ±20 words).
	Include proper email format with subject line.
	Highlight relevant experience and express interest.
	Keep all information truthful to the resume.
	DO NOT fabricate any experience."""

	def get_resume_system_prompt(output_format):
	base_prompt = """You are a professional resume writer optimizing resumes for ATS systems.
	Tailor the resume to match the job description by:
	1. Adjusting keywords to match job requirements
	2. Reordering/emphasizing relevant experience
	3. Rewriting bullet points for clarity and impact
	4. Highlighting transferable skills

	CRITICAL RULES:
	- NEVER add experience, skills, or education not in original resume
	- NEVER change dates, company names, or titles
	- ONLY rephrase and reorganize existing content
	- Keep all information factually accurate"""

	if output_format == "latex":
	return base_prompt + """

	IMPORTANT FOR MODULAR LATEX:
	- If you see "% INCLUDED FILE:" markers, that means the resume uses modular structure
	- Update ALL sections (main file AND included files) to match the job
	- Maintain the exact same file structure with "% MAIN FILE:" and "% INCLUDED FILE:" markers
	- Keep all \\input{} and \\include{} commands unchanged
	- Return the COMPLETE updated content for ALL files with proper markers

	Return valid LaTeX code suitable for Overleaf compilation with proper formatting."""
	else:
	return base_prompt + "\n\nReturn ONLY the tailored resume content in a clean, professional format."

	def extract_latex_from_zip(zip_path):
	"""Extract LaTeX content from a zip file (Overleaf export) including all component files"""
	try:
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	# Find all .tex files
	tex_files = [f for f in zip_ref.namelist() if f.endswith('.tex') and not f.startswith('__MACOSX')]

	if not tex_files:
	return "No .tex files found in zip", "error", None, {}

	# Priority: main.tex > resume.tex > cv.tex > first .tex file
	main_file = None
	for priority_name in ['main.tex', 'resume.tex', 'cv.tex', 'document.tex']:
	for tex_file in tex_files:
	if tex_file.lower().endswith(priority_name):
	main_file = tex_file
	break
	if main_file:
	break

	if not main_file:
	main_file = tex_files[0]

	# Read the main tex file
	with zip_ref.open(main_file) as f:
	main_content = f.read().decode('utf-8', errors='ignore')

	# Find all included/input files
	import re
	included_files = {}

	# Patterns for \input{file}, \include{file}, \input{folder/file}
	input_pattern = r'\\(?:input\|include)\{([^}]+)\}'
	matches = re.findall(input_pattern, main_content)

	for match in matches:
	# Handle both with and without .tex extension
	possible_paths = [
	match,
	f"{match}.tex",
	match.replace('.tex', '') + '.tex'
	]

	for possible_path in possible_paths:
	if possible_path in tex_files:
	try:
	with zip_ref.open(possible_path) as f:
	included_files[possible_path] = f.read().decode('utf-8', errors='ignore')
	break
	except:
	continue

	# Combine all content for the AI (main + all components)
	combined_content = f"% MAIN FILE: {main_file}\n{main_content}\n\n"
	for included_path, included_content in included_files.items():
	combined_content += f"\n% INCLUDED FILE: {included_path}\n{included_content}\n\n"

	return combined_content, "latex", main_file, included_files

	except Exception as e:
	return f"Error extracting zip: {str(e)}", "error", None, {}

	def extract_text_from_file(file_path):
	"""Extract text from uploaded resume (PDF, LaTeX, or ZIP)"""
	try:
	if file_path is None:
	return "No file uploaded", "unknown"

	# Handle file path (string) from Gradio File component
	if isinstance(file_path, str):
	# Check file extension
	if file_path.lower().endswith('.zip'):
	# ZIP file (Overleaf export)
	text, format_type, main_file, included_files = extract_latex_from_zip(file_path)
	# Store included files info for later use (we'll use a global or pass it through)
	return text, format_type
	elif file_path.lower().endswith('.tex'):
	# LaTeX file
	with open(file_path, 'r', encoding='utf-8') as f:
	text = f.read()
	return text, "latex"
	elif file_path.lower().endswith('.pdf'):
	# PDF file
	with open(file_path, 'rb') as f:
	reader = PdfReader(f)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text.strip(), "pdf"
	else:
	# Try to read as text
	with open(file_path, 'r', encoding='utf-8') as f:
	text = f.read()
	return text, "text"

	# Handle bytes
	elif isinstance(file_path, bytes):
	file_path = io.BytesIO(file_path)
	reader = PdfReader(file_path)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text.strip(), "pdf"

	return "Unsupported file type", "unknown"
	except Exception as e:
	return f"Error reading file: {str(e)}", "error"

	def generate_with_gpt(system_prompt, user_prompt):
	"""Generate content using GPT-4o"""
	try:
	client = get_openai_client()
	response = client.chat.completions.create(
	model=OPENAI_MODEL,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	temperature=0.7,
	max_tokens=2000
	)
	return response.choices[0].message.content
	except Exception as e:
	return f"❌ Error: {str(e)}"

	def generate_with_claude(system_prompt, user_prompt):
	"""Generate content using Claude-3.5-Sonnet"""
	try:
	client = get_anthropic_client()
	response = client.messages.create(
	model=CLAUDE_MODEL,
	max_tokens=2000,
	system=system_prompt,
	messages=[
	{"role": "user", "content": user_prompt}
	],
	temperature=0.7
	)
	return response.content[0].text
	except Exception as e:
	return f"❌ Error: {str(e)}"

	def generate_application_materials(resume_file, job_description, model, linkedin_tone, email_tone,
	linkedin_word_limit, email_word_limit, linkedin_custom_prompt, email_custom_prompt):
	"""Generate LinkedIn message, email, and tailored resume"""
	if not resume_file:
	return "⚠️ Please upload your resume", "", "", "unknown"

	if not job_description or len(job_description.strip()) < 50:
	return "⚠️ Please provide a detailed job description (at least 50 characters)", "", "", "unknown"

	# Extract resume text and detect format
	resume_text, file_format = extract_text_from_file(resume_file)
	if "Error" in resume_text:
	return resume_text, "", "", "unknown"

	# Select model
	generate_fn = generate_with_gpt if model == "GPT-4o" else generate_with_claude

	# Generate LinkedIn message with custom tone and word limit
	linkedin_system = get_linkedin_system_prompt(linkedin_tone, linkedin_word_limit)
	linkedin_prompt = f"""
	Original Resume:
	{resume_text}

	Job Description:
	{job_description}

	Write a LinkedIn message to the recruiter for this position following the tone and length guidelines.
	"""

	# Add custom instructions if provided
	if linkedin_custom_prompt and linkedin_custom_prompt.strip():
	linkedin_prompt += f"\n\nADDITIONAL INSTRUCTIONS FROM USER:\n{linkedin_custom_prompt.strip()}"

	linkedin_msg = generate_fn(linkedin_system, linkedin_prompt)

	# Generate Email with custom tone and word limit
	email_system = get_email_system_prompt(email_tone, email_word_limit)
	email_prompt = f"""
	Original Resume:
	{resume_text}

	Job Description:
	{job_description}

	Write an email to the hiring manager for this position following the tone and length guidelines.
	"""

	# Add custom instructions if provided
	if email_custom_prompt and email_custom_prompt.strip():
	email_prompt += f"\n\nADDITIONAL INSTRUCTIONS FROM USER:\n{email_custom_prompt.strip()}"

	email_content = generate_fn(email_system, email_prompt)

	# Generate Tailored Resume
	resume_system = get_resume_system_prompt(file_format)
	resume_prompt = f"""
	Original Resume:
	{resume_text}

	Job Description:
	{job_description}

	Tailor this resume to match the job description. Remember: ONLY optimize existing content, NEVER fabricate.
	"""
	tailored_resume = generate_fn(resume_system, resume_prompt)

	return linkedin_msg, email_content, tailored_resume, file_format

	def create_output_file(content, original_file_path, file_format):
	"""Create output file (PDF, LaTeX, or ZIP) from tailored resume content"""
	if not content or "Error" in content:
	return None

	try:
	# Create output directory
	os.makedirs("tailored_resumes", exist_ok=True)

	# Get base filename
	if original_file_path:
	base_name = os.path.splitext(os.path.basename(original_file_path))[0]
	else:
	base_name = "resume"

	# Handle LaTeX files (including from zip)
	if file_format == "latex":
	# If original was a zip, preserve all files and update all .tex files
	if original_file_path and original_file_path.lower().endswith('.zip'):
	output_path = f"tailored_resumes/{base_name}_tailored.zip"

	try:
	# Parse the AI output to extract individual file contents
	import re
	file_contents = {}

	# Check if AI returned modular format
	if "% MAIN FILE:" in content or "% INCLUDED FILE:" in content:
	# Split by file markers
	file_pattern = r'% (?:MAIN\|INCLUDED) FILE: (.+?)\n(.*?)(?=\n% (?:MAIN\|INCLUDED) FILE:\|$)'
	matches = re.findall(file_pattern, content, re.DOTALL)

	for filename, file_content in matches:
	filename = filename.strip()
	file_contents[filename] = file_content.strip()

	with zipfile.ZipFile(original_file_path, 'r') as original_zip:
	# Find all tex files in original
	all_files = [f for f in original_zip.namelist() if not f.startswith('__MACOSX')]
	tex_files = [f for f in all_files if f.endswith('.tex')]

	# Create new zip
	with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as new_zip:
	# Copy all non-tex files
	for item in all_files:
	if not item.endswith('.tex'):
	data = original_zip.read(item)
	new_zip.writestr(item, data)

	# Add updated tex files
	if file_contents:
	# Modular structure - update each file
	for tex_file in tex_files:
	if tex_file in file_contents:
	# This file was updated by AI
	new_zip.writestr(tex_file, file_contents[tex_file].encode('utf-8'))
	else:
	# Keep original
	data = original_zip.read(tex_file)
	new_zip.writestr(tex_file, data)
	else:
	# Single file structure - find main and update it
	main_file = None
	for priority_name in ['main.tex', 'resume.tex', 'cv.tex', 'document.tex']:
	for tex_file in tex_files:
	if tex_file.lower().endswith(priority_name):
	main_file = tex_file
	break
	if main_file:
	break

	if not main_file and tex_files:
	main_file = tex_files[0]

	# Update main file, keep others
	for tex_file in tex_files:
	if tex_file == main_file:
	new_zip.writestr(tex_file, content.encode('utf-8'))
	else:
	data = original_zip.read(tex_file)
	new_zip.writestr(tex_file, data)

	return output_path

	except Exception as e:
	print(f"Error preserving ZIP contents: {str(e)}")
	import traceback
	traceback.print_exc()
	# Fallback: create simple zip with just the tailored tex
	with zipfile.ZipFile(output_path, 'w') as zipf:
	zipf.writestr('resume_tailored.tex', content.encode('utf-8'))
	return output_path
	else:
	# Single .tex file
	output_path = f"tailored_resumes/{base_name}_tailored.tex"
	with open(output_path, 'w', encoding='utf-8') as f:
	f.write(content)
	return output_path

	# Handle PDF creation
	output_path = f"tailored_resumes/{base_name}_tailored.pdf"
	doc = SimpleDocTemplate(output_path, pagesize=letter)
	styles = getSampleStyleSheet()
	story = []

	# Parse content and add to PDF
	for line in content.split('\n'):
	line = line.strip()
	if not line:
	story.append(Spacer(1, 0.2*inch))
	continue

	if line.startswith('#'):
	# Header
	story.append(Paragraph(line.replace('#', '').strip(), styles['Heading1']))
	elif line.startswith('##'):
	# Subheader
	story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2']))
	else:
	# Body text
	story.append(Paragraph(line, styles['BodyText']))

	doc.build(story)
	return output_path
	except Exception as e:
	print(f"Output file creation error: {str(e)}")
	return None

	# Modern CSS (same style as python-cpp-optimizer)
	modern_css = """
	/* Global Styles */
	.gradio-container {
	font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
	}

	/* Header Section */
	.modern-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 40px;
	border-radius: 16px;
	text-align: center;
	margin-bottom: 32px;
	box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
	}

	.modern-header h1 {
	margin: 0 0 8px 0;
	font-size: 36px;
	font-weight: 700;
	letter-spacing: -0.02em;
	}

	.modern-header p {
	margin: 0;
	font-size: 16px;
	opacity: 0.95;
	font-weight: 400;
	}

	/* Warning Box */
	.warning-box {
	background: #fef3c7;
	border: 2px solid #f59e0b;
	border-radius: 12px;
	padding: 20px;
	margin: 24px 0;
	box-shadow: 0 4px 12px rgba(245, 158, 11, 0.1);
	}

	.warning-box h3 {
	margin: 0 0 8px 0;
	color: #92400e;
	font-size: 18px;
	font-weight: 600;
	}

	.warning-box p {
	margin: 0;
	color: #78350f;
	font-size: 14px;
	line-height: 1.6;
	}

	/* Modern Button */
	.modern-button {
	background: linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%) !important;
	color: white !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 14px 28px !important;
	font-weight: 600 !important;
	font-size: 16px !important;
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	box-shadow: 0 4px 6px rgba(59, 130, 246, 0.2) !important;
	}

	.modern-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 12px rgba(59, 130, 246, 0.3) !important;
	}

	/* Model Selector */
	.model-selector {
	background: white !important;
	border: 2px solid #e2e8f0 !important;
	border-radius: 12px !important;
	padding: 12px 16px !important;
	font-size: 16px !important;
	}

	/* Output Sections */
	.output-section {
	background: #f8fafc !important;
	border: 2px solid #e2e8f0 !important;
	border-radius: 12px !important;
	padding: 16px !important;
	font-family: 'Monaco', 'Menlo', monospace !important;
	font-size: 14px !important;
	}
	"""

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(css=modern_css, title="Job Application Optimizer", theme=gr.themes.Soft()) as app:

	# Header
	gr.HTML("""
	<div class="modern-header">
	<h1>💼 Job Application Optimizer</h1>
	<p>AI-powered resume tailoring + personalized messaging</p>
	</div>
	""")

	# Warning
	gr.HTML("""
	<div class="warning-box">
	<h3>⚠️ Ethical Use Only</h3>
	<p><strong>This tool ONLY optimizes existing resume content.</strong><br>
	It rewrites bullet points, adjusts keywords, and highlights relevant experience.<br>
	<strong>It NEVER fabricates experience, skills, or education.</strong><br>
	All outputs remain truthful to your original resume.</p>
	</div>
	""")

	# Main Content
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📄 Upload Your Resume")
	resume_upload = gr.File(
	label="Resume File (PDF, LaTeX .tex, or Overleaf ZIP)",
	file_types=[".pdf", ".tex", ".zip"]
	)

	gr.Markdown("### 📝 Job Description")
	job_desc_input = gr.TextArea(
	label="Paste the full job description here",
	lines=8,
	placeholder="Paste the complete job posting including requirements, responsibilities, and qualifications..."
	)

	gr.Markdown("### 🤖 AI Model")
	model_selector = gr.Dropdown(
	["GPT-4o", "Claude-3.5-Sonnet"],
	label="Select Model",
	value="GPT-4o",
	elem_classes=["model-selector"]
	)

	# Customization options
	with gr.Accordion("⚙️ Customization Options", open=False):
	gr.Markdown("#### 💬 LinkedIn Message")
	with gr.Row():
	linkedin_tone = gr.Dropdown(
	["Casual", "Semi-Professional", "Professional"],
	label="Tone",
	value="Semi-Professional"
	)
	linkedin_words = gr.Slider(
	minimum=30,
	maximum=150,
	value=75,
	step=5,
	label="Word Limit"
	)

	linkedin_custom_prompt = gr.TextArea(
	label="Custom Instructions (Optional)",
	placeholder="e.g., 'Mention my interest in remote work' or 'Emphasize my leadership experience'",
	lines=2
	)

	gr.Markdown("#### 📧 Email")
	with gr.Row():
	email_tone = gr.Dropdown(
	["Casual", "Semi-Professional", "Professional"],
	label="Tone",
	value="Professional"
	)
	email_words = gr.Slider(
	minimum=100,
	maximum=400,
	value=250,
	step=10,
	label="Word Limit"
	)

	email_custom_prompt = gr.TextArea(
	label="Custom Instructions (Optional)",
	placeholder="e.g., 'Mention my availability for interview' or 'Highlight my recent project'",
	lines=2
	)

	generate_btn = gr.Button("✨ Generate Application Materials", elem_classes=["modern-button"])

	with gr.Column(scale=1):
	gr.Markdown("### 💬 LinkedIn Message")
	linkedin_output = gr.TextArea(
	label="Short message for recruiter",
	lines=5,
	elem_classes=["output-section"]
	)

	gr.Markdown("### 📧 Email to Hiring Manager")
	email_output = gr.TextArea(
	label="Professional email",
	lines=12,
	elem_classes=["output-section"]
	)

	gr.Markdown("### 📑 Tailored Resume")
	resume_output = gr.TextArea(
	label="Optimized resume content",
	lines=15,
	elem_classes=["output-section"]
	)

	# Hidden state to store file format
	file_format_state = gr.State(value="pdf")

	download_btn = gr.Button("⬇️ Download Tailored Resume")
	pdf_download = gr.File(label="Download Resume File")

	# Event handlers
	generate_btn.click(
	fn=generate_application_materials,
	inputs=[
	resume_upload, job_desc_input, model_selector,
	linkedin_tone, email_tone,
	linkedin_words, email_words,
	linkedin_custom_prompt, email_custom_prompt
	],
	outputs=[linkedin_output, email_output, resume_output, file_format_state],
	show_progress=True
	)

	def handle_download(content, file, file_format):
	if file is None:
	return create_output_file(content, None, file_format)
	# Handle file path (string) from Gradio - pass full path for ZIP files
	if isinstance(file, str):
	return create_output_file(content, file, file_format)
	# Handle file object with name attribute
	return create_output_file(content, getattr(file, 'name', None), file_format)

	download_btn.click(
	fn=handle_download,
	inputs=[resume_output, resume_upload, file_format_state],
	outputs=pdf_download
	)

	return app

	# Launch with password protection
	if __name__ == "__main__":
	app = create_interface()

	# Check if running on Hugging Face Spaces
	is_huggingface = os.getenv("SPACE_ID") is not None

	if is_huggingface:
	# Hugging Face Spaces configuration
	print("🚀 Launching Job Application Optimizer on Hugging Face Spaces")
	print("🔐 Password protection enabled")
	app.launch(
	auth=[("user", APP_PASSWORD)],
	auth_message="🔐 Enter credentials to access Job Application Optimizer",
	show_error=True
	)
	else:
	# Local development configuration
	print("🚀 Launching Job Application Optimizer")
	print(f"🔐 Password protection enabled. Password: {APP_PASSWORD}")
	app.launch(
	auth=[("user", APP_PASSWORD)],
	auth_message="🔐 Enter credentials to access Job Application Optimizer",
	show_error=True,
	share=True, # Enable shareable link
	server_name="0.0.0.0" # Allow external access
	)