Spaces:

sk31415
/

resume-forge

Sleeping

App Files Files Community

resume-forge / app /main.py

sk31415

updated model

31e3d39 2 months ago

raw

history blame contribute delete

17 kB

	"""
	Resume Optimizer API
	- Parses uploaded resumes (PDF/DOCX/TXT)
	- Uses Arcee Trinity via OpenRouter to optimize for job descriptions
	- Compiles to PDF via latex.ytotech.com
	"""

	import os
	import re
	import json
	import base64
	import tempfile
	from typing import Optional
	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import Response
	from fastapi.staticfiles import StaticFiles
	import httpx

	# PDF parsing
	import fitz # PyMuPDF

	app = FastAPI(title="Resume Optimizer API")

	# CORS for frontend
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # In production, restrict this
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Configuration
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
	OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
	LATEX_API_URL = "https://latex.ytotech.com/builds/sync"

	# LaTeX Template
	LATEX_TEMPLATE = r"""
	\documentclass[a4paper,10pt]{extarticle}

	\usepackage[utf8]{inputenc}
	\usepackage{geometry}
	\usepackage{textcomp}
	\geometry{a4paper, margin=0.5in}
	\usepackage{titlesec}
	\usepackage{enumitem}
	\usepackage{hyperref}
	\setlist{noitemsep,leftmargin=*}
	\titleformat{\section}{\Large\bfseries}{\thesection}{1em}{}[\titlerule]
	\titlespacing*{\section}{0pt}{0.5em}{0.5em}
	\pagestyle{empty}

	\begin{document}

	\begin{center}
	\textbf{\Large <<NAME>>}\\[2pt]
	<<CONTACT_LINE>>
	\end{center}

	\section*{EDUCATION}
	<<EDUCATION_CONTENT>>

	\section*{EXPERIENCE}
	<<EXPERIENCE_CONTENT>>

	\section*{PROJECTS}
	<<PROJECTS_CONTENT>>

	\section*{SKILLS}
	<<SKILLS_CONTENT>>

	\end{document}
	"""


	def extract_text_from_pdf(pdf_bytes: bytes) -> str:
	"""Extract text from PDF using PyMuPDF"""
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	text = ""
	for page in doc:
	text += page.get_text()
	doc.close()
	return text


	def extract_text_from_file(content: bytes, filename: str) -> str:
	"""Extract text based on file type"""
	ext = filename.lower().split('.')[-1]

	if ext == 'pdf':
	return extract_text_from_pdf(content)
	elif ext == 'txt':
	return content.decode('utf-8', errors='ignore')
	elif ext in ['doc', 'docx']:
	# For simplicity, try to extract as text
	# In production, use python-docx
	return content.decode('utf-8', errors='ignore')
	else:
	return content.decode('utf-8', errors='ignore')


	def escape_latex(text: str) -> str:
	"""Escape special LaTeX characters"""
	# Handle backslash first
	text = text.replace('\\', r'\textbackslash{}')
	# Then handle other special characters
	replacements = [
	('&', r'\&'),
	('%', r'\%'),
	('$', r'\$'),
	('#', r'\#'),
	('_', r'\_'),
	('{', r'\{'),
	('}', r'\}'),
	('~', r'\textasciitilde{}'),
	('^', r'\textasciicircum{}'),
	]
	for old, new in replacements:
	text = text.replace(old, new)
	return text


	async def call_openrouter(prompt: str, system_prompt: str) -> str:
	"""Call OpenRouter API with Arcee Trinity"""

	if not OPENROUTER_API_KEY:
	raise HTTPException(status_code=500, detail="OPENROUTER_API_KEY not configured")

	async with httpx.AsyncClient(timeout=120.0) as client:
	response = await client.post(
	OPENROUTER_URL,
	headers={
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	"HTTP-Referer": "https://resume-optimizer.app",
	},
	json={
	"model": "arcee-ai/trinity-large-preview:free",
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt}
	],
	"max_tokens": 4000,
	"temperature": 0.3,
	}
	)

	if response.status_code != 200:
	raise HTTPException(
	status_code=response.status_code,
	detail=f"OpenRouter API error: {response.text}"
	)

	data = response.json()
	return data["choices"][0]["message"]["content"]


	async def compile_latex(latex_content: str) -> bytes:
	"""Compile LaTeX to PDF using latex.ytotech.com"""

	async with httpx.AsyncClient(timeout=60.0) as client:
	response = await client.post(
	LATEX_API_URL,
	json={
	"compiler": "pdflatex",
	"resources": [
	{"main": True, "content": latex_content}
	]
	}
	)

	# Accept any 2xx status code (200, 201, etc.)
	if not (200 <= response.status_code < 300):
	# Try to extract error message if it's text
	try:
	error_text = response.text[:1000] if response.text else "Unknown error"
	except:
	error_text = "LaTeX compilation failed (binary response)"

	raise HTTPException(
	status_code=500,
	detail=f"LaTeX compilation failed. Status: {response.status_code}. Error: {error_text}"
	)

	return response.content


	SYSTEM_PROMPT = """You are an expert resume writer, ATS optimization specialist, and hiring manager in this field.

	Your task is to optimize the resume for the specific job description provided, maximizing relevance, clarity, and keyword alignment without fabricating experience.

	OBJECTIVES:
	1. Tailor the resume specifically to the job description
	2. Optimize for ATS keyword matching and natural language ranking
	3. Improve impact using XYZ-style bullet points (X=What, Y=How/Tools, Z=Outcome/Metrics)
	4. Clearly emphasize the most important skills, tools, and concepts from the JD
	5. Reorder experiences to highlight most relevant roles first

	RULES & CONSTRAINTS:
	❌ Do NOT invent skills, experience, metrics, or credentials
	✅ You may rephrase, restructure, and quantify only if logically implied from the original resume
	✅ Preserve truthful seniority and scope
	✅ Keep formatting clean and ATS-safe
	✅ Focus on semantic relevance, not keyword stuffing

	FORMATTING REQUIREMENTS:
	Bullet Point Style (XYZ Format):
	- X = What I did (action + responsibility)
	- Y = How I did it (tools, methods, technologies, frameworks)
	- Z = Outcome or impact (metrics, results, achievements)
	- Example: "Developed Django REST API to improve data processing speed, reducing query time by 40%"

	Bullet Length: Keep each bullet to 1-2 lines max

	Keyword Optimization:
	- Identify top keywords from the job description
	- Naturally integrate into experience bullets, skills, and education sections
	- Prioritize semantic relevance over keyword stuffing
	- Use industry-standard terminology that matches the JD

	Output Format:
	Return ONLY a JSON object (no markdown, code blocks, or explanations) with this exact structure:

	{
	"name": "Full Name",
	"email": "email@example.com",
	"linkedin_url": "https://linkedin.com/in/username (or empty string if not in original resume)",
	"linkedin_display": "linkedin.com/in/username (or empty string if not in original resume)",
	"github_url": "https://github.com/username (ONLY if explicitly stated in original resume, otherwise empty string)",
	"github_display": "github.com/username (ONLY if explicitly stated in original resume, otherwise empty string)",
	"education": [
	{
	"institution": "University Name",
	"expected": "May 2024",
	"degree": "B.S. in Computer Science",
	"coursework": "Relevant courses from original resume",
	"honors": "Honors, GPA, awards if applicable"
	}
	],
	"experience": [
	{
	"company": "Company Name",
	"department": "Department if applicable",
	"location": "City, State",
	"role": "Job Title",
	"dates": "Month Year – Month Year",
	"bullets": [
	"Action verb + responsibility (X) using Key Tool or Technology (Y) to achieve quantified result (Z)",
	"XYZ format bullet with JD keywords naturally integrated, measurable impact emphasized"
	]
	}
	],
	"projects": [
	{
	"name": "Project Name",
	"technologies": "Tech1, Tech2, Tech3",
	"location": "City, State or Online",
	"role": "Your Role",
	"dates": "Month Year – Month Year",
	"bullets": [
	"Built Framework solution achieving Quantified Result",
	"Implemented Key Feature using Technologies, measurable impact"
	]
	}
	],
	"skills": {
	"technical": "Skill1, Skill2, Skill3 (prioritize JD-relevant skills first)",
	"communication": "Relevant soft skills from JD"
	}
	}

	CRITICAL INSTRUCTIONS:
	1. Analyze the job description deeply for keywords, required skills, technologies, and seniority level
	2. Reorder experience entries by relevance (most relevant to JD first)
	3. Use XYZ format for all bullet points to maximize impact
	4. Extract and highlight metrics from original resume; estimate only if reasonable
	5. Match tone and language to the job description's seniority and industry
	6. Preserve all truthful information from original resume
	7. Include all education, experience, and project sections from the original resume
	8. Keep skills section focused on JD-relevant competencies
	9. ONLY include GitHub/LinkedIn URLs if they are EXPLICITLY stated in the original resume - use empty strings otherwise

	Begin optimization now. Return ONLY the JSON object, no additional text."""


	def build_latex_from_json(data: dict) -> str:
	"""Build LaTeX document from structured JSON data"""

	# Header info
	name = escape_latex(data.get("name", "Name"))
	email = data.get("email", "email@example.com")
	linkedin_url = data.get("linkedin_url", "")
	linkedin_display = data.get("linkedin_display", "")
	github_url = data.get("github_url", "")
	github_display = data.get("github_display", "")

	# Build contact links only if they're provided
	contact_links = [f"\\href{{mailto:{email}}}{{{email}}}"]

	if linkedin_url and linkedin_url != "https://linkedin.com":
	contact_links.append(f"\\href{{{linkedin_url}}}{{{linkedin_display}}}")

	if github_url and github_url != "https://github.com":
	contact_links.append(f"\\href{{{github_url}}}{{{github_display}}}")

	contact_line = " $\|$ ".join(contact_links)

	# Build education section
	education_lines = []
	for edu in data.get("education", []):
	institution = escape_latex(edu.get("institution", ""))
	expected = escape_latex(edu.get("expected", ""))
	degree = escape_latex(edu.get("degree", ""))
	coursework = escape_latex(edu.get("coursework", ""))
	honors = escape_latex(edu.get("honors", ""))

	edu_block = f"""\\noindent
	\\textbf{{{institution}}} \\hfill \\textbf{{{expected}}} \\\\
	{degree}
	\\begin{{itemize}}
	\\item \\textbf{{Coursework: }}{coursework}
	\\item \\textbf{{Honors:}} {honors}
	\\end{{itemize}}"""
	education_lines.append(edu_block)

	# Build experience section
	experience_lines = []
	for exp in data.get("experience", []):
	company = escape_latex(exp.get("company", ""))
	department = exp.get("department", "")
	location = escape_latex(exp.get("location", ""))
	role = escape_latex(exp.get("role", ""))
	dates = escape_latex(exp.get("dates", ""))

	if department:
	company_line = f"\\textbf{{{company} $\|$ {escape_latex(department)}}}"
	else:
	company_line = f"\\textbf{{{company}}}"

	bullets = "\n ".join([f"\\item {escape_latex(b)}" for b in exp.get("bullets", [])])

	exp_block = f"""\\noindent
	{company_line} \\hfill {location} \\\\
	\\textit{{{role}}} \\hfill {dates}
	\\begin{{itemize}}
	{bullets}
	\\end{{itemize}}"""
	experience_lines.append(exp_block)

	# Build projects section
	project_lines = []
	for proj in data.get("projects", []):
	name_p = escape_latex(proj.get("name", ""))
	tech = escape_latex(proj.get("technologies", ""))
	location = escape_latex(proj.get("location", ""))
	role = escape_latex(proj.get("role", ""))
	dates = escape_latex(proj.get("dates", ""))

	bullets = "\n ".join([f"\\item {escape_latex(b)}" for b in proj.get("bullets", [])])

	proj_block = f"""\\noindent
	\\textbf{{{name_p} $\|$ {tech}}} \\hfill {location} \\\\
	\\textit{{{role}}} \\hfill {dates}
	\\begin{{itemize}}
	{bullets}
	\\end{{itemize}}"""
	project_lines.append(proj_block)

	# Build skills section
	skills = data.get("skills", {})
	technical = escape_latex(skills.get("technical", ""))
	communication = escape_latex(skills.get("communication", ""))

	skills_content = f"""\\begin{{itemize}}
	\\item \\textbf{{Technical:}} {technical}
	\\item \\textbf{{Communication:}} {communication}
	\\end{{itemize}}"""

	# Assemble final LaTeX
	latex = LATEX_TEMPLATE
	latex = latex.replace("<<NAME>>", name)
	latex = latex.replace("<<CONTACT_LINE>>", contact_line)
	latex = latex.replace("<<EDUCATION_CONTENT>>", "\n\n".join(education_lines))
	latex = latex.replace("<<EXPERIENCE_CONTENT>>", "\n\n".join(experience_lines))
	latex = latex.replace("<<PROJECTS_CONTENT>>", "\n\n".join(project_lines))
	latex = latex.replace("<<SKILLS_CONTENT>>", skills_content)

	return latex


	@app.get("/api/")
	async def root():
	return {"message": "Resume Optimizer API", "status": "running"}


	@app.get("/api/health")
	async def health():
	return {"status": "healthy"}


	@app.post("/api/optimize")
	async def optimize_resume(
	resume: UploadFile = File(...),
	job_description: str = Form(...)
	):
	"""
	Optimize a resume for a specific job description.
	Returns a compiled PDF.
	"""

	# Read and extract text from resume
	content = await resume.read()
	resume_text = extract_text_from_file(content, resume.filename or "resume.pdf")

	if not resume_text.strip():
	raise HTTPException(status_code=400, detail="Could not extract text from resume")

	# Build prompt for LLM
	prompt = f"""Here is the original resume:

	{resume_text}

	Here is the job description to optimize for:

	{job_description}

	Please optimize this resume for the job description. Return only the JSON object as specified."""

	# Call OpenRouter
	llm_response = await call_openrouter(prompt, SYSTEM_PROMPT)

	# Parse JSON from response
	try:
	# Try to extract JSON from the response
	json_match = re.search(r'\{[\s\S]*\}', llm_response)
	if json_match:
	resume_data = json.loads(json_match.group())
	else:
	raise ValueError("No JSON found in response")
	except (json.JSONDecodeError, ValueError) as e:
	raise HTTPException(
	status_code=500,
	detail=f"Failed to parse LLM response: {str(e)}\n\nResponse: {llm_response[:500]}"
	)

	# Build LaTeX from structured data
	latex_content = build_latex_from_json(resume_data)

	# Compile to PDF
	pdf_bytes = await compile_latex(latex_content)

	return Response(
	content=pdf_bytes,
	media_type="application/pdf",
	headers={
	"Content-Disposition": "attachment; filename=optimized_resume.pdf"
	}
	)


	@app.post("/api/optimize-json")
	async def optimize_resume_json(
	resume: UploadFile = File(...),
	job_description: str = Form(...)
	):
	"""
	Optimize a resume and return the structured JSON data (for debugging).
	"""

	content = await resume.read()
	resume_text = extract_text_from_file(content, resume.filename or "resume.pdf")

	if not resume_text.strip():
	raise HTTPException(status_code=400, detail="Could not extract text from resume")

	prompt = f"""Here is the original resume:

	{resume_text}

	Here is the job description to optimize for:

	{job_description}

	Please optimize this resume for the job description. Return only the JSON object as specified."""

	llm_response = await call_openrouter(prompt, SYSTEM_PROMPT)

	try:
	json_match = re.search(r'\{[\s\S]*\}', llm_response)
	if json_match:
	resume_data = json.loads(json_match.group())
	else:
	raise ValueError("No JSON found in response")
	except (json.JSONDecodeError, ValueError) as e:
	return {"error": str(e), "raw_response": llm_response}

	return {
	"optimized_data": resume_data,
	"latex": build_latex_from_json(resume_data)
	}


	# Mount static files LAST so API routes take precedence
	app.mount("/", StaticFiles(directory="static", html=True), name="static")


	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)