resume-forge / app /main.py
sk31415's picture
updated model
31e3d39
"""
Resume Optimizer API
- Parses uploaded resumes (PDF/DOCX/TXT)
- Uses Arcee Trinity via OpenRouter to optimize for job descriptions
- Compiles to PDF via latex.ytotech.com
"""
import os
import re
import json
import base64
import tempfile
from typing import Optional
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import Response
from fastapi.staticfiles import StaticFiles
import httpx
# PDF parsing
import fitz # PyMuPDF
app = FastAPI(title="Resume Optimizer API")
# CORS for frontend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # In production, restrict this
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
LATEX_API_URL = "https://latex.ytotech.com/builds/sync"
# LaTeX Template
LATEX_TEMPLATE = r"""
\documentclass[a4paper,10pt]{extarticle}
\usepackage[utf8]{inputenc}
\usepackage{geometry}
\usepackage{textcomp}
\geometry{a4paper, margin=0.5in}
\usepackage{titlesec}
\usepackage{enumitem}
\usepackage{hyperref}
\setlist{noitemsep,leftmargin=*}
\titleformat{\section}{\Large\bfseries}{\thesection}{1em}{}[\titlerule]
\titlespacing*{\section}{0pt}{0.5em}{0.5em}
\pagestyle{empty}
\begin{document}
\begin{center}
\textbf{\Large <<NAME>>}\\[2pt]
<<CONTACT_LINE>>
\end{center}
\section*{EDUCATION}
<<EDUCATION_CONTENT>>
\section*{EXPERIENCE}
<<EXPERIENCE_CONTENT>>
\section*{PROJECTS}
<<PROJECTS_CONTENT>>
\section*{SKILLS}
<<SKILLS_CONTENT>>
\end{document}
"""
def extract_text_from_pdf(pdf_bytes: bytes) -> str:
"""Extract text from PDF using PyMuPDF"""
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
doc.close()
return text
def extract_text_from_file(content: bytes, filename: str) -> str:
"""Extract text based on file type"""
ext = filename.lower().split('.')[-1]
if ext == 'pdf':
return extract_text_from_pdf(content)
elif ext == 'txt':
return content.decode('utf-8', errors='ignore')
elif ext in ['doc', 'docx']:
# For simplicity, try to extract as text
# In production, use python-docx
return content.decode('utf-8', errors='ignore')
else:
return content.decode('utf-8', errors='ignore')
def escape_latex(text: str) -> str:
"""Escape special LaTeX characters"""
# Handle backslash first
text = text.replace('\\', r'\textbackslash{}')
# Then handle other special characters
replacements = [
('&', r'\&'),
('%', r'\%'),
('$', r'\$'),
('#', r'\#'),
('_', r'\_'),
('{', r'\{'),
('}', r'\}'),
('~', r'\textasciitilde{}'),
('^', r'\textasciicircum{}'),
]
for old, new in replacements:
text = text.replace(old, new)
return text
async def call_openrouter(prompt: str, system_prompt: str) -> str:
"""Call OpenRouter API with Arcee Trinity"""
if not OPENROUTER_API_KEY:
raise HTTPException(status_code=500, detail="OPENROUTER_API_KEY not configured")
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
OPENROUTER_URL,
headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://resume-optimizer.app",
},
json={
"model": "arcee-ai/trinity-large-preview:free",
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
"max_tokens": 4000,
"temperature": 0.3,
}
)
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
detail=f"OpenRouter API error: {response.text}"
)
data = response.json()
return data["choices"][0]["message"]["content"]
async def compile_latex(latex_content: str) -> bytes:
"""Compile LaTeX to PDF using latex.ytotech.com"""
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
LATEX_API_URL,
json={
"compiler": "pdflatex",
"resources": [
{"main": True, "content": latex_content}
]
}
)
# Accept any 2xx status code (200, 201, etc.)
if not (200 <= response.status_code < 300):
# Try to extract error message if it's text
try:
error_text = response.text[:1000] if response.text else "Unknown error"
except:
error_text = "LaTeX compilation failed (binary response)"
raise HTTPException(
status_code=500,
detail=f"LaTeX compilation failed. Status: {response.status_code}. Error: {error_text}"
)
return response.content
SYSTEM_PROMPT = """You are an expert resume writer, ATS optimization specialist, and hiring manager in this field.
Your task is to optimize the resume for the specific job description provided, maximizing relevance, clarity, and keyword alignment without fabricating experience.
OBJECTIVES:
1. Tailor the resume specifically to the job description
2. Optimize for ATS keyword matching and natural language ranking
3. Improve impact using XYZ-style bullet points (X=What, Y=How/Tools, Z=Outcome/Metrics)
4. Clearly emphasize the most important skills, tools, and concepts from the JD
5. Reorder experiences to highlight most relevant roles first
RULES & CONSTRAINTS:
❌ Do NOT invent skills, experience, metrics, or credentials
✅ You may rephrase, restructure, and quantify only if logically implied from the original resume
✅ Preserve truthful seniority and scope
✅ Keep formatting clean and ATS-safe
✅ Focus on semantic relevance, not keyword stuffing
FORMATTING REQUIREMENTS:
Bullet Point Style (XYZ Format):
- X = What I did (action + responsibility)
- Y = How I did it (tools, methods, technologies, frameworks)
- Z = Outcome or impact (metrics, results, achievements)
- Example: "Developed Django REST API to improve data processing speed, reducing query time by 40%"
Bullet Length: Keep each bullet to 1-2 lines max
Keyword Optimization:
- Identify top keywords from the job description
- Naturally integrate into experience bullets, skills, and education sections
- Prioritize semantic relevance over keyword stuffing
- Use industry-standard terminology that matches the JD
Output Format:
Return ONLY a JSON object (no markdown, code blocks, or explanations) with this exact structure:
{
"name": "Full Name",
"email": "email@example.com",
"linkedin_url": "https://linkedin.com/in/username (or empty string if not in original resume)",
"linkedin_display": "linkedin.com/in/username (or empty string if not in original resume)",
"github_url": "https://github.com/username (ONLY if explicitly stated in original resume, otherwise empty string)",
"github_display": "github.com/username (ONLY if explicitly stated in original resume, otherwise empty string)",
"education": [
{
"institution": "University Name",
"expected": "May 2024",
"degree": "B.S. in Computer Science",
"coursework": "Relevant courses from original resume",
"honors": "Honors, GPA, awards if applicable"
}
],
"experience": [
{
"company": "Company Name",
"department": "Department if applicable",
"location": "City, State",
"role": "Job Title",
"dates": "Month Year – Month Year",
"bullets": [
"Action verb + responsibility (X) using Key Tool or Technology (Y) to achieve quantified result (Z)",
"XYZ format bullet with JD keywords naturally integrated, measurable impact emphasized"
]
}
],
"projects": [
{
"name": "Project Name",
"technologies": "Tech1, Tech2, Tech3",
"location": "City, State or Online",
"role": "Your Role",
"dates": "Month Year – Month Year",
"bullets": [
"Built Framework solution achieving Quantified Result",
"Implemented Key Feature using Technologies, measurable impact"
]
}
],
"skills": {
"technical": "Skill1, Skill2, Skill3 (prioritize JD-relevant skills first)",
"communication": "Relevant soft skills from JD"
}
}
CRITICAL INSTRUCTIONS:
1. Analyze the job description deeply for keywords, required skills, technologies, and seniority level
2. Reorder experience entries by relevance (most relevant to JD first)
3. Use XYZ format for all bullet points to maximize impact
4. Extract and highlight metrics from original resume; estimate only if reasonable
5. Match tone and language to the job description's seniority and industry
6. Preserve all truthful information from original resume
7. Include all education, experience, and project sections from the original resume
8. Keep skills section focused on JD-relevant competencies
9. ONLY include GitHub/LinkedIn URLs if they are EXPLICITLY stated in the original resume - use empty strings otherwise
Begin optimization now. Return ONLY the JSON object, no additional text."""
def build_latex_from_json(data: dict) -> str:
"""Build LaTeX document from structured JSON data"""
# Header info
name = escape_latex(data.get("name", "Name"))
email = data.get("email", "email@example.com")
linkedin_url = data.get("linkedin_url", "")
linkedin_display = data.get("linkedin_display", "")
github_url = data.get("github_url", "")
github_display = data.get("github_display", "")
# Build contact links only if they're provided
contact_links = [f"\\href{{mailto:{email}}}{{{email}}}"]
if linkedin_url and linkedin_url != "https://linkedin.com":
contact_links.append(f"\\href{{{linkedin_url}}}{{{linkedin_display}}}")
if github_url and github_url != "https://github.com":
contact_links.append(f"\\href{{{github_url}}}{{{github_display}}}")
contact_line = " $|$ ".join(contact_links)
# Build education section
education_lines = []
for edu in data.get("education", []):
institution = escape_latex(edu.get("institution", ""))
expected = escape_latex(edu.get("expected", ""))
degree = escape_latex(edu.get("degree", ""))
coursework = escape_latex(edu.get("coursework", ""))
honors = escape_latex(edu.get("honors", ""))
edu_block = f"""\\noindent
\\textbf{{{institution}}} \\hfill \\textbf{{{expected}}} \\\\
{degree}
\\begin{{itemize}}
\\item \\textbf{{Coursework: }}{coursework}
\\item \\textbf{{Honors:}} {honors}
\\end{{itemize}}"""
education_lines.append(edu_block)
# Build experience section
experience_lines = []
for exp in data.get("experience", []):
company = escape_latex(exp.get("company", ""))
department = exp.get("department", "")
location = escape_latex(exp.get("location", ""))
role = escape_latex(exp.get("role", ""))
dates = escape_latex(exp.get("dates", ""))
if department:
company_line = f"\\textbf{{{company} $|$ {escape_latex(department)}}}"
else:
company_line = f"\\textbf{{{company}}}"
bullets = "\n ".join([f"\\item {escape_latex(b)}" for b in exp.get("bullets", [])])
exp_block = f"""\\noindent
{company_line} \\hfill {location} \\\\
\\textit{{{role}}} \\hfill {dates}
\\begin{{itemize}}
{bullets}
\\end{{itemize}}"""
experience_lines.append(exp_block)
# Build projects section
project_lines = []
for proj in data.get("projects", []):
name_p = escape_latex(proj.get("name", ""))
tech = escape_latex(proj.get("technologies", ""))
location = escape_latex(proj.get("location", ""))
role = escape_latex(proj.get("role", ""))
dates = escape_latex(proj.get("dates", ""))
bullets = "\n ".join([f"\\item {escape_latex(b)}" for b in proj.get("bullets", [])])
proj_block = f"""\\noindent
\\textbf{{{name_p} $|$ {tech}}} \\hfill {location} \\\\
\\textit{{{role}}} \\hfill {dates}
\\begin{{itemize}}
{bullets}
\\end{{itemize}}"""
project_lines.append(proj_block)
# Build skills section
skills = data.get("skills", {})
technical = escape_latex(skills.get("technical", ""))
communication = escape_latex(skills.get("communication", ""))
skills_content = f"""\\begin{{itemize}}
\\item \\textbf{{Technical:}} {technical}
\\item \\textbf{{Communication:}} {communication}
\\end{{itemize}}"""
# Assemble final LaTeX
latex = LATEX_TEMPLATE
latex = latex.replace("<<NAME>>", name)
latex = latex.replace("<<CONTACT_LINE>>", contact_line)
latex = latex.replace("<<EDUCATION_CONTENT>>", "\n\n".join(education_lines))
latex = latex.replace("<<EXPERIENCE_CONTENT>>", "\n\n".join(experience_lines))
latex = latex.replace("<<PROJECTS_CONTENT>>", "\n\n".join(project_lines))
latex = latex.replace("<<SKILLS_CONTENT>>", skills_content)
return latex
@app.get("/api/")
async def root():
return {"message": "Resume Optimizer API", "status": "running"}
@app.get("/api/health")
async def health():
return {"status": "healthy"}
@app.post("/api/optimize")
async def optimize_resume(
resume: UploadFile = File(...),
job_description: str = Form(...)
):
"""
Optimize a resume for a specific job description.
Returns a compiled PDF.
"""
# Read and extract text from resume
content = await resume.read()
resume_text = extract_text_from_file(content, resume.filename or "resume.pdf")
if not resume_text.strip():
raise HTTPException(status_code=400, detail="Could not extract text from resume")
# Build prompt for LLM
prompt = f"""Here is the original resume:
{resume_text}
Here is the job description to optimize for:
{job_description}
Please optimize this resume for the job description. Return only the JSON object as specified."""
# Call OpenRouter
llm_response = await call_openrouter(prompt, SYSTEM_PROMPT)
# Parse JSON from response
try:
# Try to extract JSON from the response
json_match = re.search(r'\{[\s\S]*\}', llm_response)
if json_match:
resume_data = json.loads(json_match.group())
else:
raise ValueError("No JSON found in response")
except (json.JSONDecodeError, ValueError) as e:
raise HTTPException(
status_code=500,
detail=f"Failed to parse LLM response: {str(e)}\n\nResponse: {llm_response[:500]}"
)
# Build LaTeX from structured data
latex_content = build_latex_from_json(resume_data)
# Compile to PDF
pdf_bytes = await compile_latex(latex_content)
return Response(
content=pdf_bytes,
media_type="application/pdf",
headers={
"Content-Disposition": "attachment; filename=optimized_resume.pdf"
}
)
@app.post("/api/optimize-json")
async def optimize_resume_json(
resume: UploadFile = File(...),
job_description: str = Form(...)
):
"""
Optimize a resume and return the structured JSON data (for debugging).
"""
content = await resume.read()
resume_text = extract_text_from_file(content, resume.filename or "resume.pdf")
if not resume_text.strip():
raise HTTPException(status_code=400, detail="Could not extract text from resume")
prompt = f"""Here is the original resume:
{resume_text}
Here is the job description to optimize for:
{job_description}
Please optimize this resume for the job description. Return only the JSON object as specified."""
llm_response = await call_openrouter(prompt, SYSTEM_PROMPT)
try:
json_match = re.search(r'\{[\s\S]*\}', llm_response)
if json_match:
resume_data = json.loads(json_match.group())
else:
raise ValueError("No JSON found in response")
except (json.JSONDecodeError, ValueError) as e:
return {"error": str(e), "raw_response": llm_response}
return {
"optimized_data": resume_data,
"latex": build_latex_from_json(resume_data)
}
# Mount static files LAST so API routes take precedence
app.mount("/", StaticFiles(directory="static", html=True), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)