Spaces:
Sleeping
Sleeping
| """ | |
| Resume Optimizer API | |
| - Parses uploaded resumes (PDF/DOCX/TXT) | |
| - Uses Arcee Trinity via OpenRouter to optimize for job descriptions | |
| - Compiles to PDF via latex.ytotech.com | |
| """ | |
| import os | |
| import re | |
| import json | |
| import base64 | |
| import tempfile | |
| from typing import Optional | |
| from fastapi import FastAPI, UploadFile, File, Form, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import Response | |
| from fastapi.staticfiles import StaticFiles | |
| import httpx | |
| # PDF parsing | |
| import fitz # PyMuPDF | |
| app = FastAPI(title="Resume Optimizer API") | |
| # CORS for frontend | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], # In production, restrict this | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Configuration | |
| OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") | |
| OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" | |
| LATEX_API_URL = "https://latex.ytotech.com/builds/sync" | |
| # LaTeX Template | |
| LATEX_TEMPLATE = r""" | |
| \documentclass[a4paper,10pt]{extarticle} | |
| \usepackage[utf8]{inputenc} | |
| \usepackage{geometry} | |
| \usepackage{textcomp} | |
| \geometry{a4paper, margin=0.5in} | |
| \usepackage{titlesec} | |
| \usepackage{enumitem} | |
| \usepackage{hyperref} | |
| \setlist{noitemsep,leftmargin=*} | |
| \titleformat{\section}{\Large\bfseries}{\thesection}{1em}{}[\titlerule] | |
| \titlespacing*{\section}{0pt}{0.5em}{0.5em} | |
| \pagestyle{empty} | |
| \begin{document} | |
| \begin{center} | |
| \textbf{\Large <<NAME>>}\\[2pt] | |
| <<CONTACT_LINE>> | |
| \end{center} | |
| \section*{EDUCATION} | |
| <<EDUCATION_CONTENT>> | |
| \section*{EXPERIENCE} | |
| <<EXPERIENCE_CONTENT>> | |
| \section*{PROJECTS} | |
| <<PROJECTS_CONTENT>> | |
| \section*{SKILLS} | |
| <<SKILLS_CONTENT>> | |
| \end{document} | |
| """ | |
| def extract_text_from_pdf(pdf_bytes: bytes) -> str: | |
| """Extract text from PDF using PyMuPDF""" | |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| doc.close() | |
| return text | |
| def extract_text_from_file(content: bytes, filename: str) -> str: | |
| """Extract text based on file type""" | |
| ext = filename.lower().split('.')[-1] | |
| if ext == 'pdf': | |
| return extract_text_from_pdf(content) | |
| elif ext == 'txt': | |
| return content.decode('utf-8', errors='ignore') | |
| elif ext in ['doc', 'docx']: | |
| # For simplicity, try to extract as text | |
| # In production, use python-docx | |
| return content.decode('utf-8', errors='ignore') | |
| else: | |
| return content.decode('utf-8', errors='ignore') | |
| def escape_latex(text: str) -> str: | |
| """Escape special LaTeX characters""" | |
| # Handle backslash first | |
| text = text.replace('\\', r'\textbackslash{}') | |
| # Then handle other special characters | |
| replacements = [ | |
| ('&', r'\&'), | |
| ('%', r'\%'), | |
| ('$', r'\$'), | |
| ('#', r'\#'), | |
| ('_', r'\_'), | |
| ('{', r'\{'), | |
| ('}', r'\}'), | |
| ('~', r'\textasciitilde{}'), | |
| ('^', r'\textasciicircum{}'), | |
| ] | |
| for old, new in replacements: | |
| text = text.replace(old, new) | |
| return text | |
| async def call_openrouter(prompt: str, system_prompt: str) -> str: | |
| """Call OpenRouter API with Arcee Trinity""" | |
| if not OPENROUTER_API_KEY: | |
| raise HTTPException(status_code=500, detail="OPENROUTER_API_KEY not configured") | |
| async with httpx.AsyncClient(timeout=120.0) as client: | |
| response = await client.post( | |
| OPENROUTER_URL, | |
| headers={ | |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://resume-optimizer.app", | |
| }, | |
| json={ | |
| "model": "arcee-ai/trinity-large-preview:free", | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| "max_tokens": 4000, | |
| "temperature": 0.3, | |
| } | |
| ) | |
| if response.status_code != 200: | |
| raise HTTPException( | |
| status_code=response.status_code, | |
| detail=f"OpenRouter API error: {response.text}" | |
| ) | |
| data = response.json() | |
| return data["choices"][0]["message"]["content"] | |
| async def compile_latex(latex_content: str) -> bytes: | |
| """Compile LaTeX to PDF using latex.ytotech.com""" | |
| async with httpx.AsyncClient(timeout=60.0) as client: | |
| response = await client.post( | |
| LATEX_API_URL, | |
| json={ | |
| "compiler": "pdflatex", | |
| "resources": [ | |
| {"main": True, "content": latex_content} | |
| ] | |
| } | |
| ) | |
| # Accept any 2xx status code (200, 201, etc.) | |
| if not (200 <= response.status_code < 300): | |
| # Try to extract error message if it's text | |
| try: | |
| error_text = response.text[:1000] if response.text else "Unknown error" | |
| except: | |
| error_text = "LaTeX compilation failed (binary response)" | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"LaTeX compilation failed. Status: {response.status_code}. Error: {error_text}" | |
| ) | |
| return response.content | |
| SYSTEM_PROMPT = """You are an expert resume writer, ATS optimization specialist, and hiring manager in this field. | |
| Your task is to optimize the resume for the specific job description provided, maximizing relevance, clarity, and keyword alignment without fabricating experience. | |
| OBJECTIVES: | |
| 1. Tailor the resume specifically to the job description | |
| 2. Optimize for ATS keyword matching and natural language ranking | |
| 3. Improve impact using XYZ-style bullet points (X=What, Y=How/Tools, Z=Outcome/Metrics) | |
| 4. Clearly emphasize the most important skills, tools, and concepts from the JD | |
| 5. Reorder experiences to highlight most relevant roles first | |
| RULES & CONSTRAINTS: | |
| ❌ Do NOT invent skills, experience, metrics, or credentials | |
| ✅ You may rephrase, restructure, and quantify only if logically implied from the original resume | |
| ✅ Preserve truthful seniority and scope | |
| ✅ Keep formatting clean and ATS-safe | |
| ✅ Focus on semantic relevance, not keyword stuffing | |
| FORMATTING REQUIREMENTS: | |
| Bullet Point Style (XYZ Format): | |
| - X = What I did (action + responsibility) | |
| - Y = How I did it (tools, methods, technologies, frameworks) | |
| - Z = Outcome or impact (metrics, results, achievements) | |
| - Example: "Developed Django REST API to improve data processing speed, reducing query time by 40%" | |
| Bullet Length: Keep each bullet to 1-2 lines max | |
| Keyword Optimization: | |
| - Identify top keywords from the job description | |
| - Naturally integrate into experience bullets, skills, and education sections | |
| - Prioritize semantic relevance over keyword stuffing | |
| - Use industry-standard terminology that matches the JD | |
| Output Format: | |
| Return ONLY a JSON object (no markdown, code blocks, or explanations) with this exact structure: | |
| { | |
| "name": "Full Name", | |
| "email": "email@example.com", | |
| "linkedin_url": "https://linkedin.com/in/username (or empty string if not in original resume)", | |
| "linkedin_display": "linkedin.com/in/username (or empty string if not in original resume)", | |
| "github_url": "https://github.com/username (ONLY if explicitly stated in original resume, otherwise empty string)", | |
| "github_display": "github.com/username (ONLY if explicitly stated in original resume, otherwise empty string)", | |
| "education": [ | |
| { | |
| "institution": "University Name", | |
| "expected": "May 2024", | |
| "degree": "B.S. in Computer Science", | |
| "coursework": "Relevant courses from original resume", | |
| "honors": "Honors, GPA, awards if applicable" | |
| } | |
| ], | |
| "experience": [ | |
| { | |
| "company": "Company Name", | |
| "department": "Department if applicable", | |
| "location": "City, State", | |
| "role": "Job Title", | |
| "dates": "Month Year – Month Year", | |
| "bullets": [ | |
| "Action verb + responsibility (X) using Key Tool or Technology (Y) to achieve quantified result (Z)", | |
| "XYZ format bullet with JD keywords naturally integrated, measurable impact emphasized" | |
| ] | |
| } | |
| ], | |
| "projects": [ | |
| { | |
| "name": "Project Name", | |
| "technologies": "Tech1, Tech2, Tech3", | |
| "location": "City, State or Online", | |
| "role": "Your Role", | |
| "dates": "Month Year – Month Year", | |
| "bullets": [ | |
| "Built Framework solution achieving Quantified Result", | |
| "Implemented Key Feature using Technologies, measurable impact" | |
| ] | |
| } | |
| ], | |
| "skills": { | |
| "technical": "Skill1, Skill2, Skill3 (prioritize JD-relevant skills first)", | |
| "communication": "Relevant soft skills from JD" | |
| } | |
| } | |
| CRITICAL INSTRUCTIONS: | |
| 1. Analyze the job description deeply for keywords, required skills, technologies, and seniority level | |
| 2. Reorder experience entries by relevance (most relevant to JD first) | |
| 3. Use XYZ format for all bullet points to maximize impact | |
| 4. Extract and highlight metrics from original resume; estimate only if reasonable | |
| 5. Match tone and language to the job description's seniority and industry | |
| 6. Preserve all truthful information from original resume | |
| 7. Include all education, experience, and project sections from the original resume | |
| 8. Keep skills section focused on JD-relevant competencies | |
| 9. ONLY include GitHub/LinkedIn URLs if they are EXPLICITLY stated in the original resume - use empty strings otherwise | |
| Begin optimization now. Return ONLY the JSON object, no additional text.""" | |
| def build_latex_from_json(data: dict) -> str: | |
| """Build LaTeX document from structured JSON data""" | |
| # Header info | |
| name = escape_latex(data.get("name", "Name")) | |
| email = data.get("email", "email@example.com") | |
| linkedin_url = data.get("linkedin_url", "") | |
| linkedin_display = data.get("linkedin_display", "") | |
| github_url = data.get("github_url", "") | |
| github_display = data.get("github_display", "") | |
| # Build contact links only if they're provided | |
| contact_links = [f"\\href{{mailto:{email}}}{{{email}}}"] | |
| if linkedin_url and linkedin_url != "https://linkedin.com": | |
| contact_links.append(f"\\href{{{linkedin_url}}}{{{linkedin_display}}}") | |
| if github_url and github_url != "https://github.com": | |
| contact_links.append(f"\\href{{{github_url}}}{{{github_display}}}") | |
| contact_line = " $|$ ".join(contact_links) | |
| # Build education section | |
| education_lines = [] | |
| for edu in data.get("education", []): | |
| institution = escape_latex(edu.get("institution", "")) | |
| expected = escape_latex(edu.get("expected", "")) | |
| degree = escape_latex(edu.get("degree", "")) | |
| coursework = escape_latex(edu.get("coursework", "")) | |
| honors = escape_latex(edu.get("honors", "")) | |
| edu_block = f"""\\noindent | |
| \\textbf{{{institution}}} \\hfill \\textbf{{{expected}}} \\\\ | |
| {degree} | |
| \\begin{{itemize}} | |
| \\item \\textbf{{Coursework: }}{coursework} | |
| \\item \\textbf{{Honors:}} {honors} | |
| \\end{{itemize}}""" | |
| education_lines.append(edu_block) | |
| # Build experience section | |
| experience_lines = [] | |
| for exp in data.get("experience", []): | |
| company = escape_latex(exp.get("company", "")) | |
| department = exp.get("department", "") | |
| location = escape_latex(exp.get("location", "")) | |
| role = escape_latex(exp.get("role", "")) | |
| dates = escape_latex(exp.get("dates", "")) | |
| if department: | |
| company_line = f"\\textbf{{{company} $|$ {escape_latex(department)}}}" | |
| else: | |
| company_line = f"\\textbf{{{company}}}" | |
| bullets = "\n ".join([f"\\item {escape_latex(b)}" for b in exp.get("bullets", [])]) | |
| exp_block = f"""\\noindent | |
| {company_line} \\hfill {location} \\\\ | |
| \\textit{{{role}}} \\hfill {dates} | |
| \\begin{{itemize}} | |
| {bullets} | |
| \\end{{itemize}}""" | |
| experience_lines.append(exp_block) | |
| # Build projects section | |
| project_lines = [] | |
| for proj in data.get("projects", []): | |
| name_p = escape_latex(proj.get("name", "")) | |
| tech = escape_latex(proj.get("technologies", "")) | |
| location = escape_latex(proj.get("location", "")) | |
| role = escape_latex(proj.get("role", "")) | |
| dates = escape_latex(proj.get("dates", "")) | |
| bullets = "\n ".join([f"\\item {escape_latex(b)}" for b in proj.get("bullets", [])]) | |
| proj_block = f"""\\noindent | |
| \\textbf{{{name_p} $|$ {tech}}} \\hfill {location} \\\\ | |
| \\textit{{{role}}} \\hfill {dates} | |
| \\begin{{itemize}} | |
| {bullets} | |
| \\end{{itemize}}""" | |
| project_lines.append(proj_block) | |
| # Build skills section | |
| skills = data.get("skills", {}) | |
| technical = escape_latex(skills.get("technical", "")) | |
| communication = escape_latex(skills.get("communication", "")) | |
| skills_content = f"""\\begin{{itemize}} | |
| \\item \\textbf{{Technical:}} {technical} | |
| \\item \\textbf{{Communication:}} {communication} | |
| \\end{{itemize}}""" | |
| # Assemble final LaTeX | |
| latex = LATEX_TEMPLATE | |
| latex = latex.replace("<<NAME>>", name) | |
| latex = latex.replace("<<CONTACT_LINE>>", contact_line) | |
| latex = latex.replace("<<EDUCATION_CONTENT>>", "\n\n".join(education_lines)) | |
| latex = latex.replace("<<EXPERIENCE_CONTENT>>", "\n\n".join(experience_lines)) | |
| latex = latex.replace("<<PROJECTS_CONTENT>>", "\n\n".join(project_lines)) | |
| latex = latex.replace("<<SKILLS_CONTENT>>", skills_content) | |
| return latex | |
| async def root(): | |
| return {"message": "Resume Optimizer API", "status": "running"} | |
| async def health(): | |
| return {"status": "healthy"} | |
| async def optimize_resume( | |
| resume: UploadFile = File(...), | |
| job_description: str = Form(...) | |
| ): | |
| """ | |
| Optimize a resume for a specific job description. | |
| Returns a compiled PDF. | |
| """ | |
| # Read and extract text from resume | |
| content = await resume.read() | |
| resume_text = extract_text_from_file(content, resume.filename or "resume.pdf") | |
| if not resume_text.strip(): | |
| raise HTTPException(status_code=400, detail="Could not extract text from resume") | |
| # Build prompt for LLM | |
| prompt = f"""Here is the original resume: | |
| {resume_text} | |
| Here is the job description to optimize for: | |
| {job_description} | |
| Please optimize this resume for the job description. Return only the JSON object as specified.""" | |
| # Call OpenRouter | |
| llm_response = await call_openrouter(prompt, SYSTEM_PROMPT) | |
| # Parse JSON from response | |
| try: | |
| # Try to extract JSON from the response | |
| json_match = re.search(r'\{[\s\S]*\}', llm_response) | |
| if json_match: | |
| resume_data = json.loads(json_match.group()) | |
| else: | |
| raise ValueError("No JSON found in response") | |
| except (json.JSONDecodeError, ValueError) as e: | |
| raise HTTPException( | |
| status_code=500, | |
| detail=f"Failed to parse LLM response: {str(e)}\n\nResponse: {llm_response[:500]}" | |
| ) | |
| # Build LaTeX from structured data | |
| latex_content = build_latex_from_json(resume_data) | |
| # Compile to PDF | |
| pdf_bytes = await compile_latex(latex_content) | |
| return Response( | |
| content=pdf_bytes, | |
| media_type="application/pdf", | |
| headers={ | |
| "Content-Disposition": "attachment; filename=optimized_resume.pdf" | |
| } | |
| ) | |
| async def optimize_resume_json( | |
| resume: UploadFile = File(...), | |
| job_description: str = Form(...) | |
| ): | |
| """ | |
| Optimize a resume and return the structured JSON data (for debugging). | |
| """ | |
| content = await resume.read() | |
| resume_text = extract_text_from_file(content, resume.filename or "resume.pdf") | |
| if not resume_text.strip(): | |
| raise HTTPException(status_code=400, detail="Could not extract text from resume") | |
| prompt = f"""Here is the original resume: | |
| {resume_text} | |
| Here is the job description to optimize for: | |
| {job_description} | |
| Please optimize this resume for the job description. Return only the JSON object as specified.""" | |
| llm_response = await call_openrouter(prompt, SYSTEM_PROMPT) | |
| try: | |
| json_match = re.search(r'\{[\s\S]*\}', llm_response) | |
| if json_match: | |
| resume_data = json.loads(json_match.group()) | |
| else: | |
| raise ValueError("No JSON found in response") | |
| except (json.JSONDecodeError, ValueError) as e: | |
| return {"error": str(e), "raw_response": llm_response} | |
| return { | |
| "optimized_data": resume_data, | |
| "latex": build_latex_from_json(resume_data) | |
| } | |
| # Mount static files LAST so API routes take precedence | |
| app.mount("/", StaticFiles(directory="static", html=True), name="static") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |