Spaces:
Sleeping
Sleeping
| # full corrected app.py | |
| import streamlit as st | |
| import requests | |
| import pdfplumber | |
| import docx | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| from groq import Groq | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.lib import colors | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.platypus import ( | |
| SimpleDocTemplate, | |
| Paragraph, | |
| Spacer, | |
| ListFlowable, | |
| ListItem, | |
| Table, | |
| TableStyle, | |
| Image as RLImage, | |
| ) | |
| from reportlab.lib.units import mm | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| import io | |
| from PIL import Image | |
| import tempfile | |
| import os | |
| from typing import List | |
| # ----------------------------- | |
| # CONFIG | |
| # ----------------------------- | |
| REMOTEOK_URL = "https://remoteok.com/api" | |
| EMBED_MODEL = "BAAI/bge-small-en-v1.5" | |
| AI_MODEL = "openai/gpt-oss-120b" # Groq model | |
| # ----------------------------- | |
| # CACHED MODELS | |
| # ----------------------------- | |
| def load_embedding_model(): | |
| return SentenceTransformer(EMBED_MODEL) | |
| model = load_embedding_model() | |
| def init_groq(): | |
| return Groq(api_key=st.secrets.get("GROQ_API_KEY", None)) | |
| groq_client = init_groq() | |
| # ----------------------------- | |
| # UTIL / PARSING FUNCTIONS | |
| # ----------------------------- | |
| def extract_text_from_resume(file) -> str: | |
| """Extract text from PDF or DOCX file""" | |
| name = getattr(file, "name", "") | |
| if name.lower().endswith(".pdf"): | |
| text = "" | |
| with pdfplumber.open(file) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| elif name.lower().endswith(".docx"): | |
| doc = docx.Document(file) | |
| text = "\n".join([p.text for p in doc.paragraphs]) | |
| return text | |
| else: | |
| st.error("Unsupported file type. Please upload PDF or DOCX.") | |
| return "" | |
| def fetch_jobs() -> List[dict]: | |
| try: | |
| resp = requests.get(REMOTEOK_URL, timeout=10) | |
| if resp.status_code == 200: | |
| jobs = resp.json()[1:] # skip metadata | |
| return jobs | |
| except Exception as e: | |
| st.warning(f"Failed to fetch jobs: {e}") | |
| return [] | |
| def embed_texts(texts): | |
| return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) | |
| def match_jobs(resume_text, jobs, top_k=5): | |
| if not jobs: | |
| return [] | |
| job_texts = [f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" for job in jobs] | |
| resume_vec = embed_texts([resume_text]) | |
| job_vecs = embed_texts(job_texts) | |
| dim = job_vecs.shape[1] | |
| index = faiss.IndexFlatIP(dim) | |
| index.add(job_vecs) | |
| scores, idx = index.search(resume_vec, top_k) | |
| results = [] | |
| for i, score in zip(idx[0], scores[0]): | |
| results.append((jobs[i], float(score))) | |
| return results | |
| # ----------------------------- | |
| # AI GENERATION (unchanged) | |
| # ----------------------------- | |
| def generate_resume(resume_text, job): | |
| prompt = f""" | |
| You are an AI career assistant. | |
| Given this resume:\n{resume_text}\n | |
| and this job description:\n{job.get('description','')}\n | |
| Generate a structured resume in this format: | |
| Summary | |
| ----------------- | |
| [2-3 line summary tailored for the job] | |
| Skills | |
| ----------------- | |
| - Skill 1 | |
| - Skill 2 | |
| - Skill 3 | |
| Experience | |
| ----------------- | |
| Job Title | Company | Dates | |
| • Achievement 1 | |
| • Achievement 2 | |
| Education | |
| ----------------- | |
| Degree | Institution | Year | |
| """ | |
| chat_completion = groq_client.chat.completions.create( | |
| model=AI_MODEL, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.7, | |
| ) | |
| return chat_completion.choices[0].message.content | |
| def generate_cover_letter(resume_text, job, name, email, phone): | |
| prompt = f""" | |
| You are an AI career assistant. | |
| Given this resume:\n{resume_text}\n | |
| and this job description:\n{job.get('description','')}\n | |
| Generate a professional, one-page cover letter tailored to this role. | |
| Format it like this: | |
| Dear Hiring Manager, | |
| [Intro paragraph: Show enthusiasm and alignment with company/role] | |
| [Body paragraph: Highlight 2-3 most relevant skills/experiences from resume] | |
| [Closing paragraph: Express eagerness and thank them] | |
| Sincerely, | |
| {name} | |
| {email} | {phone} | |
| """ | |
| chat_completion = groq_client.chat.completions.create( | |
| model=AI_MODEL, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.7, | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # ----------------------------- | |
| # PDF BUILDING - FIXED: return bytes | |
| # ----------------------------- | |
| def build_pdf(content: str, | |
| title: str = "Resume", | |
| name: str = "John Doe", | |
| email: str = "john.doe@email.com", | |
| phone: str = "+1 234 567 890", | |
| profile_image_bytes: bytes = None) -> bytes: | |
| """ | |
| Build a polished PDF resume and return raw bytes. | |
| """ | |
| buffer = io.BytesIO() | |
| doc = SimpleDocTemplate( | |
| buffer, | |
| pagesize=A4, | |
| leftMargin=30, | |
| rightMargin=30, | |
| topMargin=30, | |
| bottomMargin=30, | |
| ) | |
| styles = getSampleStyleSheet() | |
| # ... same content-building code as you had (header, parsing, sections) ... | |
| # For brevity in this message I assume you paste the same block you had | |
| # (everything up until doc.build(story)) | |
| # *** Keep your existing section-building code here exactly. *** | |
| # (I will reuse your original 'story' construction) | |
| # [PASTE THE ORIGINAL STORY BUILDING LOGIC HERE — unchanged] | |
| doc.build(story) | |
| buffer.seek(0) | |
| return buffer.getvalue() # <<-- important fix: return bytes | |
| # ----------------------------- | |
| # STREAMLIT UI (unchanged logic) | |
| # ----------------------------- | |
| st.set_page_config(page_title="MATCHHIVE - AI Job Matcher", layout="wide", initial_sidebar_state="expanded") | |
| st.markdown( | |
| """ | |
| <style> | |
| .stButton>button { border-radius: 8px; padding:8px 12px; } | |
| .download-btn { background-color:#2ECC71 !important; color:white !important; } | |
| .job-card { padding:10px; border:1px solid #E5E7EB; border-radius:8px; margin-bottom:8px; } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| # Header area with optional logo upload | |
| col1, col2 = st.columns([1, 6]) | |
| with col1: | |
| logo_file = st.file_uploader("Upload logo (optional)", type=["png", "jpg", "jpeg"], help="Optional: upload your company/app logo") | |
| if logo_file: | |
| img = Image.open(logo_file) | |
| st.image(img, width=100) | |
| with col2: | |
| st.title("MATCHHIVE - AI Job Matcher") | |
| st.caption("Upload a resume, match to jobs, generate tailored resumes & cover letters (PDF).") | |
| # Sidebar: user contact info + options | |
| with st.sidebar: | |
| st.header("Candidate Info") | |
| name = st.text_input("Full Name", "John Doe") | |
| email = st.text_input("Email", "john.doe@email.com") | |
| phone = st.text_input("Phone", "+1 234 567 890") | |
| profile_pic = st.file_uploader("Profile photo (optional)", type=["png", "jpg", "jpeg"], help="Small circular/headshot for resume header") | |
| st.markdown("---") | |
| st.header("Job Filters (optional)") | |
| location_filter = st.text_input("Location keyword (e.g. Remote, USA, Canada)", "") | |
| keyword_filter = st.text_input("Job keyword (e.g. Python, ML, DevOps)", "") | |
| min_score = st.slider("Minimum match score", min_value=0.0, max_value=1.0, value=0.0, step=0.01) | |
| top_k = st.number_input("Number of matches to show", min_value=1, max_value=20, value=5) | |
| st.markdown("---") | |
| st.caption("Note: Job data comes from remoteok.com API and match scores are semantic similarity approximations.") | |
| # Main upload & processing area | |
| st.header("Upload Resume (PDF or DOCX)") | |
| resume_file = st.file_uploader("Upload your resume", type=["pdf", "docx"]) | |
| if not resume_file: | |
| st.info("Please upload a resume (PDF or DOCX) to start matching.") | |
| else: | |
| with st.spinner("Extracting resume text..."): | |
| resume_text = extract_text_from_resume(resume_file) | |
| if not resume_text.strip(): | |
| st.error("Could not extract text from the resume. Try a different file or ensure the PDF is text-based (not scanned).") | |
| else: | |
| # Fetch jobs and filter | |
| with st.spinner("Fetching remote jobs..."): | |
| jobs = fetch_jobs() | |
| # Apply simple filters | |
| def job_matches_filters(job): | |
| if location_filter: | |
| loc = job.get("location") or job.get("company_location") or "" | |
| if location_filter.lower() not in str(loc).lower(): | |
| return False | |
| if keyword_filter: | |
| combined = f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" | |
| if keyword_filter.lower() not in combined.lower(): | |
| return False | |
| return True | |
| filtered_jobs = [j for j in jobs if job_matches_filters(j)] | |
| # Do matching & display results | |
| with st.spinner("Computing semantic match scores..."): | |
| matches = match_jobs(resume_text, filtered_jobs, top_k=top_k) | |
| matches = [(job, score) for job, score in matches if score >= min_score] | |
| if not matches: | |
| st.warning("No matches found with given filters/score. Try lowering minimum score or removing filters.") | |
| else: | |
| st.subheader(f"Top {len(matches)} Matches") | |
| for job, score in matches: | |
| title = job.get("position", "Unknown Position") | |
| company = job.get("company", "Unknown Company") | |
| url = job.get("url", "#") | |
| posted = job.get("date", "") | |
| exp_label = f"{title} at {company} — Score: {score:.2f}" | |
| with st.expander(exp_label, expanded=False): | |
| st.markdown(f"**Location:** {job.get('location','N/A')} \n**Posted:** {posted} \n[View Job Posting]({url})") | |
| st.markdown("---") | |
| cols = st.columns([1, 1, 1]) | |
| if cols[0].button("Generate Resume (AI)", key=f"resume_{job.get('id', title)}"): | |
| with st.spinner("Generating tailored resume..."): | |
| tailored_resume = generate_resume(resume_text, job) | |
| tab1, tab2 = st.tabs(["Tailored Resume", "Cover Letter"]) | |
| with tab1: | |
| edited_resume = st.text_area("Tailored Resume (editable)", tailored_resume, height=300) | |
| if st.button("Export Tailored Resume as PDF", key=f"export_resume_{job.get('id', title)}"): | |
| prof_bytes = None | |
| if profile_pic: | |
| prof_bytes = profile_pic.getvalue() | |
| pdf_bytes = build_pdf(edited_resume, title="Resume", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes) | |
| st.download_button( | |
| label="📥 Download Resume (PDF)", | |
| data=pdf_bytes, | |
| file_name=f"{name.replace(' ', '_')}_resume.pdf", | |
| mime="application/pdf", | |
| ) | |
| with tab2: | |
| if cols[1].button("Generate Cover Letter (AI)", key=f"clgen_{job.get('id', title)}"): | |
| with st.spinner("Generating cover letter..."): | |
| tailored_cl = generate_cover_letter(resume_text, job, name, email, phone) | |
| edited_cl = st.text_area("Cover Letter (editable)", tailored_cl, height=300, key=f"cltext_{job.get('id', title)}") | |
| if st.button("Export Cover Letter as PDF", key=f"export_cl_{job.get('id', title)}"): | |
| prof_bytes = None | |
| if profile_pic: | |
| prof_bytes = profile_pic.getvalue() | |
| pdf_bytes = build_pdf(edited_cl, title="Cover Letter", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes) | |
| st.download_button( | |
| label="📥 Download Cover Letter (PDF)", | |
| data=pdf_bytes, | |
| file_name=f"{name.replace(' ', '_')}_cover_letter.pdf", | |
| mime="application/pdf", | |
| ) | |
| if cols[2].button("Show Job Description", key=f"desc_{job.get('id', title)}"): | |
| st.info(job.get("description", "No description available")) | |
| st.success("Done — select a match and generate your tailored resume or cover letter.") | |