# full corrected app.py import streamlit as st import requests import pdfplumber import docx from sentence_transformers import SentenceTransformer import faiss from groq import Groq from reportlab.lib.pagesizes import A4 from reportlab.lib import colors from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.platypus import ( SimpleDocTemplate, Paragraph, Spacer, ListFlowable, ListItem, Table, TableStyle, Image as RLImage, ) from reportlab.lib.units import mm from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont import io from PIL import Image import tempfile import os from typing import List # ----------------------------- # CONFIG # ----------------------------- REMOTEOK_URL = "https://remoteok.com/api" EMBED_MODEL = "BAAI/bge-small-en-v1.5" AI_MODEL = "openai/gpt-oss-120b" # Groq model # ----------------------------- # CACHED MODELS # ----------------------------- @st.cache_resource def load_embedding_model(): return SentenceTransformer(EMBED_MODEL) model = load_embedding_model() @st.cache_resource def init_groq(): return Groq(api_key=st.secrets.get("GROQ_API_KEY", None)) groq_client = init_groq() # ----------------------------- # UTIL / PARSING FUNCTIONS # ----------------------------- def extract_text_from_resume(file) -> str: """Extract text from PDF or DOCX file""" name = getattr(file, "name", "") if name.lower().endswith(".pdf"): text = "" with pdfplumber.open(file) as pdf: for page in pdf.pages: text += page.extract_text() or "" return text elif name.lower().endswith(".docx"): doc = docx.Document(file) text = "\n".join([p.text for p in doc.paragraphs]) return text else: st.error("Unsupported file type. Please upload PDF or DOCX.") return "" def fetch_jobs() -> List[dict]: try: resp = requests.get(REMOTEOK_URL, timeout=10) if resp.status_code == 200: jobs = resp.json()[1:] # skip metadata return jobs except Exception as e: st.warning(f"Failed to fetch jobs: {e}") return [] def embed_texts(texts): return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) def match_jobs(resume_text, jobs, top_k=5): if not jobs: return [] job_texts = [f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" for job in jobs] resume_vec = embed_texts([resume_text]) job_vecs = embed_texts(job_texts) dim = job_vecs.shape[1] index = faiss.IndexFlatIP(dim) index.add(job_vecs) scores, idx = index.search(resume_vec, top_k) results = [] for i, score in zip(idx[0], scores[0]): results.append((jobs[i], float(score))) return results # ----------------------------- # AI GENERATION (unchanged) # ----------------------------- def generate_resume(resume_text, job): prompt = f""" You are an AI career assistant. Given this resume:\n{resume_text}\n and this job description:\n{job.get('description','')}\n Generate a structured resume in this format: Summary ----------------- [2-3 line summary tailored for the job] Skills ----------------- - Skill 1 - Skill 2 - Skill 3 Experience ----------------- Job Title | Company | Dates • Achievement 1 • Achievement 2 Education ----------------- Degree | Institution | Year """ chat_completion = groq_client.chat.completions.create( model=AI_MODEL, messages=[{"role": "user", "content": prompt}], temperature=0.7, ) return chat_completion.choices[0].message.content def generate_cover_letter(resume_text, job, name, email, phone): prompt = f""" You are an AI career assistant. Given this resume:\n{resume_text}\n and this job description:\n{job.get('description','')}\n Generate a professional, one-page cover letter tailored to this role. Format it like this: Dear Hiring Manager, [Intro paragraph: Show enthusiasm and alignment with company/role] [Body paragraph: Highlight 2-3 most relevant skills/experiences from resume] [Closing paragraph: Express eagerness and thank them] Sincerely, {name} {email} | {phone} """ chat_completion = groq_client.chat.completions.create( model=AI_MODEL, messages=[{"role": "user", "content": prompt}], temperature=0.7, ) return chat_completion.choices[0].message.content # ----------------------------- # PDF BUILDING - FIXED: return bytes # ----------------------------- def build_pdf(content: str, title: str = "Resume", name: str = "John Doe", email: str = "john.doe@email.com", phone: str = "+1 234 567 890", profile_image_bytes: bytes = None) -> bytes: """ Build a polished PDF resume and return raw bytes. """ buffer = io.BytesIO() doc = SimpleDocTemplate( buffer, pagesize=A4, leftMargin=30, rightMargin=30, topMargin=30, bottomMargin=30, ) styles = getSampleStyleSheet() # ... same content-building code as you had (header, parsing, sections) ... # For brevity in this message I assume you paste the same block you had # (everything up until doc.build(story)) # *** Keep your existing section-building code here exactly. *** # (I will reuse your original 'story' construction) # [PASTE THE ORIGINAL STORY BUILDING LOGIC HERE — unchanged] doc.build(story) buffer.seek(0) return buffer.getvalue() # <<-- important fix: return bytes # ----------------------------- # STREAMLIT UI (unchanged logic) # ----------------------------- st.set_page_config(page_title="MATCHHIVE - AI Job Matcher", layout="wide", initial_sidebar_state="expanded") st.markdown( """ """, unsafe_allow_html=True, ) # Header area with optional logo upload col1, col2 = st.columns([1, 6]) with col1: logo_file = st.file_uploader("Upload logo (optional)", type=["png", "jpg", "jpeg"], help="Optional: upload your company/app logo") if logo_file: img = Image.open(logo_file) st.image(img, width=100) with col2: st.title("MATCHHIVE - AI Job Matcher") st.caption("Upload a resume, match to jobs, generate tailored resumes & cover letters (PDF).") # Sidebar: user contact info + options with st.sidebar: st.header("Candidate Info") name = st.text_input("Full Name", "John Doe") email = st.text_input("Email", "john.doe@email.com") phone = st.text_input("Phone", "+1 234 567 890") profile_pic = st.file_uploader("Profile photo (optional)", type=["png", "jpg", "jpeg"], help="Small circular/headshot for resume header") st.markdown("---") st.header("Job Filters (optional)") location_filter = st.text_input("Location keyword (e.g. Remote, USA, Canada)", "") keyword_filter = st.text_input("Job keyword (e.g. Python, ML, DevOps)", "") min_score = st.slider("Minimum match score", min_value=0.0, max_value=1.0, value=0.0, step=0.01) top_k = st.number_input("Number of matches to show", min_value=1, max_value=20, value=5) st.markdown("---") st.caption("Note: Job data comes from remoteok.com API and match scores are semantic similarity approximations.") # Main upload & processing area st.header("Upload Resume (PDF or DOCX)") resume_file = st.file_uploader("Upload your resume", type=["pdf", "docx"]) if not resume_file: st.info("Please upload a resume (PDF or DOCX) to start matching.") else: with st.spinner("Extracting resume text..."): resume_text = extract_text_from_resume(resume_file) if not resume_text.strip(): st.error("Could not extract text from the resume. Try a different file or ensure the PDF is text-based (not scanned).") else: # Fetch jobs and filter with st.spinner("Fetching remote jobs..."): jobs = fetch_jobs() # Apply simple filters def job_matches_filters(job): if location_filter: loc = job.get("location") or job.get("company_location") or "" if location_filter.lower() not in str(loc).lower(): return False if keyword_filter: combined = f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" if keyword_filter.lower() not in combined.lower(): return False return True filtered_jobs = [j for j in jobs if job_matches_filters(j)] # Do matching & display results with st.spinner("Computing semantic match scores..."): matches = match_jobs(resume_text, filtered_jobs, top_k=top_k) matches = [(job, score) for job, score in matches if score >= min_score] if not matches: st.warning("No matches found with given filters/score. Try lowering minimum score or removing filters.") else: st.subheader(f"Top {len(matches)} Matches") for job, score in matches: title = job.get("position", "Unknown Position") company = job.get("company", "Unknown Company") url = job.get("url", "#") posted = job.get("date", "") exp_label = f"{title} at {company} — Score: {score:.2f}" with st.expander(exp_label, expanded=False): st.markdown(f"**Location:** {job.get('location','N/A')} \n**Posted:** {posted} \n[View Job Posting]({url})") st.markdown("---") cols = st.columns([1, 1, 1]) if cols[0].button("Generate Resume (AI)", key=f"resume_{job.get('id', title)}"): with st.spinner("Generating tailored resume..."): tailored_resume = generate_resume(resume_text, job) tab1, tab2 = st.tabs(["Tailored Resume", "Cover Letter"]) with tab1: edited_resume = st.text_area("Tailored Resume (editable)", tailored_resume, height=300) if st.button("Export Tailored Resume as PDF", key=f"export_resume_{job.get('id', title)}"): prof_bytes = None if profile_pic: prof_bytes = profile_pic.getvalue() pdf_bytes = build_pdf(edited_resume, title="Resume", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes) st.download_button( label="📥 Download Resume (PDF)", data=pdf_bytes, file_name=f"{name.replace(' ', '_')}_resume.pdf", mime="application/pdf", ) with tab2: if cols[1].button("Generate Cover Letter (AI)", key=f"clgen_{job.get('id', title)}"): with st.spinner("Generating cover letter..."): tailored_cl = generate_cover_letter(resume_text, job, name, email, phone) edited_cl = st.text_area("Cover Letter (editable)", tailored_cl, height=300, key=f"cltext_{job.get('id', title)}") if st.button("Export Cover Letter as PDF", key=f"export_cl_{job.get('id', title)}"): prof_bytes = None if profile_pic: prof_bytes = profile_pic.getvalue() pdf_bytes = build_pdf(edited_cl, title="Cover Letter", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes) st.download_button( label="📥 Download Cover Letter (PDF)", data=pdf_bytes, file_name=f"{name.replace(' ', '_')}_cover_letter.pdf", mime="application/pdf", ) if cols[2].button("Show Job Description", key=f"desc_{job.get('id', title)}"): st.info(job.get("description", "No description available")) st.success("Done — select a match and generate your tailored resume or cover letter.")