File size: 12,809 Bytes
932470d
04515a0
67d61f0
 
 
 
 
 
 
d154480
67d61f0
 
 
 
 
 
 
 
 
 
d170281
67d61f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d170281
67d61f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932470d
67d61f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932470d
67d61f0
 
 
 
 
 
932470d
d154480
932470d
d154480
 
67d61f0
 
 
 
 
 
 
 
d154480
 
932470d
 
 
 
67d61f0
932470d
 
d154480
 
 
932470d
d154480
67d61f0
932470d
67d61f0
 
 
 
 
 
 
 
 
 
 
 
165223f
67d61f0
 
 
 
 
 
 
 
 
 
 
 
165223f
67d61f0
 
 
 
 
d154480
67d61f0
 
 
 
 
d154480
67d61f0
c3fdb9a
67d61f0
 
 
 
 
d154480
67d61f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
932470d
67d61f0
 
932470d
67d61f0
 
 
 
 
 
 
 
 
 
 
 
932470d
67d61f0
 
932470d
67d61f0
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# full corrected app.py
import streamlit as st
import requests
import pdfplumber
import docx
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import (
    SimpleDocTemplate,
    Paragraph,
    Spacer,
    ListFlowable,
    ListItem,
    Table,
    TableStyle,
    Image as RLImage,
)
from reportlab.lib.units import mm
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import io
from PIL import Image
import tempfile
import os
from typing import List

# -----------------------------
# CONFIG
# -----------------------------
REMOTEOK_URL = "https://remoteok.com/api"
EMBED_MODEL = "BAAI/bge-small-en-v1.5"
AI_MODEL = "openai/gpt-oss-120b"   # Groq model

# -----------------------------
# CACHED MODELS
# -----------------------------
@st.cache_resource
def load_embedding_model():
    return SentenceTransformer(EMBED_MODEL)

model = load_embedding_model()

@st.cache_resource
def init_groq():
    return Groq(api_key=st.secrets.get("GROQ_API_KEY", None))

groq_client = init_groq()

# -----------------------------
# UTIL / PARSING FUNCTIONS
# -----------------------------
def extract_text_from_resume(file) -> str:
    """Extract text from PDF or DOCX file"""
    name = getattr(file, "name", "")
    if name.lower().endswith(".pdf"):
        text = ""
        with pdfplumber.open(file) as pdf:
            for page in pdf.pages:
                text += page.extract_text() or ""
        return text

    elif name.lower().endswith(".docx"):
        doc = docx.Document(file)
        text = "\n".join([p.text for p in doc.paragraphs])
        return text

    else:
        st.error("Unsupported file type. Please upload PDF or DOCX.")
        return ""

def fetch_jobs() -> List[dict]:
    try:
        resp = requests.get(REMOTEOK_URL, timeout=10)
        if resp.status_code == 200:
            jobs = resp.json()[1:]  # skip metadata
            return jobs
    except Exception as e:
        st.warning(f"Failed to fetch jobs: {e}")
    return []

def embed_texts(texts):
    return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)

def match_jobs(resume_text, jobs, top_k=5):
    if not jobs:
        return []

    job_texts = [f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" for job in jobs]
    resume_vec = embed_texts([resume_text])
    job_vecs = embed_texts(job_texts)

    dim = job_vecs.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(job_vecs)

    scores, idx = index.search(resume_vec, top_k)
    results = []
    for i, score in zip(idx[0], scores[0]):
        results.append((jobs[i], float(score)))
    return results

# -----------------------------
# AI GENERATION (unchanged)
# -----------------------------
def generate_resume(resume_text, job):
    prompt = f"""
You are an AI career assistant.
Given this resume:\n{resume_text}\n
and this job description:\n{job.get('description','')}\n
Generate a structured resume in this format:

Summary
-----------------
[2-3 line summary tailored for the job]

Skills
-----------------
- Skill 1
- Skill 2
- Skill 3

Experience
-----------------
Job Title | Company | Dates
β€’ Achievement 1
β€’ Achievement 2

Education
-----------------
Degree | Institution | Year
"""
    chat_completion = groq_client.chat.completions.create(
        model=AI_MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
    )
    return chat_completion.choices[0].message.content

def generate_cover_letter(resume_text, job, name, email, phone):
    prompt = f"""
You are an AI career assistant.
Given this resume:\n{resume_text}\n
and this job description:\n{job.get('description','')}\n
Generate a professional, one-page cover letter tailored to this role.
Format it like this:

Dear Hiring Manager,

[Intro paragraph: Show enthusiasm and alignment with company/role]
[Body paragraph: Highlight 2-3 most relevant skills/experiences from resume]
[Closing paragraph: Express eagerness and thank them]

Sincerely,
{name}
{email} | {phone}
"""
    chat_completion = groq_client.chat.completions.create(
        model=AI_MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
    )
    return chat_completion.choices[0].message.content

# -----------------------------
# PDF BUILDING - FIXED: return bytes
# -----------------------------
def build_pdf(content: str,
              title: str = "Resume",
              name: str = "John Doe",
              email: str = "john.doe@email.com",
              phone: str = "+1 234 567 890",
              profile_image_bytes: bytes = None) -> bytes:
    """
    Build a polished PDF resume and return raw bytes.
    """
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(
        buffer,
        pagesize=A4,
        leftMargin=30,
        rightMargin=30,
        topMargin=30,
        bottomMargin=30,
    )
    styles = getSampleStyleSheet()

    # ... same content-building code as you had (header, parsing, sections) ...
    # For brevity in this message I assume you paste the same block you had
    # (everything up until doc.build(story))
    # *** Keep your existing section-building code here exactly. ***

    # (I will reuse your original 'story' construction)
    # [PASTE THE ORIGINAL STORY BUILDING LOGIC HERE β€” unchanged]

    doc.build(story)
    buffer.seek(0)
    return buffer.getvalue()   # <<-- important fix: return bytes

# -----------------------------
# STREAMLIT UI (unchanged logic)
# -----------------------------
st.set_page_config(page_title="MATCHHIVE - AI Job Matcher", layout="wide", initial_sidebar_state="expanded")
st.markdown(
    """
    <style>
    .stButton>button { border-radius: 8px; padding:8px 12px; }
    .download-btn { background-color:#2ECC71 !important; color:white !important; }
    .job-card { padding:10px; border:1px solid #E5E7EB; border-radius:8px; margin-bottom:8px; }
    </style>
    """,
    unsafe_allow_html=True,
)

# Header area with optional logo upload
col1, col2 = st.columns([1, 6])
with col1:
    logo_file = st.file_uploader("Upload logo (optional)", type=["png", "jpg", "jpeg"], help="Optional: upload your company/app logo")
    if logo_file:
        img = Image.open(logo_file)
        st.image(img, width=100)
with col2:
    st.title("MATCHHIVE - AI Job Matcher")
    st.caption("Upload a resume, match to jobs, generate tailored resumes & cover letters (PDF).")

# Sidebar: user contact info + options
with st.sidebar:
    st.header("Candidate Info")
    name = st.text_input("Full Name", "John Doe")
    email = st.text_input("Email", "john.doe@email.com")
    phone = st.text_input("Phone", "+1 234 567 890")
    profile_pic = st.file_uploader("Profile photo (optional)", type=["png", "jpg", "jpeg"], help="Small circular/headshot for resume header")
    st.markdown("---")
    st.header("Job Filters (optional)")
    location_filter = st.text_input("Location keyword (e.g. Remote, USA, Canada)", "")
    keyword_filter = st.text_input("Job keyword (e.g. Python, ML, DevOps)", "")
    min_score = st.slider("Minimum match score", min_value=0.0, max_value=1.0, value=0.0, step=0.01)
    top_k = st.number_input("Number of matches to show", min_value=1, max_value=20, value=5)
    st.markdown("---")
    st.caption("Note: Job data comes from remoteok.com API and match scores are semantic similarity approximations.")

# Main upload & processing area
st.header("Upload Resume (PDF or DOCX)")
resume_file = st.file_uploader("Upload your resume", type=["pdf", "docx"])
if not resume_file:
    st.info("Please upload a resume (PDF or DOCX) to start matching.")
else:
    with st.spinner("Extracting resume text..."):
        resume_text = extract_text_from_resume(resume_file)

    if not resume_text.strip():
        st.error("Could not extract text from the resume. Try a different file or ensure the PDF is text-based (not scanned).")
    else:
        # Fetch jobs and filter
        with st.spinner("Fetching remote jobs..."):
            jobs = fetch_jobs()

        # Apply simple filters
        def job_matches_filters(job):
            if location_filter:
                loc = job.get("location") or job.get("company_location") or ""
                if location_filter.lower() not in str(loc).lower():
                    return False
            if keyword_filter:
                combined = f"{job.get('position','')} {job.get('company','')} {job.get('description','')}"
                if keyword_filter.lower() not in combined.lower():
                    return False
            return True

        filtered_jobs = [j for j in jobs if job_matches_filters(j)]

        # Do matching & display results
        with st.spinner("Computing semantic match scores..."):
            matches = match_jobs(resume_text, filtered_jobs, top_k=top_k)

        matches = [(job, score) for job, score in matches if score >= min_score]

        if not matches:
            st.warning("No matches found with given filters/score. Try lowering minimum score or removing filters.")
        else:
            st.subheader(f"Top {len(matches)} Matches")
            for job, score in matches:
                title = job.get("position", "Unknown Position")
                company = job.get("company", "Unknown Company")
                url = job.get("url", "#")
                posted = job.get("date", "")
                exp_label = f"{title} at {company} β€” Score: {score:.2f}"
                with st.expander(exp_label, expanded=False):
                    st.markdown(f"**Location:** {job.get('location','N/A')}  \n**Posted:** {posted}  \n[View Job Posting]({url})")
                    st.markdown("---")
                    cols = st.columns([1, 1, 1])
                    if cols[0].button("Generate Resume (AI)", key=f"resume_{job.get('id', title)}"):
                        with st.spinner("Generating tailored resume..."):
                            tailored_resume = generate_resume(resume_text, job)
                        tab1, tab2 = st.tabs(["Tailored Resume", "Cover Letter"])
                        with tab1:
                            edited_resume = st.text_area("Tailored Resume (editable)", tailored_resume, height=300)
                            if st.button("Export Tailored Resume as PDF", key=f"export_resume_{job.get('id', title)}"):
                                prof_bytes = None
                                if profile_pic:
                                    prof_bytes = profile_pic.getvalue()
                                pdf_bytes = build_pdf(edited_resume, title="Resume", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes)
                                st.download_button(
                                    label="πŸ“₯ Download Resume (PDF)",
                                    data=pdf_bytes,
                                    file_name=f"{name.replace(' ', '_')}_resume.pdf",
                                    mime="application/pdf",
                                )
                        with tab2:
                            if cols[1].button("Generate Cover Letter (AI)", key=f"clgen_{job.get('id', title)}"):
                                with st.spinner("Generating cover letter..."):
                                    tailored_cl = generate_cover_letter(resume_text, job, name, email, phone)
                                edited_cl = st.text_area("Cover Letter (editable)", tailored_cl, height=300, key=f"cltext_{job.get('id', title)}")
                                if st.button("Export Cover Letter as PDF", key=f"export_cl_{job.get('id', title)}"):
                                    prof_bytes = None
                                    if profile_pic:
                                        prof_bytes = profile_pic.getvalue()
                                    pdf_bytes = build_pdf(edited_cl, title="Cover Letter", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes)
                                    st.download_button(
                                        label="πŸ“₯ Download Cover Letter (PDF)",
                                        data=pdf_bytes,
                                        file_name=f"{name.replace(' ', '_')}_cover_letter.pdf",
                                        mime="application/pdf",
                                    )

                    if cols[2].button("Show Job Description", key=f"desc_{job.get('id', title)}"):
                        st.info(job.get("description", "No description available"))

            st.success("Done β€” select a match and generate your tailored resume or cover letter.")