Spaces:

Alpha108
/

AI_JOB_MATCHER

Sleeping

App Files Files Community

Alpha108 commited on Sep 27, 2025

Commit

165223f

verified ·

1 Parent(s): bfc0ccf

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -73

app.py CHANGED Viewed

@@ -1,84 +1,198 @@
 import streamlit as st
 import requests
-import fitz  # PyMuPDF for PDF extraction
 from sentence_transformers import SentenceTransformer, util
 import torch
-# -------------------- CONFIG --------------------
-st.set_page_config(page_title="AI Job Matcher", page_icon="🤖", layout="wide")
-@st.cache_resource
-def load_model():
-    return SentenceTransformer("all-MiniLM-L6-v2")
-model = load_model()
-# -------------------- FUNCTIONS --------------------
-def extract_text_from_pdf(uploaded_file):
-    """Extract text from uploaded PDF."""
-    text = ""
-    doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
-    for page in doc:
-        text += page.get_text("text")
-    return text
-def fetch_jobs():
-    """Fetch jobs from RemoteOK API."""
-    url = "https://remoteok.com/api"
     try:
-        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
-        if response.status_code == 200:
-            return response.json()[1:]  # skip metadata
     except Exception as e:
-        st.error(f"⚠️ Error fetching jobs: {e}")
-    return []
-def match_jobs(cv_text, jobs, top_n=5):
-    """Match CV text with job postings using embeddings."""
-    if not cv_text.strip():
         return []
     cv_embedding = model.encode(cv_text, convert_to_tensor=True)
-    results = []
-    for job in jobs[:30]:  # limit for demo
-        title = job.get("position", "")
-        company = job.get("company", "")
-        desc = job.get("description", "")
-        job_text = f"{title} at {company}. {desc}"
-        job_embedding = model.encode(job_text, convert_to_tensor=True)
-        similarity = util.cos_sim(cv_embedding, job_embedding).item()
-        results.append({
-            "title": title,
-            "company": company,
-            "url": job.get("url", ""),
-            "score": round(similarity * 100, 2)
-        })
-    return sorted(results, key=lambda x: x["score"], reverse=True)[:top_n]
-# -------------------- UI --------------------
-st.title("🤖 AI Freelancer Job Matcher")
-st.markdown("Upload your **CV (PDF)** and get real-time job matches from RemoteOK.")
-uploaded_file = st.file_uploader("📄 Upload your CV", type=["pdf"])
-if uploaded_file is not None:
-    cv_text = extract_text_from_pdf(uploaded_file)
-    st.success("✅ CV uploaded & text extracted!")
-    st.text_area("Extracted CV Text", cv_text[:1000], height=200)
-    jobs = fetch_jobs()
-    if jobs:
-        results = match_jobs(cv_text, jobs)
-        st.subheader("🎯 Top Job Matches")
-        for r in results:
-            st.markdown(f"**{r['title']}** at *{r['company']}*")
-            st.write(f"Match Score: {r['score']}%")
-            st.markdown(f"[View Job Posting]({r['url']})")
-            st.markdown("---")
-    else:
-        st.warning("⚠️ No jobs fetched. Try again later.")

 import streamlit as st
+import fitz  # PyMuPDF
 import requests
 from sentence_transformers import SentenceTransformer, util
 import torch
+import re
+import io
+# Load pre-trained Sentence Transformer model
+# Using a smaller, efficient model suitable for HF Spaces
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def extract_text_from_pdf(pdf_file):
+    """Extracts text from an uploaded PDF file."""
     try:
+        pdf_bytes = pdf_file.read()
+        pdf_document = fitz.open(stream=io.BytesIO(pdf_bytes), filetype="pdf")
+        text = ""
+        for page_num in range(len(pdf_document)):
+            page = pdf_document.load_page(page_num)
+            text += page.get_text()
+        return text
     except Exception as e:
+        st.error(f"Error reading PDF file: {e}")
+        return None
+def extract_keywords(text):
+    """
+    A simple keyword extractor for skills, technologies, and certifications.
+    This can be replaced with a more sophisticated NLP model if needed.
+    """
+    if not text:
+        return []
+    # Using regex to find potential skills (e.g., Python, Java, SQL, AWS, Docker)
+    # This is a basic example and can be expanded significantly.
+    skills_pattern = r'\b(Python|Java|C\+\+|JavaScript|SQL|React|Node\.js|Angular|Vue|AWS|Azure|GCP|Docker|Kubernetes|TensorFlow|PyTorch|Scikit-learn|Pandas|NumPy|Git)\b'
+    skills = re.findall(skills_pattern, text, re.IGNORECASE)
+    return list(set(skills)) # Return unique skills
+def fetch_remoteok_jobs():
+    """Fetches the latest jobs from the RemoteOK API."""
+    try:
+        response = requests.get('https://remoteok.com/api')
+        response.raise_for_status()  # Raise an exception for bad status codes
+        jobs = response.json()
+        # The first element is often a header/legal notice, so we skip it
+        return jobs[1:] if isinstance(jobs, list) and len(jobs) > 1 else []
+    except requests.exceptions.RequestException as e:
+        st.error(f"Failed to fetch jobs from RemoteOK: {e}")
+        return []
+    except ValueError:
+        st.error("Failed to parse JSON response from RemoteOK.")
         return []
+def calculate_similarity(cv_text, job_description):
+    """Calculates cosine similarity between CV text and a job description."""
+    if not cv_text or not job_description:
+        return 0.0
+    # Generate embeddings
     cv_embedding = model.encode(cv_text, convert_to_tensor=True)
+    job_embedding = model.encode(job_description, convert_to_tensor=True)
+    # Calculate cosine similarity
+    cosine_scores = util.pytorch_cos_sim(cv_embedding, job_embedding)
+    return cosine_scores.item()
+def generate_match_explanation(cv_keywords, job_description):
+    """
+    Generates a brief explanation of why the job is a good match.
+    This is a simplified implementation. For more advanced explanations,
+    a generative model (like GPT or T5) could be used.
+    """
+    common_keywords = [
+        keyword for keyword in cv_keywords
+        if re.search(r'\b' + re.escape(keyword) + r'\b', job_description, re.IGNORECASE)
+    ]
+    if not common_keywords:
+        return "This job aligns with your general profile based on the overall text similarity."
+    explanation = f"This role is a strong match because it requires skills you possess, such as: **{', '.join(common_keywords[:3])}**."
+    return explanation
+def notify_user(job):
+    """Placeholder for a notification function."""
+    # In a real-world application, this would integrate with an email/messaging service.
+    log_message = f"Notification Triggered: New high-match job found - '{job['position']}' at {job['company']}. Match: {job['match_score']:.2f}%"
+    print(log_message) # For MVP, we just log to console.
+    # In HF Spaces, this will appear in the server logs.
+# --- Streamlit App UI ---
+st.set_page_config(page_title="AI Job Matcher", page_icon="🤖", layout="wide")
+st.title("🤖 AI-Powered Job Matcher")
+st.markdown("""
+Upload your CV, and this app will scan job platforms to find the best matches for your profile.
+It uses sentence embeddings to understand the context of your skills and the job requirements.
+""")
+# --- State Management ---
+if 'cv_text' not in st.session_state:
+    st.session_state.cv_text = None
+if 'cv_keywords' not in st.session_state:
+    st.session_state.cv_keywords = []
+if 'jobs' not in st.session_state:
+    st.session_state.jobs = []
+if 'processed' not in st.session_state:
+    st.session_state.processed = False
+# --- Sidebar for CV Upload and Controls ---
+with st.sidebar:
+    st.header("1. Upload Your CV")
+    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+    if uploaded_file is not None:
+        if st.button("Process CV"):
+            with st.spinner('Analyzing your CV...'):
+                st.session_state.cv_text = extract_text_from_pdf(uploaded_file)
+                if st.session_state.cv_text:
+                    st.session_state.cv_keywords = extract_keywords(st.session_state.cv_text)
+                    st.success("CV processed successfully!")
+                    st.session_state.processed = False # Reset processed state to allow re-matching
+                else:
+                    st.error("Could not extract text from the CV.")
+    if st.session_state.cv_text:
+        st.subheader("Detected Skills & Keywords:")
+        if st.session_state.cv_keywords:
+            st.write(", ".join(st.session_state.cv_keywords))
+        else:
+            st.write("No specific keywords detected. Matching will be based on overall text.")
+    st.header("2. Select Job Platforms")
+    use_remoteok = st.checkbox("RemoteOK", value=True)
+    # Add other platforms here as they are implemented
+    # use_upwork = st.checkbox("Upwork (Not Implemented)", disabled=True)
+    # use_freelancer = st.checkbox("Freelancer (Not Implemented)", disabled=True)
+# --- Main Content Area for Job Matching and Display ---
+if st.session_state.cv_text:
+    if st.button("🚀 Find My Dream Job", type="primary"):
+        st.session_state.processed = False
+        st.session_state.jobs = []
+        matched_jobs = []
+        with st.spinner("Fetching and analyzing jobs... This may take a moment."):
+            if use_remoteok:
+                fetched_jobs = fetch_remoteok_jobs()
+                if fetched_jobs:
+                    for job in fetched_jobs:
+                        # Ensure job has a description, position, and company
+                        if 'description' in job and 'position' in job and 'company' in job:
+                            description_text = job['description']
+                            similarity_score = calculate_similarity(st.session_state.cv_text, description_text)
+                            job['match_score'] = similarity_score * 100
+                            matched_jobs.append(job)
+        if matched_jobs:
+            # Sort jobs by match score in descending order
+            st.session_state.jobs = sorted(matched_jobs, key=lambda x: x['match_score'], reverse=True)
+            st.session_state.processed = True
+            # Trigger notifications for high-matching jobs (e.g., > 70%)
+            for job in st.session_state.jobs:
+                if job['match_score'] > 70:
+                    notify_user(job)
+        else:
+            st.warning("No jobs found or there was an issue with the job platforms. Please try again.")
+if st.session_state.processed and st.session_state.jobs:
+    st.header("🏆 Top Job Matches For You")
+    top_n = 10
+    for job in st.session_state.jobs[:top_n]:
+        with st.container(border=True):
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                st.subheader(job.get('position', 'N/A'))
+                st.write(f"**Company:** {job.get('company', 'N/A')}")
+                tags = job.get('tags', [])
+                if tags:
+                    st.write(f"**Tags:** `{'`, `'.join(tags[:5])}`")
+                explanation = generate_match_explanation(st.session_state.cv_keywords, job.get('description', ''))
+                st.info(f"**Why it's a match:** {explanation}")
+                st.markdown(f"[View Job Posting]({job.get('url', '#')})", unsafe_allow_html=True)
+            with col2:
+                match_score = job.get('match_score', 0)
+                st.progress(int(match_score))
+                st.metric(label="Match Score", value=f"{match_score:.2f}%")
+else:
+    st.info("Upload your CV and select job platforms to get started.")