Spaces:

Alpha108
/

AI_JOB_MATCHER

Sleeping

App Files Files Community

Alpha108 commited on Sep 28, 2025

Commit

c3fdb9a

verified ·

1 Parent(s): d170281

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -160

app.py CHANGED Viewed

@@ -2,45 +2,40 @@ import streamlit as st
 import os
 import requests
 import re
-from datetime import datetime
 import fitz  # PyMuPDF
 from docx import Document
-from collections import Counter
 import json
 # --- Configuration ---
 st.set_page_config(
-    page_title="AI Job Finder",
     page_icon="🤖",
     layout="wide",
     initial_sidebar_state="expanded",
 )
 # --- Hugging Face Secrets & API Keys ---
-# Try to get the API key from Streamlit secrets (for deployed apps)
 try:
     SCRAPINGDOG_API_KEY = st.secrets["SCRAPINGDOG_API_KEY"]
 except (KeyError, AttributeError):
-    # Fallback to environment variable (for local development)
     SCRAPINGDOG_API_KEY = os.getenv("SCRAPINGDOG_API_KEY")
-# --- Helper Functions & Classes ---
 def parse_cv(uploaded_file):
-    """Parses the uploaded CV file and returns its text content."""
     try:
         file_type = uploaded_file.type
         if "pdf" in file_type:
             with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
-                text = "".join(page.get_text() for page in doc)
-            return text
-        elif "vnd.openxmlformats-officedocument.wordprocessingml.document" in file_type: # DOCX
             doc = Document(uploaded_file)
-            text = "\n".join([para.text for para in doc.paragraphs])
-            return text
         elif "text/plain" in file_type:
-            text = uploaded_file.getvalue().decode("utf-8")
-            return text
         else:
             st.error(f"Unsupported file type: {file_type}")
             return None
@@ -48,88 +43,59 @@ def parse_cv(uploaded_file):
         st.error(f"Error parsing CV: {e}")
         return None
-def extract_keywords(text, top_n=25):
-    """Extracts the most common words from text to be used as keywords."""
     if not text:
         return []
-    # Basic regex to find words, removing simple punctuation
-    words = re.findall(r'\b[a-zA-Z-]{3,}\b', text.lower())
-    # A simple list of common English stop words.
-    # For a more robust solution, a library like NLTK would be better.
-    stop_words = set([
-        'and', 'the', 'is', 'in', 'it', 'of', 'for', 'on', 'with', 'as', 'at', 'by',
-        'to', 'a', 'an', 'that', 'this', 'i', 'you', 'he', 'she', 'we', 'they', 'was',
-        'were', 'be', 'been', 'are', 'has', 'have', 'had', 'do', 'does', 'did', 'but',
-        'if', 'or', 'so', 'not', 'from', 'about', 'more', 'my', 'your', 'our', 'their',
-        'experience', 'work', 'skills', 'responsibilities', 'project', 'projects'
-    ])
-    filtered_words = [word for word in words if word not in stop_words]
-    word_counts = Counter(filtered_words)
-    return [word for word, _ in word_counts.most_common(top_n)]
 def safe_get(data, key, default='N/A'):
-    """Safely get a value from a dictionary."""
     return data.get(key, default) if data else default
 class JobDataNormalizer:
-    """Normalizes job data from different sources into a common schema."""
-    @staticmethod
-    def normalize_remoteok(job):
-        return {
-            "id": safe_get(job, 'id'),
-            "title": safe_get(job, 'position'),
-            "company": safe_get(job, 'company'),
-            "location": safe_get(job, 'location', "Remote"),
-            "description": safe_get(job, 'description'),
-            "url": safe_get(job, 'url'),
-            "date_posted": safe_get(job, 'date'),
-            "source": "RemoteOK"
-        }
     @staticmethod
     def normalize_linkedin(job):
-         return {
-            "id": hash(safe_get(job, 'link')), # Create a simple ID
             "title": safe_get(job, 'title'),
             "company": safe_get(job, 'company'),
             "location": safe_get(job, 'location'),
             "description": safe_get(job, 'description'),
-            "url": safe_get(job, 'link'),
             "date_posted": safe_get(job, 'date'),
             "source": "LinkedIn"
         }
-# --- API Agent Functions ---
-def search_remoteok(keywords):
-    """Searches for jobs on RemoteOK based on keywords."""
-    all_jobs = []
-    try:
-        response = requests.get("https://remoteok.com/api")
-        response.raise_for_status()
-        jobs_data = response.json()
-        # The first item is a legal notice, so we skip it
-        for job in jobs_data[1:]:
-            job_text = f"{job.get('position', '')} {job.get('company', '')} {' '.join(job.get('tags', []))}".lower()
-            if any(keyword.lower() in job_text for keyword in keywords):
-                all_jobs.append(JobDataNormalizer.normalize_remoteok(job))
-    except requests.exceptions.RequestException as e:
-        st.error(f"Error fetching from RemoteOK: {e}")
-    except json.JSONDecodeError:
-        st.error("Failed to parse RemoteOK response.")
-    return all_jobs
-def search_linkedin(keywords, location):
-    """Searches for jobs on LinkedIn via ScrapingDog API."""
     if not SCRAPINGDOG_API_KEY:
-        st.warning("ScrapingDog API key not found. Cannot search LinkedIn.")
-        st.info("Please add your API key to your Hugging Face secrets with the name `SCRAPINGDOG_API_KEY`.")
         return []
-    all_jobs = []
     query = " ".join(keywords)
     api_url = f"https://api.scrapingdog.com/linkedinjobs/?api_key={SCRAPINGDOG_API_KEY}&q={query}&geoid={location}"
@@ -138,45 +104,39 @@ def search_linkedin(keywords, location):
         response.raise_for_status()
         jobs_data = response.json()
         if isinstance(jobs_data, list):
-            for job in jobs_data:
-                all_jobs.append(JobDataNormalizer.normalize_linkedin(job))
     except requests.exceptions.HTTPError as e:
-        st.error(f"ScrapingDog API Error: {e}. Check your API key and usage limits.")
     except requests.exceptions.RequestException as e:
-        st.error(f"Network error while contacting ScrapingDog: {e}")
     except json.JSONDecodeError:
-        st.error("Failed to parse LinkedIn job data. The API might have returned an invalid response.")
-    return all_jobs
 # --- UI Rendering ---
 def display_job(job):
     """Renders a single job listing in a card format."""
-    source_colors = {"RemoteOK": "#ff4b4b", "LinkedIn": "#0077b5"}
-    color = source_colors.get(job['source'], "#f0f2f6")
     st.markdown(f"""
     <div style="border: 1px solid #e1e4e8; border-radius: 8px; padding: 16px; margin-bottom: 16px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
-        <h3 style="margin-bottom: 8px;"><a href="{job['url']}" target="_blank" style="text-decoration: none; color: #0366d6;">{job['title']}</a></h3>
         <p style="margin: 0;"><strong>🏢 Company:</strong> {job['company']}</p>
         <p style="margin: 0;"><strong>📍 Location:</strong> {job['location']}</p>
         <p style="margin: 0; color: #586069;"><strong>🗓️ Posted:</strong> {job['date_posted']}</p>
-        <div style="margin-top: 12px; display: flex; align-items: center;">
-            <span style="background-color: {color}; color: white; padding: 4px 8px; border-radius: 12px; font-size: 12px; font-weight: bold;">{job['source']}</span>
         </div>
     </div>
     """, unsafe_allow_html=True)
     with st.expander("Show Job Description Snippet"):
-        # Strip HTML tags for cleaner display
         clean_description = re.sub('<[^<]+?>', '', job['description'])
         st.write(clean_description[:500] + "...")
-# --- Main Application Logic ---
 # Initialize session state
-if 'keywords' not in st.session_state:
-    st.session_state.keywords = []
 if 'jobs' not in st.session_state:
     st.session_state.jobs = []
 if 'searched' not in st.session_state:
@@ -185,99 +145,66 @@ if 'searched' not in st.session_state:
 # --- Sidebar ---
 with st.sidebar:
     st.image("https://images.emojiterra.com/twitter/v14.0/512px/1f916.png", width=80)
-    st.title("AI Job Finder")
     st.markdown("""
-    Welcome! This app helps you find relevant job postings by analyzing your CV.
-    **How it works:**
-    1.  **Upload your CV** (PDF, DOCX, TXT).
-    2.  The app **extracts key skills**.
-    3.  **Select/add skills** to search for.
-    4.  **Search** across multiple job platforms.
-    """)
-    st.header("API Key Setup")
-    st.markdown("""
-    To search on LinkedIn, you need a **ScrapingDog API key**.
-    - Get a free key at [scrapingdog.com](https://www.scrapingdog.com/).
-    - In your Hugging Face Space, go to **Settings > Secrets** and add a secret named `SCRAPINGDOG_API_KEY` with your key as the value.
     """)
-# --- Main Content ---
 st.header("1. Upload Your CV")
 uploaded_file = st.file_uploader(
-    "Upload your CV to automatically extract keywords.",
-    type=["pdf", "docx", "txt"],
-    accept_multiple_files=False
 )
 if uploaded_file:
-    with st.spinner("Analyzing your CV... 🧠"):
         cv_text = parse_cv(uploaded_file)
         if cv_text:
-            st.session_state.keywords = extract_keywords(cv_text)
-            st.success("CV analyzed successfully! Keywords have been extracted below.")
-st.header("2. Select and Refine Your Keywords")
-manual_keywords_input = st.text_input(
-    "Add more keywords (comma-separated)",
-    placeholder="e.g., python, data science, machine learning"
 )
-# Combine CV keywords with manually added ones
-manual_keywords = [k.strip() for k in manual_keywords_input.split(',') if k.strip()]
-combined_keywords = sorted(list(set(st.session_state.keywords + manual_keywords)))
-selected_keywords = st.multiselect(
-    "Choose the keywords you want to search for:",
-    options=combined_keywords,
-    default=st.session_state.keywords
 )
-st.header("3. Search for Jobs")
-location = st.text_input("Enter Location (e.g., 'United States' or leave empty for remote)", "Remote")
-col1, col2 = st.columns(2)
-with col1:
-    if st.button("🚀 Search Jobs", type="primary", use_container_width=True):
-        if not selected_keywords:
-            st.warning("Please select at least one keyword to search.")
-        else:
-            st.session_state.jobs = [] # Clear previous results
-            st.session_state.searched = True
-            with st.spinner("Searching across job platforms... This may take a moment."):
-                remoteok_jobs = search_remoteok(selected_keywords)
-                linkedin_jobs = search_linkedin(selected_keywords, location)
-                # Combine and deduplicate
-                all_jobs = remoteok_jobs + linkedin_jobs
-                unique_jobs = []
-                seen_jobs = set()
-                for job in all_jobs:
-                    identifier = (job['title'], job['company'], job['url'])
-                    if identifier not in seen_jobs:
-                        unique_jobs.append(job)
-                        seen_jobs.add(identifier)
-                # Sort by date
-                unique_jobs.sort(key=lambda x: x.get('date_posted', ''), reverse=True)
-                st.session_state.jobs = unique_jobs
-                st.success(f"Found {len(unique_jobs)} unique jobs!")
-with col2:
-    if st.button("Reset", use_container_width=True):
-        st.session_state.keywords = []
         st.session_state.jobs = []
-        st.session_state.searched = False
-        st.rerun()
 # --- Display Results ---
 if st.session_state.searched:
-    st.header(f"💼 Job Listings ({len(st.session_state.jobs)} Found)")
     if st.session_state.jobs:
         for job in st.session_state.jobs:
             display_job(job)
     else:
-        st.info("No jobs found matching your criteria. Try different keywords or broaden your search.")

 import os
 import requests
 import re
 import fitz  # PyMuPDF
 from docx import Document
 import json
 # --- Configuration ---
 st.set_page_config(
+    page_title="LinkedIn Job Finder",
     page_icon="🤖",
     layout="wide",
     initial_sidebar_state="expanded",
 )
 # --- Hugging Face Secrets & API Keys ---
+# Load API key from Streamlit secrets (for deployed apps on Hugging Face)
 try:
     SCRAPINGDOG_API_KEY = st.secrets["SCRAPINGDOG_API_KEY"]
 except (KeyError, AttributeError):
+    # Fallback for local development (optional)
     SCRAPINGDOG_API_KEY = os.getenv("SCRAPINGDOG_API_KEY")
+# --- Core Functions ---
 def parse_cv(uploaded_file):
+    """Parses text from uploaded PDF, DOCX, or TXT files."""
     try:
         file_type = uploaded_file.type
         if "pdf" in file_type:
             with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
+                return "".join(page.get_text() for page in doc)
+        elif "vnd.openxmlformats-officedocument.wordprocessingml.document" in file_type:
             doc = Document(uploaded_file)
+            return "\n".join([para.text for para in doc.paragraphs])
         elif "text/plain" in file_type:
+            return uploaded_file.getvalue().decode("utf-8")
         else:
             st.error(f"Unsupported file type: {file_type}")
             return None
         st.error(f"Error parsing CV: {e}")
         return None
+def extract_technical_skills(text):
+    """Extracts technical skills from text using a predefined list and regex."""
     if not text:
         return []
+    # Comprehensive list of technical skills (can be expanded)
+    skills_list = [
+        'Python', 'Java', 'C++', 'C#', 'JavaScript', 'TypeScript', 'Go', 'Rust', 'Ruby', 'PHP', 'Swift', 'Kotlin',
+        'SQL', 'NoSQL', 'PostgreSQL', 'MySQL', 'MongoDB', 'Redis', 'Cassandra', 'GraphQL',
+        'React', 'Angular', 'Vue.js', 'Node.js', 'Django', 'Flask', 'Spring Boot', 'Ruby on Rails',
+        'TensorFlow', 'PyTorch', 'scikit-learn', 'Keras', 'Pandas', 'NumPy', 'Matplotlib',
+        'AWS', 'Azure', 'Google Cloud', 'GCP', 'Docker', 'Kubernetes', 'Terraform', 'Ansible',
+        'CI/CD', 'Jenkins', 'Git', 'GitHub', 'GitLab', 'Linux', 'Bash', 'PowerShell',
+        'Agile', 'Scrum', 'JIRA', 'Data Science', 'Machine Learning', 'Deep Learning', 'NLP',
+        'Big Data', 'Hadoop', 'Spark', 'Cybersecurity', 'API', 'REST', 'Microservices'
+    ]
+    found_skills = set()
+    text_lower = text.lower()
+    # Use regex to find whole words to avoid matching substrings
+    for skill in skills_list:
+        pattern = r'\b' + re.escape(skill.lower()) + r'\b'
+        if re.search(pattern, text_lower):
+            found_skills.add(skill)
+    return sorted(list(found_skills))
 def safe_get(data, key, default='N/A'):
+    """Safely gets a value from a dictionary."""
     return data.get(key, default) if data else default
 class JobDataNormalizer:
+    """Normalizes LinkedIn job data into a common schema."""
     @staticmethod
     def normalize_linkedin(job):
+        return {
+            "id": hash(safe_get(job, 'link')), # Create a simple unique ID
             "title": safe_get(job, 'title'),
             "company": safe_get(job, 'company'),
             "location": safe_get(job, 'location'),
             "description": safe_get(job, 'description'),
             "date_posted": safe_get(job, 'date'),
+            "job_url": safe_get(job, 'link'),
             "source": "LinkedIn"
         }
+def search_linkedin_jobs(keywords, location):
+    """Searches for jobs on LinkedIn via the ScrapingDog API."""
     if not SCRAPINGDOG_API_KEY:
+        st.error("Please set SCRAPINGDOG_API_KEY in Hugging Face secrets.")
         return []
     query = " ".join(keywords)
     api_url = f"https://api.scrapingdog.com/linkedinjobs/?api_key={SCRAPINGDOG_API_KEY}&q={query}&geoid={location}"
         response.raise_for_status()
         jobs_data = response.json()
         if isinstance(jobs_data, list):
+            return [JobDataNormalizer.normalize_linkedin(job) for job in jobs_data]
     except requests.exceptions.HTTPError as e:
+        st.error(f"API Error: {e}. Check your ScrapingDog API key and usage limits.")
     except requests.exceptions.RequestException as e:
+        st.error(f"Network error: {e}")
     except json.JSONDecodeError:
+        st.error("Failed to parse API response. The service might be temporarily down.")
+    return []
 # --- UI Rendering ---
 def display_job(job):
     """Renders a single job listing in a card format."""
     st.markdown(f"""
     <div style="border: 1px solid #e1e4e8; border-radius: 8px; padding: 16px; margin-bottom: 16px; box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
+        <h3 style="margin-bottom: 8px;"><a href="{job['job_url']}" target="_blank" style="text-decoration: none; color: #0077b5;">{job['title']}</a></h3>
         <p style="margin: 0;"><strong>🏢 Company:</strong> {job['company']}</p>
         <p style="margin: 0;"><strong>📍 Location:</strong> {job['location']}</p>
         <p style="margin: 0; color: #586069;"><strong>🗓️ Posted:</strong> {job['date_posted']}</p>
+        <div style="margin-top: 12px;">
+            <span style="background-color: #0077b5; color: white; padding: 4px 8px; border-radius: 12px; font-size: 12px; font-weight: bold;">{job['source']}</span>
         </div>
     </div>
     """, unsafe_allow_html=True)
     with st.expander("Show Job Description Snippet"):
         clean_description = re.sub('<[^<]+?>', '', job['description'])
         st.write(clean_description[:500] + "...")
+# --- Main Application ---
 # Initialize session state
+if 'skills' not in st.session_state:
+    st.session_state.skills = []
 if 'jobs' not in st.session_state:
     st.session_state.jobs = []
 if 'searched' not in st.session_state:
 # --- Sidebar ---
 with st.sidebar:
     st.image("https://images.emojiterra.com/twitter/v14.0/512px/1f916.png", width=80)
+    st.title("LinkedIn Job Finder")
     st.markdown("""
+    Find your next role on LinkedIn by leveraging the power of AI.
+    **How to use:**
+    1.  **Upload your CV** to automatically identify your technical skills.
+    2.  **Refine the skills list** by adding or removing keywords.
+    3.  **Enter a location** and hit search!
+    **API Key Required:**
+    This app uses the ScrapingDog API. You'll need to get a free API key and set it up in your Hugging Face Space secrets as `SCRAPINGDOG_API_KEY`.
     """)
+# --- Main Content Panel ---
 st.header("1. Upload Your CV")
 uploaded_file = st.file_uploader(
+    "Upload to extract technical skills (PDF, DOCX, TXT). Personal details are ignored.",
+    type=["pdf", "docx", "txt"]
 )
 if uploaded_file:
+    with st.spinner("Analyzing CV for technical skills... 🧠"):
         cv_text = parse_cv(uploaded_file)
         if cv_text:
+            st.session_state.skills = extract_technical_skills(cv_text)
+            st.success("Successfully extracted skills from your CV!")
+st.header("2. Refine Skills and Search")
+manual_keywords = st.text_input(
+    "Add more skills or keywords (comma-separated)",
+    placeholder="e.g., Go, Cybersecurity, REST"
 )
+added_skills = [k.strip() for k in manual_keywords.split(',') if k.strip()]
+combined_skills = sorted(list(set(st.session_state.skills + added_skills)))
+selected_skills = st.multiselect(
+    "Select the skills to search for:",
+    options=combined_skills,
+    default=st.session_state.skills
 )
+location = st.text_input("Enter Location", "Remote")
+if st.button("🚀 Search Jobs on LinkedIn", type="primary", use_container_width=True):
+    if not selected_skills:
+        st.warning("Please select at least one skill to search.")
+    else:
         st.session_state.jobs = []
+        st.session_state.searched = True
+        with st.spinner("Searching LinkedIn... This may take a moment."):
+            jobs = search_linkedin_jobs(selected_skills, location)
+            st.session_state.jobs = sorted(jobs, key=lambda x: x.get('date_posted', ''), reverse=True)
 # --- Display Results ---
 if st.session_state.searched:
+    st.header(f"💼 Job Results ({len(st.session_state.jobs)} Found)")
     if st.session_state.jobs:
         for job in st.session_state.jobs:
             display_job(job)
     else:
+        st.info("No jobs found for the selected keywords. Try refining your search.")