Spaces:

Rohit-Katkar2003
/

JOBScraper

Sleeping

File size: 27,792 Bytes

dd351ae
ec4f2f9
 
 
 
 
 
 
dd351ae
 
5ec0cca
ec4f2f9
dd351ae
 
 
 
 
ec4f2f9
dd351ae
 
 
 
ec4f2f9
dd351ae
 
 
 
ec4f2f9
 
dd351ae
ec4f2f9
 
dd351ae
 
 
 
ec4f2f9
dd351ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
dd351ae
 
ec4f2f9
dd351ae
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
dd351ae
 
ec4f2f9
 
dd351ae
 
 
 
 
 
ec4f2f9
dd351ae
 
 
ec4f2f9
 
 
 
 
 
 
 
dd351ae
 
ec4f2f9
dd351ae
 
 
 
 
ec4f2f9
dd351ae
 
 
 
 
ec4f2f9
 
dd351ae
 
 
 
ec4f2f9
 
 
5ec0cca
ec4f2f9
 
 
5ec0cca
 
 
ec4f2f9
5ec0cca
ec4f2f9
5ec0cca
ec4f2f9
 
 
 
 
5ec0cca
ec4f2f9
 
5ec0cca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
 
5ec0cca
 
 
ec4f2f9
 
5ec0cca
 
 
 
 
 
 
 
 
 
ec4f2f9
 
5ec0cca
ec4f2f9
5ec0cca
 
ec4f2f9
 
 
 
 
5ec0cca
ec4f2f9
 
 
 
 
 
 
 
 
 
 
5ec0cca
ec4f2f9
5ec0cca
 
 
 
ec4f2f9
 
 
5ec0cca
ec4f2f9
 
5ec0cca
 
 
 
 
 
ec4f2f9
 
 
 
 
 
 
 
 
 
 
5ec0cca
ec4f2f9
 
5ec0cca
ec4f2f9
 
 
5ec0cca
 
ec4f2f9
5ec0cca
 
 
 
ec4f2f9
5ec0cca
ec4f2f9
 
 
5ec0cca
ec4f2f9
 
5ec0cca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
 
5ec0cca
ec4f2f9
 
 
 
 
 
5ec0cca
 
 
ec4f2f9
 
 
5ec0cca
 
 
 
 
 
 
 
 
ec4f2f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd351ae
ec4f2f9
 
 
 
 
 
 
 
dd351ae
ec4f2f9
 
 
 
dd351ae
ec4f2f9
 
 
 
 
 
dd351ae
ec4f2f9
dd351ae
ec4f2f9

import gradio as gr
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import random
from urllib.parse import quote_plus
import pandas as pd
import requests
from bs4 import BeautifulSoup
import shutil # Needed to find the binary
# --- LinkedIn Scraper Functions (Keep as is, just ensure they are defined) ---

def linkedin_job_search_engine(field, location=None, date_posted=None, experience_level=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }

    base_url = f"https://www.linkedin.com/jobs/search/?keywords={quote_plus(field)}"

    if location:
        base_url += f"&location={quote_plus(location)}"

    if date_posted:
        date_filters = {
            "Any Time": "",
            "Recent (Last 24 hours)": "r86400",
            "7 Hour ago" : 'r28800' ,
            "12 hour ago" : 'r43200',
            "Past Week": "r604800",
            "Past Month": "r2592000"

        }
        filter_value = date_filters.get(date_posted, "")
        if filter_value:
            base_url += f"&f_TPR={filter_value}"

    if experience_level is not None:
        exp_level_map = {
            0: "f_E=1",
            1: "f_E=2",
            2: "f_E=3",
            3: "f_E=4",
            4: "f_E=5",
            5: "f_E=6",
            6: "f_E=7",
            7: "f_E=8",
            8: "f_E=9",
            9: "f_E=10"
        }
        base_url += f"&{exp_level_map.get(experience_level, '')}"

    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        return f"Failed to fetch jobs. Status code: {response.status_code}", []

    soup = BeautifulSoup(response.text, 'html.parser')
    job_cards = soup.find_all('div', class_='base-card')

    jobs = []
    for job in job_cards:
        title_elem = job.find('span', class_='sr-only')
        job_title = title_elem.text.strip() if title_elem else 'N/A'

        company_elem = job.find('h4', class_='base-search-card__subtitle')
        company_name = company_elem.text.strip() if company_elem else 'N/A'

        location_elem = job.find('span', class_='job-search-card__location')
        job_location = location_elem.text.strip() if location_elem else 'N/A'

        job_link_elem = job.find('a', class_='base-card__full-link')
        job_link = job_link_elem['href'] if job_link_elem else '#'

        easy_apply_elem = job.find('span', class_='easy-apply-label')
        if easy_apply_elem:
            continue # Skip Easy Apply jobs as per your original logic

        jobs.append({
            'Title': job_title,
            'Company': company_name,
            'Location': job_location,
            'Job Link': job_link
        })

    return f"Found {len(jobs)} jobs", jobs

def format_results(job_title, location, date_posted, experience_level):
    # Convert experience_level string from dropdown to integer index if needed
    # Or pass it directly if the function handles strings
    try:
        exp_level_int = int(experience_level) if experience_level and experience_level.isdigit() else 0
    except ValueError:
        exp_level_int = 0 # Default or handle error

    message, jobs = linkedin_job_search_engine(job_title, location, date_posted, exp_level_int)
    if not jobs:
        return message, "No jobs found 😢"

    # Create table header
    table_md = """
| 📌 Title | 🏢 Company | 📍 Location | 🔗 Apply |
|---|---|---|---|
"""
    # Add rows with links opening in new tab
    for job in jobs:
        title = job['Title']
        company = job['Company']
        loc = job['Location']
        link = job['Job Link']
        # Ensure link is absolute or handle relative links if necessary
        apply_button = f'<a href="{link}" target="_blank" rel="noopener noreferrer">👉 Apply Now</a>'
        table_md += f"| 💼 {title} | 🏢 {company} | 📍 {loc} | {apply_button} |\n"

    return message, table_md

# --- All Jobs Scraper Functions (Keep as is) ---

def get_search_urls(search_url, num_results=20, is_query=True, headless=True):
    """Your existing Selenium function - modified for Hugging Face Spaces"""
    options = Options()

    if headless:
        # Ensure headless is set correctly for newer Chrome versions
        # --headless=new is generally preferred
        options.add_argument("--headless=new")

    # --- Standard Options ---
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage") # Important for containerized envs
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-plugins-discovery")
    options.add_argument("--disable-web-security")
    options.add_argument("--allow-running-insecure-content")
    # options.add_argument("--disable-features=VizDisplayCompositor") # Can sometimes cause issues, try disabling
    options.add_argument("--window-size=1920,1080")

    # --- Crucial for Hugging Face Spaces: Set Binary Location Explicitly ---
    # Try common paths or use shutil.which
    chrome_executable = (
        shutil.which("google-chrome") or
        shutil.which("chromium-browser") or
        shutil.which("chromium") or
        "/usr/bin/google-chrome" # Fallback common path
        # Add more potential paths if needed based on your space logs
    )

    if chrome_executable:
        print(f"Setting Chrome binary location to: {chrome_executable}")
        options.binary_location = chrome_executable
    else:
        print("Warning: Could not find Chrome/Chromium executable. Proceeding with default (might fail).")
        # If not found, uc.Chrome might try its default, but explicit is better.

    # --- User Agent ---
    # Ensure this UA matches the *actual* Chrome version available on Hugging Face
    # You might need to adjust this. Check Hugging Face docs or logs for Chrome version.
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")

    # --- Disable Automation Indicators ---
    options.add_argument("--disable-automation")
    options.add_argument("--disable-infobars")
    # Exclude the `enable-automation` switch itself
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)


    driver = None # Initialize driver variable
    try:
        # --- Initialize undetected_chromedriver ---
        # Pass the options object
        # Specifying version_main might help, but often letting it auto-detect is better.
        # If you know the exact Chrome version on Hugging Face, use it.
        # version_main = 119 # Example, adjust if needed or remove
        driver = uc.Chrome(options=options) # Removed version_main for now

        # --- Execute Script to Remove Webdriver Flag ---
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

        # --- Perform Search or Navigation ---
        if is_query:
            print("Navigating to Google...")
            # Ensure clean URL
            driver.get("https://www.google.com")

            # Add random delay to mimic human behavior
            time.sleep(random.uniform(2, 4))

            print(f"Searching for: {search_url}")
            search_box = driver.find_element(By.NAME, "q")
            search_box.clear()
            search_box.send_keys(search_url)
            search_box.submit()
        else:
            print(f"Navigating to: {search_url}")
            driver.get(search_url)

        # Wait for page to load
        time.sleep(random.uniform(3, 5))

        # --- Check for Blocking ---
        page_source = driver.page_source.lower()
        if "blocked" in page_source or "captcha" in page_source or "unusual traffic" in page_source or "sorry" in page_source: # Add "sorry"
            print("⚠️ Detected potential blocking (CAPTCHA, 'Sorry' page, etc.). Page might not have loaded correctly.")
            # Consider returning an empty list or raising an exception here
            # return [] # Or handle as appropriate

        urls = []

        # --- Extract URLs ---
        # Try multiple selectors as Google changes them frequently
        selectors_to_try = [
            "h3 a", # Direct link within h3
            ".LC20lb.DKV0Md", # More specific Google result title class
            ".g a[href^='http']", # Link within result div starting with http
            ".yuRUbf a", # Another common Google class
            ".tF2Cxc a" # Another common structure
            # Add more if needed, inspect the HTML in Spaces if this fails
        ]

        results = []
        successful_selector = None

        for selector in selectors_to_try:
            try:
                temp_results = driver.find_elements(By.CSS_SELECTOR, selector)
                if temp_results:
                    results = temp_results
                    successful_selector = selector
                    print(f"✓ Found {len(results)} potential results with selector: '{selector}'")
                    break
            except Exception as e:
                print(f"✗ Selector '{selector}' failed during find_elements: {str(e)[:100]}...")
                continue

        if not results:
            print("❌ No results found with any selector. Printing page info for debugging...")
            print(f"Page title: '{driver.title}'")
            print(f"Current URL: {driver.current_url}")
            # Limiting source print length for logs
            # print(f"Page source snippet: {driver.page_source[:2000]}...")
            # Consider saving source for detailed debugging if needed locally
            # with open("debug_page_spaces.html", "w", encoding="utf-8") as f:
            #     f.write(driver.page_source)
            # print("📄 Debug page source saved (if file system allows).")

            return []

        # --- Process Results ---
        for i, result in enumerate(results):
            try:
                 # Get the href directly from the element found by the selector
                 url = result.get_attribute("href")

                 # Validate and clean URL
                 if url and url.startswith("http") and "google.com" not in url and "youtube.com" not in url:
                     # Remove Google redirect if present (more robust check)
                     from urllib.parse import urlparse, parse_qs
                     parsed_url = urlparse(url)
                     if 'url' in parsed_url.path: # Check path for /url
                          query_params = parse_qs(parsed_url.query)
                          if 'q' in query_params:
                              url = query_params['q'][0]

                     if url not in urls:
                         urls.append(url)
                         print(f"{len(urls)}. {url}")

                         if len(urls) >= num_results:
                             break

            except Exception as e:
                print(f"❌ Error processing result {i}: {str(e)[:100]}...")
                continue

        print(f"✓ Successfully extracted {len(urls)} URLs")
        return urls

    except Exception as e:
        print(f"❌ Critical error during driver execution: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for debugging
        return []

    finally:
        # --- Ensure Driver Quits ---
        if driver:
            try:
                driver.quit()
                print("Driver quit successfully.")
            except Exception as e:
                print(f"Error quitting driver: {e}") # Log error but don't crash
        else:
            print("Driver was not initialized, nothing to quit.")

def search_job(portal, job_title, job_type, location, posting, experience_level=""):
    """Enhanced job search function with experience levels"""

    # Add experience level to search query if provided
    experience_query = ""
    if experience_level and experience_level != "Any":
        # More specific queries might be needed depending on how sites filter
        if experience_level == "Entry Level":
            experience_query = "+entry+level+junior+fresher"
        elif experience_level == "Mid Level":
            experience_query = "+mid+level+2-5+years"
        elif experience_level == "Senior Level":
            experience_query = "+senior+lead+5++years"
        elif experience_level == "Executive":
            experience_query = "+director+manager+executive+head"

    job_portal_with_link = {
        'indeed': f'https://www.google.com/search?q={quote_plus(job_title)}+site:indeed.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'greenhouse': f'https://www.google.com/search?q={quote_plus(job_title)}+site:greenhouse.io+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'lever': f'https://www.google.com/search?q={quote_plus(job_type)}+site:lever.co+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'ashby': f'https://www.google.com/search?q={quote_plus(job_title)}+site:ashbyhq.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'pinpoint': f'https://www.google.com/search?q={quote_plus(job_title)}+site:pinpointhq.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'job_subdomain': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.*+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'careers_page': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3Acareers.*%20OR%20site%3A*%2Fcareers%2F*%20OR%20site%3A*%2Fcareer%2F*)+{quote_plus(job_type)}+{quote_plus(location)}{quote_plus(experience_query)}&tbs=qdr:{quote_plus(posting)}',
        'talent_subdomain': f'https://www.google.com/search?q={quote_plus(job_title)}+site:talent.*+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'paylocity': f'https://www.google.com/search?q={quote_plus(job_title)}+site:recruiting.paylocity.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'keka': f'https://www.google.com/search?q={quote_plus(job_title)}+site:keka.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'workable': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.workable.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'breezyHR': f'https://www.google.com/search?q={quote_plus(job_title)}+site:breezy.hr+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'wellfound': f'https://www.google.com/search?q={quote_plus(job_title)}+site:wellfound.com+{quote_plus(job_type)}+{quote_plus(location)}&tbs=qdr:{quote_plus(posting)}',
        'y_combinator': f'https://www.google.com/search?q={quote_plus(job_title)}+site:workatastartup.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'oracle_cloud': f'https://www.google.com/search?q={quote_plus(job_title)}+site:oraclecloud.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'workday': f'https://www.google.com/search?q={quote_plus(job_title)}+site:myworkdayjobs.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'recruitee': f'https://www.google.com/search?q={quote_plus(job_title)}+site:recruitee.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'rippling': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3Arippling.com%20OR%20site%3Arippling-ats.com)+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'gusto': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.gusto.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'teamtailor': f'https://www.google.com/search?q={quote_plus(job_title)}+site:teamtailor.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'smartrecruiters': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.smartrecruiters.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'builtin': f'https://www.google.com/search?q={quote_plus(job_title)}+site:builtin.com/job/+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'glassdoor': f'https://www.google.com/search?q={quote_plus(job_title)}+site:glassdoor.com/job-listing/+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'all_jobs': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3A*%2Femployment%2F*%20OR%20site%3A*%2Fopportunities%2F*%20OR%20site%3A*%2Fopenings%2F*%20OR%20site%3A*%2Fjoin-us%2F*%20OR%20site%3A*%2Fwork-with-us%2F*)+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}'
    }

    return job_portal_with_link.get(portal, "")

def search_jobs_interface(job_title, job_type, location, posting, experience_level, selected_portals, num_results):
    """Main function to handle the Gradio interface"""

    if not job_title.strip():
        return "❌ Please enter a job title", None, "<p style='color:red;'>❌ Please enter a job title</p>"

    if not selected_portals:
        return "❌ Please select at least one job portal", None, "<p style='color:red;'>❌ Please select at least one job portal</p>"

    all_results = []
    progress_messages = []

    for portal in selected_portals:
        try:
            progress_messages.append(f"🔍 Searching {portal}...")

            # Get search URL for the portal
            search_url = search_job(portal, job_title, job_type, location, posting, experience_level)

            if search_url:
                # Use the selenium function to get job URLs
                urls = get_search_urls(search_url, num_results=num_results, is_query=False, headless=True)

                for url in urls:
                    all_results.append({
                        'Portal': portal.title(),
                        'Job Title': job_title,
                        'Location': location,
                        'Job Type': job_type,
                        'Experience Level': experience_level,
                        'URL': url # Keep raw URL for DataFrame if needed
                    })

                progress_messages.append(f"✅ Found {len(urls)} jobs on {portal}")
            else:
                progress_messages.append(f"❌ Invalid portal: {portal}")

        except Exception as e:
            progress_messages.append(f"❌ Error searching {portal}: {str(e)}")

    # Create progress summary
    progress_summary = "\n".join(progress_messages)
    progress_summary += f"\n\n📊 Total Results: {len(all_results)} jobs found"

    # Generate HTML table with clickable links
    if all_results:
        df = pd.DataFrame(all_results)
        # Create HTML table string
        html_table = "<table border='1' class='dataframe' style='width:100%; border-collapse: collapse;'>"
        html_table += "<thead><tr style='background-color: #f2f2f2;'>"
        for col in df.columns:
            html_table += f"<th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>{col}</th>"
        html_table += "</tr></thead><tbody>"

        for _, row in df.iterrows():
            html_table += "<tr>"
            for col in df.columns:
                cell_value = row[col]
                if col == 'URL':
                    # Make URL clickable
                    html_table += f"<td style='padding: 8px; border: 1px solid #ddd;'><a href='{cell_value}' target='_blank' style='color: #1f77b4;' rel='noopener noreferrer'>Apply Now</a></td>"
                else:
                    html_table += f"<td style='padding: 8px; border: 1px solid #ddd;'>{cell_value}</td>"
            html_table += "</tr>"
        html_table += "</tbody></table>"
        return progress_summary, df, html_table # Return DataFrame and HTML
    else:
        no_results_html = "<p style='color:red;'>❌ No jobs found. Try different search parameters.</p>"
        return progress_summary + "\n\n❌ No jobs found. Try different search parameters.", None, no_results_html


# --- Define the Gradio interface with Tabs ---

def create_gradio_interface():
    # Available job portals for All Jobs Scraper
    job_portals = [
        'indeed', 'greenhouse', 'lever', 'ashby', 'pinpoint',
        'job_subdomain', 'careers_page', 'talent_subdomain',
        'paylocity', 'keka', 'workable', 'breezyHR', 'wellfound',
        'y_combinator', 'oracle_cloud', 'workday', 'recruitee',
        'rippling', 'gusto', 'teamtailor', 'smartrecruiters',
        'builtin', 'glassdoor', 'all_jobs'
    ]

    with gr.Blocks(title="AI Job Search Engine", theme=gr.themes.Soft()) as app:

        gr.Markdown("# 🚀 AI-Powered Job Search Engine")

        with gr.Tabs():
            # --- Tab 1: LinkedIn Jobs Scraper ---
            with gr.TabItem("LinkedIn Jobs"):
                gr.Markdown("## 🔍 Search Jobs on LinkedIn")
                with gr.Row():
                    with gr.Column(scale=2):
                        # LinkedIn search parameters
                        linkedin_job_title = gr.Textbox(
                            label="💼 Job Title",
                            placeholder="e.g., AI ML Engineer, Data Scientist",
                            value="AI ML Engineer"
                        )
                        with gr.Row():
                            linkedin_location = gr.Textbox(
                                label="📍 Location",
                                placeholder="e.g., Pune, Mumbai, Bangalore",
                                value="Pune"
                            )
                            linkedin_date_posted = gr.Dropdown(
                                label="📅 Posted Within",
                                choices=["Any Time", "Recent (Last 24 hours)","7 Hour ago","12 hour ago", "Past Week", "Past Month"],
                                value="Past Week"
                            )
                        with gr.Row():
                            # LinkedIn uses 0-9 for experience levels
                            linkedin_experience_level = gr.Dropdown(
                                label="⭐ Experience Level (Years)",
                                choices=[str(i) for i in range(10)], # 0 to 9
                                value="0" # Default to Entry Level (0)
                            )
                            # Placeholder for future inputs if needed
                            dummy = gr.Textbox(visible=False) # Or remove this row if not needed

                        linkedin_search_btn = gr.Button("🔍 Search LinkedIn Jobs", variant="primary")

                    with gr.Column(scale=3):
                        # LinkedIn Results section
                        linkedin_result_msg = gr.Textbox(
                            label="📈 Message",
                            lines=2,
                            max_lines=5,
                            interactive=False
                        )
                        linkedin_result_display = gr.Markdown(
                            label="📋 Job Listings"
                        )

                # Connect LinkedIn search function
                linkedin_search_btn.click(
                    fn=format_results,
                    inputs=[linkedin_job_title, linkedin_location, linkedin_date_posted, linkedin_experience_level],
                    outputs=[linkedin_result_msg, linkedin_result_display]
                )

            # --- Tab 2: All Jobs Scraper (Google-based) ---
            with gr.TabItem("All Jobs (Google Search)"):
                gr.Markdown("## 🌐 Search Jobs across the Web (via Google)")
                with gr.Row():
                    with gr.Column(scale=2):

                        # Job search parameters (your original ones)
                        job_title = gr.Textbox(
                            label="💼 Job Title",
                            placeholder="e.g., AI ML Engineer, Data Scientist, Software Developer",
                            value="AI ML"
                        )

                        with gr.Row():
                            job_type = gr.Dropdown(
                                label="🏢 Job Type",
                                choices=["remote", "on-site", "hybrid", "any"],
                                value="remote"
                            )

                            location = gr.Textbox(
                                label="📍 Location",
                                placeholder="e.g., Pune, Mumbai, Bangalore",
                                value="pune"
                            )

                        with gr.Row():
                            posting = gr.Dropdown(
                                label="📅 Posted Within",
                                choices=[('4 hour ago','h4'),('8 hour ago','h8'),('12 hour ago','h12'),("Last 24 hours", "d"),('2 days ago','h48'),('3 days ago' , 'h72'), ("Last week", "w"), ("Last month", "m"), ("Any time", "")],
                                value="d"
                            )

                            experience_level = gr.Dropdown(
                                label="⭐ Experience Level",
                                choices=["Any", "Entry Level", "Mid Level", "Senior Level", "Executive"],
                                value="Any"
                            )

                        # Job portals selection
                        selected_portals = gr.CheckboxGroup(
                            label="🌐 Select Job Portals",
                            choices=job_portals,
                            value=['indeed', 'greenhouse', 'lever', 'builtin', 'glassdoor',
                                   'job_subdomain', 'careers_page', 'talent_subdomain',
                                'paylocity', 'keka', 'workable', 'breezyHR', 'wellfound',
                                'y_combinator', 'oracle_cloud', 'workday', 'recruitee',
                                'rippling', 'gusto', 'teamtailor', 'smartrecruiters','all_jobs']
                        )

                        num_results = gr.Slider(
                            label="📊 Results per Portal",
                            minimum=1,
                            maximum=30,
                            value=20,
                            step=1
                        )

                        # Search button
                        search_btn = gr.Button("🔍 Search Jobs", variant="primary", size="lg")

                    with gr.Column(scale=3):
                        # Results section (your original ones)
                        progress_output = gr.Textbox(
                            label="📈 Search Progress",
                            lines=10,
                            max_lines=15,
                            interactive=False
                        )

                        # HTML component for clickable links
                        html_output = gr.HTML(
                            label="📋 Clickable Job Results"
                        )

                # Connect the search function (your original connection)
                search_btn.click(
                    fn=search_jobs_interface,
                    inputs=[job_title, job_type, location, posting, experience_level, selected_portals, num_results],
                    outputs=[progress_output, gr.Dataframe(visible=False), html_output]
                )

    return app

# Launch the application
if __name__ == "__main__":
    app = create_gradio_interface()
    app.launch()