import gradio as gr
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import random
from urllib.parse import quote_plus
import pandas as pd
import requests
from bs4 import BeautifulSoup
import shutil # Needed to find the binary
# --- LinkedIn Scraper Functions (Keep as is, just ensure they are defined) ---

def linkedin_job_search_engine(field, location=None, date_posted=None, experience_level=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }

    base_url = f"https://www.linkedin.com/jobs/search/?keywords={quote_plus(field)}"

    if location:
        base_url += f"&location={quote_plus(location)}"

    if date_posted:
        date_filters = {
            "Any Time": "",
            "Recent (Last 24 hours)": "r86400",
            "7 Hour ago" : 'r28800' ,
            "12 hour ago" : 'r43200',
            "Past Week": "r604800",
            "Past Month": "r2592000"

        }
        filter_value = date_filters.get(date_posted, "")
        if filter_value:
            base_url += f"&f_TPR={filter_value}"

    if experience_level is not None:
        exp_level_map = {
            0: "f_E=1",
            1: "f_E=2",
            2: "f_E=3",
            3: "f_E=4",
            4: "f_E=5",
            5: "f_E=6",
            6: "f_E=7",
            7: "f_E=8",
            8: "f_E=9",
            9: "f_E=10"
        }
        base_url += f"&{exp_level_map.get(experience_level, '')}"

    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        return f"Failed to fetch jobs. Status code: {response.status_code}", []

    soup = BeautifulSoup(response.text, 'html.parser')
    job_cards = soup.find_all('div', class_='base-card')

    jobs = []
    for job in job_cards:
        title_elem = job.find('span', class_='sr-only')
        job_title = title_elem.text.strip() if title_elem else 'N/A'

        company_elem = job.find('h4', class_='base-search-card__subtitle')
        company_name = company_elem.text.strip() if company_elem else 'N/A'

        location_elem = job.find('span', class_='job-search-card__location')
        job_location = location_elem.text.strip() if location_elem else 'N/A'

        job_link_elem = job.find('a', class_='base-card__full-link')
        job_link = job_link_elem['href'] if job_link_elem else '#'

        easy_apply_elem = job.find('span', class_='easy-apply-label')
        if easy_apply_elem:
            continue # Skip Easy Apply jobs as per your original logic

        jobs.append({
            'Title': job_title,
            'Company': company_name,
            'Location': job_location,
            'Job Link': job_link
        })

    return f"Found {len(jobs)} jobs", jobs

def format_results(job_title, location, date_posted, experience_level):
    # Convert experience_level string from dropdown to integer index if needed
    # Or pass it directly if the function handles strings
    try:
        exp_level_int = int(experience_level) if experience_level and experience_level.isdigit() else 0
    except ValueError:
        exp_level_int = 0 # Default or handle error

    message, jobs = linkedin_job_search_engine(job_title, location, date_posted, exp_level_int)
    if not jobs:
        return message, "No jobs found 😢"

    # Create table header
    table_md = """
| 📌 Title | 🏢 Company | 📍 Location | 🔗 Apply |
|---|---|---|---|
"""
    # Add rows with links opening in new tab
    for job in jobs:
        title = job['Title']
        company = job['Company']
        loc = job['Location']
        link = job['Job Link']
        # Ensure link is absolute or handle relative links if necessary
        apply_button = f'<a href="{link}" target="_blank" rel="noopener noreferrer">👉 Apply Now</a>'
        table_md += f"| 💼 {title} | 🏢 {company} | 📍 {loc} | {apply_button} |\n"

    return message, table_md

# --- All Jobs Scraper Functions (Keep as is) ---

def get_search_urls(search_url, num_results=20, is_query=True, headless=True):
    """Your existing Selenium function - modified for Hugging Face Spaces"""
    options = Options()

    if headless:
        # Ensure headless is set correctly for newer Chrome versions
        # --headless=new is generally preferred
        options.add_argument("--headless=new")

    # --- Standard Options ---
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage") # Important for containerized envs
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-plugins-discovery")
    options.add_argument("--disable-web-security")
    options.add_argument("--allow-running-insecure-content")
    # options.add_argument("--disable-features=VizDisplayCompositor") # Can sometimes cause issues, try disabling
    options.add_argument("--window-size=1920,1080")

    # --- Crucial for Hugging Face Spaces: Set Binary Location Explicitly ---
    # Try common paths or use shutil.which
    chrome_executable = (
        shutil.which("google-chrome") or
        shutil.which("chromium-browser") or
        shutil.which("chromium") or
        "/usr/bin/google-chrome" # Fallback common path
        # Add more potential paths if needed based on your space logs
    )

    if chrome_executable:
        print(f"Setting Chrome binary location to: {chrome_executable}")
        options.binary_location = chrome_executable
    else:
        print("Warning: Could not find Chrome/Chromium executable. Proceeding with default (might fail).")
        # If not found, uc.Chrome might try its default, but explicit is better.

    # --- User Agent ---
    # Ensure this UA matches the *actual* Chrome version available on Hugging Face
    # You might need to adjust this. Check Hugging Face docs or logs for Chrome version.
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")

    # --- Disable Automation Indicators ---
    options.add_argument("--disable-automation")
    options.add_argument("--disable-infobars")
    # Exclude the `enable-automation` switch itself
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)


    driver = None # Initialize driver variable
    try:
        # --- Initialize undetected_chromedriver ---
        # Pass the options object
        # Specifying version_main might help, but often letting it auto-detect is better.
        # If you know the exact Chrome version on Hugging Face, use it.
        # version_main = 119 # Example, adjust if needed or remove
        driver = uc.Chrome(options=options) # Removed version_main for now

        # --- Execute Script to Remove Webdriver Flag ---
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

        # --- Perform Search or Navigation ---
        if is_query:
            print("Navigating to Google...")
            # Ensure clean URL
            driver.get("https://www.google.com")

            # Add random delay to mimic human behavior
            time.sleep(random.uniform(2, 4))

            print(f"Searching for: {search_url}")
            search_box = driver.find_element(By.NAME, "q")
            search_box.clear()
            search_box.send_keys(search_url)
            search_box.submit()
        else:
            print(f"Navigating to: {search_url}")
            driver.get(search_url)

        # Wait for page to load
        time.sleep(random.uniform(3, 5))

        # --- Check for Blocking ---
        page_source = driver.page_source.lower()
        if "blocked" in page_source or "captcha" in page_source or "unusual traffic" in page_source or "sorry" in page_source: # Add "sorry"
            print("⚠️ Detected potential blocking (CAPTCHA, 'Sorry' page, etc.). Page might not have loaded correctly.")
            # Consider returning an empty list or raising an exception here
            # return [] # Or handle as appropriate

        urls = []

        # --- Extract URLs ---
        # Try multiple selectors as Google changes them frequently
        selectors_to_try = [
            "h3 a", # Direct link within h3
            ".LC20lb.DKV0Md", # More specific Google result title class
            ".g a[href^='http']", # Link within result div starting with http
            ".yuRUbf a", # Another common Google class
            ".tF2Cxc a" # Another common structure
            # Add more if needed, inspect the HTML in Spaces if this fails
        ]

        results = []
        successful_selector = None

        for selector in selectors_to_try:
            try:
                temp_results = driver.find_elements(By.CSS_SELECTOR, selector)
                if temp_results:
                    results = temp_results
                    successful_selector = selector
                    print(f"✓ Found {len(results)} potential results with selector: '{selector}'")
                    break
            except Exception as e:
                print(f"✗ Selector '{selector}' failed during find_elements: {str(e)[:100]}...")
                continue

        if not results:
            print("❌ No results found with any selector. Printing page info for debugging...")
            print(f"Page title: '{driver.title}'")
            print(f"Current URL: {driver.current_url}")
            # Limiting source print length for logs
            # print(f"Page source snippet: {driver.page_source[:2000]}...")
            # Consider saving source for detailed debugging if needed locally
            # with open("debug_page_spaces.html", "w", encoding="utf-8") as f:
            #     f.write(driver.page_source)
            # print("📄 Debug page source saved (if file system allows).")

            return []

        # --- Process Results ---
        for i, result in enumerate(results):
            try:
                 # Get the href directly from the element found by the selector
                 url = result.get_attribute("href")

                 # Validate and clean URL
                 if url and url.startswith("http") and "google.com" not in url and "youtube.com" not in url:
                     # Remove Google redirect if present (more robust check)
                     from urllib.parse import urlparse, parse_qs
                     parsed_url = urlparse(url)
                     if 'url' in parsed_url.path: # Check path for /url
                          query_params = parse_qs(parsed_url.query)
                          if 'q' in query_params:
                              url = query_params['q'][0]

                     if url not in urls:
                         urls.append(url)
                         print(f"{len(urls)}. {url}")

                         if len(urls) >= num_results:
                             break

            except Exception as e:
                print(f"❌ Error processing result {i}: {str(e)[:100]}...")
                continue

        print(f"✓ Successfully extracted {len(urls)} URLs")
        return urls

    except Exception as e:
        print(f"❌ Critical error during driver execution: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for debugging
        return []

    finally:
        # --- Ensure Driver Quits ---
        if driver:
            try:
                driver.quit()
                print("Driver quit successfully.")
            except Exception as e:
                print(f"Error quitting driver: {e}") # Log error but don't crash
        else:
            print("Driver was not initialized, nothing to quit.")

def search_job(portal, job_title, job_type, location, posting, experience_level=""):
    """Enhanced job search function with experience levels"""

    # Add experience level to search query if provided
    experience_query = ""
    if experience_level and experience_level != "Any":
        # More specific queries might be needed depending on how sites filter
        if experience_level == "Entry Level":
            experience_query = "+entry+level+junior+fresher"
        elif experience_level == "Mid Level":
            experience_query = "+mid+level+2-5+years"
        elif experience_level == "Senior Level":
            experience_query = "+senior+lead+5++years"
        elif experience_level == "Executive":
            experience_query = "+director+manager+executive+head"

    job_portal_with_link = {
        'indeed': f'https://www.google.com/search?q={quote_plus(job_title)}+site:indeed.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'greenhouse': f'https://www.google.com/search?q={quote_plus(job_title)}+site:greenhouse.io+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'lever': f'https://www.google.com/search?q={quote_plus(job_type)}+site:lever.co+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'ashby': f'https://www.google.com/search?q={quote_plus(job_title)}+site:ashbyhq.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'pinpoint': f'https://www.google.com/search?q={quote_plus(job_title)}+site:pinpointhq.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'job_subdomain': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.*+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'careers_page': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3Acareers.*%20OR%20site%3A*%2Fcareers%2F*%20OR%20site%3A*%2Fcareer%2F*)+{quote_plus(job_type)}+{quote_plus(location)}{quote_plus(experience_query)}&tbs=qdr:{quote_plus(posting)}',
        'talent_subdomain': f'https://www.google.com/search?q={quote_plus(job_title)}+site:talent.*+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'paylocity': f'https://www.google.com/search?q={quote_plus(job_title)}+site:recruiting.paylocity.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'keka': f'https://www.google.com/search?q={quote_plus(job_title)}+site:keka.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'workable': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.workable.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'breezyHR': f'https://www.google.com/search?q={quote_plus(job_title)}+site:breezy.hr+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'wellfound': f'https://www.google.com/search?q={quote_plus(job_title)}+site:wellfound.com+{quote_plus(job_type)}+{quote_plus(location)}&tbs=qdr:{quote_plus(posting)}',
        'y_combinator': f'https://www.google.com/search?q={quote_plus(job_title)}+site:workatastartup.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'oracle_cloud': f'https://www.google.com/search?q={quote_plus(job_title)}+site:oraclecloud.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'workday': f'https://www.google.com/search?q={quote_plus(job_title)}+site:myworkdayjobs.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'recruitee': f'https://www.google.com/search?q={quote_plus(job_title)}+site:recruitee.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'rippling': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3Arippling.com%20OR%20site%3Arippling-ats.com)+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'gusto': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.gusto.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'teamtailor': f'https://www.google.com/search?q={quote_plus(job_title)}+site:teamtailor.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'smartrecruiters': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.smartrecruiters.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'builtin': f'https://www.google.com/search?q={quote_plus(job_title)}+site:builtin.com/job/+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'glassdoor': f'https://www.google.com/search?q={quote_plus(job_title)}+site:glassdoor.com/job-listing/+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'all_jobs': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3A*%2Femployment%2F*%20OR%20site%3A*%2Fopportunities%2F*%20OR%20site%3A*%2Fopenings%2F*%20OR%20site%3A*%2Fjoin-us%2F*%20OR%20site%3A*%2Fwork-with-us%2F*)+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}'
    }

    return job_portal_with_link.get(portal, "")

def search_jobs_interface(job_title, job_type, location, posting, experience_level, selected_portals, num_results):
    """Main function to handle the Gradio interface"""

    if not job_title.strip():
        return "❌ Please enter a job title", None, "<p style='color:red;'>❌ Please enter a job title</p>"

    if not selected_portals:
        return "❌ Please select at least one job portal", None, "<p style='color:red;'>❌ Please select at least one job portal</p>"

    all_results = []
    progress_messages = []

    for portal in selected_portals:
        try:
            progress_messages.append(f"🔍 Searching {portal}...")

            # Get search URL for the portal
            search_url = search_job(portal, job_title, job_type, location, posting, experience_level)

            if search_url:
                # Use the selenium function to get job URLs
                urls = get_search_urls(search_url, num_results=num_results, is_query=False, headless=True)

                for url in urls:
                    all_results.append({
                        'Portal': portal.title(),
                        'Job Title': job_title,
                        'Location': location,
                        'Job Type': job_type,
                        'Experience Level': experience_level,
                        'URL': url # Keep raw URL for DataFrame if needed
                    })

                progress_messages.append(f"✅ Found {len(urls)} jobs on {portal}")
            else:
                progress_messages.append(f"❌ Invalid portal: {portal}")

        except Exception as e:
            progress_messages.append(f"❌ Error searching {portal}: {str(e)}")

    # Create progress summary
    progress_summary = "\n".join(progress_messages)
    progress_summary += f"\n\n📊 Total Results: {len(all_results)} jobs found"

    # Generate HTML table with clickable links
    if all_results:
        df = pd.DataFrame(all_results)
        # Create HTML table string
        html_table = "<table border='1' class='dataframe' style='width:100%; border-collapse: collapse;'>"
        html_table += "<thead><tr style='background-color: #f2f2f2;'>"
        for col in df.columns:
            html_table += f"<th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>{col}</th>"
        html_table += "</tr></thead><tbody>"

        for _, row in df.iterrows():
            html_table += "<tr>"
            for col in df.columns:
                cell_value = row[col]
                if col == 'URL':
                    # Make URL clickable
                    html_table += f"<td style='padding: 8px; border: 1px solid #ddd;'><a href='{cell_value}' target='_blank' style='color: #1f77b4;' rel='noopener noreferrer'>Apply Now</a></td>"
                else:
                    html_table += f"<td style='padding: 8px; border: 1px solid #ddd;'>{cell_value}</td>"
            html_table += "</tr>"
        html_table += "</tbody></table>"
        return progress_summary, df, html_table # Return DataFrame and HTML
    else:
        no_results_html = "<p style='color:red;'>❌ No jobs found. Try different search parameters.</p>"
        return progress_summary + "\n\n❌ No jobs found. Try different search parameters.", None, no_results_html


# --- Define the Gradio interface with Tabs ---

def create_gradio_interface():
    # Available job portals for All Jobs Scraper
    job_portals = [
        'indeed', 'greenhouse', 'lever', 'ashby', 'pinpoint',
        'job_subdomain', 'careers_page', 'talent_subdomain',
        'paylocity', 'keka', 'workable', 'breezyHR', 'wellfound',
        'y_combinator', 'oracle_cloud', 'workday', 'recruitee',
        'rippling', 'gusto', 'teamtailor', 'smartrecruiters',
        'builtin', 'glassdoor', 'all_jobs'
    ]

    with gr.Blocks(title="AI Job Search Engine", theme=gr.themes.Soft()) as app:

        gr.Markdown("# 🚀 AI-Powered Job Search Engine")

        with gr.Tabs():
            # --- Tab 1: LinkedIn Jobs Scraper ---
            with gr.TabItem("LinkedIn Jobs"):
                gr.Markdown("## 🔍 Search Jobs on LinkedIn")
                with gr.Row():
                    with gr.Column(scale=2):
                        # LinkedIn search parameters
                        linkedin_job_title = gr.Textbox(
                            label="💼 Job Title",
                            placeholder="e.g., AI ML Engineer, Data Scientist",
                            value="AI ML Engineer"
                        )
                        with gr.Row():
                            linkedin_location = gr.Textbox(
                                label="📍 Location",
                                placeholder="e.g., Pune, Mumbai, Bangalore",
                                value="Pune"
                            )
                            linkedin_date_posted = gr.Dropdown(
                                label="📅 Posted Within",
                                choices=["Any Time", "Recent (Last 24 hours)","7 Hour ago","12 hour ago", "Past Week", "Past Month"],
                                value="Past Week"
                            )
                        with gr.Row():
                            # LinkedIn uses 0-9 for experience levels
                            linkedin_experience_level = gr.Dropdown(
                                label="⭐ Experience Level (Years)",
                                choices=[str(i) for i in range(10)], # 0 to 9
                                value="0" # Default to Entry Level (0)
                            )
                            # Placeholder for future inputs if needed
                            dummy = gr.Textbox(visible=False) # Or remove this row if not needed

                        linkedin_search_btn = gr.Button("🔍 Search LinkedIn Jobs", variant="primary")

                    with gr.Column(scale=3):
                        # LinkedIn Results section
                        linkedin_result_msg = gr.Textbox(
                            label="📈 Message",
                            lines=2,
                            max_lines=5,
                            interactive=False
                        )
                        linkedin_result_display = gr.Markdown(
                            label="📋 Job Listings"
                        )

                # Connect LinkedIn search function
                linkedin_search_btn.click(
                    fn=format_results,
                    inputs=[linkedin_job_title, linkedin_location, linkedin_date_posted, linkedin_experience_level],
                    outputs=[linkedin_result_msg, linkedin_result_display]
                )

            # --- Tab 2: All Jobs Scraper (Google-based) ---
            with gr.TabItem("All Jobs (Google Search)"):
                gr.Markdown("## 🌐 Search Jobs across the Web (via Google)")
                with gr.Row():
                    with gr.Column(scale=2):

                        # Job search parameters (your original ones)
                        job_title = gr.Textbox(
                            label="💼 Job Title",
                            placeholder="e.g., AI ML Engineer, Data Scientist, Software Developer",
                            value="AI ML"
                        )

                        with gr.Row():
                            job_type = gr.Dropdown(
                                label="🏢 Job Type",
                                choices=["remote", "on-site", "hybrid", "any"],
                                value="remote"
                            )

                            location = gr.Textbox(
                                label="📍 Location",
                                placeholder="e.g., Pune, Mumbai, Bangalore",
                                value="pune"
                            )

                        with gr.Row():
                            posting = gr.Dropdown(
                                label="📅 Posted Within",
                                choices=[('4 hour ago','h4'),('8 hour ago','h8'),('12 hour ago','h12'),("Last 24 hours", "d"),('2 days ago','h48'),('3 days ago' , 'h72'), ("Last week", "w"), ("Last month", "m"), ("Any time", "")],
                                value="d"
                            )

                            experience_level = gr.Dropdown(
                                label="⭐ Experience Level",
                                choices=["Any", "Entry Level", "Mid Level", "Senior Level", "Executive"],
                                value="Any"
                            )

                        # Job portals selection
                        selected_portals = gr.CheckboxGroup(
                            label="🌐 Select Job Portals",
                            choices=job_portals,
                            value=['indeed', 'greenhouse', 'lever', 'builtin', 'glassdoor',
                                   'job_subdomain', 'careers_page', 'talent_subdomain',
                                'paylocity', 'keka', 'workable', 'breezyHR', 'wellfound',
                                'y_combinator', 'oracle_cloud', 'workday', 'recruitee',
                                'rippling', 'gusto', 'teamtailor', 'smartrecruiters','all_jobs']
                        )

                        num_results = gr.Slider(
                            label="📊 Results per Portal",
                            minimum=1,
                            maximum=30,
                            value=20,
                            step=1
                        )

                        # Search button
                        search_btn = gr.Button("🔍 Search Jobs", variant="primary", size="lg")

                    with gr.Column(scale=3):
                        # Results section (your original ones)
                        progress_output = gr.Textbox(
                            label="📈 Search Progress",
                            lines=10,
                            max_lines=15,
                            interactive=False
                        )

                        # HTML component for clickable links
                        html_output = gr.HTML(
                            label="📋 Clickable Job Results"
                        )

                # Connect the search function (your original connection)
                search_btn.click(
                    fn=search_jobs_interface,
                    inputs=[job_title, job_type, location, posting, experience_level, selected_portals, num_results],
                    outputs=[progress_output, gr.Dataframe(visible=False), html_output]
                )

    return app

# Launch the application
if __name__ == "__main__":
    app = create_gradio_interface()
    app.launch()