File size: 27,792 Bytes
dd351ae
ec4f2f9
 
 
 
 
 
 
dd351ae
 
5ec0cca
ec4f2f9
dd351ae
 
 
 
 
ec4f2f9
dd351ae
 
 
 
ec4f2f9
dd351ae
 
 
 
ec4f2f9
 
dd351ae
ec4f2f9
 
dd351ae
 
 
 
ec4f2f9
dd351ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
dd351ae
 
ec4f2f9
dd351ae
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
dd351ae
 
ec4f2f9
 
dd351ae
 
 
 
 
 
ec4f2f9
dd351ae
 
 
ec4f2f9
 
 
 
 
 
 
 
dd351ae
 
ec4f2f9
dd351ae
 
 
 
 
ec4f2f9
dd351ae
 
 
 
 
ec4f2f9
 
dd351ae
 
 
 
ec4f2f9
 
 
5ec0cca
ec4f2f9
 
 
5ec0cca
 
 
ec4f2f9
5ec0cca
ec4f2f9
5ec0cca
ec4f2f9
 
 
 
 
5ec0cca
ec4f2f9
 
5ec0cca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
 
5ec0cca
 
 
ec4f2f9
 
5ec0cca
 
 
 
 
 
 
 
 
 
ec4f2f9
 
5ec0cca
ec4f2f9
5ec0cca
 
ec4f2f9
 
 
 
 
5ec0cca
ec4f2f9
 
 
 
 
 
 
 
 
 
 
5ec0cca
ec4f2f9
5ec0cca
 
 
 
ec4f2f9
 
 
5ec0cca
ec4f2f9
 
5ec0cca
 
 
 
 
 
ec4f2f9
 
 
 
 
 
 
 
 
 
 
5ec0cca
ec4f2f9
 
5ec0cca
ec4f2f9
 
 
5ec0cca
 
ec4f2f9
5ec0cca
 
 
 
ec4f2f9
5ec0cca
ec4f2f9
 
 
5ec0cca
ec4f2f9
 
5ec0cca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4f2f9
 
5ec0cca
ec4f2f9
 
 
 
 
 
5ec0cca
 
 
ec4f2f9
 
 
5ec0cca
 
 
 
 
 
 
 
 
ec4f2f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd351ae
ec4f2f9
 
 
 
 
 
 
 
dd351ae
ec4f2f9
 
 
 
dd351ae
ec4f2f9
 
 
 
 
 
dd351ae
ec4f2f9
dd351ae
ec4f2f9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
import gradio as gr
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import random
from urllib.parse import quote_plus
import pandas as pd
import requests
from bs4 import BeautifulSoup
import shutil # Needed to find the binary
# --- LinkedIn Scraper Functions (Keep as is, just ensure they are defined) ---

def linkedin_job_search_engine(field, location=None, date_posted=None, experience_level=None):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
    }

    base_url = f"https://www.linkedin.com/jobs/search/?keywords={quote_plus(field)}"

    if location:
        base_url += f"&location={quote_plus(location)}"

    if date_posted:
        date_filters = {
            "Any Time": "",
            "Recent (Last 24 hours)": "r86400",
            "7 Hour ago" : 'r28800' ,
            "12 hour ago" : 'r43200',
            "Past Week": "r604800",
            "Past Month": "r2592000"

        }
        filter_value = date_filters.get(date_posted, "")
        if filter_value:
            base_url += f"&f_TPR={filter_value}"

    if experience_level is not None:
        exp_level_map = {
            0: "f_E=1",
            1: "f_E=2",
            2: "f_E=3",
            3: "f_E=4",
            4: "f_E=5",
            5: "f_E=6",
            6: "f_E=7",
            7: "f_E=8",
            8: "f_E=9",
            9: "f_E=10"
        }
        base_url += f"&{exp_level_map.get(experience_level, '')}"

    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        return f"Failed to fetch jobs. Status code: {response.status_code}", []

    soup = BeautifulSoup(response.text, 'html.parser')
    job_cards = soup.find_all('div', class_='base-card')

    jobs = []
    for job in job_cards:
        title_elem = job.find('span', class_='sr-only')
        job_title = title_elem.text.strip() if title_elem else 'N/A'

        company_elem = job.find('h4', class_='base-search-card__subtitle')
        company_name = company_elem.text.strip() if company_elem else 'N/A'

        location_elem = job.find('span', class_='job-search-card__location')
        job_location = location_elem.text.strip() if location_elem else 'N/A'

        job_link_elem = job.find('a', class_='base-card__full-link')
        job_link = job_link_elem['href'] if job_link_elem else '#'

        easy_apply_elem = job.find('span', class_='easy-apply-label')
        if easy_apply_elem:
            continue # Skip Easy Apply jobs as per your original logic

        jobs.append({
            'Title': job_title,
            'Company': company_name,
            'Location': job_location,
            'Job Link': job_link
        })

    return f"Found {len(jobs)} jobs", jobs

def format_results(job_title, location, date_posted, experience_level):
    # Convert experience_level string from dropdown to integer index if needed
    # Or pass it directly if the function handles strings
    try:
        exp_level_int = int(experience_level) if experience_level and experience_level.isdigit() else 0
    except ValueError:
        exp_level_int = 0 # Default or handle error

    message, jobs = linkedin_job_search_engine(job_title, location, date_posted, exp_level_int)
    if not jobs:
        return message, "No jobs found 😢"

    # Create table header
    table_md = """
| 📌 Title | 🏢 Company | 📍 Location | 🔗 Apply |
|---|---|---|---|
"""
    # Add rows with links opening in new tab
    for job in jobs:
        title = job['Title']
        company = job['Company']
        loc = job['Location']
        link = job['Job Link']
        # Ensure link is absolute or handle relative links if necessary
        apply_button = f'<a href="{link}" target="_blank" rel="noopener noreferrer">👉 Apply Now</a>'
        table_md += f"| 💼 {title} | 🏢 {company} | 📍 {loc} | {apply_button} |\n"

    return message, table_md

# --- All Jobs Scraper Functions (Keep as is) ---

def get_search_urls(search_url, num_results=20, is_query=True, headless=True):
    """Your existing Selenium function - modified for Hugging Face Spaces"""
    options = Options()

    if headless:
        # Ensure headless is set correctly for newer Chrome versions
        # --headless=new is generally preferred
        options.add_argument("--headless=new")

    # --- Standard Options ---
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage") # Important for containerized envs
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-plugins-discovery")
    options.add_argument("--disable-web-security")
    options.add_argument("--allow-running-insecure-content")
    # options.add_argument("--disable-features=VizDisplayCompositor") # Can sometimes cause issues, try disabling
    options.add_argument("--window-size=1920,1080")

    # --- Crucial for Hugging Face Spaces: Set Binary Location Explicitly ---
    # Try common paths or use shutil.which
    chrome_executable = (
        shutil.which("google-chrome") or
        shutil.which("chromium-browser") or
        shutil.which("chromium") or
        "/usr/bin/google-chrome" # Fallback common path
        # Add more potential paths if needed based on your space logs
    )

    if chrome_executable:
        print(f"Setting Chrome binary location to: {chrome_executable}")
        options.binary_location = chrome_executable
    else:
        print("Warning: Could not find Chrome/Chromium executable. Proceeding with default (might fail).")
        # If not found, uc.Chrome might try its default, but explicit is better.

    # --- User Agent ---
    # Ensure this UA matches the *actual* Chrome version available on Hugging Face
    # You might need to adjust this. Check Hugging Face docs or logs for Chrome version.
    options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36")

    # --- Disable Automation Indicators ---
    options.add_argument("--disable-automation")
    options.add_argument("--disable-infobars")
    # Exclude the `enable-automation` switch itself
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)


    driver = None # Initialize driver variable
    try:
        # --- Initialize undetected_chromedriver ---
        # Pass the options object
        # Specifying version_main might help, but often letting it auto-detect is better.
        # If you know the exact Chrome version on Hugging Face, use it.
        # version_main = 119 # Example, adjust if needed or remove
        driver = uc.Chrome(options=options) # Removed version_main for now

        # --- Execute Script to Remove Webdriver Flag ---
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

        # --- Perform Search or Navigation ---
        if is_query:
            print("Navigating to Google...")
            # Ensure clean URL
            driver.get("https://www.google.com")

            # Add random delay to mimic human behavior
            time.sleep(random.uniform(2, 4))

            print(f"Searching for: {search_url}")
            search_box = driver.find_element(By.NAME, "q")
            search_box.clear()
            search_box.send_keys(search_url)
            search_box.submit()
        else:
            print(f"Navigating to: {search_url}")
            driver.get(search_url)

        # Wait for page to load
        time.sleep(random.uniform(3, 5))

        # --- Check for Blocking ---
        page_source = driver.page_source.lower()
        if "blocked" in page_source or "captcha" in page_source or "unusual traffic" in page_source or "sorry" in page_source: # Add "sorry"
            print("⚠️ Detected potential blocking (CAPTCHA, 'Sorry' page, etc.). Page might not have loaded correctly.")
            # Consider returning an empty list or raising an exception here
            # return [] # Or handle as appropriate

        urls = []

        # --- Extract URLs ---
        # Try multiple selectors as Google changes them frequently
        selectors_to_try = [
            "h3 a", # Direct link within h3
            ".LC20lb.DKV0Md", # More specific Google result title class
            ".g a[href^='http']", # Link within result div starting with http
            ".yuRUbf a", # Another common Google class
            ".tF2Cxc a" # Another common structure
            # Add more if needed, inspect the HTML in Spaces if this fails
        ]

        results = []
        successful_selector = None

        for selector in selectors_to_try:
            try:
                temp_results = driver.find_elements(By.CSS_SELECTOR, selector)
                if temp_results:
                    results = temp_results
                    successful_selector = selector
                    print(f"✓ Found {len(results)} potential results with selector: '{selector}'")
                    break
            except Exception as e:
                print(f"✗ Selector '{selector}' failed during find_elements: {str(e)[:100]}...")
                continue

        if not results:
            print("❌ No results found with any selector. Printing page info for debugging...")
            print(f"Page title: '{driver.title}'")
            print(f"Current URL: {driver.current_url}")
            # Limiting source print length for logs
            # print(f"Page source snippet: {driver.page_source[:2000]}...")
            # Consider saving source for detailed debugging if needed locally
            # with open("debug_page_spaces.html", "w", encoding="utf-8") as f:
            #     f.write(driver.page_source)
            # print("📄 Debug page source saved (if file system allows).")

            return []

        # --- Process Results ---
        for i, result in enumerate(results):
            try:
                 # Get the href directly from the element found by the selector
                 url = result.get_attribute("href")

                 # Validate and clean URL
                 if url and url.startswith("http") and "google.com" not in url and "youtube.com" not in url:
                     # Remove Google redirect if present (more robust check)
                     from urllib.parse import urlparse, parse_qs
                     parsed_url = urlparse(url)
                     if 'url' in parsed_url.path: # Check path for /url
                          query_params = parse_qs(parsed_url.query)
                          if 'q' in query_params:
                              url = query_params['q'][0]

                     if url not in urls:
                         urls.append(url)
                         print(f"{len(urls)}. {url}")

                         if len(urls) >= num_results:
                             break

            except Exception as e:
                print(f"❌ Error processing result {i}: {str(e)[:100]}...")
                continue

        print(f"✓ Successfully extracted {len(urls)} URLs")
        return urls

    except Exception as e:
        print(f"❌ Critical error during driver execution: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for debugging
        return []

    finally:
        # --- Ensure Driver Quits ---
        if driver:
            try:
                driver.quit()
                print("Driver quit successfully.")
            except Exception as e:
                print(f"Error quitting driver: {e}") # Log error but don't crash
        else:
            print("Driver was not initialized, nothing to quit.")

def search_job(portal, job_title, job_type, location, posting, experience_level=""):
    """Enhanced job search function with experience levels"""

    # Add experience level to search query if provided
    experience_query = ""
    if experience_level and experience_level != "Any":
        # More specific queries might be needed depending on how sites filter
        if experience_level == "Entry Level":
            experience_query = "+entry+level+junior+fresher"
        elif experience_level == "Mid Level":
            experience_query = "+mid+level+2-5+years"
        elif experience_level == "Senior Level":
            experience_query = "+senior+lead+5++years"
        elif experience_level == "Executive":
            experience_query = "+director+manager+executive+head"

    job_portal_with_link = {
        'indeed': f'https://www.google.com/search?q={quote_plus(job_title)}+site:indeed.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'greenhouse': f'https://www.google.com/search?q={quote_plus(job_title)}+site:greenhouse.io+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'lever': f'https://www.google.com/search?q={quote_plus(job_type)}+site:lever.co+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'ashby': f'https://www.google.com/search?q={quote_plus(job_title)}+site:ashbyhq.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'pinpoint': f'https://www.google.com/search?q={quote_plus(job_title)}+site:pinpointhq.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'job_subdomain': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.*+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'careers_page': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3Acareers.*%20OR%20site%3A*%2Fcareers%2F*%20OR%20site%3A*%2Fcareer%2F*)+{quote_plus(job_type)}+{quote_plus(location)}{quote_plus(experience_query)}&tbs=qdr:{quote_plus(posting)}',
        'talent_subdomain': f'https://www.google.com/search?q={quote_plus(job_title)}+site:talent.*+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'paylocity': f'https://www.google.com/search?q={quote_plus(job_title)}+site:recruiting.paylocity.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'keka': f'https://www.google.com/search?q={quote_plus(job_title)}+site:keka.com+{quote_plus(job_type)}+{quote_plus(location)}{experience_query}&tbs=qdr:{quote_plus(posting)}',
        'workable': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.workable.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'breezyHR': f'https://www.google.com/search?q={quote_plus(job_title)}+site:breezy.hr+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'wellfound': f'https://www.google.com/search?q={quote_plus(job_title)}+site:wellfound.com+{quote_plus(job_type)}+{quote_plus(location)}&tbs=qdr:{quote_plus(posting)}',
        'y_combinator': f'https://www.google.com/search?q={quote_plus(job_title)}+site:workatastartup.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'oracle_cloud': f'https://www.google.com/search?q={quote_plus(job_title)}+site:oraclecloud.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'workday': f'https://www.google.com/search?q={quote_plus(job_title)}+site:myworkdayjobs.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'recruitee': f'https://www.google.com/search?q={quote_plus(job_title)}+site:recruitee.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'rippling': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3Arippling.com%20OR%20site%3Arippling-ats.com)+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'gusto': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.gusto.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'teamtailor': f'https://www.google.com/search?q={quote_plus(job_title)}+site:teamtailor.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'smartrecruiters': f'https://www.google.com/search?q={quote_plus(job_title)}+site:jobs.smartrecruiters.com+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'builtin': f'https://www.google.com/search?q={quote_plus(job_title)}+site:builtin.com/job/+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'glassdoor': f'https://www.google.com/search?q={quote_plus(job_title)}+site:glassdoor.com/job-listing/+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}',
        'all_jobs': f'https://www.google.com/search?q={quote_plus(job_title)}+(site%3A*%2Femployment%2F*%20OR%20site%3A*%2Fopportunities%2F*%20OR%20site%3A*%2Fopenings%2F*%20OR%20site%3A*%2Fjoin-us%2F*%20OR%20site%3A*%2Fwork-with-us%2F*)+{quote_plus(job_type)}&tbs=qdr:{quote_plus(posting)}'
    }

    return job_portal_with_link.get(portal, "")

def search_jobs_interface(job_title, job_type, location, posting, experience_level, selected_portals, num_results):
    """Main function to handle the Gradio interface"""

    if not job_title.strip():
        return "❌ Please enter a job title", None, "<p style='color:red;'>❌ Please enter a job title</p>"

    if not selected_portals:
        return "❌ Please select at least one job portal", None, "<p style='color:red;'>❌ Please select at least one job portal</p>"

    all_results = []
    progress_messages = []

    for portal in selected_portals:
        try:
            progress_messages.append(f"🔍 Searching {portal}...")

            # Get search URL for the portal
            search_url = search_job(portal, job_title, job_type, location, posting, experience_level)

            if search_url:
                # Use the selenium function to get job URLs
                urls = get_search_urls(search_url, num_results=num_results, is_query=False, headless=True)

                for url in urls:
                    all_results.append({
                        'Portal': portal.title(),
                        'Job Title': job_title,
                        'Location': location,
                        'Job Type': job_type,
                        'Experience Level': experience_level,
                        'URL': url # Keep raw URL for DataFrame if needed
                    })

                progress_messages.append(f"✅ Found {len(urls)} jobs on {portal}")
            else:
                progress_messages.append(f"❌ Invalid portal: {portal}")

        except Exception as e:
            progress_messages.append(f"❌ Error searching {portal}: {str(e)}")

    # Create progress summary
    progress_summary = "\n".join(progress_messages)
    progress_summary += f"\n\n📊 Total Results: {len(all_results)} jobs found"

    # Generate HTML table with clickable links
    if all_results:
        df = pd.DataFrame(all_results)
        # Create HTML table string
        html_table = "<table border='1' class='dataframe' style='width:100%; border-collapse: collapse;'>"
        html_table += "<thead><tr style='background-color: #f2f2f2;'>"
        for col in df.columns:
            html_table += f"<th style='padding: 8px; text-align: left; border: 1px solid #ddd;'>{col}</th>"
        html_table += "</tr></thead><tbody>"

        for _, row in df.iterrows():
            html_table += "<tr>"
            for col in df.columns:
                cell_value = row[col]
                if col == 'URL':
                    # Make URL clickable
                    html_table += f"<td style='padding: 8px; border: 1px solid #ddd;'><a href='{cell_value}' target='_blank' style='color: #1f77b4;' rel='noopener noreferrer'>Apply Now</a></td>"
                else:
                    html_table += f"<td style='padding: 8px; border: 1px solid #ddd;'>{cell_value}</td>"
            html_table += "</tr>"
        html_table += "</tbody></table>"
        return progress_summary, df, html_table # Return DataFrame and HTML
    else:
        no_results_html = "<p style='color:red;'>❌ No jobs found. Try different search parameters.</p>"
        return progress_summary + "\n\n❌ No jobs found. Try different search parameters.", None, no_results_html


# --- Define the Gradio interface with Tabs ---

def create_gradio_interface():
    # Available job portals for All Jobs Scraper
    job_portals = [
        'indeed', 'greenhouse', 'lever', 'ashby', 'pinpoint',
        'job_subdomain', 'careers_page', 'talent_subdomain',
        'paylocity', 'keka', 'workable', 'breezyHR', 'wellfound',
        'y_combinator', 'oracle_cloud', 'workday', 'recruitee',
        'rippling', 'gusto', 'teamtailor', 'smartrecruiters',
        'builtin', 'glassdoor', 'all_jobs'
    ]

    with gr.Blocks(title="AI Job Search Engine", theme=gr.themes.Soft()) as app:

        gr.Markdown("# 🚀 AI-Powered Job Search Engine")

        with gr.Tabs():
            # --- Tab 1: LinkedIn Jobs Scraper ---
            with gr.TabItem("LinkedIn Jobs"):
                gr.Markdown("## 🔍 Search Jobs on LinkedIn")
                with gr.Row():
                    with gr.Column(scale=2):
                        # LinkedIn search parameters
                        linkedin_job_title = gr.Textbox(
                            label="💼 Job Title",
                            placeholder="e.g., AI ML Engineer, Data Scientist",
                            value="AI ML Engineer"
                        )
                        with gr.Row():
                            linkedin_location = gr.Textbox(
                                label="📍 Location",
                                placeholder="e.g., Pune, Mumbai, Bangalore",
                                value="Pune"
                            )
                            linkedin_date_posted = gr.Dropdown(
                                label="📅 Posted Within",
                                choices=["Any Time", "Recent (Last 24 hours)","7 Hour ago","12 hour ago", "Past Week", "Past Month"],
                                value="Past Week"
                            )
                        with gr.Row():
                            # LinkedIn uses 0-9 for experience levels
                            linkedin_experience_level = gr.Dropdown(
                                label="⭐ Experience Level (Years)",
                                choices=[str(i) for i in range(10)], # 0 to 9
                                value="0" # Default to Entry Level (0)
                            )
                            # Placeholder for future inputs if needed
                            dummy = gr.Textbox(visible=False) # Or remove this row if not needed

                        linkedin_search_btn = gr.Button("🔍 Search LinkedIn Jobs", variant="primary")

                    with gr.Column(scale=3):
                        # LinkedIn Results section
                        linkedin_result_msg = gr.Textbox(
                            label="📈 Message",
                            lines=2,
                            max_lines=5,
                            interactive=False
                        )
                        linkedin_result_display = gr.Markdown(
                            label="📋 Job Listings"
                        )

                # Connect LinkedIn search function
                linkedin_search_btn.click(
                    fn=format_results,
                    inputs=[linkedin_job_title, linkedin_location, linkedin_date_posted, linkedin_experience_level],
                    outputs=[linkedin_result_msg, linkedin_result_display]
                )

            # --- Tab 2: All Jobs Scraper (Google-based) ---
            with gr.TabItem("All Jobs (Google Search)"):
                gr.Markdown("## 🌐 Search Jobs across the Web (via Google)")
                with gr.Row():
                    with gr.Column(scale=2):

                        # Job search parameters (your original ones)
                        job_title = gr.Textbox(
                            label="💼 Job Title",
                            placeholder="e.g., AI ML Engineer, Data Scientist, Software Developer",
                            value="AI ML"
                        )

                        with gr.Row():
                            job_type = gr.Dropdown(
                                label="🏢 Job Type",
                                choices=["remote", "on-site", "hybrid", "any"],
                                value="remote"
                            )

                            location = gr.Textbox(
                                label="📍 Location",
                                placeholder="e.g., Pune, Mumbai, Bangalore",
                                value="pune"
                            )

                        with gr.Row():
                            posting = gr.Dropdown(
                                label="📅 Posted Within",
                                choices=[('4 hour ago','h4'),('8 hour ago','h8'),('12 hour ago','h12'),("Last 24 hours", "d"),('2 days ago','h48'),('3 days ago' , 'h72'), ("Last week", "w"), ("Last month", "m"), ("Any time", "")],
                                value="d"
                            )

                            experience_level = gr.Dropdown(
                                label="⭐ Experience Level",
                                choices=["Any", "Entry Level", "Mid Level", "Senior Level", "Executive"],
                                value="Any"
                            )

                        # Job portals selection
                        selected_portals = gr.CheckboxGroup(
                            label="🌐 Select Job Portals",
                            choices=job_portals,
                            value=['indeed', 'greenhouse', 'lever', 'builtin', 'glassdoor',
                                   'job_subdomain', 'careers_page', 'talent_subdomain',
                                'paylocity', 'keka', 'workable', 'breezyHR', 'wellfound',
                                'y_combinator', 'oracle_cloud', 'workday', 'recruitee',
                                'rippling', 'gusto', 'teamtailor', 'smartrecruiters','all_jobs']
                        )

                        num_results = gr.Slider(
                            label="📊 Results per Portal",
                            minimum=1,
                            maximum=30,
                            value=20,
                            step=1
                        )

                        # Search button
                        search_btn = gr.Button("🔍 Search Jobs", variant="primary", size="lg")

                    with gr.Column(scale=3):
                        # Results section (your original ones)
                        progress_output = gr.Textbox(
                            label="📈 Search Progress",
                            lines=10,
                            max_lines=15,
                            interactive=False
                        )

                        # HTML component for clickable links
                        html_output = gr.HTML(
                            label="📋 Clickable Job Results"
                        )

                # Connect the search function (your original connection)
                search_btn.click(
                    fn=search_jobs_interface,
                    inputs=[job_title, job_type, location, posting, experience_level, selected_portals, num_results],
                    outputs=[progress_output, gr.Dataframe(visible=False), html_output]
                )

    return app

# Launch the application
if __name__ == "__main__":
    app = create_gradio_interface()
    app.launch()