AmnaHassan commited on
Commit
4302a23
·
verified ·
1 Parent(s): c85382d

Upload 3 files

Browse files
Files changed (3) hide show
  1. .gitignore +3 -0
  2. app.py +62 -0
  3. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ *.pyc
3
+ __pycache__/
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import requests
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.options import Options
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.common.keys import Keys
8
+ import time
9
+ import pandas as pd
10
+
11
+ def get_driver():
12
+ api_key = os.environ.get('SCRAPEOPS_API_KEY')
13
+ response = requests.get(f'http://headers.scrapeops.io/v1/browser-headers?api_key={api_key}')
14
+ headers = response.json()['result']
15
+
16
+ chrome_options = Options()
17
+ for key, value in headers.items():
18
+ chrome_options.add_argument(f'--{key}={value}')
19
+
20
+ return webdriver.Remote(
21
+ command_executor='http://chrome:4444/wd/hub',
22
+ options=chrome_options
23
+ )
24
+
25
+ def scrape_linkedin_jobs(search_query, location, num_pages):
26
+ driver = get_driver()
27
+
28
+ # Navigate to LinkedIn Jobs
29
+ driver.get('https://www.linkedin.com/jobs/')
30
+
31
+ # ... rest of your scraping logic ...
32
+
33
+ driver.quit()
34
+
35
+ return pd.DataFrame(jobs_data)
36
+
37
+ # Streamlit app
38
+ st.title('LinkedIn Job Scraper')
39
+
40
+ # Input fields
41
+ search_query = st.text_input('Enter job title', 'Data Scientist')
42
+ location = st.text_input('Enter location', 'New York')
43
+ num_pages = st.number_input('Number of pages to scrape', min_value=1, max_value=20, value=5)
44
+
45
+ if st.button('Scrape Jobs'):
46
+ st.info('Scraping in progress... Please wait.')
47
+ df = scrape_linkedin_jobs(search_query, location, num_pages)
48
+
49
+ # Display results
50
+ st.success(f"Scraped {len(df)} jobs!")
51
+ st.dataframe(df)
52
+
53
+ # Download button
54
+ csv = df.to_csv(index=False)
55
+ st.download_button(
56
+ label="Download data as CSV",
57
+ data=csv,
58
+ file_name="linkedin_jobs.csv",
59
+ mime="text/csv",
60
+ )
61
+
62
+ st.warning('Note: Web scraping may violate LinkedIn\'s terms of service. Use responsibly and for educational purposes only.')
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ selenium
3
+ pandas
4
+ requests