Spaces:

AmnaHassan
/

Web-Scrapper-linkedin

Paused

App Files Files Community

AmnaHassan commited on Jul 27, 2024

Commit

4302a23

verified ·

1 Parent(s): c85382d

Upload 3 files

Browse files

Files changed (3) hide show

.gitignore +3 -0
app.py +62 -0
requirements.txt +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+*.pyc
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import streamlit as st
+import os
+import requests
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+import time
+import pandas as pd
+def get_driver():
+    api_key = os.environ.get('SCRAPEOPS_API_KEY')
+    response = requests.get(f'http://headers.scrapeops.io/v1/browser-headers?api_key={api_key}')
+    headers = response.json()['result']
+    chrome_options = Options()
+    for key, value in headers.items():
+        chrome_options.add_argument(f'--{key}={value}')
+    return webdriver.Remote(
+        command_executor='http://chrome:4444/wd/hub',
+        options=chrome_options
+    )
+def scrape_linkedin_jobs(search_query, location, num_pages):
+    driver = get_driver()
+    # Navigate to LinkedIn Jobs
+    driver.get('https://www.linkedin.com/jobs/')
+    # ... rest of your scraping logic ...
+    driver.quit()
+    return pd.DataFrame(jobs_data)
+# Streamlit app
+st.title('LinkedIn Job Scraper')
+# Input fields
+search_query = st.text_input('Enter job title', 'Data Scientist')
+location = st.text_input('Enter location', 'New York')
+num_pages = st.number_input('Number of pages to scrape', min_value=1, max_value=20, value=5)
+if st.button('Scrape Jobs'):
+    st.info('Scraping in progress... Please wait.')
+    df = scrape_linkedin_jobs(search_query, location, num_pages)
+    # Display results
+    st.success(f"Scraped {len(df)} jobs!")
+    st.dataframe(df)
+    # Download button
+    csv = df.to_csv(index=False)
+    st.download_button(
+        label="Download data as CSV",
+        data=csv,
+        file_name="linkedin_jobs.csv",
+        mime="text/csv",
+    )
+st.warning('Note: Web scraping may violate LinkedIn\'s terms of service. Use responsibly and for educational purposes only.')

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+selenium
+pandas
+requests