Spaces:
Paused
Paused
| import streamlit as st | |
| import requests | |
| import numpy as np | |
| from PIL import Image | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import requests | |
| import pandas as pd | |
| import numpy as np | |
| from bs4 import BeautifulSoup | |
| import bs4 | |
| from urllib.request import urlopen | |
| import time | |
| import re | |
| import time | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import matplotlib as mpl | |
| import plotly | |
| import plotly.express as px | |
| import plotly.graph_objs as go | |
| import plotly.offline as py | |
| from plotly.offline import iplot | |
| from plotly.subplots import make_subplots | |
| import plotly.figure_factory as ff | |
| from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.common.keys import Keys | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from selenium.webdriver.chrome.service import Service | |
| import requests | |
| import platform | |
| import zipfile | |
| import os | |
| import subprocess | |
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import time | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.webdriver.chrome.options import Options | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| from wuzzuf_scraper import Wuzzuf_scrapping | |
| from linkedin_scraper import LINKEDIN_Scrapping | |
| from data_analysis import map_bubble, linkedin_exp, wuzzuf_exp | |
| # Set up Streamlit page configuration | |
| st.set_page_config(page_title="My Web_Scrap Page", page_icon=":tada:", layout="wide") | |
| # ---- HEADER SECTION ---- | |
| with st.container(): | |
| left_column, right_column = st.columns(2) | |
| with left_column: | |
| st.subheader("Hi! I am Yassmen :wave:") | |
| st.title("An Electronics and Communication Engineer") | |
| st.write("In this app we will scrap jobs from LinkedIn and Wuzzuf websites, let's get it started :boom:") | |
| st.write("[Reach me >](https://www.linkedin.com/in/yassmen-youssef-48439a166/)") | |
| with right_column: | |
| st.image("im.gif", use_column_width=True) | |
| # Sidebar selections | |
| webs = ["Wuzzuf", "Linkedin"] | |
| jobs = ["Machine Learning", "AI Engineer", "Data Analysis", "Software Testing"] | |
| nums = np.arange(1, 1000) | |
| site = st.sidebar.selectbox("Select one website", webs) | |
| job = st.sidebar.selectbox("Select one job", jobs) | |
| num_jobs = st.sidebar.selectbox("Select number of jobs you want to scrap", nums) | |
| # Function to get Selenium driver | |
| from selenium import webdriver | |
| from selenium.webdriver.firefox.service import Service as FirefoxService | |
| from webdriver_manager.firefox import GeckoDriverManager | |
| def get_driver(): | |
| options = webdriver.ChromeOptions() | |
| options.add_argument("--headless") # Run in headless mode | |
| options.add_argument("--no-sandbox") | |
| options.add_argument("--disable-dev-shm-usage") | |
| try: | |
| driver = webdriver.Chrome(options=options) | |
| return driver | |
| except Exception as e: | |
| st.error(f"Error initializing WebDriver: {e}") | |
| return None | |
| import streamlit as st | |
| from streamlit_option_menu import option_menu | |
| import streamlit.components.v1 as components | |
| n2 = pd.DataFrame() | |
| if st.sidebar.button('Start Scrapping'): | |
| if site =="Wuzzuf": | |
| with st.container(): | |
| st.write("---") | |
| tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"]) | |
| with tab1 : | |
| with st.spinner('✨Now loading...' ): | |
| time.sleep(5) | |
| driver = get_driver() # Initialize the driver | |
| n1 = Wuzzuf_scrapping(job, num_jobs, driver) # Pass driver to the scraping function | |
| driver.quit() # Clean up the driver | |
| try: | |
| tab1.dataframe(n1) | |
| except: | |
| try: | |
| tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success | |
| except: | |
| tab1.table(n1) | |
| with tab2: | |
| map_bubble(n1) | |
| with tab3: | |
| #tab3.plotly_chart(wuzzuf_exp(n1)) | |
| wuzzuf_exp(n1) | |
| elif site =="Linkedin": | |
| with st.container(): | |
| st.write("---") | |
| tab1, tab2 ,tab3= st.tabs([" Data", " Bubble Map","Data Exploration"]) | |
| with tab1 : | |
| with st.spinner('✨Now loading...' ): | |
| time.sleep(5) | |
| driver = get_driver() | |
| n1 = LINKEDIN_Scrapping(job ,num_jobs,driver ) | |
| driver.quit() # Clean up the driver | |
| try: | |
| tab1.dataframe(n1) | |
| except: | |
| try: | |
| tab1.write(n1.astype(str).set_index(n1.index.astype(str))) # Success | |
| except: | |
| tab1.table(n1) | |
| with tab2: | |
| map_bubble(n1) | |
| with tab3: | |
| linkedin_exp(n1) | |