Spaces:

Yassmen
/

Job.web.scrapping

Paused

App Files Files Community

Yassmen commited on Oct 27, 2024

Commit

6343888

verified ·

1 Parent(s): 6d228e9

Create data_analysis.py

Browse files

Files changed (1) hide show

data_analysis.py +208 -0

data_analysis.py ADDED Viewed

	@@ -0,0 +1,208 @@

+#### function to show map for loaction of the job
+import time
+import matplotlib.pyplot as plt
+import seaborn as sns
+import matplotlib as mpl
+import plotly
+import plotly.express as px
+import plotly.graph_objs as go
+import plotly.offline as py
+from plotly.offline import iplot
+from plotly.subplots import make_subplots
+import plotly.figure_factory as ff
+def map_bubble(df):
+  import requests
+  import urllib.parse
+  g =[]
+  for i  in range(len(df.Location)):
+    if  df.Location.loc[i].split(","):
+      g.append(df.Location.loc[i].split(",")[0])
+    else:
+      g.append(df.Location.loc[i])
+  df['new_loc']=g
+  if 'country' in df.columns:
+    df["full_location"] = df["new_loc"] + ", " +df["country"]
+    dict_cities = dict(df.full_location.value_counts())
+  else :
+    dict_cities = dict(df.new_loc.value_counts())
+  lat = []
+  lon = []
+  bubble_df = pd.DataFrame()
+  add=[]
+  val=[]
+  try:
+    for address in dict_cities.keys():
+      url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
+      response = requests.get(url).json()
+      lat.append(response[0]["lat"])
+      lon.append(response[0]["lon"])
+      add.append(address)
+      val.append(dict_cities[address])
+  except:
+    pass
+  bubble_df['address'] =add
+  bubble_df['lat'] = lat
+  bubble_df['lon'] = lon
+  bubble_df['value'] = val
+  # import the library
+  import folium
+  # Make an empty map
+  m = folium.Map(location=[20,0], tiles="OpenStreetMap", zoom_start=2)
+  # add marker one by one on the map
+  for i in range(0,len(bubble_df)):
+    folium.Circle(
+        location=[bubble_df.iloc[i]['lat'], bubble_df.iloc[i]['lon']],
+        popup=bubble_df.iloc[i][['address','value']].values,
+        radius=float(bubble_df.iloc[i]['value'])*500,
+        color='#69b3a2',
+        fill=True,
+        fill_color='#69b3a2'
+    ).add_to(m)
+  m
+  # Show the map again
+  return m
+##########################
+#########################
+#### wuzzuf analysis
+def wuzzuf_exp(df1):
+  top10_job_title = df1['Title'].value_counts()[:10]
+  fig1 = px.bar(y=top10_job_title.values,
+              x=top10_job_title.index,
+              color = top10_job_title.index,
+              color_discrete_sequence=px.colors.sequential.deep,
+              text=top10_job_title.values,
+              title= 'Top 10 Job Titles',
+              template= 'plotly_dark')
+  fig1.update_layout(height=500,width=500,
+      xaxis_title="Job Titles",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  st.plotly_chart(fig1)
+  type_grouped = df1['Career_Level'].value_counts()
+  #e_type = ['Full-Time','Part-Time','Contract','Freelance']
+  e_type =dict(df1['Career_Level'].value_counts()).keys()
+  fig2 = px.bar(x = e_type, y = type_grouped.values,
+        color = type_grouped.index,
+        color_discrete_sequence=px.colors.sequential.dense,
+        template = 'plotly_dark',
+        text = type_grouped.values, title = 'Career Level Distribution')
+  fig2.update_layout( height=500, width=500,
+      xaxis_title="Career Level",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  fig2.update_traces(width=0.5)
+  st.plotly_chart(fig2)
+  residence = df1['Location'].value_counts()
+  top10_employee_location = residence[:10]
+  fig3 = px.bar(y=top10_employee_location.values,
+              x=top10_employee_location.index,
+              color = top10_employee_location.index,
+              color_discrete_sequence=px.colors.sequential.deep,
+              text=top10_employee_location.values,
+              title= 'Top 10 Location of job',
+              template= 'plotly_dark')
+  fig3.update_layout(height=500,width=500,
+      xaxis_title="Location of job",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  st.plotly_chart(fig3)
+  type_grouped = df1['Experience_Needed'].value_counts()
+  #e_type = ['Full-Time','Part-Time','Contract','Freelance']
+  e_type =dict(df1['Experience_Needed'].value_counts()).keys()
+  fig4 = px.bar(x = e_type, y = type_grouped.values,
+        color = type_grouped.index,
+        color_discrete_sequence=px.colors.sequential.dense,
+        template = 'plotly_dark',
+        text = type_grouped.values, title = ' Experience Level Distribution')
+  fig4.update_layout(height=500,width=500,
+      xaxis_title=" Experience Level (years)",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  fig4.update_traces(width=0.5)
+  st.plotly_chart(fig4)
+  return
+#########################
+### linkedin analysis
+def linkedin_exp(df1):
+  top10_job_title = df1['Title'].value_counts()[:10]
+  fig1 = px.bar(y=top10_job_title.values,
+              x=top10_job_title.index,
+              color = top10_job_title.index,
+              color_discrete_sequence=px.colors.sequential.deep,
+              text=top10_job_title.values,
+              title= 'Top 10 Job Titles',
+              template= 'plotly_dark')
+  fig1.update_layout(height=500,width=500,
+      xaxis_title="Job Titles",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  st.plotly_chart(fig1)
+  type_grouped = df1['Employment type'].value_counts()
+  #e_type = ['Full-Time','Part-Time','Contract','Freelance']
+  e_type =dict(df1['Employment type'].value_counts()).keys()
+  fig2 = px.bar(x = e_type, y = type_grouped.values,
+        color = type_grouped.index,
+        color_discrete_sequence=px.colors.sequential.dense,
+        template = 'plotly_dark',
+        text = type_grouped.values, title = 'Employment type Distribution')
+  fig2.update_layout( height=500, width=500,
+      xaxis_title="Employment type",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  fig2.update_traces(width=0.5)
+  st.plotly_chart(fig2)
+  residence = df1['Location'].value_counts()
+  top10_employee_location = residence[:10]
+  fig3 = px.bar(y=top10_employee_location.values,
+              x=top10_employee_location.index,
+              color = top10_employee_location.index,
+              color_discrete_sequence=px.colors.sequential.deep,
+              text=top10_employee_location.values,
+              title= 'Top 10 Location of job',
+              template= 'plotly_dark')
+  fig3.update_layout(height=500,width=500,
+      xaxis_title="Location of job",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  st.plotly_chart(fig3)
+  type_grouped = df1['Seniority level'].value_counts()
+  #e_type = ['Full-Time','Part-Time','Contract','Freelance']
+  e_type =dict(df1['Seniority level'].value_counts()).keys()
+  fig4 = px.bar(x = e_type, y = type_grouped.values,
+        color = type_grouped.index,
+        color_discrete_sequence=px.colors.sequential.dense,
+        template = 'plotly_dark',
+        text = type_grouped.values, title = 'Seniority level Distribution')
+  fig4.update_layout(height=500,width=500,
+      xaxis_title="Seniority level",
+      yaxis_title="count",
+      font = dict(size=17,family="Franklin Gothic"))
+  fig4.update_traces(width=0.5)
+  st.plotly_chart(fig4)
+  return