razaali10 commited on
Commit
b5f50bc
·
verified ·
1 Parent(s): c41f38a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -45
app.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import json
4
  import smtplib
5
  from email.message import EmailMessage
6
- from typing import Dict
7
 
8
  from jobspy import scrape_jobs
9
  import groq
@@ -23,6 +23,12 @@ def remove_duplicates(df: pd.DataFrame) -> pd.DataFrame:
23
  return df.drop_duplicates("__dedup__").drop(columns="__dedup__")
24
 
25
 
 
 
 
 
 
 
26
  # ======================================================
27
  # Optional Email Helper
28
  # ======================================================
@@ -49,12 +55,8 @@ def send_email_with_csv(recipient_email: str, df: pd.DataFrame):
49
  msg["Subject"] = "Your Job Search Results"
50
  msg["From"] = email_from
51
  msg["To"] = recipient_email
52
-
53
  msg.set_content(
54
- "Hello,\n\n"
55
- "Attached is the CSV file containing your job search results.\n\n"
56
- "Regards,\n"
57
- "Private Job Search Tool"
58
  )
59
 
60
  csv_data = df.to_csv(index=False)
@@ -67,7 +69,7 @@ def send_email_with_csv(recipient_email: str, df: pd.DataFrame):
67
 
68
 
69
  # ======================================================
70
- # AI helper
71
  # ======================================================
72
 
73
  def extract_search_parameters(client, prompt: str) -> Dict[str, str]:
@@ -98,77 +100,141 @@ def extract_search_parameters(client, prompt: str) -> Dict[str, str]:
98
 
99
 
100
  # ======================================================
101
- # Job scraping
102
  # ======================================================
103
 
104
  @st.cache_data(ttl=3600)
105
- def get_indeed_jobs(search_term: str, location: str) -> pd.DataFrame:
 
 
 
 
 
106
  try:
107
  jobs = scrape_jobs(
108
  site_name=["indeed"],
109
  search_term=search_term,
110
  location=location,
111
  results_wanted=100,
112
- hours_old=72,
113
- country_indeed="Canada"
 
114
  )
115
  return pd.DataFrame(jobs)
116
  except Exception:
117
  return pd.DataFrame()
118
 
119
 
 
 
 
 
 
120
  # ======================================================
121
  # Streamlit App
122
  # ======================================================
123
 
124
  def main():
125
  st.set_page_config(page_title="Private Job Search", layout="centered")
126
- st.title("📄 Private Job Search & Download")
127
 
 
128
  job_prompt = st.text_area(
129
  "Describe the job you are looking for",
130
- placeholder="e.g. Civil Engineer, Planner, Water Resources Engineer in Alberta",
131
  height=120
132
  )
133
 
134
  api_key = st.text_input("Groq API Key", type="password")
135
 
136
- send_email = st.checkbox("📧 Send results by email (optional)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- email_address = None
139
- if send_email:
140
- email_address = st.text_input("Email address")
 
141
 
142
- if st.button(
143
- "🔍 Search Jobs",
144
- disabled=not job_prompt or not api_key
145
- ):
 
 
 
 
 
 
 
 
 
146
  client = groq.Client(api_key=api_key)
147
 
148
  with st.spinner("Understanding your request..."):
149
  params = extract_search_parameters(client, job_prompt)
150
 
151
- with st.spinner("Searching jobs..."):
152
- jobs_df = get_indeed_jobs(
153
- params["search_term"],
154
- params["location"]
155
- )
156
-
157
- if jobs_df.empty:
158
- st.warning("No jobs found.")
159
- return
160
 
161
- jobs_df.fillna("", inplace=True)
162
- jobs_df = remove_duplicates(jobs_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- st.success(f"✅ Found {len(jobs_df)} unique jobs")
 
 
165
 
166
- # --------------------------------------------------
167
- # DOWNLOAD OPTION (ALWAYS AVAILABLE)
168
- # --------------------------------------------------
169
 
 
170
  csv_data = jobs_df.to_csv(index=False).encode("utf-8")
171
-
172
  st.download_button(
173
  label="⬇️ Download Jobs (CSV)",
174
  data=csv_data,
@@ -176,18 +242,12 @@ def main():
176
  mime="text/csv"
177
  )
178
 
179
- # --------------------------------------------------
180
- # OPTIONAL EMAIL
181
- # --------------------------------------------------
182
-
183
  if send_email:
184
  if not email_address:
185
  st.warning("Please enter an email address.")
186
  elif not email_secrets_available():
187
- st.warning(
188
- "Email configuration not found. "
189
- "Download is still available."
190
- )
191
  else:
192
  with st.spinner("Sending email..."):
193
  try:
@@ -196,6 +256,16 @@ def main():
196
  except Exception as e:
197
  st.error(f"Failed to send email: {e}")
198
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  if __name__ == "__main__":
201
  main()
 
3
  import json
4
  import smtplib
5
  from email.message import EmailMessage
6
+ from typing import Dict, List
7
 
8
  from jobspy import scrape_jobs
9
  import groq
 
23
  return df.drop_duplicates("__dedup__").drop(columns="__dedup__")
24
 
25
 
26
+ def compute_keyword_score(text: str, keywords: List[str]) -> int:
27
+ """Simple keyword ranking: count of keyword hits (case-insensitive)."""
28
+ text_l = (text or "").lower()
29
+ return sum(text_l.count(k.lower()) for k in keywords if k.strip())
30
+
31
+
32
  # ======================================================
33
  # Optional Email Helper
34
  # ======================================================
 
55
  msg["Subject"] = "Your Job Search Results"
56
  msg["From"] = email_from
57
  msg["To"] = recipient_email
 
58
  msg.set_content(
59
+ "Hello,\n\nAttached is the CSV file containing your job search results.\n\nRegards,\nPrivate Job Search Tool"
 
 
 
60
  )
61
 
62
  csv_data = df.to_csv(index=False)
 
69
 
70
 
71
  # ======================================================
72
+ # AI helper (intent extraction)
73
  # ======================================================
74
 
75
  def extract_search_parameters(client, prompt: str) -> Dict[str, str]:
 
100
 
101
 
102
  # ======================================================
103
+ # Job scraping (Per-board toggles)
104
  # ======================================================
105
 
106
  @st.cache_data(ttl=3600)
107
+ def get_indeed_jobs(
108
+ search_term: str,
109
+ location: str,
110
+ radius_km: int,
111
+ posted_within_days: int
112
+ ) -> pd.DataFrame:
113
  try:
114
  jobs = scrape_jobs(
115
  site_name=["indeed"],
116
  search_term=search_term,
117
  location=location,
118
  results_wanted=100,
119
+ hours_old=posted_within_days * 24,
120
+ country_indeed="Canada",
121
+ radius=radius_km
122
  )
123
  return pd.DataFrame(jobs)
124
  except Exception:
125
  return pd.DataFrame()
126
 
127
 
128
+ def get_other_board_stub(board_name: str) -> pd.DataFrame:
129
+ """Stub for future boards (toggle-safe)."""
130
+ return pd.DataFrame()
131
+
132
+
133
  # ======================================================
134
  # Streamlit App
135
  # ======================================================
136
 
137
  def main():
138
  st.set_page_config(page_title="Private Job Search", layout="centered")
139
+ st.title("📄 Private Job Search, Rank & Download")
140
 
141
+ # --- Inputs ---
142
  job_prompt = st.text_area(
143
  "Describe the job you are looking for",
144
+ placeholder="e.g. Civil Engineer, Water Resources, Transportation in Alberta",
145
  height=120
146
  )
147
 
148
  api_key = st.text_input("Groq API Key", type="password")
149
 
150
+ # --- Per-job-board toggles ---
151
+ st.subheader("Job Boards")
152
+ colb1, colb2, colb3 = st.columns(3)
153
+ with colb1:
154
+ use_indeed = st.checkbox("Indeed", value=True)
155
+ with colb2:
156
+ use_glassdoor = st.checkbox("Glassdoor (coming soon)", value=False, disabled=True)
157
+ with colb3:
158
+ use_linkedin = st.checkbox("LinkedIn (coming soon)", value=False, disabled=True)
159
+
160
+ # --- Filters ---
161
+ st.subheader("Filters")
162
+ posted_within_days = st.slider(
163
+ "Posted within last (days)",
164
+ min_value=1, max_value=30, value=7
165
+ )
166
 
167
+ radius_km = st.slider(
168
+ "Search radius (km)",
169
+ min_value=5, max_value=100, value=25, step=5
170
+ )
171
 
172
+ # --- Keyword ranking ---
173
+ keywords_raw = st.text_input(
174
+ "Keyword ranking (comma-separated)",
175
+ placeholder="water, wastewater, stormwater, EPANET, WNTR"
176
+ )
177
+ keywords = [k.strip() for k in keywords_raw.split(",") if k.strip()]
178
+
179
+ # --- Optional email ---
180
+ send_email = st.checkbox("📧 Send results by email (optional)")
181
+ email_address = st.text_input("Email address") if send_email else None
182
+
183
+ # --- Action ---
184
+ if st.button("🔍 Search Jobs", disabled=not job_prompt or not api_key):
185
  client = groq.Client(api_key=api_key)
186
 
187
  with st.spinner("Understanding your request..."):
188
  params = extract_search_parameters(client, job_prompt)
189
 
190
+ all_jobs = []
 
 
 
 
 
 
 
 
191
 
192
+ with st.spinner("Searching job boards..."):
193
+ if use_indeed:
194
+ indeed_df = get_indeed_jobs(
195
+ params["search_term"],
196
+ params["location"],
197
+ radius_km,
198
+ posted_within_days
199
+ )
200
+ if not indeed_df.empty:
201
+ indeed_df["source"] = "Indeed"
202
+ all_jobs.append(indeed_df)
203
+
204
+ # Future boards (toggle-safe)
205
+ if use_glassdoor:
206
+ all_jobs.append(get_other_board_stub("Glassdoor"))
207
+ if use_linkedin:
208
+ all_jobs.append(get_other_board_stub("LinkedIn"))
209
+
210
+ if not all_jobs:
211
+ st.warning("No jobs found.")
212
+ return
213
+
214
+ jobs_df = pd.concat(all_jobs, ignore_index=True)
215
+ jobs_df.fillna("", inplace=True)
216
+ jobs_df = remove_duplicates(jobs_df)
217
+
218
+ # --- Keyword ranking ---
219
+ if keywords:
220
+ jobs_df["keyword_score"] = jobs_df.apply(
221
+ lambda r: compute_keyword_score(
222
+ f"{r.get('title','')} {r.get('description','')}",
223
+ keywords
224
+ ),
225
+ axis=1
226
+ )
227
+ else:
228
+ jobs_df["keyword_score"] = 0
229
 
230
+ # Sort by keyword score (desc) then date if available
231
+ sort_cols = ["keyword_score"]
232
+ jobs_df = jobs_df.sort_values(sort_cols, ascending=[False])
233
 
234
+ st.success(f"✅ Found {len(jobs_df)} jobs")
 
 
235
 
236
+ # --- Download always available ---
237
  csv_data = jobs_df.to_csv(index=False).encode("utf-8")
 
238
  st.download_button(
239
  label="⬇️ Download Jobs (CSV)",
240
  data=csv_data,
 
242
  mime="text/csv"
243
  )
244
 
245
+ # --- Optional email ---
 
 
 
246
  if send_email:
247
  if not email_address:
248
  st.warning("Please enter an email address.")
249
  elif not email_secrets_available():
250
+ st.warning("Email not configured. Download is still available.")
 
 
 
251
  else:
252
  with st.spinner("Sending email..."):
253
  try:
 
256
  except Exception as e:
257
  st.error(f"Failed to send email: {e}")
258
 
259
+ # --- Preview ---
260
+ st.subheader("Preview (Top Results)")
261
+ preview_cols = [
262
+ c for c in [
263
+ "source", "title", "company", "location",
264
+ "keyword_score", "date_posted", "job_url"
265
+ ] if c in jobs_df.columns
266
+ ]
267
+ st.dataframe(jobs_df[preview_cols].head(20), use_container_width=True)
268
+
269
 
270
  if __name__ == "__main__":
271
  main()