File size: 9,584 Bytes
42f62ee
 
 
a11d230
 
b5f50bc
85d4550
e994c7a
 
 
42f62ee
e994c7a
a11d230
e994c7a
85d4550
a11d230
c41f38a
e994c7a
 
 
 
878881a
c41f38a
85d4550
a11d230
b5f50bc
 
23ee159
b5f50bc
 
e994c7a
c41f38a
e994c7a
a11d230
c41f38a
 
 
 
 
 
 
 
 
 
 
a11d230
 
 
 
 
 
 
 
c41f38a
a11d230
 
 
b5f50bc
a11d230
 
 
c41f38a
a11d230
 
 
 
 
 
 
e994c7a
b5f50bc
e994c7a
42f62ee
e994c7a
42f62ee
e994c7a
a11d230
e994c7a
85d4550
 
a11d230
85d4550
42f62ee
85d4550
42f62ee
85d4550
 
a11d230
42f62ee
 
85d4550
 
42f62ee
85d4550
42f62ee
 
85d4550
c41f38a
85d4550
42f62ee
e994c7a
23ee159
e994c7a
85d4550
a11d230
b5f50bc
 
 
 
 
 
42f62ee
a11d230
 
 
 
 
b5f50bc
 
 
42f62ee
a11d230
878881a
42f62ee
 
85d4550
e994c7a
 
 
878881a
a11d230
c41f38a
b5f50bc
85d4550
23ee159
 
 
a11d230
 
23ee159
a11d230
 
85d4550
c41f38a
85d4550
23ee159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5f50bc
23ee159
 
 
 
 
b5f50bc
23ee159
b5f50bc
 
23ee159
 
 
b5f50bc
c41f38a
b5f50bc
 
23ee159
 
 
 
b5f50bc
85d4550
23ee159
 
 
b5f50bc
 
23ee159
b5f50bc
 
 
23ee159
 
 
b5f50bc
 
 
23ee159
 
 
 
 
 
 
a11d230
42f62ee
a11d230
e994c7a
85d4550
23ee159
 
 
 
 
 
 
 
 
 
b5f50bc
 
23ee159
 
 
 
 
 
b5f50bc
85d4550
23ee159
 
 
c41f38a
23ee159
 
 
 
 
 
 
 
 
 
 
c41f38a
23ee159
 
 
 
 
 
 
 
 
 
c41f38a
 
 
 
 
 
 
 
23ee159
 
 
c41f38a
 
 
 
b5f50bc
c41f38a
 
 
 
 
 
 
878881a
23ee159
 
 
 
b5f50bc
 
23ee159
b5f50bc
 
 
23ee159
 
 
 
b5f50bc
85d4550
42f62ee
85d4550
878881a
c41f38a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import streamlit as st
import pandas as pd
import json
import smtplib
from email.message import EmailMessage
from typing import Dict, List

from jobspy import scrape_jobs
import groq


# ======================================================
# Utilities
# ======================================================

def remove_duplicates(df: pd.DataFrame) -> pd.DataFrame:
    df["__dedup__"] = (
        df.get("title", "").astype(str) + "|" +
        df.get("company", "").astype(str) + "|" +
        df.get("location", "").astype(str) + "|" +
        df.get("job_url", "").astype(str)
    )
    return df.drop_duplicates("__dedup__").drop(columns="__dedup__")


def compute_keyword_score(text: str, keywords: List[str]) -> int:
    text_l = (text or "").lower()
    return sum(text_l.count(k.lower()) for k in keywords if k)


# ======================================================
# Optional Email Helper
# ======================================================

def email_secrets_available() -> bool:
    required = [
        "SMTP_SERVER",
        "SMTP_PORT",
        "SMTP_USER",
        "SMTP_PASSWORD",
        "EMAIL_FROM",
    ]
    return all(key in st.secrets for key in required)


def send_email_with_csv(recipient_email: str, df: pd.DataFrame):
    smtp_server = st.secrets["SMTP_SERVER"]
    smtp_port = int(st.secrets["SMTP_PORT"])
    smtp_user = st.secrets["SMTP_USER"]
    smtp_password = st.secrets["SMTP_PASSWORD"]
    email_from = st.secrets["EMAIL_FROM"]

    msg = EmailMessage()
    msg["Subject"] = "Your Job Search Results"
    msg["From"] = email_from
    msg["To"] = recipient_email
    msg.set_content(
        "Hello,\n\nAttached is the CSV file containing your job search results.\n\nRegards,\nPrivate Job Search Tool"
    )

    csv_data = df.to_csv(index=False)
    msg.add_attachment(csv_data, subtype="csv", filename="job_results.csv")

    with smtplib.SMTP(smtp_server, smtp_port) as server:
        server.starttls()
        server.login(smtp_user, smtp_password)
        server.send_message(msg)


# ======================================================
# AI helper (intent extraction)
# ======================================================

def extract_search_parameters(client, prompt: str) -> Dict[str, str]:
    system_prompt = """
    Extract job search parameters.
    Return JSON ONLY:

    {
      "search_term": "<job title or keywords>",
      "location": "<city, province/state, or country>"
    }
    """

    response = client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        temperature=0.2,
        max_tokens=200,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
    )

    try:
        return json.loads(response.choices[0].message.content)
    except Exception:
        return {"search_term": prompt, "location": "Canada"}


# ======================================================
# Job scraping
# ======================================================

@st.cache_data(ttl=3600)
def get_indeed_jobs(
    search_term: str,
    location: str,
    radius_km: int,
    posted_within_days: int
) -> pd.DataFrame:
    try:
        jobs = scrape_jobs(
            site_name=["indeed"],
            search_term=search_term,
            location=location,
            results_wanted=100,
            hours_old=posted_within_days * 24,
            country_indeed="Canada",
            radius=radius_km
        )
        return pd.DataFrame(jobs)
    except Exception:
        return pd.DataFrame()


# ======================================================
# Streamlit App
# ======================================================

def main():
    st.set_page_config(page_title="Private Job Search", layout="centered")
    st.title("📄 Private Job Search, Rank & Download")

    # --------------------------------------------------
    # Job description
    # --------------------------------------------------
    job_prompt = st.text_area(
        "Describe the job you are looking for",
        placeholder="e.g. Civil Engineer, Water Resources, Transportation",
        height=120
    )

    api_key = st.text_input("Groq API Key", type="password")

    # --------------------------------------------------
    # City selection
    # --------------------------------------------------
    st.subheader("Location")

    predefined_cities = [
        "Use AI / Prompt Location",
        "Calgary, AB",
        "Edmonton, AB",
        "Toronto, ON",
        "Vancouver, BC",
        "Mississauga, ON",
        "Brampton, ON",
        "Ottawa, ON",
        "Hamilton, ON",
        "Custom city..."
    ]

    selected_city = st.selectbox("Select city", predefined_cities)

    custom_city = ""
    if selected_city == "Custom city...":
        custom_city = st.text_input(
            "Enter city (e.g., Red Deer, AB or Surrey, BC)"
        )

    # --------------------------------------------------
    # Job boards
    # --------------------------------------------------
    st.subheader("Job Boards")
    use_indeed = st.checkbox("Indeed", value=True)

    # --------------------------------------------------
    # Filters
    # --------------------------------------------------
    st.subheader("Filters")

    posted_within_days = st.slider(
        "Posted within last (days)",
        min_value=1,
        max_value=30,
        value=7
    )

    radius_km = st.slider(
        "Search radius (km)",
        min_value=5,
        max_value=100,
        value=25,
        step=5
    )

    # --------------------------------------------------
    # Keyword ranking
    # --------------------------------------------------
    keywords_raw = st.text_input(
        "Keyword ranking (comma-separated)",
        placeholder="water, wastewater, stormwater, EPANET"
    )
    keywords = [k.strip() for k in keywords_raw.split(",") if k.strip()]

    # --------------------------------------------------
    # Optional email
    # --------------------------------------------------
    send_email = st.checkbox("📧 Send results by email (optional)")
    email_address = st.text_input("Email address") if send_email else None

    # --------------------------------------------------
    # Action
    # --------------------------------------------------
    if st.button(
        "🔍 Search Jobs",
        disabled=not job_prompt or not api_key
    ):
        client = groq.Client(api_key=api_key)

        with st.spinner("Understanding your request..."):
            params = extract_search_parameters(client, job_prompt)

        # Resolve final location
        if selected_city == "Use AI / Prompt Location":
            location = params.get("location", "Canada")
        elif selected_city == "Custom city...":
            location = custom_city if custom_city else params.get("location", "Canada")
        else:
            location = selected_city

        if not use_indeed:
            st.warning("No job boards selected.")
            return

        with st.spinner("Searching jobs..."):
            jobs_df = get_indeed_jobs(
                params["search_term"],
                location,
                radius_km,
                posted_within_days
            )

            if jobs_df.empty:
                st.warning("No jobs found.")
                return

            jobs_df.fillna("", inplace=True)
            jobs_df = remove_duplicates(jobs_df)

        # Keyword ranking
        jobs_df["keyword_score"] = jobs_df.apply(
            lambda r: compute_keyword_score(
                f"{r.get('title','')} {r.get('description','')}",
                keywords
            ),
            axis=1
        )

        jobs_df = jobs_df.sort_values(
            by="keyword_score",
            ascending=False
        )

        st.success(f"✅ Found {len(jobs_df)} jobs for **{location}**")

        # --------------------------------------------------
        # Download
        # --------------------------------------------------
        csv_data = jobs_df.to_csv(index=False).encode("utf-8")
        st.download_button(
            label="⬇️ Download Jobs (CSV)",
            data=csv_data,
            file_name="job_results.csv",
            mime="text/csv"
        )

        # --------------------------------------------------
        # Optional email
        # --------------------------------------------------
        if send_email:
            if not email_address:
                st.warning("Please enter an email address.")
            elif not email_secrets_available():
                st.warning("Email not configured. Download is still available.")
            else:
                with st.spinner("Sending email..."):
                    try:
                        send_email_with_csv(email_address, jobs_df)
                        st.success(f"📧 Results emailed to {email_address}")
                    except Exception as e:
                        st.error(f"Failed to send email: {e}")

        # --------------------------------------------------
        # Preview
        # --------------------------------------------------
        st.subheader("Preview (Top 20 Results)")
        preview_cols = [
            c for c in [
                "title", "company", "location",
                "keyword_score", "date_posted", "job_url"
            ] if c in jobs_df.columns
        ]
        st.dataframe(
            jobs_df[preview_cols].head(20),
            use_container_width=True
        )


if __name__ == "__main__":
    main()