Alpha108 commited on
Commit
165223f
Β·
verified Β·
1 Parent(s): bfc0ccf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -73
app.py CHANGED
@@ -1,84 +1,198 @@
1
  import streamlit as st
 
2
  import requests
3
- import fitz # PyMuPDF for PDF extraction
4
  from sentence_transformers import SentenceTransformer, util
5
  import torch
 
 
6
 
7
- # -------------------- CONFIG --------------------
8
- st.set_page_config(page_title="AI Job Matcher", page_icon="πŸ€–", layout="wide")
9
-
10
- @st.cache_resource
11
- def load_model():
12
- return SentenceTransformer("all-MiniLM-L6-v2")
13
-
14
- model = load_model()
15
 
16
- # -------------------- FUNCTIONS --------------------
17
- def extract_text_from_pdf(uploaded_file):
18
- """Extract text from uploaded PDF."""
19
- text = ""
20
- doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
21
- for page in doc:
22
- text += page.get_text("text")
23
- return text
24
-
25
- def fetch_jobs():
26
- """Fetch jobs from RemoteOK API."""
27
- url = "https://remoteok.com/api"
28
  try:
29
- response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
30
- if response.status_code == 200:
31
- return response.json()[1:] # skip metadata
 
 
 
 
32
  except Exception as e:
33
- st.error(f"⚠️ Error fetching jobs: {e}")
34
- return []
35
-
36
- def match_jobs(cv_text, jobs, top_n=5):
37
- """Match CV text with job postings using embeddings."""
38
- if not cv_text.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  return []
40
 
 
 
 
 
 
 
41
  cv_embedding = model.encode(cv_text, convert_to_tensor=True)
42
- results = []
43
-
44
- for job in jobs[:30]: # limit for demo
45
- title = job.get("position", "")
46
- company = job.get("company", "")
47
- desc = job.get("description", "")
48
- job_text = f"{title} at {company}. {desc}"
49
-
50
- job_embedding = model.encode(job_text, convert_to_tensor=True)
51
- similarity = util.cos_sim(cv_embedding, job_embedding).item()
52
-
53
- results.append({
54
- "title": title,
55
- "company": company,
56
- "url": job.get("url", ""),
57
- "score": round(similarity * 100, 2)
58
- })
59
-
60
- return sorted(results, key=lambda x: x["score"], reverse=True)[:top_n]
61
-
62
- # -------------------- UI --------------------
63
- st.title("πŸ€– AI Freelancer Job Matcher")
64
- st.markdown("Upload your **CV (PDF)** and get real-time job matches from RemoteOK.")
65
-
66
- uploaded_file = st.file_uploader("πŸ“„ Upload your CV", type=["pdf"])
67
-
68
- if uploaded_file is not None:
69
- cv_text = extract_text_from_pdf(uploaded_file)
70
- st.success("βœ… CV uploaded & text extracted!")
71
- st.text_area("Extracted CV Text", cv_text[:1000], height=200)
72
-
73
- jobs = fetch_jobs()
74
- if jobs:
75
- results = match_jobs(cv_text, jobs)
76
-
77
- st.subheader("🎯 Top Job Matches")
78
- for r in results:
79
- st.markdown(f"**{r['title']}** at *{r['company']}*")
80
- st.write(f"Match Score: {r['score']}%")
81
- st.markdown(f"[View Job Posting]({r['url']})")
82
- st.markdown("---")
83
- else:
84
- st.warning("⚠️ No jobs fetched. Try again later.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import fitz # PyMuPDF
3
  import requests
 
4
  from sentence_transformers import SentenceTransformer, util
5
  import torch
6
+ import re
7
+ import io
8
 
9
+ # Load pre-trained Sentence Transformer model
10
+ # Using a smaller, efficient model suitable for HF Spaces
11
+ model = SentenceTransformer('all-MiniLM-L6-v2')
 
 
 
 
 
12
 
13
+ def extract_text_from_pdf(pdf_file):
14
+ """Extracts text from an uploaded PDF file."""
 
 
 
 
 
 
 
 
 
 
15
  try:
16
+ pdf_bytes = pdf_file.read()
17
+ pdf_document = fitz.open(stream=io.BytesIO(pdf_bytes), filetype="pdf")
18
+ text = ""
19
+ for page_num in range(len(pdf_document)):
20
+ page = pdf_document.load_page(page_num)
21
+ text += page.get_text()
22
+ return text
23
  except Exception as e:
24
+ st.error(f"Error reading PDF file: {e}")
25
+ return None
26
+
27
+ def extract_keywords(text):
28
+ """
29
+ A simple keyword extractor for skills, technologies, and certifications.
30
+ This can be replaced with a more sophisticated NLP model if needed.
31
+ """
32
+ if not text:
33
+ return []
34
+ # Using regex to find potential skills (e.g., Python, Java, SQL, AWS, Docker)
35
+ # This is a basic example and can be expanded significantly.
36
+ skills_pattern = r'\b(Python|Java|C\+\+|JavaScript|SQL|React|Node\.js|Angular|Vue|AWS|Azure|GCP|Docker|Kubernetes|TensorFlow|PyTorch|Scikit-learn|Pandas|NumPy|Git)\b'
37
+ skills = re.findall(skills_pattern, text, re.IGNORECASE)
38
+ return list(set(skills)) # Return unique skills
39
+
40
+ def fetch_remoteok_jobs():
41
+ """Fetches the latest jobs from the RemoteOK API."""
42
+ try:
43
+ response = requests.get('https://remoteok.com/api')
44
+ response.raise_for_status() # Raise an exception for bad status codes
45
+ jobs = response.json()
46
+ # The first element is often a header/legal notice, so we skip it
47
+ return jobs[1:] if isinstance(jobs, list) and len(jobs) > 1 else []
48
+ except requests.exceptions.RequestException as e:
49
+ st.error(f"Failed to fetch jobs from RemoteOK: {e}")
50
+ return []
51
+ except ValueError:
52
+ st.error("Failed to parse JSON response from RemoteOK.")
53
  return []
54
 
55
+ def calculate_similarity(cv_text, job_description):
56
+ """Calculates cosine similarity between CV text and a job description."""
57
+ if not cv_text or not job_description:
58
+ return 0.0
59
+
60
+ # Generate embeddings
61
  cv_embedding = model.encode(cv_text, convert_to_tensor=True)
62
+ job_embedding = model.encode(job_description, convert_to_tensor=True)
63
+
64
+ # Calculate cosine similarity
65
+ cosine_scores = util.pytorch_cos_sim(cv_embedding, job_embedding)
66
+ return cosine_scores.item()
67
+
68
+ def generate_match_explanation(cv_keywords, job_description):
69
+ """
70
+ Generates a brief explanation of why the job is a good match.
71
+ This is a simplified implementation. For more advanced explanations,
72
+ a generative model (like GPT or T5) could be used.
73
+ """
74
+ common_keywords = [
75
+ keyword for keyword in cv_keywords
76
+ if re.search(r'\b' + re.escape(keyword) + r'\b', job_description, re.IGNORECASE)
77
+ ]
78
+
79
+ if not common_keywords:
80
+ return "This job aligns with your general profile based on the overall text similarity."
81
+
82
+ explanation = f"This role is a strong match because it requires skills you possess, such as: **{', '.join(common_keywords[:3])}**."
83
+ return explanation
84
+
85
+
86
+ def notify_user(job):
87
+ """Placeholder for a notification function."""
88
+ # In a real-world application, this would integrate with an email/messaging service.
89
+ log_message = f"Notification Triggered: New high-match job found - '{job['position']}' at {job['company']}. Match: {job['match_score']:.2f}%"
90
+ print(log_message) # For MVP, we just log to console.
91
+ # In HF Spaces, this will appear in the server logs.
92
+
93
+
94
+ # --- Streamlit App UI ---
95
+ st.set_page_config(page_title="AI Job Matcher", page_icon="πŸ€–", layout="wide")
96
+
97
+ st.title("πŸ€– AI-Powered Job Matcher")
98
+ st.markdown("""
99
+ Upload your CV, and this app will scan job platforms to find the best matches for your profile.
100
+ It uses sentence embeddings to understand the context of your skills and the job requirements.
101
+ """)
102
+
103
+ # --- State Management ---
104
+ if 'cv_text' not in st.session_state:
105
+ st.session_state.cv_text = None
106
+ if 'cv_keywords' not in st.session_state:
107
+ st.session_state.cv_keywords = []
108
+ if 'jobs' not in st.session_state:
109
+ st.session_state.jobs = []
110
+ if 'processed' not in st.session_state:
111
+ st.session_state.processed = False
112
+
113
+
114
+ # --- Sidebar for CV Upload and Controls ---
115
+ with st.sidebar:
116
+ st.header("1. Upload Your CV")
117
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
118
+
119
+ if uploaded_file is not None:
120
+ if st.button("Process CV"):
121
+ with st.spinner('Analyzing your CV...'):
122
+ st.session_state.cv_text = extract_text_from_pdf(uploaded_file)
123
+ if st.session_state.cv_text:
124
+ st.session_state.cv_keywords = extract_keywords(st.session_state.cv_text)
125
+ st.success("CV processed successfully!")
126
+ st.session_state.processed = False # Reset processed state to allow re-matching
127
+ else:
128
+ st.error("Could not extract text from the CV.")
129
+
130
+ if st.session_state.cv_text:
131
+ st.subheader("Detected Skills & Keywords:")
132
+ if st.session_state.cv_keywords:
133
+ st.write(", ".join(st.session_state.cv_keywords))
134
+ else:
135
+ st.write("No specific keywords detected. Matching will be based on overall text.")
136
+
137
+ st.header("2. Select Job Platforms")
138
+ use_remoteok = st.checkbox("RemoteOK", value=True)
139
+ # Add other platforms here as they are implemented
140
+ # use_upwork = st.checkbox("Upwork (Not Implemented)", disabled=True)
141
+ # use_freelancer = st.checkbox("Freelancer (Not Implemented)", disabled=True)
142
+
143
+
144
+ # --- Main Content Area for Job Matching and Display ---
145
+ if st.session_state.cv_text:
146
+ if st.button("πŸš€ Find My Dream Job", type="primary"):
147
+ st.session_state.processed = False
148
+ st.session_state.jobs = []
149
+ matched_jobs = []
150
+
151
+ with st.spinner("Fetching and analyzing jobs... This may take a moment."):
152
+ if use_remoteok:
153
+ fetched_jobs = fetch_remoteok_jobs()
154
+ if fetched_jobs:
155
+ for job in fetched_jobs:
156
+ # Ensure job has a description, position, and company
157
+ if 'description' in job and 'position' in job and 'company' in job:
158
+ description_text = job['description']
159
+ similarity_score = calculate_similarity(st.session_state.cv_text, description_text)
160
+ job['match_score'] = similarity_score * 100
161
+ matched_jobs.append(job)
162
+
163
+ if matched_jobs:
164
+ # Sort jobs by match score in descending order
165
+ st.session_state.jobs = sorted(matched_jobs, key=lambda x: x['match_score'], reverse=True)
166
+ st.session_state.processed = True
167
+
168
+ # Trigger notifications for high-matching jobs (e.g., > 70%)
169
+ for job in st.session_state.jobs:
170
+ if job['match_score'] > 70:
171
+ notify_user(job)
172
+ else:
173
+ st.warning("No jobs found or there was an issue with the job platforms. Please try again.")
174
+
175
+ if st.session_state.processed and st.session_state.jobs:
176
+ st.header("πŸ† Top Job Matches For You")
177
+ top_n = 10
178
+ for job in st.session_state.jobs[:top_n]:
179
+ with st.container(border=True):
180
+ col1, col2 = st.columns([4, 1])
181
+ with col1:
182
+ st.subheader(job.get('position', 'N/A'))
183
+ st.write(f"**Company:** {job.get('company', 'N/A')}")
184
+ tags = job.get('tags', [])
185
+ if tags:
186
+ st.write(f"**Tags:** `{'`, `'.join(tags[:5])}`")
187
+
188
+ explanation = generate_match_explanation(st.session_state.cv_keywords, job.get('description', ''))
189
+ st.info(f"**Why it's a match:** {explanation}")
190
+
191
+ st.markdown(f"[View Job Posting]({job.get('url', '#')})", unsafe_allow_html=True)
192
+
193
+ with col2:
194
+ match_score = job.get('match_score', 0)
195
+ st.progress(int(match_score))
196
+ st.metric(label="Match Score", value=f"{match_score:.2f}%")
197
+ else:
198
+ st.info("Upload your CV and select job platforms to get started.")