Anupam007 commited on
Commit
659694c
·
verified ·
1 Parent(s): 7fefece

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +708 -0
app.py ADDED
@@ -0,0 +1,708 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import re
4
+ import json
5
+ import random
6
+ import time
7
+ import smtplib
8
+ import requests
9
+ import numpy as np
10
+ import pandas as pd
11
+ from email.mime.text import MIMEText
12
+ from email.mime.multipart import MIMEMultipart
13
+ from email.mime.application import MIMEApplication
14
+ from datetime import datetime, timedelta
15
+ from PyPDF2 import PdfReader
16
+ from bs4 import BeautifulSoup
17
+ from sentence_transformers import SentenceTransformer
18
+ from sklearn.metrics.pairwise import cosine_similarity
19
+ import torch
20
+ import logging
21
+ import gradio as gr
22
+ from google.colab import files
23
+
24
+ # Set up logging
25
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+ log_file = "/content/application_log.txt"
27
+ logging.getLogger().addHandler(logging.FileHandler(log_file))
28
+
29
+ # Set up GPU if available
30
+ if torch.cuda.is_available():
31
+ device = torch.device("cuda")
32
+ logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
33
+ else:
34
+ device = torch.device("cpu")
35
+ logging.info("GPU not available, using CPU instead")
36
+
37
+ # Initialize the sentence transformer model
38
+ @torch.no_grad()
39
+ def initialize_model():
40
+ logging.info("Initializing sentence transformer model")
41
+ try:
42
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device=device)
43
+ return model
44
+ except Exception as e:
45
+ logging.error(f"Failed to initialize model: {str(e)}")
46
+ raise
47
+
48
+ model = initialize_model()
49
+
50
+ # Function to extract text from a PDF resume
51
+ def extract_resume_text(pdf_file_path):
52
+ logging.info("Extracting resume text")
53
+ try:
54
+ with open(pdf_file_path, 'rb') as f:
55
+ pdf_reader = PdfReader(f)
56
+ text = ""
57
+ for page in pdf_reader.pages:
58
+ extracted = page.extract_text()
59
+ if extracted:
60
+ text += extracted
61
+ if not text.strip():
62
+ raise Exception("No text extracted from PDF. Ensure the PDF is not image-based.")
63
+ logging.info(f"Extracted resume text (first 200 chars): {text[:200]}")
64
+ return text
65
+ except Exception as e:
66
+ logging.error(f"Error extracting text from PDF: {str(e)}")
67
+ raise Exception(f"Error extracting text from PDF: {str(e)}")
68
+
69
+ # Function to parse resume and extract key information
70
+ def parse_resume(resume_text):
71
+ logging.info("Parsing resume")
72
+ parsed_info = {
73
+ "skills": [],
74
+ "education": [],
75
+ "experience": [],
76
+ "personal_info": {},
77
+ "react_experience": "0",
78
+ "redux_experience": "0",
79
+ "javascript_experience": "0",
80
+ "education_details": [],
81
+ "work_history": []
82
+ }
83
+
84
+ # Split resume into sections based on candidate headers
85
+ candidate_pattern = r'(IM A\. SAMPLE [IVX]+)\s*'
86
+ candidate_sections = re.split(candidate_pattern, resume_text, flags=re.IGNORECASE)
87
+ candidates = []
88
+ for i in range(1, len(candidate_sections), 2):
89
+ candidates.append((candidate_sections[i], candidate_sections[i+1]))
90
+
91
+ if not candidates:
92
+ candidates = [("Unknown Candidate", resume_text)]
93
+
94
+ candidate_name, candidate_text = candidates[0]
95
+ parsed_info["personal_info"]["name"] = candidate_name.strip()
96
+ logging.info(f"Parsed candidate name: {candidate_name}")
97
+
98
+ # Extract email
99
+ email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+'
100
+ email_matches = re.findall(email_pattern, candidate_text, re.IGNORECASE)
101
+ if email_matches:
102
+ parsed_info["personal_info"]["email"] = email_matches[0]
103
+ else:
104
+ logging.warning("No email found in resume")
105
+
106
+ # Extract phone number
107
+ phone_pattern = r'\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}'
108
+ phone_matches = re.findall(phone_pattern, candidate_text)
109
+ if phone_matches:
110
+ parsed_info["personal_info"]["phone"] = phone_matches[0]
111
+ else:
112
+ logging.warning("No phone number found in resume")
113
+
114
+ # Extract address
115
+ address_pattern = r'(\d+\s+[A-Za-z\s]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
116
+ address_matches = re.findall(address_pattern, candidate_text, re.IGNORECASE)
117
+ if address_matches:
118
+ parsed_info["personal_info"]["address"] = address_matches[0]
119
+ else:
120
+ parsed_info["personal_info"]["address"] = "Not found"
121
+ logging.warning("No address found in resume")
122
+
123
+ # Extract skills (expanded list and more permissive matching)
124
+ skill_keywords = [
125
+ "python", "java", "javascript", "html", "css", "sql", "react",
126
+ "node", "aws", "azure", "docker", "git", "c++", "visual basic",
127
+ "perl", "asp", "php", "cobol", "xml", "asp.net", "quickbooks",
128
+ "ms office", "ms access", "spss", "typescript", "angular", "vue",
129
+ "mysql", "mongodb", "linux", "bash", "kubernetes", "jenkins"
130
+ ]
131
+ resume_lower = candidate_text.lower()
132
+ for skill in skill_keywords:
133
+ if skill.lower() in resume_lower or f"{skill.lower()} " in resume_lower:
134
+ parsed_info["skills"].append(skill)
135
+ if not parsed_info["skills"]:
136
+ logging.warning("No skills extracted from resume")
137
+
138
+ # Extract specific experience
139
+ patterns = {
140
+ "react_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*React',
141
+ "redux_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*Redux',
142
+ "javascript_experience": r'(\d+)[\s\+]*(years?|yrs?)[\s\+]*(?:of)?[\s\+]*(?:experience)?[\s\+]*(?:with|in)?[\s\+]*(?:JavaScript|JS)'
143
+ }
144
+
145
+ for key, pattern in patterns.items():
146
+ matches = re.findall(pattern, candidate_text, re.IGNORECASE)
147
+ if matches:
148
+ parsed_info[key] = matches[0][0]
149
+ else:
150
+ logging.debug(f"No {key} found in resume")
151
+
152
+ # Extract education
153
+ education_pattern = r'(?i)(bachelor|master|phd|b\.s\.|m\.s\.|b\.a\.|m\.a\.|mba|associate|certificate)\s*[\'’]?\s*[so]?\s*[A-Za-z\s,]+?(?:(?:\(|,|\n)((?:19|20)\d{2}|Expected[^\n]*|June|Jan|Summer|Fall|Spring))'
154
+ education_matches = re.findall(education_pattern, candidate_text)
155
+ parsed_info["education_details"] = [
156
+ {"degree": deg, "institution": inst.strip(), "year": year.strip()}
157
+ for deg, inst, year in education_matches
158
+ ]
159
+ parsed_info["education"] = [f"{edu['degree']} from {edu['institution']} ({edu['year']})" for edu in parsed_info["education_details"]]
160
+ if not parsed_info["education"]:
161
+ logging.warning("No education details extracted from resume")
162
+
163
+ # Extract experience periods
164
+ experience_pattern = r'(?i)(\d{4})\s*(?:-|to)\s*(present|\d{4})'
165
+ experience_matches = re.findall(experience_pattern, candidate_text)
166
+ parsed_info["experience"] = [f"{start}-{end}" for start, end in experience_matches]
167
+ if not parsed_info["experience"]:
168
+ logging.warning("No experience periods extracted from resume")
169
+
170
+ # Extract work history details
171
+ work_history_pattern = r'(?i)([A-Za-z\s\/-]+),\s*([A-Za-z\s]+),\s*([A-Za-z\s]+)\s*\(([\d\s-]+|present|Summer|Fall|Spring|Jan|June)\)'
172
+ work_history_matches = re.findall(work_history_pattern, candidate_text)
173
+ parsed_info["work_history"] = [
174
+ {"role": role.strip(), "company": company.strip(), "location": location.strip(), "years": years.strip()}
175
+ for role, company, location, years in work_history_matches
176
+ ]
177
+ if not parsed_info["work_history"]:
178
+ logging.warning("No work history extracted from resume")
179
+
180
+ logging.info(f"Parsed resume info: {json.dumps(parsed_info, indent=2)}")
181
+ return parsed_info
182
+
183
+ # Function to scrape LinkedIn jobs
184
+ def search_jobs(job_title, location, num_jobs=5, skills=[]):
185
+ logging.info(f"Scraping LinkedIn jobs for {job_title} in {location}")
186
+ try:
187
+ job_title_encoded = job_title.replace(" ", "%20")
188
+ location_encoded = location.replace(" ", "%20")
189
+ url = f"https://www.linkedin.com/jobs/search/?keywords={job_title_encoded}&location={location_encoded}&f_E=2"
190
+ headers = {
191
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
192
+ }
193
+
194
+ response = requests.get(url, headers=headers, timeout=5)
195
+ if response.status_code != 200:
196
+ logging.error(f"LinkedIn request failed with status {response.status_code}")
197
+ raise Exception(f"HTTP {response.status_code}")
198
+
199
+ soup = BeautifulSoup(response.text, 'html.parser')
200
+ job_cards = soup.find_all('div', class_='base-card')[:num_jobs]
201
+ jobs = []
202
+
203
+ for i, card in enumerate(job_cards):
204
+ title = card.find('h3', class_='base-search-card__title')
205
+ company = card.find('h4', class_='base-search-card__subtitle')
206
+ job_location = card.find('span', class_='job-search-card__location')
207
+ description = card.find('div', class_='show-more-less-html__markup') or card.find('p')
208
+
209
+ title_text = title.get_text(strip=True) if title else f"{job_title} - Entry"
210
+ company_text = company.get_text(strip=True) if company else f"Company {i+1}"
211
+ location_text = job_location.get_text(strip=True) if job_location else location
212
+ description_text = description.get_text(strip=True)[:500] if description else f"Entry-level position for {job_title}. Requirements: {', '.join(skills[:2] if skills else ['Java', 'SQL'])}."
213
+
214
+ email = f"careers@{company_text.lower().replace(' ', '').replace('&', '')}.com"
215
+
216
+ job = {
217
+ "id": f"linkedin_job_{i}",
218
+ "title": title_text,
219
+ "company": company_text,
220
+ "location": location_text,
221
+ "description": description_text,
222
+ "posting_date": datetime.now().strftime("%Y-%m-%d"),
223
+ "salary_range": "$40,000 - $60,000",
224
+ "application_url": card.find('a', class_='base-card__full-link')['href'] if card.find('a') else f"https://linkedin.com/jobs/{i}",
225
+ "email": email,
226
+ "requires_form": random.choice([True, False])
227
+ }
228
+ jobs.append(job)
229
+
230
+ if not jobs:
231
+ logging.warning("No jobs found on LinkedIn, falling back to mock data")
232
+ raise Exception("No jobs found")
233
+
234
+ logging.info(f"Scraped {len(jobs)} LinkedIn jobs")
235
+ return jobs[:num_jobs]
236
+ except Exception as e:
237
+ logging.error(f"Error in LinkedIn job search: {str(e)}")
238
+ mock_jobs = []
239
+ companies = ["TechCorp", "DataSys", "InnoTech", "FutureSoft", "CodeWizards"]
240
+ job_descriptions = [
241
+ f"Seeking an entry-level {job_title} to join our team. Learn and grow with hands-on projects under mentorship.",
242
+ f"Looking for a motivated {job_title} to contribute to innovative solutions. Perfect for recent graduates."
243
+ ]
244
+ tech_keywords = {
245
+ "software engineer": ["Java", "Python", "JavaScript", "SQL", "HTML", "CSS", "Git"],
246
+ "frontend developer": ["JavaScript", "HTML", "CSS", "React"],
247
+ "data analyst": ["Python", "SQL", "Excel", "SPSS"],
248
+ "systems analyst": ["SQL", "Visual Basic", "Database Management"]
249
+ }
250
+
251
+ job_title_lower = job_title.lower()
252
+ relevant_keywords = next(
253
+ (v for k, v in tech_keywords.items() if k in job_title_lower),
254
+ skills[:3] if skills else ["Java", "SQL", "JavaScript"]
255
+ )
256
+
257
+ for i in range(num_jobs):
258
+ company = random.choice(companies)
259
+ job_desc = random.choice(job_descriptions)
260
+ selected_keywords = random.sample(relevant_keywords, min(2, len(relevant_keywords)))
261
+ requirements = f"Requirements: {', '.join(selected_keywords)}."
262
+
263
+ full_description = f"{job_desc} {requirements}"
264
+
265
+ job = {
266
+ "id": f"mock_job_{i}",
267
+ "title": f"{job_title} - Entry",
268
+ "company": company,
269
+ "location": location,
270
+ "description": f"{job_desc} Requirements: {', '.join(skills[:2] if skills else ['Java', 'SQL'])}.",
271
+ "posting_date": (datetime.now() - timedelta(days=random.randint(1, 7))).strftime("%Y-%m-%d"),
272
+ "salary_range": "$40,000 - $60,000",
273
+ "application_url": f"https://example.com/jobs/{i}",
274
+ "email": f"careers@{company.lower().replace(' ', '')}.com",
275
+ "requires_form": random.choice([True, False])
276
+ }
277
+ mock_jobs.append(job)
278
+ logging.info(f"Fell back to {len(mock_jobs)} mock jobs")
279
+ return mock_jobs
280
+
281
+ # Function to calculate match score
282
+ def calculate_match_score(resume_text, job_description):
283
+ logging.info("Calculating match score")
284
+ try:
285
+ # Use entire resume text if skills section is too sparse
286
+ resume_lines = resume_text.lower().split('\n')
287
+ skills_section = ' '.join([line for line in resume_lines if any(skill in line.lower() for skill in [
288
+ 'java', 'sql', 'javascript', 'python', 'html', 'css', 'react', 'node', 'aws', 'azure', 'docker', 'git'
289
+ ])])
290
+ if not skills_section:
291
+ skills_section = resume_text.lower()
292
+ logging.warning("No specific skills section found, using full resume text for matching")
293
+
294
+ resume_embedding = model.encode(skills_section, convert_to_tensor=True)
295
+ job_embedding = model.encode(job_description, convert_to_tensor=True)
296
+ similarity = cosine_similarity(resume_embedding.cpu().numpy().reshape(1, -1), job_embedding.cpu().numpy().reshape(1, -1))[0][0]
297
+ score = similarity * 100
298
+ logging.info(f"Match score calculated: {score}%")
299
+ return score
300
+ except Exception as e:
301
+ logging.error(f"Error calculating match score: {str(e)}")
302
+ return 0.0
303
+
304
+ # Function to generate entry-level cover letter
305
+ def generate_cover_letter(resume_info, job_info):
306
+ logging.info(f"Generating cover letter for {job_info['title']}")
307
+ company_name = job_info["company"]
308
+ job_title = job_info["title"]
309
+ skills_text = ", ".join(resume_info["skills"][:2]) if resume_info["skills"] else "technical skills"
310
+ name = resume_info.get('personal_info', {}).get('name', 'Your Name')
311
+
312
+ templates = [
313
+ f"""Dear Hiring Manager at {company_name},
314
+
315
+ I am excited to apply for the {job_title} position. As a recent graduate with skills in {skills_text}, I am eager to contribute to your team and grow under your mentorship.
316
+
317
+ {company_name}'s innovative projects inspire me, and I am committed to learning and delivering value in an entry-level role.
318
+
319
+ Thank you for considering my application. I look forward to discussing how I can contribute.
320
+
321
+ Sincerely,
322
+ {name}"""
323
+ ]
324
+ return random.choice(templates)
325
+
326
+ # Function to generate job application form
327
+ def generate_job_form(resume_info, job_info):
328
+ logging.info(f"Generating job form for {job_info['id']}")
329
+ personal_info = resume_info.get("personal_info", {})
330
+ address = personal_info.get("address", "")
331
+ city_state_zip = address.split(",")[-1].strip() if address else ""
332
+ city = city_state_zip.split()[:-2] if city_state_zip else []
333
+ state_zip = city_state_zip.split()[-2:] if city_state_zip else ["", ""]
334
+ state = state_zip[0] if state_zip else ""
335
+ zip_code = state_zip[1] if len(state_zip) > 1 else ""
336
+
337
+ return {
338
+ "job_title": job_info["title"],
339
+ "company": job_info["company"],
340
+ "application_date": datetime.now().strftime("%Y-%m-%d"),
341
+ "personal_info": {
342
+ "name": personal_info.get("name", ""),
343
+ "email": personal_info.get("email", ""),
344
+ "phone": personal_info.get("phone", ""),
345
+ "address": address.split(",")[0] if address else "",
346
+ "city": " ".join(city) if city else "",
347
+ "state": state,
348
+ "zip": zip_code,
349
+ "country": "USA"
350
+ },
351
+ "experience": {
352
+ "react_js": resume_info.get("react_experience", "0"),
353
+ "redux_js": resume_info.get("redux_experience", "0"),
354
+ "javascript": resume_info.get("javascript_experience", "0")
355
+ },
356
+ "preferences": {
357
+ "onsite_work": "Yes",
358
+ "commuting": "Yes",
359
+ "relocation": "Yes",
360
+ "remote_work": "Yes"
361
+ },
362
+ "education": resume_info.get("education", []),
363
+ "skills": resume_info.get("skills", []),
364
+ "work_history": resume_info.get("work_history", [])
365
+ }
366
+
367
+ # Function to save job application form
368
+ def save_job_form(form_data, job_id):
369
+ logging.info(f"Saving job form for {job_id}")
370
+ filename = f"/content/job_application_form_{job_id}.json"
371
+ try:
372
+ with open(filename, "w") as f:
373
+ json.dump(form_data, f, indent=2)
374
+ return filename
375
+ except Exception as e:
376
+ logging.error(f"Error saving form: {str(e)}")
377
+ return None
378
+
379
+ # Function to test SMTP login
380
+ def test_smtp_login(user_email, user_password):
381
+ logging.info(f"Testing SMTP login for {user_email}")
382
+ user_password = user_password.strip()
383
+ if len(user_password) != 16:
384
+ logging.error(f"Invalid app-specific password length: {len(user_password)} characters")
385
+ return False, "SMTP login failed: App-specific password must be exactly 16 characters. Generate a new one at https://myaccount.google.com/security > App passwords > Select app: Mail > Generate."
386
+ if not re.match(r'^[a-zA-Z0-9]+$', user_password):
387
+ logging.error("Invalid app-specific password format: contains invalid characters")
388
+ return False, "SMTP login failed: App-specific password contains invalid characters. Use only letters and numbers."
389
+ try:
390
+ with smtplib.SMTP('smtp.gmail.com', 587, timeout=5) as server:
391
+ server.starttls()
392
+ server.login(user_email, user_password)
393
+ logging.info("SMTP login successful")
394
+ return True, "SMTP login successful"
395
+ except smtplib.SMTPAuthenticationError:
396
+ logging.error("SMTP authentication failed: Invalid email or password")
397
+ return False, "SMTP login failed: Invalid email or app-specific password. Ensure 2-Factor Authentication is enabled (https://myaccount.google.com/security > 2-Step Verification) and use a new app-specific password."
398
+ except Exception as e:
399
+ logging.error(f"SMTP login failed: {str(e)}")
400
+ return False, f"SMTP login failed: {str(e)}. Check network connection or try again later."
401
+
402
+ # Function to send application email
403
+ def send_application(resume_file_path, cover_letter, job_info, user_email, user_password, form_data=None):
404
+ logging.info(f"Sending application to {job_info['email']}")
405
+ try:
406
+ msg = MIMEMultipart()
407
+ msg['From'] = user_email
408
+ msg['To'] = job_info['email']
409
+ msg['Subject'] = f"Application for {job_info['title']} - {resume_info['personal_info']['name']}"
410
+
411
+ msg.attach(MIMEText(cover_letter, 'plain'))
412
+
413
+ # Attach resume
414
+ with open(resume_file_path, 'rb') as f:
415
+ resume_attachment = MIMEApplication(f.read(), _subtype='pdf')
416
+ resume_attachment.add_header('Content-Disposition', 'attachment', filename=os.path.basename(resume_file_path))
417
+ msg.attach(resume_attachment)
418
+
419
+ # Attach form if required
420
+ if form_data:
421
+ form_filename = save_job_form(form_data, job_info['id'])
422
+ if form_filename:
423
+ with open(form_filename, 'rb') as f:
424
+ form_attachment = MIMEApplication(f.read(), _subtype='json')
425
+ form_attachment.add_header('Content-Disposition', 'attachment', filename=os.path.basename(form_filename))
426
+ msg.attach(form_attachment)
427
+
428
+ # Send email
429
+ with smtplib.SMTP('smtp.gmail.com', 587, timeout=5) as server:
430
+ server.starttls()
431
+ server.login(user_email, user_password.strip())
432
+ server.sendmail(user_email, job_info['email'], msg.as_string())
433
+
434
+ logging.info(f"Application sent successfully to {job_info['email']}")
435
+ return {
436
+ "status": "success",
437
+ "message": "Application sent successfully",
438
+ "to": job_info["email"],
439
+ "from": user_email,
440
+ "subject": msg['Subject'],
441
+ "body": cover_letter,
442
+ "resume_attached": True,
443
+ "form_attached": form_data is not None,
444
+ "sent_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
445
+ }
446
+ except Exception as e:
447
+ logging.error(f"Error sending email: {str(e)}")
448
+ return {
449
+ "status": "error",
450
+ "message": f"Failed to send email: {str(e)}",
451
+ "to": job_info["email"],
452
+ "from": user_email,
453
+ "subject": f"Application for {job_info['title']}",
454
+ "body": cover_letter,
455
+ "resume_attached": True,
456
+ "form_attached": form_data is not None,
457
+ "sent_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
458
+ }
459
+
460
+ # Function to predict interview likelihood
461
+ def predict_interview_likelihood(match_score):
462
+ if match_score > 85:
463
+ return "Very High"
464
+ elif match_score > 70:
465
+ return "High"
466
+ elif match_score > 50:
467
+ return "Medium"
468
+ else:
469
+ return "Low"
470
+
471
+ # Function to simulate interview scheduling
472
+ def schedule_interviews(applications, min_interviews=5):
473
+ logging.info("Scheduling mock interviews")
474
+ interview_candidates = random.sample(applications, min(max(min_interviews, int(len(applications) * 0.2)), len(applications)))
475
+ interview_schedule = []
476
+
477
+ start_date = datetime.now() + timedelta(days=1)
478
+ time_slots = [
479
+ "09:00 AM", "10:00 AM", "11:00 AM", "01:00 PM", "02:00 PM", "03:00 PM"
480
+ ]
481
+
482
+ for i, app in enumerate(interview_candidates):
483
+ job = app["job"]
484
+ interview_date = (start_date + timedelta(days=i // len(time_slots))).strftime("%Y-%m-%d")
485
+ interview_schedule.append({
486
+ "company": job["company"],
487
+ "job_title": job["title"],
488
+ "date": interview_date,
489
+ "time": time_slots[i % len(time_slots)],
490
+ "email": job["email"],
491
+ "status": "Scheduled (Mock)"
492
+ })
493
+
494
+ logging.info(f"Scheduled {len(interview_schedule)} mock interviews")
495
+ return interview_schedule
496
+
497
+ # Main application processing function
498
+ def process_application(resume_file, job_title, location, user_email, user_password, num_applications=5, progress=gr.Progress()):
499
+ global resume_info
500
+ progress(0, desc="Starting processing...")
501
+ try:
502
+ # Validate inputs
503
+ progress(0.1, desc="Validating inputs...")
504
+ if not all([resume_file, job_title, location, user_email, user_password]):
505
+ return {"error": "All fields are required"}
506
+ if not re.match(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", user_email):
507
+ return {"error": "Invalid email format"}
508
+ if not isinstance(num_applications, int) or num_applications < 1 or num_applications > 50:
509
+ return {"error": "Number of applications must be between 1 and 50"}
510
+ if not resume_file or not isinstance(resume_file, str) or not resume_file.lower().endswith('.pdf'):
511
+ return {"error": "Resume must be a valid PDF file path"}
512
+
513
+ # Test SMTP login
514
+ progress(0.2, desc="Testing SMTP login...")
515
+ smtp_success, smtp_message = test_smtp_login(user_email, user_password)
516
+ if not smtp_success:
517
+ return {"error": smtp_message}
518
+
519
+ # Save uploaded resume
520
+ progress(0.3, desc="Processing resume...")
521
+ resume_path = "/content/resume.pdf"
522
+ if not os.path.exists(resume_file):
523
+ return {"error": f"Resume file not found at {resume_file}"}
524
+ with open(resume_path, "wb") as f:
525
+ with open(resume_file, "rb") as src:
526
+ f.write(src.read())
527
+
528
+ resume_text = extract_resume_text(resume_path)
529
+ resume_info = parse_resume(resume_text)
530
+
531
+ # Search jobs
532
+ progress(0.4, desc="Searching jobs...")
533
+ jobs = search_jobs(job_title, location, num_applications, resume_info["skills"])
534
+
535
+ results = []
536
+ for i, job in enumerate(jobs):
537
+ progress(0.5 + (i / len(jobs)) * 0.4, desc=f"Processing application {i+1}/{len(jobs)}...")
538
+ match_score = calculate_match_score(resume_text, job["description"])
539
+ cover_letter = generate_cover_letter(resume_info, job)
540
+ form_data = generate_job_form(resume_info, job) if job.get("requires_form", False) else None
541
+
542
+ if form_data:
543
+ form_filename = save_job_form(form_data, job["id"])
544
+ job["form_filename"] = form_filename
545
+
546
+ application_result = send_application(resume_path, cover_letter, job, user_email, user_password, form_data)
547
+
548
+ results.append({
549
+ "job": job,
550
+ "match_score": round(match_score, 2),
551
+ "interview_likelihood": predict_interview_likelihood(match_score),
552
+ "application_status": application_result["status"],
553
+ "application_message": application_result.get("message", ""),
554
+ "form_data": form_data
555
+ })
556
+
557
+ progress(0.9, desc="Scheduling interviews...")
558
+ results.sort(key=lambda x: x["match_score"], reverse=True)
559
+ interview_schedule = schedule_interviews(results)
560
+
561
+ progress(1.0, desc="Finalizing results...")
562
+ return {
563
+ "resume_info": resume_info,
564
+ "results": results,
565
+ "interview_schedule": interview_schedule,
566
+ "total_applications": len(results),
567
+ "successful_applications": sum(1 for r in results if r["application_status"] == "success"),
568
+ "failed_applications": sum(1 for r in results if r["application_status"] == "error"),
569
+ "top_match_score": results[0]["match_score"] if results else 0,
570
+ "forms_generated": sum(1 for r in results if r.get("form_data") is not None)
571
+ }
572
+ except Exception as e:
573
+ logging.error(f"Error processing application: {str(e)}")
574
+ return {
575
+ "error": str(e),
576
+ "resume_info": None,
577
+ "results": [],
578
+ "interview_schedule": [],
579
+ "total_applications": 0,
580
+ "successful_applications": 0,
581
+ "failed_applications": 0,
582
+ "top_match_score": 0,
583
+ "forms_generated": 0
584
+ }
585
+
586
+ # Function to format results
587
+ def format_results(results):
588
+ logging.info("Formatting results")
589
+ if "error" in results and results["error"]:
590
+ return f"Error: {results['error']}\n\n**Troubleshooting**:\n- **SMTP Error**: Follow these steps:\n 1. Enable 2-Factor Authentication: https://myaccount.google.com/security > 2-Step Verification.\n 2. Generate an app-specific password: https://myaccount.google.com/security > App passwords > Select app: Mail > Generate.\n 3. Enter the 16-character password without spaces.\n- **No Jobs Found**: LinkedIn may have blocked the request. Try reducing the number of applications or wait 5 minutes.\n- **No Output**: Check the public URL in Colab output and ensure sufficient RAM (Runtime > Change runtime type > High-RAM)."
591
+
592
+ resume_info = results["resume_info"]
593
+ application_results = results["results"]
594
+ interview_schedule = results["interview_schedule"]
595
+
596
+ output = "## Resume Analysis\n"
597
+ output += f"- Name: {resume_info.get('personal_info', {}).get('name', 'Not found')}\n"
598
+ output += f"- Email: {resume_info.get('personal_info', {}).get('email', 'Not found')}\n"
599
+ output += f"- Phone: {resume_info.get('personal_info', {}).get('phone', 'Not found')}\n"
600
+ output += f"- Address: {resume_info.get('personal_info', {}).get('address', 'Not found')}\n"
601
+ output += f"- Skills: {', '.join(resume_info['skills']) or 'None'}\n"
602
+ output += f"- Education: {', '.join(resume_info['education']) or 'None'}\n"
603
+ output += f"- Experience: {', '.join(resume_info['experience']) or 'None'}\n"
604
+
605
+ output += "\n## Application Results\n"
606
+ output += f"- Total Applications: {results['total_applications']}\n"
607
+ output += f"- Successful: {results['successful_applications']}\n"
608
+ output += f"- Failed: {results['failed_applications']}\n"
609
+ output += f"- Top Match Score: {results['top_match_score']}%\n"
610
+ output += f"- Forms Generated: {results['forms_generated']}\n"
611
+ output += f"- Scheduled Interviews: {len(interview_schedule)} (Note: These are mock schedules pending real company responses)\n\n"
612
+
613
+ output += "## Interview Schedule\n"
614
+ for i, interview in enumerate(interview_schedule, 1):
615
+ output += f"### {i}. {interview['job_title']} at {interview['company']}\n"
616
+ output += f"- Date: {interview['date']}\n"
617
+ output += f"- Time: {interview['time']}\n"
618
+ output += f"- Email: {interview['email']}\n"
619
+ output += f"- Status: {interview['status']}\n\n"
620
+
621
+ output += "## Job Matches\n"
622
+ for i, result in enumerate(application_results, 1):
623
+ job = result["job"]
624
+ output += f"### {i}. {job['title']} at {job['company']}\n"
625
+ output += f"- Location: {job['location']}\n"
626
+ output += f"- Match Score: {result['match_score']}%\n"
627
+ output += f"- Interview Likelihood: {result['interview_likelihood']}\n"
628
+ output += f"- Status: {result['application_status'].upper()}\n"
629
+ if job.get("requires_form", False):
630
+ output += f"- Form: {job.get('form_filename', 'Generated')}\n"
631
+ if result["application_status"] == "error":
632
+ output += f"- Error: {result['application_message']}\n"
633
+ output += f"- Email: {job['email']}\n"
634
+ output += f"- Description: {job['description']}\n"
635
+ output += f"- Applied: {datetime.now().strftime('%Y-%m-%d')}\n\n"
636
+
637
+ # Download forms and log file
638
+ form_files = [f for f in os.listdir('/content') if f.startswith("job_application_form_") and f.endswith(".json")]
639
+ if form_files:
640
+ for form_file in form_files:
641
+ files.download(f"/content/{form_file}")
642
+ files.download(log_file)
643
+
644
+ logging.info("Results formatted and files downloaded")
645
+ return output
646
+
647
+ # Gradio interface
648
+ def gradio_interface(resume_file, job_title, location, user_email, user_password, num_applications):
649
+ logging.info("Starting Gradio interface processing")
650
+ try:
651
+ num_applications = int(num_applications) if num_applications else 5
652
+ # Save the uploaded resume file to a temporary path
653
+ resume_path = "/content/uploaded_resume.pdf"
654
+ with open(resume_path, "wb") as f:
655
+ with open(resume_file, "rb") as src:
656
+ f.write(src.read())
657
+ results = process_application(resume_path, job_title, location, user_email, user_password, num_applications)
658
+ return format_results(results)
659
+ except ValueError:
660
+ logging.error("Invalid number of applications")
661
+ return "Error: Number of applications must be an integer between 1 and 50."
662
+ except Exception as e:
663
+ logging.error(f"Gradio interface error: {str(e)}")
664
+ return f"Error: {str(e)}"
665
+
666
+ # Setup instructions for Gradio in Colab
667
+ def setup_and_run():
668
+ print("Installing dependencies...")
669
+ # !pip install PyPDF2 beautifulsoup4 sentence-transformers scikit-learn torch numpy pandas requests gradio
670
+ print("Starting Gradio interface...")
671
+
672
+ iface = gr.Interface(
673
+ fn=gradio_interface,
674
+ inputs=[
675
+ gr.File(label="Upload Resume (PDF)", file_types=[".pdf"]),
676
+ gr.Textbox(label="Job Title (e.g., Software Engineer)", placeholder="Software Engineer"),
677
+ gr.Textbox(label="Location (e.g., India)", placeholder="India"),
678
+ gr.Textbox(label="Your Gmail Address", placeholder="diabeteseducation61@gmail.com"),
679
+ gr.Textbox(label="Your Gmail App-Specific Password (16 characters, no spaces)", type="password"),
680
+ gr.Number(label="Number of Applications (default 5)", value=5, minimum=1, maximum=50)
681
+ ],
682
+ outputs=gr.Markdown(label="Results"),
683
+ title="Job Application Automator",
684
+ description="Upload your resume and apply to entry-level jobs. **Important**: To generate a Gmail app-specific password:\n1. Enable 2-Factor Authentication: https://myaccount.google.com/security > 2-Step Verification.\n2. Generate an app-specific password: https://myaccount.google.com/security > App passwords > Select app: Mail > Select device: Other > Generate.\n3. Use the 16-character password without spaces."
685
+ )
686
+ max_attempts = 3
687
+ for attempt in range(max_attempts):
688
+ try:
689
+ logging.info(f"Attempting to launch Gradio (attempt {attempt + 1}/{max_attempts})")
690
+ public_url = iface.launch(share=True, quiet=True)
691
+ logging.info(f"Gradio launched successfully with public URL: {public_url}")
692
+ print(f"Gradio interface launched at: {public_url}")
693
+ break
694
+ except Exception as e:
695
+ logging.error(f"Gradio launch failed (attempt {attempt + 1}): {str(e)}")
696
+ if attempt == max_attempts - 1:
697
+ print(f"Failed to launch Gradio with share=True after {max_attempts} attempts: {str(e)}")
698
+ print("Falling back to local tunnel...")
699
+ try:
700
+ public_url = iface.launch(quiet=True)
701
+ logging.info(f"Gradio launched locally with URL: {public_url}")
702
+ print(f"Gradio interface launched locally at: {public_url}")
703
+ except Exception as local_e:
704
+ logging.error(f"Local Gradio launch failed: {str(local_e)}")
705
+ print(f"Local launch failed: {str(local_e)}. Check Colab resources or try again.")
706
+
707
+ if __name__ == "__main__":
708
+ setup_and_run()