TKM03 commited on
Commit
1d2993e
ยท
verified ยท
1 Parent(s): 52a747a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -53
app.py CHANGED
@@ -1,26 +1,50 @@
1
- import re
2
  import os
 
3
  import shutil
4
  import PyPDF2
5
  import gradio as gr
6
  from transformers import pipeline
7
-
8
- # Load classification model
9
- text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model")
10
-
11
- # Label mapping for binary classification
12
- LABEL_MAP = {
13
- "LABEL_0": "Irrelevant",
14
- "LABEL_1": "Relevant"
15
- }
16
-
17
- # Global variable to store the filtered files per label
18
- classified_files = {
19
- "Relevant": [],
20
- "Irrelevant": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  }
22
 
23
-
24
  def clean_resume_text(text):
25
  text = re.sub(r'http\S+', ' ', text)
26
  text = re.sub(r'#\S+', '', text)
@@ -29,7 +53,6 @@ def clean_resume_text(text):
29
  text = re.sub(r'[^\x00-\x7f]', ' ', text)
30
  return re.sub(r'\s+', ' ', text).strip()
31
 
32
-
33
  def extract_resume_text(file):
34
  try:
35
  reader = PyPDF2.PdfReader(file)
@@ -42,61 +65,60 @@ def extract_resume_text(file):
42
  except Exception as e:
43
  return None, f"Error reading PDF: {str(e)}"
44
 
45
-
46
- def classify_and_store(files):
47
- predictions = {}
48
- classified_files["Relevant"] = []
49
- classified_files["Irrelevant"] = []
50
-
51
- if os.path.exists("filtered_resumes"):
52
- shutil.rmtree("filtered_resumes")
53
- os.makedirs("filtered_resumes", exist_ok=True)
54
 
55
  for file in files:
56
  file_name = os.path.basename(file.name)
57
  resume_text, error = extract_resume_text(file)
58
  if error:
59
- predictions[file_name] = {"error": error}
60
  continue
61
 
62
  cleaned_text = clean_resume_text(resume_text)
63
- result = text_classifier(cleaned_text[:512])
64
- label = result[0]['label']
65
- score = round(result[0]['score'], 4)
66
- status = LABEL_MAP.get(label, "Unknown")
67
-
68
- predictions[file_name] = {
69
- "Relevance": status,
70
- "Confidence Score": score
71
- }
72
-
73
- dest_path = f"filtered_resumes/{file_name}"
74
- with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
75
- shutil.copyfileobj(f_in, f_out)
76
 
77
- classified_files[status].append(dest_path)
 
 
 
78
 
79
- return predictions
 
80
 
 
 
81
 
82
- def get_resumes_by_category(category):
83
- return classified_files.get(category, [])
84
 
 
 
 
 
 
85
 
86
  # Gradio UI
87
- with gr.Blocks(title="Resume Classifier & Category Filter") as demo:
88
- gr.Markdown("## ๐Ÿ“‚ Resume Relevance Classifier\nUpload resumes and view based on relevance category.")
 
89
 
90
  file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
91
- classify_button = gr.Button("๐Ÿง  Classify Resumes")
 
92
 
93
- relevance_output = gr.JSON(label="Classification Results")
 
94
 
95
- category_dropdown = gr.Dropdown(choices=["Relevant", "Irrelevant"], label="Select Resume Category to View")
96
- filtered_files_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")
 
97
 
98
- classify_button.click(fn=classify_and_store, inputs=[file_input], outputs=[relevance_output])
99
- category_dropdown.change(fn=get_resumes_by_category, inputs=[category_dropdown], outputs=[filtered_files_output])
100
 
101
  if __name__ == "__main__":
102
- demo.launch()
 
 
1
  import os
2
+ import re
3
  import shutil
4
  import PyPDF2
5
  import gradio as gr
6
  from transformers import pipeline
7
+ from collections import defaultdict
8
+
9
+ # Load job classification model
10
+ text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
11
+
12
+ # Expanded label map (you can update based on actual model labels returned)
13
+ CATEGORY_MAP = {
14
+ "LABEL_0": "Information Technology / Software Engineering",
15
+ "LABEL_1": "Healthcare / Medical / Nursing",
16
+ "LABEL_2": "Finance / Accounting / Auditing",
17
+ "LABEL_3": "Engineering / Mechanical / Civil / Electrical",
18
+ "LABEL_4": "Education / Training / Teaching",
19
+ "LABEL_5": "Sales / Marketing / Business Development",
20
+ "LABEL_6": "Customer Service / Support",
21
+ "LABEL_7": "Human Resources / Recruitment / Talent Acquisition",
22
+ "LABEL_8": "Legal / Compliance",
23
+ "LABEL_9": "Administration / Clerical",
24
+ "LABEL_10": "Operations / Logistics / Supply Chain",
25
+ "LABEL_11": "Management / Strategy / Consulting",
26
+ "LABEL_12": "Science / Research / R&D",
27
+ "LABEL_13": "Design / UI-UX / Creative",
28
+ "LABEL_14": "Manufacturing / Production",
29
+ "LABEL_15": "Hospitality / Tourism / Travel",
30
+ "LABEL_16": "Construction / Architecture",
31
+ "LABEL_17": "Media / Communication / PR / Journalism",
32
+ "LABEL_18": "Procurement / Purchasing",
33
+ "LABEL_19": "Security / Safety",
34
+ "LABEL_20": "Real Estate / Property Management",
35
+ "LABEL_21": "Energy / Oil & Gas / Utilities",
36
+ "LABEL_22": "Agriculture / Environmental / Forestry",
37
+ "LABEL_23": "Transportation / Automotive / Aviation",
38
+ "LABEL_24": "Retail / Merchandising / E-commerce",
39
+ "LABEL_25": "Data Science / Machine Learning / AI",
40
+ "LABEL_26": "Product Management / Project Management",
41
+ "LABEL_27": "Quality Assurance / Control",
42
+ "LABEL_28": "Telecommunication / Network Engineering",
43
+ "LABEL_29": "Entrepreneurship / Startups / Freelancing",
44
+ "LABEL_30": "Other / Miscellaneous"
45
  }
46
 
47
+ # Helper functions
48
  def clean_resume_text(text):
49
  text = re.sub(r'http\S+', ' ', text)
50
  text = re.sub(r'#\S+', '', text)
 
53
  text = re.sub(r'[^\x00-\x7f]', ' ', text)
54
  return re.sub(r'\s+', ' ', text).strip()
55
 
 
56
  def extract_resume_text(file):
57
  try:
58
  reader = PyPDF2.PdfReader(file)
 
65
  except Exception as e:
66
  return None, f"Error reading PDF: {str(e)}"
67
 
68
+ def classify_resumes(files):
69
+ categorized = defaultdict(list)
70
+ label_scores = {}
71
+ os.makedirs("classified_resumes", exist_ok=True)
 
 
 
 
 
72
 
73
  for file in files:
74
  file_name = os.path.basename(file.name)
75
  resume_text, error = extract_resume_text(file)
76
  if error:
 
77
  continue
78
 
79
  cleaned_text = clean_resume_text(resume_text)
80
+ result = text_classifier(cleaned_text[:512])[0]
81
+ label = result['label']
82
+ score = round(result['score'], 4)
83
+ category = CATEGORY_MAP.get(label, label)
 
 
 
 
 
 
 
 
 
84
 
85
+ # Save to relevant folder
86
+ cat_folder = os.path.join("classified_resumes", category.replace(" ", "_"))
87
+ os.makedirs(cat_folder, exist_ok=True)
88
+ save_path = os.path.join(cat_folder, file_name)
89
 
90
+ with open(file.name, "rb") as f_in, open(save_path, "wb") as f_out:
91
+ shutil.copyfileobj(f_in, f_out)
92
 
93
+ categorized[category].append(save_path)
94
+ label_scores[file_name] = {"Predicted Job Category": category, "Confidence Score": score}
95
 
96
+ return label_scores, categorized
 
97
 
98
+ def show_category_files(selected_category):
99
+ category_path = os.path.join("classified_resumes", selected_category.replace(" ", "_"))
100
+ if not os.path.exists(category_path):
101
+ return []
102
+ return [os.path.join(category_path, f) for f in os.listdir(category_path) if f.endswith(".pdf")]
103
 
104
  # Gradio UI
105
+ with gr.Blocks(title="๐Ÿง  Resume Screening & Categorization") as demo:
106
+ gr.Markdown("""## ๐Ÿ“„ Resume Screening by Job Role/Industry
107
+ Upload resumes below. The app classifies each into categories like IT, HR, Sales, etc. Then click on any category to view/download relevant resumes.""")
108
 
109
  file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
110
+ classify_button = gr.Button("๐Ÿ“Š Classify Resumes")
111
+ output_json = gr.JSON(label="Classification Summary")
112
 
113
+ category_dropdown = gr.Dropdown(label="Select Category to View Files", choices=sorted(list(CATEGORY_MAP.values())))
114
+ resume_file_list = gr.File(label="Filtered Resumes in Selected Category", file_count="multiple")
115
 
116
+ def update_dropdown_options(files):
117
+ _, cat_data = classify_resumes(files)
118
+ return sorted(list(cat_data.keys()))
119
 
120
+ classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
121
+ category_dropdown.change(fn=show_category_files, inputs=[category_dropdown], outputs=[resume_file_list])
122
 
123
  if __name__ == "__main__":
124
+ demo.launch()