TKM03 commited on
Commit
0ea0dd9
·
verified ·
1 Parent(s): 8ab2e60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -103
app.py CHANGED
@@ -5,115 +5,44 @@ import PyPDF2
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
- # Load multi-class resume classifier model
9
- text_classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", top_k=1)
10
 
11
- # Map label to readable category names
12
  CATEGORY_MAP = {
13
- "LABEL_0": "Data Science / Machine Learning / AI",
14
- "LABEL_1": "Information Technology / Software Engineering",
15
  "LABEL_2": "Sales / Marketing / Business Development",
16
- "LABEL_3": "Finance / Accounting / Auditing",
17
- "LABEL_4": "Human Resources / Recruitment / Talent Acquisition",
18
- "LABEL_5": "Product Management / Project Management",
19
  "LABEL_6": "Engineering / Mechanical / Civil / Electrical",
20
- "LABEL_7": "Operations / Logistics / Supply Chain",
21
- "LABEL_8": "Design / UI-UX / Creative",
22
- "LABEL_9": "Legal / Compliance",
23
- "LABEL_10": "Healthcare / Medical / Nursing",
24
- "LABEL_11": "Customer Service / Support",
25
- "LABEL_12": "Education / Training / Teaching",
26
- "LABEL_13": "Entrepreneurship / Startups / Freelancing",
27
- "LABEL_14": "Retail / Merchandising / E-commerce",
28
- "LABEL_15": "Media / Communication / PR / Journalism",
29
- "LABEL_16": "Manufacturing / Production",
30
- "LABEL_17": "Administration / Clerical",
31
- "LABEL_18": "Quality Assurance / Control",
32
- "LABEL_19": "Construction / Architecture",
33
- "LABEL_20": "Science / Research / R&D",
34
- "LABEL_21": "Real Estate / Property Management",
35
- "LABEL_22": "Security / Safety",
36
- "LABEL_23": "Procurement / Purchasing",
37
- "LABEL_24": "Hospitality / Tourism / Travel",
38
- "LABEL_25": "Telecommunication / Network Engineering",
39
- "LABEL_26": "Transportation / Automotive / Aviation",
40
- "LABEL_27": "Energy / Oil & Gas / Utilities",
41
- "LABEL_28": "Management / Strategy / Consulting",
42
- "LABEL_29": "Other / Miscellaneous"
43
  }
44
 
 
 
45
 
46
  def clean_text(text):
47
  text = re.sub(r'http\S+', ' ', text)
48
- text = re.sub(r'[^\w\s]', ' ', text)
49
- return re.sub(r'\s+', ' ', text).strip()
50
-
51
-
52
- def extract_text_from_pdf(file):
53
- try:
54
- reader = PyPDF2.PdfReader(file)
55
- text = " ".join(page.extract_text() or "" for page in reader.pages)
56
- return clean_text(text), None if text.strip() else "No text found."
57
- except Exception as e:
58
- return None, str(e)
59
-
60
-
61
- def classify_resumes(files):
62
- results = {}
63
- category_to_files = {}
64
-
65
- if os.path.exists("classified_resumes"):
66
- shutil.rmtree("classified_resumes")
67
- os.makedirs("classified_resumes", exist_ok=True)
68
-
69
- for file in files:
70
- file_name = os.path.basename(file.name)
71
- text, error = extract_text_from_pdf(file)
72
-
73
- if error:
74
- results[file_name] = {"error": error}
75
- continue
76
-
77
- pred = text_classifier(text[:512])[0][0]
78
- label = pred['label']
79
- category = CATEGORY_MAP.get(label, "Unknown")
80
- score = round(pred['score'], 4)
81
-
82
- results[file_name] = {
83
- "Predicted Job Category": label,
84
- "Category Name": category,
85
- "Confidence Score": score
86
- }
87
-
88
- # Save file in category folder
89
- cat_dir = os.path.join("classified_resumes", category)
90
- os.makedirs(cat_dir, exist_ok=True)
91
- dest_path = os.path.join(cat_dir, file_name)
92
- with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
93
- shutil.copyfileobj(f_in, f_out)
94
-
95
- category_to_files.setdefault(category, []).append(dest_path)
96
-
97
- return results, list(category_to_files.keys()), category_to_files
98
-
99
-
100
- def show_files_by_category(selected_category, category_to_files):
101
- return category_to_files.get(selected_category, [])
102
-
103
-
104
- with gr.Blocks(title="Resume Category Classifier") as demo:
105
- gr.Markdown("## 📂 Resume Screening by Job Category")
106
- file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
107
- classify_btn = gr.Button("🔍 Classify Resumes")
108
-
109
- results_output = gr.JSON(label="Prediction Results")
110
- category_dropdown = gr.Dropdown(label="Select Category to View Resumes", interactive=True)
111
- file_output = gr.File(label="Download Filtered Resumes", file_types=[".pdf"], file_count="multiple")
112
-
113
- category_files_state = gr.State({})
114
-
115
- classify_btn.click(fn=classify_resumes, inputs=[file_input], outputs=[results_output, category_dropdown, category_files_state])
116
- category_dropdown.change(fn=show_files_by_category, inputs=[category_dropdown, category_files_state], outputs=[file_output])
117
-
118
- if __name__ == "__main__":
119
- demo.launch()
 
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
+ # Load classification model
9
+ classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)
10
 
11
+ # Manually defined category map (expand as needed)
12
  CATEGORY_MAP = {
13
+ "LABEL_0": "Information Technology / Software Engineering",
14
+ "LABEL_1": "Finance / Accounting / Auditing",
15
  "LABEL_2": "Sales / Marketing / Business Development",
16
+ "LABEL_3": "Human Resources / Recruitment / Talent Acquisition",
17
+ "LABEL_4": "Healthcare / Medical / Nursing",
18
+ "LABEL_5": "Education / Training / Teaching",
19
  "LABEL_6": "Engineering / Mechanical / Civil / Electrical",
20
+ "LABEL_7": "Product Management / Project Management",
21
+ "LABEL_8": "Customer Service / Support",
22
+ "LABEL_9": "Operations / Logistics / Supply Chain",
23
+ "LABEL_10": "Design / UI-UX / Creative",
24
+ "LABEL_11": "Legal / Compliance",
25
+ "LABEL_12": "Entrepreneurship / Startups / Freelancing",
26
+ "LABEL_13": "Media / Communication / PR / Journalism",
27
+ "LABEL_14": "Quality Assurance / Control",
28
+ "LABEL_15": "Administration / Clerical",
29
+ "LABEL_16": "Science / Research / R&D",
30
+ "LABEL_17": "Construction / Architecture",
31
+ "LABEL_18": "Retail / Merchandising / E-commerce",
32
+ "LABEL_19": "Security / Safety",
33
+ "LABEL_20": "Real Estate / Property Management",
34
+ "LABEL_21": "Manufacturing / Production",
35
+ "LABEL_22": "Telecommunication / Network Engineering",
36
+ "LABEL_23": "Transportation / Automotive / Aviation",
37
+ "LABEL_24": "Energy / Oil & Gas / Utilities",
38
+ "LABEL_25": "Other / Miscellaneous"
 
 
 
 
39
  }
40
 
41
+ classified_files_by_category = {} # Global dictionary to store categorized files
42
+
43
 
44
  def clean_text(text):
45
  text = re.sub(r'http\S+', ' ', text)
46
+ text = re.sub(r'#\S+', '', text)
47
+ text = re.sub(r'@\S+', ' ', text)
48
+ text = re.sub(r'[^