Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,115 +5,44 @@ import PyPDF2
|
|
| 5 |
import gradio as gr
|
| 6 |
from transformers import pipeline
|
| 7 |
|
| 8 |
-
# Load
|
| 9 |
-
|
| 10 |
|
| 11 |
-
#
|
| 12 |
CATEGORY_MAP = {
|
| 13 |
-
"LABEL_0": "
|
| 14 |
-
"LABEL_1": "
|
| 15 |
"LABEL_2": "Sales / Marketing / Business Development",
|
| 16 |
-
"LABEL_3": "
|
| 17 |
-
"LABEL_4": "
|
| 18 |
-
"LABEL_5": "
|
| 19 |
"LABEL_6": "Engineering / Mechanical / Civil / Electrical",
|
| 20 |
-
"LABEL_7": "
|
| 21 |
-
"LABEL_8": "
|
| 22 |
-
"LABEL_9": "
|
| 23 |
-
"LABEL_10": "
|
| 24 |
-
"LABEL_11": "
|
| 25 |
-
"LABEL_12": "
|
| 26 |
-
"LABEL_13": "
|
| 27 |
-
"LABEL_14": "
|
| 28 |
-
"LABEL_15": "
|
| 29 |
-
"LABEL_16": "
|
| 30 |
-
"LABEL_17": "
|
| 31 |
-
"LABEL_18": "
|
| 32 |
-
"LABEL_19": "
|
| 33 |
-
"LABEL_20": "
|
| 34 |
-
"LABEL_21": "
|
| 35 |
-
"LABEL_22": "
|
| 36 |
-
"LABEL_23": "
|
| 37 |
-
"LABEL_24": "
|
| 38 |
-
"LABEL_25": "
|
| 39 |
-
"LABEL_26": "Transportation / Automotive / Aviation",
|
| 40 |
-
"LABEL_27": "Energy / Oil & Gas / Utilities",
|
| 41 |
-
"LABEL_28": "Management / Strategy / Consulting",
|
| 42 |
-
"LABEL_29": "Other / Miscellaneous"
|
| 43 |
}
|
| 44 |
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def clean_text(text):
|
| 47 |
text = re.sub(r'http\S+', ' ', text)
|
| 48 |
-
text = re.sub(r'
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
def extract_text_from_pdf(file):
|
| 53 |
-
try:
|
| 54 |
-
reader = PyPDF2.PdfReader(file)
|
| 55 |
-
text = " ".join(page.extract_text() or "" for page in reader.pages)
|
| 56 |
-
return clean_text(text), None if text.strip() else "No text found."
|
| 57 |
-
except Exception as e:
|
| 58 |
-
return None, str(e)
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
def classify_resumes(files):
|
| 62 |
-
results = {}
|
| 63 |
-
category_to_files = {}
|
| 64 |
-
|
| 65 |
-
if os.path.exists("classified_resumes"):
|
| 66 |
-
shutil.rmtree("classified_resumes")
|
| 67 |
-
os.makedirs("classified_resumes", exist_ok=True)
|
| 68 |
-
|
| 69 |
-
for file in files:
|
| 70 |
-
file_name = os.path.basename(file.name)
|
| 71 |
-
text, error = extract_text_from_pdf(file)
|
| 72 |
-
|
| 73 |
-
if error:
|
| 74 |
-
results[file_name] = {"error": error}
|
| 75 |
-
continue
|
| 76 |
-
|
| 77 |
-
pred = text_classifier(text[:512])[0][0]
|
| 78 |
-
label = pred['label']
|
| 79 |
-
category = CATEGORY_MAP.get(label, "Unknown")
|
| 80 |
-
score = round(pred['score'], 4)
|
| 81 |
-
|
| 82 |
-
results[file_name] = {
|
| 83 |
-
"Predicted Job Category": label,
|
| 84 |
-
"Category Name": category,
|
| 85 |
-
"Confidence Score": score
|
| 86 |
-
}
|
| 87 |
-
|
| 88 |
-
# Save file in category folder
|
| 89 |
-
cat_dir = os.path.join("classified_resumes", category)
|
| 90 |
-
os.makedirs(cat_dir, exist_ok=True)
|
| 91 |
-
dest_path = os.path.join(cat_dir, file_name)
|
| 92 |
-
with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
|
| 93 |
-
shutil.copyfileobj(f_in, f_out)
|
| 94 |
-
|
| 95 |
-
category_to_files.setdefault(category, []).append(dest_path)
|
| 96 |
-
|
| 97 |
-
return results, list(category_to_files.keys()), category_to_files
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
def show_files_by_category(selected_category, category_to_files):
|
| 101 |
-
return category_to_files.get(selected_category, [])
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
with gr.Blocks(title="Resume Category Classifier") as demo:
|
| 105 |
-
gr.Markdown("## 📂 Resume Screening by Job Category")
|
| 106 |
-
file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
|
| 107 |
-
classify_btn = gr.Button("🔍 Classify Resumes")
|
| 108 |
-
|
| 109 |
-
results_output = gr.JSON(label="Prediction Results")
|
| 110 |
-
category_dropdown = gr.Dropdown(label="Select Category to View Resumes", interactive=True)
|
| 111 |
-
file_output = gr.File(label="Download Filtered Resumes", file_types=[".pdf"], file_count="multiple")
|
| 112 |
-
|
| 113 |
-
category_files_state = gr.State({})
|
| 114 |
-
|
| 115 |
-
classify_btn.click(fn=classify_resumes, inputs=[file_input], outputs=[results_output, category_dropdown, category_files_state])
|
| 116 |
-
category_dropdown.change(fn=show_files_by_category, inputs=[category_dropdown, category_files_state], outputs=[file_output])
|
| 117 |
-
|
| 118 |
-
if __name__ == "__main__":
|
| 119 |
-
demo.launch()
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
from transformers import pipeline
|
| 7 |
|
| 8 |
+
# Load classification model
|
| 9 |
+
classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)
|
| 10 |
|
| 11 |
+
# Manually defined category map (expand as needed)
|
| 12 |
CATEGORY_MAP = {
|
| 13 |
+
"LABEL_0": "Information Technology / Software Engineering",
|
| 14 |
+
"LABEL_1": "Finance / Accounting / Auditing",
|
| 15 |
"LABEL_2": "Sales / Marketing / Business Development",
|
| 16 |
+
"LABEL_3": "Human Resources / Recruitment / Talent Acquisition",
|
| 17 |
+
"LABEL_4": "Healthcare / Medical / Nursing",
|
| 18 |
+
"LABEL_5": "Education / Training / Teaching",
|
| 19 |
"LABEL_6": "Engineering / Mechanical / Civil / Electrical",
|
| 20 |
+
"LABEL_7": "Product Management / Project Management",
|
| 21 |
+
"LABEL_8": "Customer Service / Support",
|
| 22 |
+
"LABEL_9": "Operations / Logistics / Supply Chain",
|
| 23 |
+
"LABEL_10": "Design / UI-UX / Creative",
|
| 24 |
+
"LABEL_11": "Legal / Compliance",
|
| 25 |
+
"LABEL_12": "Entrepreneurship / Startups / Freelancing",
|
| 26 |
+
"LABEL_13": "Media / Communication / PR / Journalism",
|
| 27 |
+
"LABEL_14": "Quality Assurance / Control",
|
| 28 |
+
"LABEL_15": "Administration / Clerical",
|
| 29 |
+
"LABEL_16": "Science / Research / R&D",
|
| 30 |
+
"LABEL_17": "Construction / Architecture",
|
| 31 |
+
"LABEL_18": "Retail / Merchandising / E-commerce",
|
| 32 |
+
"LABEL_19": "Security / Safety",
|
| 33 |
+
"LABEL_20": "Real Estate / Property Management",
|
| 34 |
+
"LABEL_21": "Manufacturing / Production",
|
| 35 |
+
"LABEL_22": "Telecommunication / Network Engineering",
|
| 36 |
+
"LABEL_23": "Transportation / Automotive / Aviation",
|
| 37 |
+
"LABEL_24": "Energy / Oil & Gas / Utilities",
|
| 38 |
+
"LABEL_25": "Other / Miscellaneous"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
}
|
| 40 |
|
| 41 |
+
classified_files_by_category = {} # Global dictionary to store categorized files
|
| 42 |
+
|
| 43 |
|
| 44 |
def clean_text(text):
|
| 45 |
text = re.sub(r'http\S+', ' ', text)
|
| 46 |
+
text = re.sub(r'#\S+', '', text)
|
| 47 |
+
text = re.sub(r'@\S+', ' ', text)
|
| 48 |
+
text = re.sub(r'[^
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|