Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

App Files Files Community

TKM03 commited on Jul 2, 2025

Commit

0ea0dd9

verified ·

1 Parent(s): 8ab2e60

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -103

app.py CHANGED Viewed

@@ -5,115 +5,44 @@ import PyPDF2
 import gradio as gr
 from transformers import pipeline
-# Load multi-class resume classifier model
-text_classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", top_k=1)
-# Map label to readable category names
 CATEGORY_MAP = {
-    "LABEL_0": "Data Science / Machine Learning / AI",
-    "LABEL_1": "Information Technology / Software Engineering",
     "LABEL_2": "Sales / Marketing / Business Development",
-    "LABEL_3": "Finance / Accounting / Auditing",
-    "LABEL_4": "Human Resources / Recruitment / Talent Acquisition",
-    "LABEL_5": "Product Management / Project Management",
     "LABEL_6": "Engineering / Mechanical / Civil / Electrical",
-    "LABEL_7": "Operations / Logistics / Supply Chain",
-    "LABEL_8": "Design / UI-UX / Creative",
-    "LABEL_9": "Legal / Compliance",
-    "LABEL_10": "Healthcare / Medical / Nursing",
-    "LABEL_11": "Customer Service / Support",
-    "LABEL_12": "Education / Training / Teaching",
-    "LABEL_13": "Entrepreneurship / Startups / Freelancing",
-    "LABEL_14": "Retail / Merchandising / E-commerce",
-    "LABEL_15": "Media / Communication / PR / Journalism",
-    "LABEL_16": "Manufacturing / Production",
-    "LABEL_17": "Administration / Clerical",
-    "LABEL_18": "Quality Assurance / Control",
-    "LABEL_19": "Construction / Architecture",
-    "LABEL_20": "Science / Research / R&D",
-    "LABEL_21": "Real Estate / Property Management",
-    "LABEL_22": "Security / Safety",
-    "LABEL_23": "Procurement / Purchasing",
-    "LABEL_24": "Hospitality / Tourism / Travel",
-    "LABEL_25": "Telecommunication / Network Engineering",
-    "LABEL_26": "Transportation / Automotive / Aviation",
-    "LABEL_27": "Energy / Oil & Gas / Utilities",
-    "LABEL_28": "Management / Strategy / Consulting",
-    "LABEL_29": "Other / Miscellaneous"
 }
 def clean_text(text):
     text = re.sub(r'http\S+', ' ', text)
-    text = re.sub(r'[^\w\s]', ' ', text)
-    return re.sub(r'\s+', ' ', text).strip()
-def extract_text_from_pdf(file):
-    try:
-        reader = PyPDF2.PdfReader(file)
-        text = " ".join(page.extract_text() or "" for page in reader.pages)
-        return clean_text(text), None if text.strip() else "No text found."
-    except Exception as e:
-        return None, str(e)
-def classify_resumes(files):
-    results = {}
-    category_to_files = {}
-    if os.path.exists("classified_resumes"):
-        shutil.rmtree("classified_resumes")
-    os.makedirs("classified_resumes", exist_ok=True)
-    for file in files:
-        file_name = os.path.basename(file.name)
-        text, error = extract_text_from_pdf(file)
-        if error:
-            results[file_name] = {"error": error}
-            continue
-        pred = text_classifier(text[:512])[0][0]
-        label = pred['label']
-        category = CATEGORY_MAP.get(label, "Unknown")
-        score = round(pred['score'], 4)
-        results[file_name] = {
-            "Predicted Job Category": label,
-            "Category Name": category,
-            "Confidence Score": score
-        }
-        # Save file in category folder
-        cat_dir = os.path.join("classified_resumes", category)
-        os.makedirs(cat_dir, exist_ok=True)
-        dest_path = os.path.join(cat_dir, file_name)
-        with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
-            shutil.copyfileobj(f_in, f_out)
-        category_to_files.setdefault(category, []).append(dest_path)
-    return results, list(category_to_files.keys()), category_to_files
-def show_files_by_category(selected_category, category_to_files):
-    return category_to_files.get(selected_category, [])
-with gr.Blocks(title="Resume Category Classifier") as demo:
-    gr.Markdown("## 📂 Resume Screening by Job Category")
-    file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
-    classify_btn = gr.Button("🔍 Classify Resumes")
-    results_output = gr.JSON(label="Prediction Results")
-    category_dropdown = gr.Dropdown(label="Select Category to View Resumes", interactive=True)
-    file_output = gr.File(label="Download Filtered Resumes", file_types=[".pdf"], file_count="multiple")
-    category_files_state = gr.State({})
-    classify_btn.click(fn=classify_resumes, inputs=[file_input], outputs=[results_output, category_dropdown, category_files_state])
-    category_dropdown.change(fn=show_files_by_category, inputs=[category_dropdown, category_files_state], outputs=[file_output])
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from transformers import pipeline
+# Load classification model
+classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)
+# Manually defined category map (expand as needed)
 CATEGORY_MAP = {
+    "LABEL_0": "Information Technology / Software Engineering",
+    "LABEL_1": "Finance / Accounting / Auditing",
     "LABEL_2": "Sales / Marketing / Business Development",
+    "LABEL_3": "Human Resources / Recruitment / Talent Acquisition",
+    "LABEL_4": "Healthcare / Medical / Nursing",
+    "LABEL_5": "Education / Training / Teaching",
     "LABEL_6": "Engineering / Mechanical / Civil / Electrical",
+    "LABEL_7": "Product Management / Project Management",
+    "LABEL_8": "Customer Service / Support",
+    "LABEL_9": "Operations / Logistics / Supply Chain",
+    "LABEL_10": "Design / UI-UX / Creative",
+    "LABEL_11": "Legal / Compliance",
+    "LABEL_12": "Entrepreneurship / Startups / Freelancing",
+    "LABEL_13": "Media / Communication / PR / Journalism",
+    "LABEL_14": "Quality Assurance / Control",
+    "LABEL_15": "Administration / Clerical",
+    "LABEL_16": "Science / Research / R&D",
+    "LABEL_17": "Construction / Architecture",
+    "LABEL_18": "Retail / Merchandising / E-commerce",
+    "LABEL_19": "Security / Safety",
+    "LABEL_20": "Real Estate / Property Management",
+    "LABEL_21": "Manufacturing / Production",
+    "LABEL_22": "Telecommunication / Network Engineering",
+    "LABEL_23": "Transportation / Automotive / Aviation",
+    "LABEL_24": "Energy / Oil & Gas / Utilities",
+    "LABEL_25": "Other / Miscellaneous"
 }
+classified_files_by_category = {}  # Global dictionary to store categorized files
 def clean_text(text):
     text = re.sub(r'http\S+', ' ', text)
+    text = re.sub(r'#\S+', '', text)
+    text = re.sub(r'@\S+', ' ', text)
+    text = re.sub(r'[^