Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

App Files Files Community

TKM03 commited on Jul 2, 2025

Commit

1d2993e

verified ·

1 Parent(s): 52a747a

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -53

app.py CHANGED Viewed

@@ -1,26 +1,50 @@
-import re
 import os
 import shutil
 import PyPDF2
 import gradio as gr
 from transformers import pipeline
-# Load classification model
-text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model")
-# Label mapping for binary classification
-LABEL_MAP = {
-    "LABEL_0": "Irrelevant",
-    "LABEL_1": "Relevant"
-}
-# Global variable to store the filtered files per label
-classified_files = {
-    "Relevant": [],
-    "Irrelevant": []
 }
 def clean_resume_text(text):
     text = re.sub(r'http\S+', ' ', text)
     text = re.sub(r'#\S+', '', text)
@@ -29,7 +53,6 @@ def clean_resume_text(text):
     text = re.sub(r'[^\x00-\x7f]', ' ', text)
     return re.sub(r'\s+', ' ', text).strip()
 def extract_resume_text(file):
     try:
         reader = PyPDF2.PdfReader(file)
@@ -42,61 +65,60 @@ def extract_resume_text(file):
     except Exception as e:
         return None, f"Error reading PDF: {str(e)}"
-def classify_and_store(files):
-    predictions = {}
-    classified_files["Relevant"] = []
-    classified_files["Irrelevant"] = []
-    if os.path.exists("filtered_resumes"):
-        shutil.rmtree("filtered_resumes")
-    os.makedirs("filtered_resumes", exist_ok=True)
     for file in files:
         file_name = os.path.basename(file.name)
         resume_text, error = extract_resume_text(file)
         if error:
-            predictions[file_name] = {"error": error}
             continue
         cleaned_text = clean_resume_text(resume_text)
-        result = text_classifier(cleaned_text[:512])
-        label = result[0]['label']
-        score = round(result[0]['score'], 4)
-        status = LABEL_MAP.get(label, "Unknown")
-        predictions[file_name] = {
-            "Relevance": status,
-            "Confidence Score": score
-        }
-        dest_path = f"filtered_resumes/{file_name}"
-        with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
-            shutil.copyfileobj(f_in, f_out)
-        classified_files[status].append(dest_path)
-    return predictions
-def get_resumes_by_category(category):
-    return classified_files.get(category, [])
 # Gradio UI
-with gr.Blocks(title="Resume Classifier & Category Filter") as demo:
-    gr.Markdown("## 📂 Resume Relevance Classifier\nUpload resumes and view based on relevance category.")
     file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
-    classify_button = gr.Button("🧠 Classify Resumes")
-    relevance_output = gr.JSON(label="Classification Results")
-    category_dropdown = gr.Dropdown(choices=["Relevant", "Irrelevant"], label="Select Resume Category to View")
-    filtered_files_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")
-    classify_button.click(fn=classify_and_store, inputs=[file_input], outputs=[relevance_output])
-    category_dropdown.change(fn=get_resumes_by_category, inputs=[category_dropdown], outputs=[filtered_files_output])
 if __name__ == "__main__":
-    demo.launch()

 import os
+import re
 import shutil
 import PyPDF2
 import gradio as gr
 from transformers import pipeline
+from collections import defaultdict
+# Load job classification model
+text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
+# Expanded label map (you can update based on actual model labels returned)
+CATEGORY_MAP = {
+    "LABEL_0": "Information Technology / Software Engineering",
+    "LABEL_1": "Healthcare / Medical / Nursing",
+    "LABEL_2": "Finance / Accounting / Auditing",
+    "LABEL_3": "Engineering / Mechanical / Civil / Electrical",
+    "LABEL_4": "Education / Training / Teaching",
+    "LABEL_5": "Sales / Marketing / Business Development",
+    "LABEL_6": "Customer Service / Support",
+    "LABEL_7": "Human Resources / Recruitment / Talent Acquisition",
+    "LABEL_8": "Legal / Compliance",
+    "LABEL_9": "Administration / Clerical",
+    "LABEL_10": "Operations / Logistics / Supply Chain",
+    "LABEL_11": "Management / Strategy / Consulting",
+    "LABEL_12": "Science / Research / R&D",
+    "LABEL_13": "Design / UI-UX / Creative",
+    "LABEL_14": "Manufacturing / Production",
+    "LABEL_15": "Hospitality / Tourism / Travel",
+    "LABEL_16": "Construction / Architecture",
+    "LABEL_17": "Media / Communication / PR / Journalism",
+    "LABEL_18": "Procurement / Purchasing",
+    "LABEL_19": "Security / Safety",
+    "LABEL_20": "Real Estate / Property Management",
+    "LABEL_21": "Energy / Oil & Gas / Utilities",
+    "LABEL_22": "Agriculture / Environmental / Forestry",
+    "LABEL_23": "Transportation / Automotive / Aviation",
+    "LABEL_24": "Retail / Merchandising / E-commerce",
+    "LABEL_25": "Data Science / Machine Learning / AI",
+    "LABEL_26": "Product Management / Project Management",
+    "LABEL_27": "Quality Assurance / Control",
+    "LABEL_28": "Telecommunication / Network Engineering",
+    "LABEL_29": "Entrepreneurship / Startups / Freelancing",
+    "LABEL_30": "Other / Miscellaneous"
 }
+# Helper functions
 def clean_resume_text(text):
     text = re.sub(r'http\S+', ' ', text)
     text = re.sub(r'#\S+', '', text)
     text = re.sub(r'[^\x00-\x7f]', ' ', text)
     return re.sub(r'\s+', ' ', text).strip()
 def extract_resume_text(file):
     try:
         reader = PyPDF2.PdfReader(file)
     except Exception as e:
         return None, f"Error reading PDF: {str(e)}"
+def classify_resumes(files):
+    categorized = defaultdict(list)
+    label_scores = {}
+    os.makedirs("classified_resumes", exist_ok=True)
     for file in files:
         file_name = os.path.basename(file.name)
         resume_text, error = extract_resume_text(file)
         if error:
             continue
         cleaned_text = clean_resume_text(resume_text)
+        result = text_classifier(cleaned_text[:512])[0]
+        label = result['label']
+        score = round(result['score'], 4)
+        category = CATEGORY_MAP.get(label, label)
+        # Save to relevant folder
+        cat_folder = os.path.join("classified_resumes", category.replace(" ", "_"))
+        os.makedirs(cat_folder, exist_ok=True)
+        save_path = os.path.join(cat_folder, file_name)
+        with open(file.name, "rb") as f_in, open(save_path, "wb") as f_out:
+            shutil.copyfileobj(f_in, f_out)
+        categorized[category].append(save_path)
+        label_scores[file_name] = {"Predicted Job Category": category, "Confidence Score": score}
+    return label_scores, categorized
+def show_category_files(selected_category):
+    category_path = os.path.join("classified_resumes", selected_category.replace(" ", "_"))
+    if not os.path.exists(category_path):
+        return []
+    return [os.path.join(category_path, f) for f in os.listdir(category_path) if f.endswith(".pdf")]
 # Gradio UI
+with gr.Blocks(title="🧠 Resume Screening & Categorization") as demo:
+    gr.Markdown("""## 📄 Resume Screening by Job Role/Industry
+Upload resumes below. The app classifies each into categories like IT, HR, Sales, etc. Then click on any category to view/download relevant resumes.""")
     file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
+    classify_button = gr.Button("📊 Classify Resumes")
+    output_json = gr.JSON(label="Classification Summary")
+    category_dropdown = gr.Dropdown(label="Select Category to View Files", choices=sorted(list(CATEGORY_MAP.values())))
+    resume_file_list = gr.File(label="Filtered Resumes in Selected Category", file_count="multiple")
+    def update_dropdown_options(files):
+        _, cat_data = classify_resumes(files)
+        return sorted(list(cat_data.keys()))
+    classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
+    category_dropdown.change(fn=show_category_files, inputs=[category_dropdown], outputs=[resume_file_list])
 if __name__ == "__main__":
+    demo.launch()